From 6f6c5470f6f9a4f0ba2c13521219ceece60a00e6 Mon Sep 17 00:00:00 2001 From: z23cc Date: Tue, 7 Apr 2026 08:53:29 +0800 Subject: [PATCH 1/3] feat(skills): add enhanced frontmatter schema with optional fields Define new optional frontmatter fields (allowed-tools, preamble-tier, version, model, voice-triggers, and 8 more) in docs/skill-anatomy.md with full field reference table. Update 3 core skills (plan, work, brainstorm) as examples. All new fields are optional for backward compatibility. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/skill-anatomy.md | 47 +++++++++++++++++++++++++++- skills/flow-code-brainstorm/SKILL.md | 15 +++++++++ skills/flow-code-plan/SKILL.md | 15 +++++++++ skills/flow-code-work/SKILL.md | 16 ++++++++++ 4 files changed, 92 insertions(+), 1 deletion(-) diff --git a/docs/skill-anatomy.md b/docs/skill-anatomy.md index 0a8e6b1c..62329918 100644 --- a/docs/skill-anatomy.md +++ b/docs/skill-anatomy.md @@ -21,10 +21,32 @@ Prefix all skill directories with `flow-code-`. Main file is always `SKILL.md` ( --- name: flow-code- description: Use when [triggering conditions and symptoms only] +# --- Optional fields (all backward-compatible) --- +# allowed-tools: # Tool allowlist (allowlist, not denylist) +# - Bash +# - Read +# - Edit +# - Glob +# - Grep +# version: 1.0.0 # SemVer skill version +# model: sonnet # LLM model override (haiku|sonnet|opus|inherit) +# preamble-tier: 1 # Startup cost: 1=none, 2=light, 3=heavy +# voice-triggers: # Speech-to-text aliases for discovery +# - "flow plan" +# - "make a plan" +# user-invocable: false # Hide from / menu (background knowledge only) +# argument-hint: "" # Autocomplete hint shown in / menu +# context: fork # Run in isolated subagent context +# agent: Explore # Subagent type when context: fork +# effort: medium # Model reasoning effort (low|medium|high|max) +# hooks: { pre-tool-call: ... } # Lifecycle hooks scoped to this skill +# paths: "*.rs,*.toml" # Glob patterns limiting auto-activation +# shell: bash # Shell for DCI blocks (bash|powershell) --- ``` -**Rules:** +### Required Fields + - `name`: Lowercase, hyphen-separated. Must match directory name. Always starts with `flow-code-`. - `description`: Starts with "Use when...". Max 500 characters. Third person. - Include: triggering conditions, symptoms, contexts. @@ -32,6 +54,28 @@ description: Use when [triggering conditions and symptoms only] **Why:** Descriptions are injected into system prompts for skill discovery. If the description contains process steps, agents follow the summary and skip the actual skill content. +### Optional Fields Reference + +All optional fields are backward-compatible. Omitting them changes nothing for existing skills. + +| Field | Type | Description | +|-------|------|-------------| +| `allowed-tools` | list | Tool allowlist — tools permitted without prompts when skill is active. Valid: `Read`, `Write`, `Edit`, `Bash`, `Glob`, `Grep`, `WebFetch`, `WebSearch`, `Task`, `TodoWrite`, `NotebookEdit`, `AskUserQuestion`, `Skill`. Bash supports patterns: `Bash(cargo:*)` | +| `version` | string | SemVer version (e.g., `1.0.0`). Useful for tracking skill evolution and coordinating updates across plugins | +| `model` | string | LLM model override: `haiku`, `sonnet`, `opus`, or `inherit` (default). Use sparingly — most skills should inherit the session model | +| `preamble-tier` | integer | Startup cost indicator: `1` = no preamble (instant), `2` = light preamble (env detection), `3` = heavy preamble (network, builds). Helps agents estimate activation cost | +| `voice-triggers` | list | Speech-to-text aliases for discovery. Handles common STT misheard variants (e.g., `"flow plan"` for `/flow-code:plan`). Listed in the description at render time | +| `user-invocable` | boolean | Set `false` to hide from `/` menu. Skill becomes background knowledge only, intended for agent preloading | +| `argument-hint` | string | Autocomplete hint shown in the `/` menu (e.g., `[epic-id]`, ``) | +| `context` | string | Set to `fork` to run the skill in an isolated subagent context | +| `agent` | string | Subagent type when `context: fork` is set (default: `general-purpose`) | +| `effort` | string | Override model reasoning effort: `low`, `medium`, `high`, `max` | +| `hooks` | object | Lifecycle hooks scoped to this skill (pre-tool-call, post-tool-call, etc.) | +| `paths` | string/list | Glob patterns limiting auto-activation. Accepts comma-separated string or YAML list | +| `shell` | string | Shell for DCI (`` !`command` ``) blocks: `bash` (default) or `powershell` | + +**Key difference from agents:** Skills use `allowed-tools` (allowlist) while agents use `disallowedTools` (denylist). The `effort` and `maxTurns` fields originated as agent-only but `effort` is now available for skills too. + ## Required Sections ```markdown @@ -161,6 +205,7 @@ The `flow-code-debug` skill (`skills/flow-code-debug/SKILL.md`) is the reference - [ ] Directory created as `skills/flow-code-/` - [ ] SKILL.md has valid YAML frontmatter with `name` and `description` - [ ] Description starts with "Use when..." (no workflow summary) +- [ ] Optional fields (if used) are valid: `allowed-tools` lists real tools, `version` is SemVer, `model` is a known alias - [ ] All six required sections present (Overview, When to Use, Core Process, Common Rationalizations, Red Flags, Verification) - [ ] Rationalizations table has 3+ entries with factual rebuttals - [ ] Red flags list has observable symptoms (not vague advice) diff --git a/skills/flow-code-brainstorm/SKILL.md b/skills/flow-code-brainstorm/SKILL.md index 676bfe30..17bca786 100644 --- a/skills/flow-code-brainstorm/SKILL.md +++ b/skills/flow-code-brainstorm/SKILL.md @@ -2,6 +2,21 @@ name: flow-code-brainstorm description: "Use when exploring requirements before planning. Pressure-tests ideas, generates approaches, and outputs a requirements doc for /flow-code:plan." user-invocable: false +version: 1.0.0 +preamble-tier: 2 +allowed-tools: + - Bash + - Read + - Write + - Edit + - Glob + - Grep + - AskUserQuestion + - WebSearch +voice-triggers: + - "brainstorm" + - "explore this idea" + - "think through this" --- # Flow brainstorm diff --git a/skills/flow-code-plan/SKILL.md b/skills/flow-code-plan/SKILL.md index cb237b15..3d4c6aaf 100644 --- a/skills/flow-code-plan/SKILL.md +++ b/skills/flow-code-plan/SKILL.md @@ -2,6 +2,21 @@ name: flow-code-plan description: "Use when planning features or designing implementation. Triggers on /flow-code:plan with text descriptions or Flow IDs." user-invocable: false +version: 1.0.0 +preamble-tier: 2 +allowed-tools: + - Bash + - Read + - Write + - Edit + - Glob + - Grep + - AskUserQuestion + - Skill +voice-triggers: + - "flow plan" + - "make a plan" + - "plan this" --- # Flow plan diff --git a/skills/flow-code-work/SKILL.md b/skills/flow-code-work/SKILL.md index 3df07e43..7937b7a2 100644 --- a/skills/flow-code-work/SKILL.md +++ b/skills/flow-code-work/SKILL.md @@ -2,6 +2,22 @@ name: flow-code-work description: "Use when implementing a plan or working through a spec. Triggers on /flow-code:work with Flow IDs." user-invocable: false +version: 1.0.0 +preamble-tier: 2 +allowed-tools: + - Bash + - Read + - Write + - Edit + - Glob + - Grep + - AskUserQuestion + - Skill + - Task +voice-triggers: + - "flow work" + - "start working" + - "execute the plan" --- # Flow work From cb18cc404868bd8d3a7fe42cf2b6d5a6bc73750c Mon Sep 17 00:00:00 2001 From: z23cc Date: Tue, 7 Apr 2026 08:55:00 +0800 Subject: [PATCH 2/3] ci: add shell smoke tests, security audit, markdown lint, and dev tooling - Add smoke-test CI job (runs scripts/smoke_test.sh after cargo tests) - Add security-audit CI job (cargo-audit for dependency vulnerabilities) - Add markdown-lint CI job (markdownlint-cli2 for skills/**/*.md and agents/**/*.md) - Add .markdownlint.json tuned for skill files (long lines, inline HTML allowed) - Add .editorconfig for consistent formatting across Rust, Markdown, Bash, JSON - Add scripts/setup-hooks.sh for auto-installing pre-commit hook Co-Authored-By: Claude Opus 4.6 (1M context) --- .editorconfig | 27 ++++++++++++++++ .github/workflows/ci.yml | 68 ++++++++++++++++++++++++++++++++++++++++ .markdownlint.json | 15 +++++++++ scripts/setup-hooks.sh | 34 ++++++++++++++++++++ 4 files changed, 144 insertions(+) create mode 100644 .editorconfig create mode 100644 .markdownlint.json create mode 100755 scripts/setup-hooks.sh diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..3ec40c8f --- /dev/null +++ b/.editorconfig @@ -0,0 +1,27 @@ +# EditorConfig — consistent formatting across editors +# https://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[*.rs] +indent_size = 4 + +[*.md] +trim_trailing_whitespace = false + +[*.{yml,yaml,json,toml}] +indent_size = 2 + +[*.sh] +indent_size = 2 + +[Makefile] +indent_style = tab diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a0becaa3..97c620e7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,12 +5,20 @@ on: branches: [main] paths: - "flowctl/**" + - "scripts/**" + - "skills/**/*.md" + - "agents/**/*.md" - ".github/workflows/ci.yml" + - ".markdownlint.json" pull_request: branches: [main] paths: - "flowctl/**" + - "scripts/**" + - "skills/**/*.md" + - "agents/**/*.md" - ".github/workflows/ci.yml" + - ".markdownlint.json" env: CARGO_TERM_COLOR: always @@ -53,6 +61,66 @@ jobs: - name: Clippy run: cargo clippy --all-targets -- -D warnings + smoke-test: + name: Shell Smoke Tests + runs-on: ubuntu-latest + needs: [test] + steps: + - uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Cache cargo registry & build + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + flowctl/target + key: ${{ runner.os }}-cargo-${{ hashFiles('flowctl/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Build flowctl + run: cd flowctl && cargo build --release + + - name: Run smoke tests + run: bash scripts/smoke_test.sh + + security-audit: + name: Dependency Security Audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Install cargo-audit + run: cargo install cargo-audit --locked + + - name: Run cargo audit + working-directory: flowctl + run: cargo audit + + markdown-lint: + name: Markdown Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install markdownlint-cli2 + run: npm install -g markdownlint-cli2 + + - name: Lint skill and agent markdown + run: markdownlint-cli2 "skills/**/*.md" "agents/**/*.md" + coverage: name: Test Coverage runs-on: ubuntu-latest diff --git a/.markdownlint.json b/.markdownlint.json new file mode 100644 index 00000000..3cc783d3 --- /dev/null +++ b/.markdownlint.json @@ -0,0 +1,15 @@ +{ + "default": true, + "MD013": { + "line_length": 300, + "code_blocks": false, + "tables": false, + "headings": false + }, + "MD024": { + "siblings_only": true + }, + "MD033": false, + "MD041": false, + "MD046": false +} diff --git a/scripts/setup-hooks.sh b/scripts/setup-hooks.sh new file mode 100755 index 00000000..94db9b38 --- /dev/null +++ b/scripts/setup-hooks.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# setup-hooks.sh — Install git hooks by symlinking from scripts/ to .git/hooks/ +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +HOOKS_DIR="$ROOT/.git/hooks" + +if [[ ! -d "$ROOT/.git" ]]; then + echo "ERROR: not a git repository (no .git/ at $ROOT)" >&2 + exit 1 +fi + +mkdir -p "$HOOKS_DIR" + +# Symlink pre-commit hook +SOURCE="$ROOT/scripts/pre-commit.sh" +TARGET="$HOOKS_DIR/pre-commit" + +if [[ ! -f "$SOURCE" ]]; then + echo "ERROR: $SOURCE not found" >&2 + exit 1 +fi + +if [[ -L "$TARGET" ]]; then + echo "pre-commit hook already symlinked, updating..." + rm "$TARGET" +elif [[ -f "$TARGET" ]]; then + echo "WARNING: existing pre-commit hook found, backing up to pre-commit.bak" + mv "$TARGET" "$TARGET.bak" +fi + +ln -s "$SOURCE" "$TARGET" +chmod +x "$TARGET" +echo "Installed pre-commit hook: $TARGET -> $SOURCE" From 7a6363a11d0f2ba623ffdc09388d7e2ed5c00b25 Mon Sep 17 00:00:00 2001 From: z23cc Date: Tue, 7 Apr 2026 08:59:54 +0800 Subject: [PATCH 3/3] feat: add skill validation, cross-platform gen, README overhaul - scripts/validate-skills.sh: validates all skills against frontmatter schema (25/25 pass, 130 warnings for optional sections) - scripts/gen-platform.sh: generates Codex (.codex/AGENTS.md) and Cursor (.cursor/rules/*.mdc) adapter files from plugin metadata - README.md: complete overhaul with skill inventory table, architecture diagram, quick start guide, commands reference - CHANGELOG.md: initial Keep a Changelog format entry Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 23 + README.md | 1970 +++--------------------------------- scripts/gen-platform.sh | 455 +++++++++ scripts/validate-skills.sh | 142 +++ 4 files changed, 787 insertions(+), 1803 deletions(-) create mode 100644 CHANGELOG.md create mode 100755 scripts/gen-platform.sh create mode 100755 scripts/validate-skills.sh diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..b336ec12 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,23 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). + +## [0.1.31] - 2026-04-07 + +### Added +- Anti-rationalization sections to 4 core skills (plan, work, impl-review, epic-review) +- Adversarial review iteration 2 findings addressed +- Simplify-ignore hook improvements from adversarial review +- Borrow agent skills patterns and anti-patterns documentation +- README overhaul with architecture diagram, skill inventory table, and streamlined quick start +- CHANGELOG.md (this file) + +### Changed +- skill-create updated to enforce skill-anatomy.md template +- README.md restructured for clarity: badges, one-liner, install, quick start, full skill inventory, architecture, commands reference, contributing, and license sections + +## [0.1.30] and earlier + +See [git log](https://github.com/z23cc/flow-code/commits/main) for full history. diff --git a/README.md b/README.md index b79691ef..4209c7ee 100644 --- a/README.md +++ b/README.md @@ -1,1878 +1,242 @@
-**[English](README.md)** | **[中文](README_CN.md)** - # Flow-Code -[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](../../LICENSE) +[![Version](https://img.shields.io/badge/version-0.1.31-blue.svg)](CHANGELOG.md) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) [![Claude Code](https://img.shields.io/badge/Claude_Code-Plugin-blueviolet)](https://claude.ai/code) +[![Status](https://img.shields.io/badge/Status-Active_Development-brightgreen)](CHANGELOG.md) -[![Version](https://img.shields.io/badge/Version-0.1.27-green)](../../CHANGELOG.md) - -[![Status](https://img.shields.io/badge/Status-Active_Development-brightgreen)](../../CHANGELOG.md) - -**A production-grade harness for Claude Code. Full-auto development from idea to PR.** - -**Zero external dependencies. Zero questions asked.** +**Full-auto development from idea to PR. Zero dependencies. Zero questions asked.**
--- -### What is Harness Engineering? - -> *"The model is commodity; the harness is moat."* — [Anthropic](https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents), [OpenAI](https://openai.com/index/harness-engineering/), [Mitchell Hashimoto](https://mitchellh.com/writing/my-ai-adoption-journey) - -A **harness** wraps around an AI coding agent to handle everything the model can't do alone: state management, context bridging, quality gates, multi-agent coordination, and error recovery. Flow-Code is a complete harness for Claude Code. - -### How Flow-Code Compares - -| Capability | Flow-Code | [compound-engineering](https://github.com/EveryInc/compound-engineering-plugin) (12.5K⭐) | [claude-mem](https://github.com/thedotmack/claude-mem) (44K⭐) | [superpowers](https://github.com/anthropics/claude-plugins-official) | -|---|---|---|---|---| -| Task DAG + state machine | ✅ 37 commands, deps, split/skip | ❌ | ❌ | ❌ | -| Parallel Teams + file lock | ✅ Agent Teams, atomic locks | ❌ | ❌ | ✅ parallel agents (no locks) | -| Three-layer quality | ✅ guard + RP + Codex adversarial | ❌ | ❌ | ❌ | -| Runtime DAG mutation | ✅ split/skip/dep rm mid-execution | ❌ | ❌ | ❌ | -| Cross-model adversarial review | ✅ GPT tries to break Claude's code | ❌ | ❌ | ❌ | -| Full-auto (zero questions) | ✅ AI decides branch/review/depth | ❌ | ❌ | ❌ | -| Context preservation | ✅ PreCompact hook | ❌ | ✅ embedding + RAG | ❌ | -| Auto draft PR | ✅ | ❌ | ❌ | ❌ | -| Zero dependencies | ✅ single Rust binary + Bash skills | ❌ Node.js | ❌ ChromaDB | ❌ Node.js | - ---- - -## Table of Contents - -- [What Is This?](#what-is-this) -- [Why It Works](#why-it-works) -- [Quick Start](#quick-start) — Install, setup, use -- [When to Use What](#when-to-use-what) — Interview vs Plan vs Work -- [Agent Readiness Assessment](#agent-readiness-assessment) — `/flow-code:prime` -- [Troubleshooting](#troubleshooting) -- [Codebase Map](#codebase-map) — Architecture documentation via parallel subagents -- [Auto-Improve](#auto-improve-autonomous-optimization) — Autonomous code optimization -- [Ralph (Autonomous Mode)](#ralph-autonomous-mode) — Run overnight -- [Features](#features) — Re-anchoring, multi-user, reviews, dependencies -- [Commands](#commands) — All slash commands + flags - - [Command Reference](#command-reference) — Detailed input docs for each command -- [The Workflow](#the-workflow) — Planning and work phases -- [.flow/ Directory](#flow-directory) — File structure -- [flowctl CLI](#flowctl-cli) — Direct CLI usage - ---- - -## What Is This? - -Flow-Code is a **harness engineering framework** for Claude Code. One command goes from idea to draft PR — planning, parallel implementation, three-layer quality gates, and cross-model adversarial review, all fully automated. +Flow-Code is a harness engineering plugin for Claude Code. One command takes you from an idea to a draft PR -- planning, parallel implementation, three-layer quality gates, and cross-model adversarial review, all fully automated. ``` /flow-code:plan "Add OAuth login" - → AI research (adaptive scouts) - → RP plan-review (code-aware) - → Teams parallel workers (file locking) - → guard per-commit (Layer 1) - → Codex adversarial (Layer 3: GPT tries to break it) - → auto push + draft PR -``` - -Everything lives in your repo as `.flow/` state. No external services. No global config. Single Rust binary + Bash skills. Uninstall: delete `.flow/`. - - - - - - - - - - -
Planning PhaseImplementation Phase
Planning: dependency-ordered tasksExecution: fixes, evidence, review
- ---- - -## Epic-first task model - -Flow-Code does not support standalone tasks. - -Every unit of work belongs to an epic fn-N (even if it's a single task). - -Tasks are always fn-N.M and inherit context from the epic spec. - -Flow-Code always creates an epic container (even for one-offs) so every task has a durable home for context, re-anchoring, and automation. You never have to think about it. - -Rationale: keeps the system simple, improves re-anchoring, makes automation (Ralph) reliable. - -"One-off request" -> epic with one task. - ---- - -## Why It Works - -### Full-Auto by Default - -Say one sentence. Flow-Code plans, implements, tests, commits, and opens a draft PR — zero questions asked. AI reads git state and `.flow/` config to make all decisions (branch, review backend, research depth) autonomously. - -```bash -# Full auto: plan → implement → test → commit → draft PR -/flow-code:plan "add OAuth support" - -# Resume anytime — reads .flow state and continues from where it left off -/flow-code:work fn-1 - -# One task at a time for maximum control -/flow-code:work fn-1.1 + -> AI research (adaptive scouts) + -> RP plan-review (code-aware) + -> Teams parallel workers (file locking) + -> guard per-commit (Layer 1) + -> Codex adversarial (Layer 3: GPT tries to break it) + -> auto push + draft PR ``` -All modes get: re-anchoring before each task, evidence recording, file locking, cross-model review (if rp-cli available), and auto push + draft PR on completion. - -**Default: Teams mode** — Ready tasks (no unresolved dependencies) are automatically spawned as parallel Agent Team workers with file locking and SendMessage coordination. Single tasks run as a foreground worker with zero overhead. After each wave completes, a structured **Wave Checkpoint** runs: aggregate results, verify integration (guards + invariants), output a summary, then plan the next wave. Newly unblocked tasks become ready for the next batch. - -Workers also use **file-level Wave parallelism** within each task — when touching 3+ files, they issue parallel reads in one message, analyze dependencies at a checkpoint, then issue parallel edits. This achieves 3-4x speedup over sequential file I/O. - -**Three-layer review timing**: Layer 1 (guard) runs per-commit automatically. Layer 2 (RP plan-review) runs once during planning. Layer 3 (Codex adversarial) runs once when all tasks complete. No per-task review overhead — quality gates are at the right level. - -### No Context Length Worries - -- **Tasks sized at planning:** Every task is scoped to fit one work iteration -- **Re-anchor every task:** Fresh context from `.flow/` specs before each task -- **Survives compaction:** Re-anchors after conversation summarization too -- **Fresh context in Ralph:** Each iteration starts with a clean context window - -Never worry about 200K token limits again. - -### Three-Layer Quality System - -Each layer catches different types of problems. No overlap, no waste: - -| Layer | Tool | When | What it catches | -|-------|------|------|----------------| -| **1. Guard** | `flowctl guard` (lint/type/test) | Every commit | Syntax, types, test failures | -| **2. RP Plan-Review** | RepoPrompt context_builder | Plan phase | Spec-code inconsistency (RP sees full codebase) | -| **3. Codex Adversarial** | `flowctl codex adversarial` | Epic completion | Security, concurrency, edge cases (different model family) | - -Guard is deterministic. RP validates against existing code. Codex (GPT) tries to **break** what Claude built — different architectures have different blind spots. - ---- - -### Zero Friction - -- **Works in 30 seconds.** Install the plugin, run a command. No setup. -- **Non-invasive.** No CLAUDE.md edits. No daemons. (Ralph uses plugin hooks for enforcement.) -- **Clean uninstall.** Delete `.flow/` (and `scripts/ralph/` if enabled). -- **Multi-user safe.** Teams work parallel branches without coordination servers. - ---- +Everything lives in your repo as `.flow/` state. No external services. No global config. Single Rust binary + Markdown skills. -## Quick Start - -### 1. Install +## Install ```bash -# Add marketplace /plugin marketplace add https://github.com/z23cc/flow-code - -# Install flow-code /plugin install flow-code ``` -### 2. Setup (Recommended) - -```bash -/flow-code:setup -``` - -This is technically optional but **highly recommended**. It: -- **Configures review backend** (RepoPrompt, Codex, or none) — required for cross-model reviews -- Copies `flowctl` to `.flow/bin/` for direct CLI access -- Adds flow-code instructions to CLAUDE.md/AGENTS.md (helps other AI tools understand your project) -- Creates `.flow/usage.md` with full CLI reference +**Requirements:** [Claude Code](https://docs.anthropic.com/en/docs/claude-code), git. Optional: [RepoPrompt](https://repoprompt.com) (macOS) or [Codex CLI](https://github.com/openai/codex) for cross-model reviews. -**Idempotent** - safe to re-run. Detects plugin updates and refreshes scripts automatically. - -After setup: -```bash -export PATH=".flow/bin:$PATH" -flowctl --help -flowctl epics # List all epics -flowctl tasks --epic fn-1 # List tasks for epic -flowctl ready --epic fn-1 # What's ready to work on -``` - -### 3. Use +## Quick Start ```bash -# Spec: "create a spec for X" — writes epic with structured requirements -# Then plan or interview to refine - -# Plan: research, create epic with tasks -/flow-code:plan Add a contact form with validation +# 1. Plan a feature (auto-executes by default) +/flow-code:plan "add a contact form with validation" -# Work: execute tasks in dependency order +# 2. Or plan-only, then work separately +/flow-code:plan "add OAuth support" --plan-only /flow-code:work fn-1 -# Or work directly from a spec file (creates epic automatically) -/flow-code:work docs/my-feature-spec.md -``` - -That's it. Flow-Code handles research, task ordering, reviews, and audit trails. - -### When to Use What - -Flow-next is flexible. There's no single "correct" order — the right sequence depends on how well-defined your spec already is. - -**The key question: How fleshed out is your idea?** - -#### Spec-driven (recommended for new features) - -``` -Create spec → Interview or Plan → Work -``` - -1. **Create spec** — ask Claude to "create a spec for X". This creates an epic with a structured spec (goal, architecture, API contracts, edge cases, acceptance criteria, boundaries, decision context) — no tasks yet -2. **Refine or plan**: - - `/flow-code:interview fn-1` — deep Q&A to pressure-test the spec, surface gaps - - `/flow-code:plan fn-1` — research best practices + break into tasks -3. **Work** — `/flow-code:work fn-1` executes with re-anchoring and reviews - -Best for: features where you want to nail down the WHAT/WHY before committing to HOW. The spec captures everything an implementer needs. - -#### Vague idea or rough concept - -``` -Interview → Plan → Work -``` - -1. **Interview first** — `/flow-code:interview "your rough idea"` asks 40+ deep questions to surface requirements, edge cases, and decisions you haven't thought about -2. **Plan** — `/flow-code:plan fn-1` takes the refined spec and researches best practices, current docs, repo patterns, then splits into properly-sized tasks -3. **Work** — `/flow-code:work fn-1` executes with re-anchoring and reviews - -#### Well-written spec or PRD - -``` -Plan → Interview → Work -``` - -1. **Plan first** — `/flow-code:plan specs/my-feature.md` researches best practices and current patterns, then breaks your spec into epic + tasks -2. **Interview after** — `/flow-code:interview fn-1` runs deep questions against the plan to catch edge cases, missing requirements, or assumptions -3. **Work** — `/flow-code:work fn-1` executes - -#### Minimal planning - -``` -Plan → Work -``` - -Skip interview entirely for well-understood changes. Plan still researches best practices and splits into tasks. - -#### Quick single-task (spec already complete) - -``` -Work directly -``` - -```bash -/flow-code:work specs/small-fix.md -``` - -For small, self-contained changes where you already have a complete spec. Creates an epic with **one task** and executes immediately. You get flow tracking, re-anchoring, and optional review — without full planning overhead. - -Best for: bug fixes, small features, well-scoped changes that don't need task splitting. - -**Note:** This does NOT split into multiple tasks. For detailed specs that need breakdown, use Plan first. - -**Summary:** - -| Starting point | Recommended sequence | -|----------------|---------------------| -| New feature, want solid spec first | Spec → Interview/Plan → Work | -| Vague idea, rough notes | Interview → Plan → Work | -| Detailed spec/PRD | Plan → Interview → Work | -| Well-understood, needs task splitting | Plan → Work | -| Small single-task, spec complete | Work directly (creates 1 epic + 1 task) | - -**Spec vs Interview vs Plan:** -- **Spec** (just ask "create a spec") creates an epic with structured requirements (goal, architecture, API contracts, edge cases, acceptance criteria, boundaries). No tasks, no codebase research. -- **Interview** refines an epic via deep Q&A (40+ questions). Writes back to the epic spec only — no tasks. -- **Plan** researches best practices, analyzes existing patterns, and creates sized tasks with dependencies. - -You can always run interview again after planning to catch anything missed. Interview writes back to the epic spec only — it won't modify existing tasks. - ---- - -## Agent Readiness Assessment - -> Inspired by [Factory.ai's Agent Readiness framework](https://factory.ai/news/agent-readiness) - -`/flow-code:prime` assesses your codebase for agent-readiness and proposes improvements. Works for greenfield and brownfield projects. - -### The Problem - -Agents waste cycles when codebases lack: -- **Pre-commit hooks** → waits 10min for CI instead of 5sec local feedback -- **Documented env vars** → guesses, fails, guesses again -- **CLAUDE.md** → doesn't know project conventions -- **Test commands** → can't verify changes work - -These are **environment problems**, not agent problems. Prime helps fix them. - -### Quick Start - -```bash -/flow-code:prime # Full assessment + interactive fixes -/flow-code:prime --report-only # Just show the report -/flow-code:prime --fix-all # Apply all fixes without asking -``` - -### The Eight Pillars - -Prime evaluates your codebase across eight pillars (48 criteria total): - -#### Agent Readiness (Pillars 1-5) — Scored, Fixes Offered - -| Pillar | What It Checks | -|--------|----------------| -| **1. Style & Validation** | Linters, formatters, type checking, pre-commit hooks | -| **2. Build System** | Build tool, commands, lock files, monorepo tooling | -| **3. Testing** | Test framework, commands, verification, coverage, E2E | -| **4. Documentation** | README, CLAUDE.md, setup docs, architecture | -| **5. Dev Environment** | .env.example, Docker, devcontainer, runtime version | - -#### Production Readiness (Pillars 6-8) — Reported Only - -| Pillar | What It Checks | -|--------|----------------| -| **6. Observability** | Structured logging, tracing, metrics, error tracking, health endpoints | -| **7. Security** | Branch protection, secret scanning, CODEOWNERS, Dependabot | -| **8. Workflow & Process** | CI/CD, PR templates, issue templates, release automation | - -**Two-tier approach**: Pillars 1-5 determine your agent maturity level and are eligible for fixes. Pillars 6-8 are reported for visibility but no fixes are offered — these are team/production decisions. - -### Maturity Levels - -| Level | Name | Description | Overall Score | -|-------|------|-------------|---------------| -| 1 | Minimal | Basic project structure only | <30% | -| 2 | Functional | Can build and run, limited docs | 30-49% | -| 3 | **Standardized** | Agent-ready for routine work | 50-69% | -| 4 | Optimized | Fast feedback loops, comprehensive docs | 70-84% | -| 5 | Autonomous | Full autonomous operation capable | 85%+ | - -**Level 3 is the target** for most teams. It means agents can handle routine work: bug fixes, tests, docs, dependency updates. - -### How It Works - -1. **Parallel Assessment** — 9 haiku scouts run in parallel (~15-20 seconds): - - Agent Readiness scouts: - - `tooling-scout` — linters, formatters, pre-commit, type checking - - `claude-md-scout` — CLAUDE.md/AGENTS.md analysis - - `env-scout` — environment setup - - `testing-scout` — test infrastructure - - `build-scout` — build system - - `docs-gap-scout` — README, ADRs, architecture docs - - Production Readiness scouts: - - `observability-scout` — logging, tracing, metrics, health endpoints - - `security-scout` — GitHub API checks, CODEOWNERS, Dependabot - - `workflow-scout` — CI/CD, templates, automation - -2. **Verification** — Verifies test commands actually work (e.g., `pytest --collect-only`) - -3. **Synthesize Report** — Calculates Agent Readiness score, Production Readiness score, and maturity level - -4. **Interactive Remediation** — Uses `AskUserQuestion` for agent readiness fixes only: - ``` - Which tooling improvements should I add? - ☐ Add pre-commit hooks (Recommended) - ☐ Add linter config - ☐ Add runtime version file - ``` - -5. **Apply Fixes** — Creates/modifies files based on your selections - -6. **Re-assess** — Optionally re-run to show improvement - -### Example Report - -```markdown -# Agent Readiness Report - -**Repository**: my-project -**Assessed**: 2026-01-23 - -## Scores Summary - -| Category | Score | Level | -|----------|-------|-------| -| **Agent Readiness** (Pillars 1-5) | 73% | Level 4 - Optimized | -| Production Readiness (Pillars 6-8) | 17% | — | -| **Overall** | 52% | — | - -## Agent Readiness (Pillars 1-5) - -| Pillar | Score | Status | -|--------|-------|--------| -| Style & Validation | 67% (4/6) | ⚠️ | -| Build System | 100% (6/6) | ✅ | -| Testing | 67% (4/6) | ⚠️ | -| Documentation | 83% (5/6) | ✅ | -| Dev Environment | 83% (5/6) | ✅ | - -## Production Readiness (Pillars 6-8) — Report Only - -| Pillar | Score | Status | -|--------|-------|--------| -| Observability | 33% (2/6) | ❌ | -| Security | 17% (1/6) | ❌ | -| Workflow & Process | 0% (0/6) | ❌ | - -## Top Recommendations (Agent Readiness) - -1. **Tooling**: Add pre-commit hooks — 5 sec feedback vs 10 min CI wait -2. **Tooling**: Add Python type checking — catch errors locally -3. **Docs**: Update README — replace generic template -``` - -### Remediation Templates - -Prime offers fixes for agent readiness gaps (**not** team governance): - -| Fix | What Gets Created | -|-----|-------------------| -| CLAUDE.md | Project overview, commands, structure, conventions | -| .env.example | Template with detected env vars | -| Pre-commit (JS) | Husky + lint-staged config | -| Pre-commit (Python) | `.pre-commit-config.yaml` | -| Linter config | ESLint, Biome, or Ruff config (if none exists) | -| Formatter config | Prettier or Biome config (if none exists) | -| .nvmrc/.python-version | Runtime version pinning | -| .gitignore entries | .env, build outputs, node_modules | - -Templates adapt to your project's detected conventions and existing tools. Won't suggest ESLint if you have Biome, etc. - -### User Consent Required - -**By default, prime asks before every change** using interactive checkboxes. You choose what gets created. - -- **Asks first** — uses `AskUserQuestion` tool for interactive selection per category -- **Never overwrites** existing files without explicit consent -- **Never commits** changes (leaves for you to review) -- **Never deletes** files -- **Merges** with existing configs when possible -- **Respects** your existing tools (won't add ESLint if you have Biome) - -Use `--fix-all` to skip questions and apply everything. Use `--report-only` to just see the assessment. - -### Flags - -| Flag | Description | -|------|-------------| -| `--report-only` | Skip remediation, just show report | -| `--fix-all` | Apply all recommendations without asking | -| `` | Assess a different directory | - ---- - -### Full-Auto vs Interactive - -By default, everything is autonomous. Use `--interactive` only when you want to pause between tasks. - -| Mode | Trigger | Behavior | -|------|---------|----------| -| **Full-auto** (default) | `/flow-code:plan "idea"` | Plan → work → review → PR, zero questions | -| **Interactive** | `--interactive` flag | Pauses after each task for human confirmation | -| **Ralph** (multi-session) | `scripts/ralph/ralph.sh` | Fresh context per iteration, overnight runs | - -For large epics (>10 tasks), Ralph provides fresh context per session. See [Ralph Mode](#ralph-autonomous-mode) for setup. - ---- - -## Troubleshooting - -### Reset a stuck task - -```bash -# Check task status -flowctl show fn-1.2 --json - -# Reset to todo (from done/blocked) -flowctl task reset fn-1.2 - -# Reset + dependents in same epic -flowctl task reset fn-1.2 --cascade -``` - -### Clean up `.flow/` safely - -Run manually in terminal (not via AI agent): - -```bash -# Remove all flow state (keeps git history) -rm -rf .flow/ - -# Re-initialize -flowctl init -``` - -### Debug Ralph runs - -```bash -# Check run progress -cat scripts/ralph/runs/*/progress.txt - -# View iteration logs -ls scripts/ralph/runs/*/iter-*.log - -# Check for blocked tasks -ls scripts/ralph/runs/*/block-*.md -``` - -### Receipt validation failing - -```bash -# Check receipt exists -ls scripts/ralph/runs/*/receipts/ +# 3. Resume anytime -- reads .flow state and continues +/flow-code:work fn-1 -# Verify receipt format -cat scripts/ralph/runs/*/receipts/impl-fn-1.1.json -# Must have: {"type":"impl_review","id":"fn-1.1",...} +# 4. Optional setup for review backends + local CLI +/flow-code:setup ``` -### Custom rp-cli instructions conflicting - -> **Caution**: If you have custom instructions for `rp-cli` in your `CLAUDE.md` or `AGENTS.md`, they may conflict with Flow-Code's RepoPrompt integration. +That's it. Flow-Code handles research, task ordering, parallel execution, reviews, and opens a draft PR when done. -Flow-Code's plan-review and impl-review skills include specific instructions for `rp-cli` usage (window selection, builder workflow, chat commands). Custom rp-cli instructions can override these and cause unexpected behavior. - -**Symptoms:** -- Reviews not using the correct RepoPrompt window -- Builder not selecting expected files -- Chat commands failing or behaving differently - -**Fix:** Remove or comment out custom rp-cli instructions from your `CLAUDE.md`/`AGENTS.md` when using Flow-Code reviews. The plugin provides complete rp-cli guidance. - ---- +## Architecture -## Codebase Map +```mermaid +graph TD + A["/flow-code:plan"] --> B["Research Scouts
(parallel subagents)"] + B --> C[".flow/ epic + tasks + deps"] + C --> D["/flow-code:work"] + D --> E["Teams Mode
(parallel workers + file locking)"] + E --> F["Layer 1: guard
(lint/type/test per commit)"] + F --> G["Layer 3: Codex adversarial
(GPT tries to break it)"] + G --> H["Auto push + draft PR"] -Generate comprehensive architecture documentation using parallel Sonnet subagents. + I["/flow-code:plan-review"] -.-> C + J["/flow-code:impl-review"] -.-> F + K["/flow-code:epic-review"] -.-> G -```bash -/flow-code:map + style A fill:#4a9eff,color:#fff + style D fill:#4a9eff,color:#fff + style H fill:#2ecc71,color:#fff ``` -Creates `docs/CODEBASE_MAP.md` with: -- Architecture diagram (Mermaid) -- Module guide (purpose, exports, dependencies per file) -- Data flow diagrams -- Conventions and gotchas -- Navigation guide ("To add an API endpoint: touch these files") +**Core engine:** `flowctl` is a Rust binary with libSQL storage. Skills and agents are Markdown files loaded by Claude Code's Skill tool. No Node.js, no npm, no external services. -**How it works:** -1. Scans file tree with token counts (respects .gitignore) -2. Splits work into ~150k token chunks -3. Spawns Sonnet subagents in parallel to analyze each chunk -4. Synthesizes reports into a single map document - -**Update mode** — re-run to update only changed modules: -```bash -/flow-code:map --update ``` - -**Integrated with flow-code workflow:** -- `repo-scout` reads the map first during planning (faster, more accurate) -- `auto-improve` reads the map before each experiment (better context) -- `context-scout` benefits from architecture overview - -Based on [Cartographer](https://github.com/kingbootoshi/cartographer) (MIT). - ---- - -## Auto-Improve (Autonomous Optimization) - -> Inspired by [Karpathy's autoresearch](https://github.com/karpathy/autoresearch) — 700 experiments in 2 days, 19% performance gain at Shopify. - -One command to start autonomous code improvement. Auto-detects project type, guard commands, and runs immediately. - -```bash -/flow-code:auto-improve "fix N+1 queries and add missing tests" --scope src/ +commands/flow-code/*.md -> Slash command definitions (user entry points) +skills/*/SKILL.md -> Skill implementations (24 skills) +agents/*.md -> Subagent definitions (24 agents) +bin/flowctl -> Rust binary (built from flowctl/ workspace) +hooks/hooks.json -> Ralph workflow guards (active when FLOW_RALPH=1) ``` -That's it. Flow-Code detects your project (Django/React/Next.js), finds lint+test commands, creates an experiment branch, and starts improving. Each experiment: discover → implement → test → keep or discard. - -**More examples:** -```bash -# Next.js bundle optimization -/flow-code:auto-improve "reduce bundle size" --scope src/components/ --max 20 - -# Security hardening -/flow-code:auto-improve "fix security vulnerabilities" --scope src/api/ src/auth/ - -# Test coverage -/flow-code:auto-improve "improve test coverage to 80%" - -# Watch mode (see what agent is doing) -/flow-code:auto-improve "optimize API performance" --scope src/ --watch -``` +## Commands -**How it works:** -``` -for each experiment (up to --max, default 50): - 1. Agent reads code + previous experiments (learns from history) - 2. Discovers ONE improvement opportunity - 3. Writes test first (TDD style) - 4. Implements minimal change (scope-restricted) - 5. Runs guard (auto-detected lint + tests must pass) - 6. Judges: keep (git commit) or discard (git reset) - 7. Logs to experiments.jsonl → summary.md at end -``` +| Command | What It Does | +|---------|--------------| +| `/flow-code:plan ` | Research codebase, create epic with dependency-ordered tasks | +| `/flow-code:work ` | Execute epic/task/spec file with re-anchoring before each task | +| `/flow-code:interview ` | Deep Q&A (40+ questions) to refine a spec before planning | +| `/flow-code:plan-review ` | Carmack-level plan review via RepoPrompt or Codex | +| `/flow-code:impl-review` | Carmack-level implementation review of current branch | +| `/flow-code:epic-review ` | Verify implementation matches spec before closing | +| `/flow-code:debug` | Systematic debugging: root cause investigation | +| `/flow-code:prime` | Assess codebase agent-readiness, propose fixes | +| `/flow-code:sync ` | Update downstream task specs after implementation drift | +| `/flow-code:retro` | Post-epic retrospective: lessons learned | +| `/flow-code:ralph-init` | Scaffold autonomous Ralph harness | +| `/flow-code:django` | Django-specific patterns, security, testing | +| `/flow-code:skill-create` | Create new flow-code skills | +| `/flow-code:setup` | Install flowctl locally + configure review backend | +| `/flow-code:uninstall` | Remove flow-code from project | + +**Flags:** All commands accept flags (`--research=rp|grep`, `--review=rp|codex|none`, `--branch=current|new|worktree`, `--interactive`, `--tdd`, `--plan-only`, `--no-pr`). Natural language also works: `/flow-code:plan Add webhooks, use context-scout, skip review`. + +## Skill Inventory + +### Core Skills (8) + +| Skill | Command | Purpose | +|-------|---------|---------| +| `flow-code` | `/flow-code` | Task/epic management (list, create, status) | +| `flow-code-plan` | `/flow-code:plan` | Create structured build plans from descriptions | +| `flow-code-work` | `/flow-code:work` | Execute plans with Teams mode (parallel workers + file locking) | +| `flow-code-plan-review` | `/flow-code:plan-review` | Carmack-level plan review via RepoPrompt or Codex | +| `flow-code-impl-review` | `/flow-code:impl-review` | Post-implementation code review | +| `flow-code-epic-review` | `/flow-code:epic-review` | Final review before closing an epic | +| `flow-code-setup` | `/flow-code:setup` | Install flowctl CLI and configure project | +| `flow-code-map` | `/flow-code:map` | Generate codebase architecture maps via parallel subagents | + +### Extension Skills -- Development (4) + +| Skill | Command | Purpose | +|-------|---------|---------| +| `flow-code-debug` | `/flow-code:debug` | Systematic debugging with root cause investigation | +| `flow-code-auto-improve` | `/flow-code:auto-improve` | Autonomous code quality improvement loops | +| `flow-code-django` | `/flow-code:django` | Django-specific patterns, security, and testing | +| `flow-code-deps` | `/flow-code:deps` | Dependency graph visualization and execution order | + +### Extension Skills -- Workflow (4) + +| Skill | Command | Purpose | +|-------|---------|---------| +| `flow-code-interview` | `/flow-code:interview` | Refine specs through structured Q&A (40+ questions) | +| `flow-code-sync` | `/flow-code:sync` | Sync downstream task specs after implementation drift | +| `flow-code-retro` | `/flow-code:retro` | Post-epic retrospective and lessons learned | +| `flow-code-prime` | `/flow-code:prime` | Assess codebase readiness for agent work | + +### Extension Skills -- Tooling (8) + +| Skill | Command | Purpose | +|-------|---------|---------| +| `flow-code-ralph-init` | `/flow-code:ralph-init` | Scaffold autonomous Ralph harness | +| `flow-code-loop-status` | `/flow-code:loop-status` | Monitor running Ralph/auto-improve loops | +| `flow-code-worktree-kit` | `/flow-code:worktree-kit` | Git worktree management for parallel work | +| `flow-code-export-context` | `/flow-code:export-context` | Export context for external model review | +| `flow-code-rp-explorer` | `/flow-code:rp-explorer` | RepoPrompt-powered codebase exploration | +| `flow-code-skill-create` | `/flow-code:skill-create` | Create new flow-code skills | +| `flow-code-prompt-eng` | Internal | Prompt engineering guidance for review agents | +| `browser` | `/browser` | Browser automation via agent-browser CLI | + +## How It Works -**What's auto-detected:** +### Full-Auto by Default -| Project | Guard command | -|---------|--------------| -| Django + ruff | `ruff check . && python -m pytest -x -q` | -| Django + pytest | `python -m pytest -x -q` | -| Next.js/React | `npm run lint && npm test` | -| No tests found | Warning — set `GUARD_CMD` in config.env | +Say one sentence. Flow-Code plans, implements, tests, commits, and opens a draft PR -- zero questions asked. AI reads git state and `.flow/` config to make all decisions autonomously. -**Customization:** -- `scripts/auto-improve/program.md` — edit to change improvement focus and judgment criteria -- `scripts/auto-improve/config.env` — override goal, scope, guard, max experiments +**Default mode: Teams + Phase-Gate.** Ready tasks are spawned as parallel Agent Team workers with file locking and SendMessage coordination. After each wave, a structured checkpoint verifies integration before the next batch. -**Output:** -- `experiments.jsonl` — every experiment logged (hypothesis, result, commit) -- `summary.md` — generated at end with kept/discarded/crashed counts -- Kept improvements committed on `auto-improve/` branch +### Three-Layer Quality System -**Using with Codex CLI:** -```bash -# Set CLAUDE_BIN to use Codex instead of Claude -CLAUDE_BIN=codex scripts/auto-improve/auto-improve.sh +Each layer catches different types of problems: -# Or set in config.env for persistent use -# CLAUDE_BIN=codex -# AUTO_IMPROVE_CODEX_MODEL=gpt-5.4 -``` +| Layer | Tool | When | What It Catches | +|-------|------|------|----------------| +| **1. Guard** | `flowctl guard` | Every commit | Syntax, types, test failures | +| **2. RP Plan-Review** | RepoPrompt context_builder | Plan phase | Spec-code inconsistency | +| **3. Codex Adversarial** | `flowctl codex adversarial` | Epic completion | Security, concurrency, edge cases | -Auto-improve auto-detects the CLI type and uses the correct flags (Claude: `-p --output-format stream-json`, Codex: `-q --full-auto`). +Guard is deterministic. RP validates against existing code. Codex (GPT) tries to **break** what Claude built -- different model families have different blind spots. -**Ralph vs Auto-Improve:** -| | Ralph | Auto-Improve | -|---|---|---| -| Purpose | Execute planned tasks | Explore & optimize | -| Input | Epic with spec + tasks | Goal + scope | -| Approach | Follow plan exactly | Discover improvements | -| Output | Completed features | Incremental code improvements | -| When | You know WHAT to build | You want code to get BETTER | +### Re-Anchoring ---- +Before every task, Flow-Code re-reads epic spec, task spec, and git state from `.flow/`. No hallucinated scope creep, no forgotten requirements. Survives context compaction. -## Uninstall +### Ralph (Autonomous Mode) -Run manually in terminal (DCG blocks these from AI agents): +Ralph is the repo-local autonomous loop for overnight runs. Fresh context per iteration, multi-model review gates, receipt-based gating, and scope freeze for safety. ```bash -rm -rf .flow/ # Core flow state -rm -rf scripts/ralph/ # Ralph (if enabled) +/flow-code:ralph-init # Scaffold (one-time) +scripts/ralph/ralph_once.sh # One iteration (observe) +scripts/ralph/ralph.sh # Full loop (AFK) +scripts/ralph/ralph.sh --watch # Stream tool calls in real-time ``` -Or use `/flow-code:uninstall` which cleans up docs and prints commands to run. +### Cross-Model Reviews ---- +Two models catch what one misses. Reviews use a second model (RepoPrompt or Codex CLI) to verify plans and implementations. Carmack-level criteria: Completeness, Feasibility, Architecture, Security, Testability. -## Ralph (Autonomous Mode) +| Backend | Platform | Best For | +|---------|----------|----------| +| [RepoPrompt](https://repoprompt.com) | macOS | Best context, visual builder, deeper codebase discovery | +| [Codex CLI](https://github.com/openai/codex) | All | Cross-platform, terminal-based, session continuity | -> **⚠️ Safety first**: Ralph defaults to `YOLO=1` (skips permission prompts). -> - Start with `ralph_once.sh` to observe one iteration -> - Consider [Docker sandbox](https://docs.docker.com/ai/sandboxes/claude-code/) for isolation -> - Consider [DCG (Destructive Command Guard)](https://github.com/Dicklesworthstone/destructive_command_guard) to block destructive commands — see [DCG setup](docs/ralph.md#additional-safety-dcg-optional) -> -> **Community sandbox setups** (alternative approaches): -> - [devcontainer-for-claude-yolo-and-flow-code](https://github.com/Ranudar/devcontainer-for-claude-yolo-and-flow-code) — VS Code devcontainer with Playwright, firewall whitelisting, and RepoPrompt MCP bridge -> - [agent-sandbox](https://github.com/novotnyllc/agent-sandbox) — Docker Sandbox (Desktop 4.50+) with seccomp/user namespace isolation, .NET + Node.js +### Other Platforms -Ralph is the repo-local autonomous loop that plans and works through tasks end-to-end. +| Platform | Install | Notes | +|----------|---------|-------| +| **Claude Code** | `/plugin install flow-code` | Primary platform | +| **Factory Droid** | `/plugin install flow-code` | Native support, uses `${DROID_PLUGIN_ROOT}` fallback | +| **OpenAI Codex** | `./scripts/install-codex.sh` | Commands use `/prompts:` prefix | -**Setup (one-time, inside Claude):** -```bash -/flow-code:ralph-init -``` +## `.flow/` Directory -Or from terminal without entering Claude: -```bash -claude -p "/flow-code:ralph-init" ``` - -**Run (outside Claude):** -```bash -scripts/ralph/ralph.sh +.flow/ + meta.json # Schema version + config.json # Project settings + epics/ + fn-1-add-oauth.json # Epic metadata (id, title, status, deps) + specs/ + fn-1-add-oauth.md # Epic spec (plan, scope, acceptance) + tasks/ + fn-1-add-oauth.1.json # Task metadata (id, status, priority, deps) + fn-1-add-oauth.1.md # Task spec (description, acceptance, done summary) + memory/ # Persistent learnings (opt-in) ``` -Ralph writes run artifacts under `scripts/ralph/runs/`, including review receipts used for gating. - -📖 **[Ralph deep dive](docs/ralph.md)** +Uninstall: delete `.flow/` (and `scripts/ralph/` if enabled). Or run `/flow-code:uninstall`. -🖥️ **[Ralph TUI](../../flow-code-tui/)** — Terminal UI for monitoring runs in real-time (`bun add -g flow-code-tui`) - -### How Ralph Differs from Other Autonomous Agents - -Autonomous coding agents are taking the industry by storm—loop until done, commit, repeat. Most solutions gate progress by tests and linting alone. Ralph goes further. - -**Multi-model review gates**: Ralph uses [RepoPrompt](https://repoprompt.com) (macOS) or OpenAI Codex CLI (cross-platform) to send plan and implementation reviews to a *different* model. A second set of eyes catches blind spots that self-review misses. RepoPrompt's builder provides full file context; Codex uses context hints from changed files. - -**Review loops until Ship** (max 2 iterations): Reviews block progress until resolved. Fix → re-review cycles run until `SHIP` verdict or iteration limit (prevents infinite loops from diminishing-returns fixes). - -**Receipt-based gating**: Reviews must produce a receipt JSON file proving they ran. No receipt = no progress. This prevents drift where Claude skips the review step and marks things done anyway. - -**Guard hooks**: Plugin hooks enforce workflow rules deterministically—blocking `--json` flags, preventing new chats on re-reviews, requiring receipts before stop. Only active when `FLOW_RALPH=1`; zero impact for non-Ralph users. See [Guard Hooks](docs/ralph.md#guard-hooks). - -**Atomic window selection**: The `setup-review` command handles RepoPrompt window matching atomically. Claude can't skip steps or invent window IDs—the entire sequence runs as one unit or fails. - -The result: code that's been reviewed by two models, tested, linted, and iteratively refined. Not perfect, but meaningfully more robust than single-model autonomous loops. - -### Controlling Ralph - -External agents (Clawdbot, GitHub Actions, etc.) can pause/resume/stop Ralph runs without killing processes. - -**CLI commands:** -```bash -# Check status -flowctl status # Epic/task counts + active runs -flowctl status --json # JSON for automation - -# Control active run -flowctl ralph pause # Pause run (auto-detects if single) -flowctl ralph resume # Resume paused run -flowctl ralph stop # Request graceful stop -flowctl ralph status # Show run state - -# Specify run when multiple active -flowctl ralph pause --run -``` - -**Sentinel files (manual control):** -```bash -# Pause: touch PAUSE file in run directory -touch scripts/ralph/runs//PAUSE -# Resume: remove PAUSE file -rm scripts/ralph/runs//PAUSE -# Stop: touch STOP file (kept for audit) -touch scripts/ralph/runs//STOP -``` +## Contributing -Ralph checks sentinels at iteration boundaries (after Claude returns, before next iteration). +1. Fork the repository +2. Create a feature branch +3. Run tests: `cd flowctl && cargo build --release && cargo test --all` +4. Run smoke tests: `bash scripts/smoke_test.sh` +5. Submit a PR -### Review Mode (Three-Layer Quality) +See [docs/skills.md](docs/skills.md) for the skill classification and [CLAUDE.md](CLAUDE.md) for development conventions. -Ralph uses the same three-layer quality system as interactive mode: +## License -``` -plan → RP plan-review (Layer 2) -task 1 → guard ✓ (Layer 1) -task 2 → guard ✓ -task N → guard ✓ -all done → Codex adversarial (Layer 3) -→ push + draft PR -``` - -**Configure in `scripts/ralph/config.env`:** - -```bash -# Review backend (rp = RepoPrompt, codex = Codex CLI, none = skip) -WORK_REVIEW=rp -``` - -**Common configurations:** - -```bash -# Fast iteration with quality gate (recommended) -REVIEW_MODE=per-epic -WORK_REVIEW=rp - -# Maximum speed, no reviews -REVIEW_MODE=per-epic -WORK_REVIEW=none -COMPLETION_REVIEW=none - -# Strict mode, review everything -REVIEW_MODE=per-task -WORK_REVIEW=rp -COMPLETION_REVIEW=rp -``` - -**Monitoring:** - -```bash -# Watch Ralph run in real-time -scripts/ralph/ralph.sh --watch - -# View run logs -tail -f scripts/ralph/runs/latest/ralph.log - -# Check progress -scripts/ralph/flowctl list -``` - -### Scope Isolation (Freeze Scope) - -When running Ralph overnight, external changes to the backlog can cause unexpected behavior — new tasks picked up without review, removed tasks causing confusion, modified specs invalidating assumptions. - -**Configure in `scripts/ralph/config.env`:** - -```bash -# Capture task IDs + spec hashes at start, check each iteration -FREEZE_SCOPE=1 - -# What to do on scope change: stop | warn | ignore -SCOPE_CHANGE_ACTION=stop -``` - -**What it detects:** - -| Change Type | Detection | Outcome | -|-------------|-----------|---------| -| Task added externally | Task ID not in frozen list | SCOPE_CHANGED | -| Task removed externally | Frozen task ID missing | SCOPE_CHANGED | -| Spec content modified | MD5 hash mismatch | SCOPE_CHANGED | -| Status change (todo→done) | Not tracked | Allowed (normal) | - -**Actions:** - -| Action | Behavior | -|--------|----------| -| `stop` | Halt Ralph with exit code 1 and clear message | -| `warn` | Log changes, display warning, continue execution | -| `ignore` | Log changes silently, continue execution | - -**Files created in `$RUN_DIR/scope/`:** - -| File | Content | -|------|---------| -| `scope.json` | Full snapshot (task IDs, statuses, spec hashes) | -| `task_ids.txt` | Sorted task IDs for easy diff | -| `hashes.txt` | `id:md5hash` pairs for specs and tasks | -| `changes-iter-NNN.txt` | Detected changes per iteration (if any) | - -**Recommended for overnight runs:** -```bash -FREEZE_SCOPE=1 -SCOPE_CHANGE_ACTION=stop # Safe: halt on external changes -``` - -**For monitored runs:** -```bash -FREEZE_SCOPE=1 -SCOPE_CHANGE_ACTION=warn # Continue but flag changes -``` - -### Structured Logging - -Ralph writes structured JSON event logs to `$RUN_DIR/events.jsonl` for easy parsing and analysis. Each line is a JSON object: - -```json -{"ts":"2026-03-26T12:00:00.123Z","level":"info","event":"run_start","run_id":"20260326-120000-a1b2","max_iterations":25,"review_mode":"per-epic"} -{"ts":"2026-03-26T12:01:15.456Z","level":"info","event":"iteration","iter":1,"status":"work","task":"fn-1.1"} -{"ts":"2026-03-26T12:05:30.789Z","level":"info","event":"worker_done","iter":1,"exit_code":0,"timeout":false} -{"ts":"2026-03-26T12:30:00.000Z","level":"info","event":"run_end","reason":"NO_WORK","tasks_done":5,"elapsed":"29:00"} -``` - -**Query examples:** -```bash -# Count iterations per status -jq -r 'select(.event=="iteration") | .status' events.jsonl | sort | uniq -c - -# Find failed workers -jq 'select(.event=="worker_done" and .exit_code!=0)' events.jsonl - -# Total run time -jq -r 'select(.event=="run_end") | .elapsed' events.jsonl -``` - -The plain-text `progress.txt` log still exists for backwards compatibility. Use `events.jsonl` for automation and analysis. - -**Task retry/rollback:** -```bash -# Reset completed/blocked task to todo -flowctl task reset fn-1-add-oauth.3 - -# Reset + cascade to dependent tasks (same epic) -flowctl task reset fn-1-add-oauth.2 --cascade -``` - ---- - -## Human-in-the-Loop Workflow (Detailed) - -Default flow when you drive manually: - -```mermaid -flowchart TD - A[Idea or short spec
prompt or doc] --> B{Need deeper spec?} - B -- yes --> C[Optional: /flow-code:interview fn-N or spec.md
40+ deep questions to refine spec] - C --> D[Refined spec] - B -- no --> D - D --> E[/flow-code:plan idea or fn-N/] - E --> F[Parallel subagents: repo patterns + online docs + best practices] - F --> G[flow-gap-analyst: edge cases + missing reqs] - G --> H[Writes .flow/ epic + tasks + deps] - H --> I{Plan review?} - I -- yes --> J[/flow-code:plan-review fn-N/] - J --> K{Plan passes review?} - K -- no --> L[Re-anchor + fix plan] - L --> J - K -- yes --> M[/flow-code:work fn-N/] - I -- no --> M - M --> N[Re-anchor before EVERY task] - N --> O[Implement] - O --> P[Test + verify acceptance] - P --> Q[flowctl done: write done summary + evidence] - Q --> R{Impl review?} - R -- yes --> S[/flow-code:impl-review/] - S --> T{Next ready task?} - R -- no --> T - T -- yes --> N - T -- no --> V{Epic review?} - V -- yes --> W[/flow-code:epic-review fn-N/] - W --> X{Epic passes review?} - X -- no --> Y[Fix gaps inline] - Y --> W - X -- yes --> U[Close epic] - V -- no --> U - classDef optional stroke-dasharray: 6 4,stroke:#999; - class C,J,S,W optional; -``` - -Notes: -- `/flow-code:interview` accepts Flow IDs or spec file paths and writes refinements back -- `/flow-code:plan` accepts new ideas or an existing Flow ID to update the plan - -Tip: with RP 1.5.68+, use `flowctl rp setup-review --create` to auto-open RepoPrompt windows. Alternatively, open RP on your repo beforehand for faster context loading. -Plan review in rp mode requires `flowctl rp chat-send`; if rp-cli/windows unavailable, the review gate retries. - ---- - -## Features - -Built for reliability. These are the guardrails. - -**Re-anchoring prevents drift** - -Before EVERY task, Flow-Code re-reads the epic spec, task spec, and git state from `.flow/`. This forces Claude back to the source of truth - no hallucinated scope creep, no forgotten requirements. In Ralph mode, this happens automatically each iteration. - -Unlike agents that carry accumulated context (where early mistakes compound), re-anchoring gives each task a fresh, accurate starting point. - -### Re-anchoring - -Before EVERY task, Flow-Code re-reads: -- Epic spec and task spec from `.flow/` -- Current git status and recent commits -- Validation state - -Per Anthropic's long-running agent guidance: agents must re-anchor from sources of truth to prevent drift. The reads are cheap; drift is expensive. - -### Multi-user Safe - -Teams can work in parallel branches without coordination servers: - -- **Merge-safe IDs**: Scans existing files to allocate the next ID. No shared counters. -- **Soft claims**: Tasks track an `assignee` field. Prevents accidental duplicate work. -- **Actor resolution**: Auto-detects from git email, `FLOW_ACTOR` env, or `$USER`. -- **Local validation**: `flowctl validate --all` catches issues before commit. - -```bash -# Actor A starts task -flowctl start fn-1.1 # Sets assignee automatically - -# Actor B tries same task -flowctl start fn-1.1 # Fails: "claimed by actor-a@example.com" -flowctl start fn-1.1 --force # Override if needed -``` - -### Parallel Worktrees - -Multiple agents can work simultaneously in different git worktrees, sharing task state: - -```bash -# Main repo -git worktree add ../feature-a fn-1-branch -git worktree add ../feature-b fn-2-branch - -# Both worktrees share task state via .git/flow-state/ -cd ../feature-a && flowctl start fn-1.1 # Agent A claims task -cd ../feature-b && flowctl start fn-2.1 # Agent B claims different task -``` - -**How it works:** -- Runtime state (status, assignee, evidence) lives in `.git/flow-state/` — shared across worktrees -- Definition files (title, description, deps) stay in `.flow/` — tracked in git -- Per-task `fcntl` locking prevents race conditions - -**State directory resolution:** -1. `FLOW_STATE_DIR` env (explicit override) -2. `git --git-common-dir` + `/flow-state` (worktree-aware) -3. `.flow/state` fallback (non-git or old git) - -**Commands:** -```bash -flowctl state-path # Show resolved state directory -``` - -### Zero Dependencies - -Everything is bundled: -- `flowctl.py` and the `flowctl/` package ship with the plugin -- No external tracker CLI to install -- No external services -- Just Python 3 - -### Bundled Skills - -Utility skills available during planning and implementation: - -| Skill | Use Case | -|-------|----------| -| `browser` | Web automation via agent-browser CLI (verify UI, scrape docs, test flows) | -| `flow-code-rp-explorer` | Token-efficient codebase exploration via RepoPrompt | -| `flow-code-worktree-kit` | Git worktree management for parallel work | -| `flow-code-export-context` | Export context for external LLM review | - -### Non-invasive - -- No daemons -- No CLAUDE.md edits -- Delete `.flow/` to uninstall; if you enabled Ralph, also delete `scripts/ralph/` -- Ralph uses plugin hooks for workflow enforcement (only active when `FLOW_RALPH=1`) - -### CI-ready - -```bash -flowctl validate --all -``` - -Exits 1 on errors. Drop into pre-commit hooks or GitHub Actions. See `docs/ci-workflow-example.yml`. - -### One File Per Task - -Each epic and task gets its own JSON + markdown file pair. Merge conflicts are rare and easy to resolve. - -### Cross-Model Reviews - -Two models catch what one misses. Reviews use a second model (via RepoPrompt or Codex) to verify plans and implementations before they ship. - -**Three review types:** -- **Plan reviews** — Verify architecture before coding starts -- **Impl reviews** — Verify each task implementation -- **Completion reviews** — Verify epic delivers all spec requirements before closing - -**Review criteria (Carmack-level, identical for both backends):** - -| Review Type | Criteria | -|-------------|----------| -| **Plan** | Completeness, Feasibility, Clarity, Architecture, Risks (incl. security), Scope, Testability | -| **Impl** | Correctness, Simplicity, DRY, Architecture, Edge Cases, Tests, Security | -| **Completion** | Spec compliance: all requirements delivered, docs updated, no gaps | - -Reviews block progress until `SHIP`. Fix → re-review cycles continue until approved. - -#### RepoPrompt (Recommended) - -[RepoPrompt](https://repoprompt.com) provides the best review experience on macOS. - -**Why recommended:** -- Best-in-class context builder for reviews (full file context, smart selection) -- Enables **context-scout** for deeper codebase discovery (alternative: repo-scout works without RP) -- Visual diff review UI + persistent chat threads - -**Setup:** - -1. Install RepoPrompt: - ```bash - brew install --cask repoprompt - ``` - -2. **Enable MCP Server** (required for rp-cli): - - Settings → MCP Server → Enable - - Click "Install CLI to PATH" (creates `/usr/local/bin/rp-cli`) - - Verify: `rp-cli --version` - -3. **Configure models** — RepoPrompt uses two models that must be set in the UI (not controllable via CLI): - - | Setting | Recommended | Purpose | - |---------|-------------|---------| - | **Context Builder model** | GPT-5.3 Codex Medium (via Codex CLI or OpenAI API) | Builds file selection for reviews. Needs large context window. | - | **Chat model** | GPT-5.2 High (via Codex CLI or OpenAI API) | Runs the actual review. Needs strong reasoning. | - - Set these in Settings → Models. Any OpenAI API-compatible model works (Codex CLI, OpenAI API key, or other providers). These models are what make cross-model review valuable — a different model catches blind spots that self-review misses. - - > **Note:** When `--create` auto-opens a new workspace, it inherits your default model settings. Configure models before first use. - -**Usage:** -```bash -/flow-code:plan-review fn-1 --review=rp -/flow-code:impl-review --review=rp -``` - -#### Codex (Cross-Platform Alternative) - -OpenAI Codex CLI works on any platform (macOS, Linux, Windows). - -**Why use Codex:** -- Cross-platform (no macOS requirement) -- Terminal-based (no GUI needed) -- Session continuity via thread IDs -- Same Carmack-level review criteria as RepoPrompt -- Uses GPT 5.2 High by default when used as a review backend from Claude Code (no config needed) - -**Trade-off:** Uses heuristic context hints from changed files rather than RepoPrompt's intelligent file selection. - -> **Note:** When running Flow-Code inside Codex itself, commands use `/prompts:` prefix (e.g., `/prompts:impl-review`). The `/flow-code:` prefix below applies to Claude Code. - -**Setup:** -```bash -# Install and authenticate Codex CLI -npm install -g @openai/codex -codex auth -``` - -**Usage:** -```bash -/flow-code:plan-review fn-1 --review=codex -/flow-code:impl-review --review=codex - -# Or via flowctl directly -flowctl codex plan-review fn-1 --base main -flowctl codex impl-review fn-1.3 --base main -``` - -**Verify installation:** -```bash -flowctl codex check -``` - -#### Configuration - -Set default review backend: -```bash -# Per-project (saved in .flow/config.json) -flowctl config set review.backend rp # or codex, or none - -# Per-session (environment variable) -export FLOW_REVIEW_BACKEND=codex -``` - -Priority: `--review=...` argument > `FLOW_REVIEW_BACKEND` env > `.flow/config.json` > error. - -**No auto-detect.** Run `/flow-code:setup` to configure your preferred review backend, or pass `--review=X` explicitly. - -#### Which to Choose? - -| Scenario | Recommendation | -|----------|----------------| -| macOS with GUI available | RepoPrompt (better context) | -| Linux/Windows | Codex (only option) | -| CI/headless environments | Codex (no GUI needed) | -| Ralph overnight runs | Either works; RP auto-opens with --create (1.5.68+) | - -Without a backend configured, reviews fail with a clear error. Run `/flow-code:setup` or pass `--review=X`. - -### Dependency Graphs - -Tasks declare their blockers. `flowctl ready` shows what can start. Nothing executes until dependencies resolve. - -**Epic-level dependencies**: During planning, `epic-scout` runs in parallel with other research scouts to find relationships with existing open epics. If the new plan depends on APIs/patterns from another epic, dependencies are auto-set via `flowctl epic add-dep`. Findings reported at end of planning—no prompts needed. - -### Auto-Block Stuck Tasks - -After MAX_ATTEMPTS_PER_TASK failures (default 5), Ralph: -1. Writes `block-.md` with failure context -2. Marks task blocked via `flowctl block` -3. Moves to next task - -Prevents infinite retry loops. Review `block-*.md` files in the morning to understand what went wrong. - -### Plan-Sync (Opt-in) - -Synchronizes downstream task specs when implementation drifts from the original plan. - -**Automatic (opt-in):** -```bash -flowctl config set planSync.enabled true -``` - -When enabled, after each task completes, a plan-sync agent: -1. Compares what was planned vs what was actually built -2. Identifies downstream tasks that reference stale assumptions (names, APIs, data structures) -3. Updates affected task specs with accurate info - -Skip conditions: disabled (default), task failed, no downstream tasks. - -**Cross-epic sync (opt-in, default false):** -```bash -flowctl config set planSync.crossEpic true -``` - -When enabled, plan-sync also checks other open epics for stale references. Useful when multiple epics share APIs/patterns, but increases sync time. Disabled by default to avoid long Ralph loops. - -**Manual trigger:** -```bash -/flow-code:sync fn-1.2 # Sync from specific task -/flow-code:sync fn-1 # Scan whole epic for drift -/flow-code:sync fn-1.2 --dry-run # Preview changes without writing -``` - -Manual sync ignores `planSync.enabled` config—if you run it, you want it. Works with any source task status (not just done). - -### Memory System (Opt-in) - -Persistent learnings that survive context compaction. - -```bash -# Enable -flowctl config set memory.enabled true -flowctl memory init - -# Manual entries -flowctl memory add --type pitfall "Always use flowctl rp wrappers" -flowctl memory add --type convention "Tests in __tests__ dirs" -flowctl memory add --type decision "SQLite over Postgres for simplicity" - -# Query -flowctl memory list -flowctl memory search "flowctl" -flowctl memory read --type pitfalls -``` - -When enabled: -- **Planning**: `memory-scout` runs in parallel with other scouts -- **Work**: worker reads memory files directly during re-anchor -- **Ralph**: NEEDS_WORK reviews auto-capture to `pitfalls.md` -- **Auto-capture**: session end hook extracts decisions, discoveries, and pitfalls from transcript - -**Auto-memory** (on by default, zero config): - -Every session end, the plugin automatically extracts key learnings from the transcript: - -- **Default: Gemini AI summarization** — `gemini -p` analyzes the transcript and extracts decisions, discoveries, and pitfalls. Understands semantics, not just keywords. -- **Fallback: pattern matching** — if `gemini` CLI is not available, falls back to regex extraction. - -No setup needed — `.flow/memory/` is auto-created on first capture. Max 5 entries per session: -- `pitfalls.md` — bugs found, things to avoid -- `conventions.md` — project patterns, coding conventions -- `decisions.md` — architectural choices and rationale - -To disable: `flowctl config set memory.auto false` - -Memory retrieval works in all modes (manual, Ralph, auto-improve). Use `flowctl memory add` for manual entries. - -Config lives in `.flow/config.json`, separate from Ralph's `scripts/ralph/config.env`. - ---- - -## Commands - -Ten commands, complete workflow: - -| Command | What It Does | -|---------|--------------| -| `/flow-code:plan ` | Research the codebase, create epic with dependency-ordered tasks | -| `/flow-code:work ` | Execute epic, task, or spec file, re-anchoring before each | -| `/flow-code:interview ` | Deep interview to flesh out a spec before planning | -| `/flow-code:plan-review ` | Carmack-level plan review via RepoPrompt | -| `/flow-code:impl-review` | Carmack-level impl review of current branch | -| `/flow-code:epic-review ` | Epic-completion review: verify implementation matches spec | -| `/flow-code:debug` | Systematic debugging: root cause investigation → pattern analysis → hypothesis → fix | -| `/flow-code:prime` | Assess codebase agent-readiness, propose fixes ([details](#agent-readiness-assessment)) | -| `/flow-code:sync ` | Manual plan-sync: update downstream tasks after implementation drift | -| `/flow-code:ralph-init` | Scaffold repo-local Ralph harness (`scripts/ralph/`) | -| `/flow-code:retro` | Post-epic retrospective: what worked, what didn't, lessons → memory | -| `/flow-code:django` | Django patterns: architecture, DRF, security, testing, verification | -| `/flow-code:skill-create` | TDD-based skill creation: baseline test → write → bulletproof | -| `/flow-code:setup` | Optional: install flowctl locally + add docs (for power users) | -| `/flow-code:uninstall` | Remove flow-code from project (keeps tasks if desired) | - -Work accepts an epic (`fn-N`), task (`fn-N.M`), or markdown spec file (`.md`). Spec files auto-create an epic with one task. - -### Autonomous Mode (Flags) - -All commands accept flags to skip questions: - -```bash -# Plan with flags -/flow-code:plan Add caching --research=grep --no-review -/flow-code:plan Add auth --research=rp --review=rp - -# Work with flags -/flow-code:work fn-1 --branch=current --no-review -/flow-code:work fn-1 --branch=new --review=export - -# Reviews with flags -/flow-code:plan-review fn-1 --review=rp -/flow-code:impl-review --review=export -``` - -Natural language also works: - -```bash -/flow-code:plan Add webhooks, use context-scout, skip review -/flow-code:work fn-1 current branch, no review -``` - -| Command | Available Flags | -|---------|-----------------| -| `/flow-code:plan` | `--research=rp\|grep`, `--depth=short\|standard\|deep`, `--review=rp\|codex\|export\|none`, `--plan-only` | -| `/flow-code:work` | `--branch=current\|worktree\|new`, `--review=rp\|codex\|none`, `--no-review`, `--interactive`, `--tdd`, `--no-pr` | -| `/flow-code:plan-review` | `--review=rp\|codex\|export` | -| `/flow-code:impl-review` | `--review=rp\|codex\|export` | -| `/flow-code:prime` | `--report-only`, `--fix-all` | -| `/flow-code:sync` | `--dry-run` | - -### Command Reference - -Detailed input documentation for each command. - -#### `/flow-code:plan` - -``` -/flow-code:plan [--research=rp|grep] [--review=rp|codex|export|none] -``` - -| Input | Description | -|-------|-------------| -| `` | Free-form feature description ("Add user authentication with OAuth") | -| `fn-N` | Existing epic ID to update the plan | -| `--research=rp` | Use RepoPrompt context-scout for deeper codebase discovery | -| `--research=grep` | Use grep-based repo-scout (default, faster) | -| `--review=rp\|codex\|export\|none` | Review backend after planning | -| `--no-review` | Shorthand for `--review=none` | - -#### `/flow-code:work` - -``` -/flow-code:work [--branch=current|new|worktree] [--review=rp|codex|export|none] -``` - -| Input | Description | -|-------|-------------| -| `fn-N` | Execute entire epic (all tasks in dependency order) | -| `fn-N.M` | Execute single task | -| `path/to/spec.md` | Create epic from spec file, execute immediately | -| `--branch=current` | Work on current branch | -| `--branch=new` | Create new branch `fn-N-slug` (default) | -| `--branch=worktree` | Create git worktree for isolated work | -| `--review=rp\|codex\|export\|none` | Review backend after work | -| `--no-review` | Shorthand for `--review=none` | - -#### `/flow-code:interview` - -``` -/flow-code:interview -``` - -| Input | Description | -|-------|-------------| -| `fn-N` | Interview about epic to refine requirements | -| `fn-N.M` | Interview about specific task | -| `path/to/spec.md` | Interview about spec file | -| `"rough idea"` | Interview about a new idea (creates epic) | - -Deep questioning (40+ questions) to surface requirements, edge cases, and decisions. - -#### `/flow-code:plan-review` - -``` -/flow-code:plan-review [--review=rp|codex|export] [focus areas] -``` - -| Input | Description | -|-------|-------------| -| `fn-N` | Epic ID to review | -| `--review=rp` | Use RepoPrompt (macOS, visual builder) | -| `--review=codex` | Use OpenAI Codex CLI (cross-platform) | -| `--review=export` | Export context for manual review | -| `[focus areas]` | Optional: "focus on security" or "check API design" | - -Carmack-level criteria: Completeness, Feasibility, Clarity, Architecture, Risks, Scope, Testability. - -#### `/flow-code:impl-review` - -``` -/flow-code:impl-review [--review=rp|codex|export] [focus areas] -``` - -| Input | Description | -|-------|-------------| -| `--review=rp` | Use RepoPrompt (macOS, visual builder) | -| `--review=codex` | Use OpenAI Codex CLI (cross-platform) | -| `--review=export` | Export context for manual review | -| `[focus areas]` | Optional: "focus on performance" or "check error handling" | - -Reviews current branch changes. Carmack-level criteria: Correctness, Simplicity, DRY, Architecture, Edge Cases, Tests, Security. - -#### `/flow-code:epic-review` - -``` -/flow-code:epic-review [--review=rp|codex|none] -``` - -| Input | Description | -|-------|-------------| -| `fn-N` | Epic ID to review | -| `--review=rp` | Use RepoPrompt (macOS, visual builder) | -| `--review=codex` | Use OpenAI Codex CLI (cross-platform) | -| `--review=none` | Skip review | - -Reviews epic implementation against spec. Runs after all tasks complete. Catches requirement gaps, missing functionality, incomplete doc updates. - -#### `/flow-code:prime` - -``` -/flow-code:prime [--report-only] [--fix-all] [path] -``` - -| Input | Description | -|-------|-------------| -| (no args) | Assess current directory, interactive fixes | -| `--report-only` | Show assessment report, skip remediation | -| `--fix-all` | Apply all recommendations without asking | -| `[path]` | Assess a different directory | - -See [Agent Readiness Assessment](#agent-readiness-assessment) for details. - -#### `/flow-code:sync` - -``` -/flow-code:sync [--dry-run] -``` - -| Input | Description | -|-------|-------------| -| `fn-N` | Sync entire epic's downstream tasks | -| `fn-N.M` | Sync from specific task | -| `--dry-run` | Preview changes without writing | - -Updates downstream task specs when implementation drifts from plan. - -#### `/flow-code:ralph-init` - -``` -/flow-code:ralph-init -``` - -No arguments. Scaffolds `scripts/ralph/` for autonomous operation. - -#### `/flow-code:setup` - -``` -/flow-code:setup -``` - -No arguments. Optional setup that: -- Configures review backend (rp, codex, or none) -- Copies flowctl to `.flow/bin/` -- Adds flow-code instructions to CLAUDE.md/AGENTS.md - -#### `/flow-code:uninstall` - -``` -/flow-code:uninstall -``` - -No arguments. Interactive removal with option to keep tasks. - ---- - -## The Workflow - -### Defaults (manual and Ralph) - -Flow-Code uses the same defaults in manual and Ralph runs. Ralph bypasses prompts only. - -- plan: `--research=grep` -- work: `--branch=new` -- review: from `.flow/config.json` (set via `/flow-code:setup`), or `none` if not configured - -Override via flags or `scripts/ralph/config.env`. - -### Planning Phase - -1. **Research (parallel subagents)**: `repo-scout` (or `context-scout` if rp-cli) + `practice-scout` + `docs-scout` + `github-scout` + `epic-scout` + `docs-gap-scout` -2. **Gap analysis**: `flow-gap-analyst` finds edge cases + missing requirements -3. **Epic creation**: Writes spec to `.flow/specs/fn-N.md`, sets epic dependencies from `epic-scout` findings -4. **Task breakdown**: Creates tasks + explicit dependencies in `.flow/tasks/`, adds doc update acceptance criteria from `docs-gap-scout` -5. **Validate**: `flowctl validate --epic fn-N` -6. **Review** (optional): `/flow-code:plan-review fn-N` with re-anchor + fix loop until "Ship" - -### Work Phase - -1. **Re-anchor**: Re-read epic + task specs + git state (EVERY task) -2. **Execute**: Implement using existing patterns -3. **Test**: Verify acceptance criteria -4. **Record**: `flowctl done` adds summary + evidence to the task spec -5. **Review** (optional): `/flow-code:impl-review` via RepoPrompt -6. **Loop**: Next ready task → repeat until no ready tasks. Close epic manually (`flowctl epic close fn-N`) or let Ralph close at loop end. - ---- - -## Ralph Mode (Autonomous, Opt-In) - -Ralph is repo-local and opt-in. Files are created only by `/flow-code:ralph-init`. Remove manually with `rm -rf scripts/ralph/`. -`/flow-code:ralph-init` also writes `scripts/ralph/.gitignore` so run logs stay out of git. - -What it automates (one unit per iteration, fresh context each time): -- Selector chooses plan vs work unit (`flowctl next`) -- Plan gate = plan review loop until Ship (if enabled) -- Work gate = one task until pass (tests + validate + optional impl review) - - Single run branch: all epics work on one `ralph-` branch (cherry-pick/revert friendly) - -Enable: -```bash -/flow-code:ralph-init -./scripts/ralph/ralph_once.sh # one iteration (observe) -./scripts/ralph/ralph.sh # full loop (AFK) -``` - -**Watch mode** - see what Claude is doing: -```bash -./scripts/ralph/ralph.sh --watch # Stream tool calls in real-time -./scripts/ralph/ralph.sh --watch verbose # Also stream model responses -``` - -Run scripts from terminal (not inside Claude Code). `ralph_once.sh` runs one iteration so you can observe before going fully autonomous. - -### Ralph defaults vs recommended (plan review gate) - -`REQUIRE_PLAN_REVIEW` controls whether Ralph must pass the **plan review gate** before doing any implementation work. - -**Default (safe, won't stall):** - -* `REQUIRE_PLAN_REVIEW=0` - Ralph can proceed to work tasks even if `rp-cli` is missing or unavailable overnight. - -**Recommended (best results, requires rp-cli):** - -* `REQUIRE_PLAN_REVIEW=1` -* `PLAN_REVIEW=rp` - -This forces Ralph to run `/flow-code:plan-review` until the epic plan is approved before starting tasks. - -**Tip:** If you don't have `rp-cli` installed, keep `REQUIRE_PLAN_REVIEW=0` or Ralph may repeatedly select the plan gate and make no progress. - -Ralph verifies RepoPrompt reviews via receipt JSON files in `scripts/ralph/runs//receipts/` (plan + impl). - -### Ralph loop (one iteration) - -```mermaid -flowchart TD - A[ralph.sh iteration] --> B[flowctl next] - B -->|status=plan| C[/flow-code:plan-review fn-N/] - C -->|verdict=SHIP| D[flowctl epic set-plan-review-status=ship] - C -->|verdict!=SHIP| A - - B -->|status=work| E[/flow-code:work fn-N.M/] - E --> F[tests + validate] - F -->|fail| A - - F -->|WORK_REVIEW!=none| R[/flow-code:impl-review/] - R -->|verdict=SHIP| G[flowctl done + git commit] - R -->|verdict!=SHIP| A - - F -->|WORK_REVIEW=none| G - - G --> A - - B -->|status=completion_review| CR[/flow-code:epic-review fn-N/] - CR -->|verdict=SHIP| CRD[flowctl epic set-completion-review-status=ship] - CR -->|verdict!=SHIP| A - CRD --> A - - B -->|status=none| H[close done epics] - H --> I[COMPLETE] -``` - -**YOLO safety**: YOLO mode uses `--dangerously-skip-permissions`. Use a sandbox/container and no secrets in env for unattended runs. - ---- - -## .flow/ Directory - -``` -.flow/ -├── meta.json # Schema version -├── config.json # Project settings (memory enabled, etc.) -├── epics/ -│ └── fn-1-add-oauth.json # Epic metadata (id, title, status, deps) -├── specs/ -│ └── fn-1-add-oauth.md # Epic spec (plan, scope, acceptance) -├── tasks/ -│ ├── fn-1-add-oauth.1.json # Task metadata (id, status, priority, deps, assignee) -│ ├── fn-1-add-oauth.1.md # Task spec (description, acceptance, done summary) -│ └── ... -└── memory/ # Persistent learnings (opt-in) - ├── pitfalls.md # Lessons from NEEDS_WORK reviews - ├── conventions.md # Project patterns - └── decisions.md # Architectural choices -``` - -Flowctl accepts schema v1 and v2; new fields are optional and defaulted. - -New fields: -- Epic JSON: `plan_review_status`, `plan_reviewed_at`, `completion_review_status`, `completion_reviewed_at`, `depends_on_epics`, `branch_name`, `gaps` -- Task JSON: `priority` - -### ID Format - -- **Epic**: `fn-N-slug` where `slug` is derived from the epic title (e.g., `fn-1-add-oauth`, `fn-2-fix-login-bug`) -- **Task**: `fn-N-slug.M` (e.g., `fn-1-add-oauth.1`, `fn-2-fix-login-bug.2`) - -The slug is automatically generated from the epic title (lowercase, hyphens for spaces, max 40 chars). This makes IDs human-readable and self-documenting. - -**Backwards compatibility**: Legacy formats `fn-N` (no suffix) and `fn-N-xxx` (random 3-char suffix) are still fully supported. Existing epics don't need migration. - -There are no task IDs outside an epic. If you want a single task, create an epic with one task. - -### Separation of Concerns - -- **JSON files**: Metadata only (IDs, status, dependencies, assignee) -- **Markdown files**: Narrative content (specs, descriptions, summaries) - ---- - -## flowctl CLI - -Bundled Python script for managing `.flow/`. Flow-Code's commands handle epic/task creation automatically—use `flowctl` for direct inspection, fixes, or advanced workflows: - -```bash -# Setup -flowctl init # Create .flow/ structure -flowctl detect # Check if .flow/ exists - -# Epics -flowctl epic create --title "..." # Create epic -flowctl epic create --title "..." --branch "fn-1-epic" -flowctl epic set-plan fn-1 --file spec.md # Set epic spec from file -flowctl epic set-plan-review-status fn-1 --status ship -flowctl epic close fn-1 # Close epic (requires all tasks done) - -# Tasks -flowctl task create --epic fn-1 --title "..." --deps fn-1.2,fn-1.3 --priority 10 -flowctl task set-description fn-1.1 --file desc.md -flowctl task set-acceptance fn-1.1 --file accept.md - -# Dependencies -flowctl dep add fn-1.3 fn-1.2 # fn-1.3 depends on fn-1.2 - -# Workflow -flowctl ready --epic fn-1 # Show ready/in_progress/blocked -flowctl next # Select next plan/work unit -flowctl start fn-1.1 # Claim and start task -flowctl done fn-1.1 --summary-file s.md --evidence-json e.json -flowctl block fn-1.2 --reason-file r.md - -# Queries -flowctl show fn-1 --json # Epic with all tasks -flowctl cat fn-1 # Print epic spec - -# Validation -flowctl validate --epic fn-1 # Validate single epic -flowctl validate --all # Validate everything (for CI) - -# Review helpers -flowctl rp chat-send --window W --tab T --message-file m.md -flowctl prep-chat --message-file m.md --selected-paths a.ts b.ts -o payload.json -``` - -📖 **[Full CLI reference](docs/flowctl.md)** -🤖 **[Ralph deep dive](docs/ralph.md)** - ---- - -## Task Completion - -When a task completes, `flowctl done` appends structured data to the task spec: - -### Done Summary - -```markdown -## Done summary - -- Added ContactForm component with Zod validation -- Integrated with server action for submission -- All tests passing - -Follow-ups: -- Consider rate limiting (out of scope) -``` - -### Evidence - -```markdown -## Evidence - -- Commits: a3f21b9 -- Tests: bun test -- PRs: -``` - -This creates a complete audit trail: what was planned, what was done, how it was verified. - ---- - -## Flow vs Flow-Code - -| | Flow | Flow-Code | -|:--|:--|:--| -| **Task tracking** | External tracker or standalone plan files | `.flow/` directory (bundled flowctl) | -| **Install** | Plugin + optional external tracker | Plugin only | -| **Artifacts** | Standalone plan files | `.flow/specs/` and `.flow/tasks/` | -| **Config edits** | External config edits (if using tracker) | None | -| **Multi-user** | Via external tracker | Built-in (scan-based IDs, soft claims) | -| **Uninstall** | Remove plugin + external tracker config | Delete `.flow/` (and `scripts/ralph/` if enabled) | - -**Choose Flow-Code if you want:** -- Zero external dependencies -- No config file edits -- Clean uninstall (delete `.flow/`, and `scripts/ralph/` if enabled) -- Built-in multi-user safety - -**Choose Flow if you:** -- Already use an external tracker for issue tracking -- Want plan files as standalone artifacts -- Need full issue management features - ---- - -## Requirements - -- Python 3.8+ -- git -- Optional: [RepoPrompt](https://repoprompt.com) for macOS GUI reviews + enables **context-scout** (deeper codebase discovery than repo-scout). Reviews work without it via Codex backend. -- Optional: OpenAI Codex CLI (`npm install -g @openai/codex`) for cross-platform terminal-based reviews - -Without a review backend, reviews are skipped. - ---- - -## Development - -```bash -claude --plugin-dir ./plugins/flow-code -``` - ---- - -## Other Platforms - -### Factory Droid (Native Support) - -Flow-Code works natively in [Factory Droid](https://factory.ai) — no modifications needed. - -**Install:** -```bash -# In Droid CLI -/plugin marketplace add https://github.com/z23cc/flow-code -/plugin install flow-code -``` - -**Cross-platform patterns used:** -- Skills use `${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}` bash fallback -- Hooks use `Bash|Execute` regex matcher (Claude Code = Bash, Droid = Execute) -- Agents use `disallowedTools` blacklist (not `tools` whitelist — tool names differ between platforms) - -**Caveats:** -- Subagents may behave differently (Droid's Task tool implementation) -- Hook timing may vary slightly - -> **Rollback:** If you experience issues, downgrade to v0.20.9 (last pre-Droid version): `claude plugins install flow-code@0.20.9` - -### OpenAI Codex - -Flow-Code works in OpenAI Codex with near-parity to Claude Code. The install script converts Claude Code's plugin system to Codex's multi-agent roles, prompts, and config. - -**Key difference:** Commands use the `/prompts:` prefix in Codex instead of `/flow-code:`: - -| Claude Code | Codex | -|-------------|-------| -| `/flow-code:plan` | `/prompts:plan` | -| `/flow-code:work` | `/prompts:work` | -| `/flow-code:impl-review` | `/prompts:impl-review` | -| `/flow-code:plan-review` | `/prompts:plan-review` | -| `/flow-code:epic-review` | `/prompts:epic-review` | -| `/flow-code:interview` | `/prompts:interview` | -| `/flow-code:prime` | `/prompts:prime` | -| `/flow-code:ralph-init` | `/prompts:ralph-init` | - -**What works:** -- Planning, work execution, interviews, reviews — full workflow -- Multi-agent roles: 20 agents run as parallel Codex threads (up to 12 concurrent) -- Cross-model reviews (Codex as review backend) -- flowctl CLI - -**Model mapping (3-tier):** - -| Tier | Codex Model | Agents | Reasoning | -|------|-------------|--------|-----------| -| Intelligent | `gpt-5.4` | quality-auditor, flow-gap-analyst, context-scout | high | -| Smart scouts | `gpt-5.4` | epic-scout, agents-md-scout, docs-gap-scout | high | -| Fast scouts | `gpt-5.3-codex-spark` | build, env, testing, tooling, observability, security, workflow, memory scouts | skipped | -| Inherited | parent model | worker, plan-sync | parent | - -Smart scouts (epic-scout, agents-md-scout, docs-gap-scout) need deeper reasoning for context building and analysis. The remaining 8 scanning scouts run on Spark for speed — they check for file presence and patterns without needing multi-step reasoning. - -Override model defaults: -```bash -CODEX_MODEL_INTELLIGENT=gpt-5.4 \ -CODEX_MODEL_FAST=gpt-5.3-codex-spark \ -CODEX_REASONING_EFFORT=high \ -CODEX_MAX_THREADS=12 \ -./scripts/install-codex.sh flow-code -``` - -**Caveats:** -- `/prompts:setup` not supported — use manual project setup below -- Ralph autonomous mode not supported — requires plugin hooks (guard hooks, receipt gating) which Codex doesn't support -- `/prompts:ralph-init` scaffolds files but the loop won't enforce workflow rules without hooks -- `claude-md-scout` is auto-renamed to `agents-md-scout` (CLAUDE.md → AGENTS.md patching) - -**Install:** -```bash -# Clone and install (one-time) -git clone https://github.com/z23cc/flow-code.git -cd flow-code -./scripts/install-codex.sh -``` - -> The script copies skills/agents/prompts to `~/.codex/` and flowctl to `~/.flow/bin/`. Add `export PATH="$HOME/.flow/bin:$PATH"` to your shell profile. - -**Per-project setup** (run in each project): -```bash -# Initialize .flow/ directory -flowctl init - -# Optional: configure review backend (codex recommended for Codex CLI) -flowctl config set review.backend codex -``` - -**Optional AGENTS.md snippet** (helps Codex understand flow-code): -```markdown - -## Flow-Code - -This project uses Flow-Code for task tracking. `flowctl` must be in PATH (`~/.flow/bin/`). - -Quick commands: -- `flowctl list` — list epics + tasks -- `flowctl ready --epic fn-N` — what's ready -- `flowctl start fn-N.M` — claim task -- `flowctl done fn-N.M --summary-file s.md --evidence-json e.json` - -Prompts (use `/prompts:`): -- `/prompts:plan` — create a build plan -- `/prompts:work` — execute tasks -- `/prompts:impl-review` — implementation review -- `/prompts:interview` — refine specs interactively - -``` +MIT License. See [LICENSE](LICENSE) for details. --- diff --git a/scripts/gen-platform.sh b/scripts/gen-platform.sh new file mode 100755 index 00000000..bacf2ba1 --- /dev/null +++ b/scripts/gen-platform.sh @@ -0,0 +1,455 @@ +#!/usr/bin/env bash +# gen-platform.sh — Generate cross-platform adapter files for flow-code. +# Usage: bash scripts/gen-platform.sh +# Idempotent: safe to re-run, overwrites cleanly. +# Dependencies: bash, jq + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +CLAUDE_MD="$PLUGIN_ROOT/CLAUDE.md" +PLUGIN_JSON="$PLUGIN_ROOT/.claude-plugin/plugin.json" + +# --- helpers --- + +die() { printf 'Error: %s\n' "$1" >&2; exit 1; } + +require_file() { + [ -f "$1" ] || die "Required file not found: $1" +} + +ensure_dir() { + mkdir -p "$1" +} + +plugin_name() { + jq -r '.name // "flow-code"' "$PLUGIN_JSON" +} + +plugin_version() { + jq -r '.version // "unknown"' "$PLUGIN_JSON" +} + +plugin_description() { + jq -r '.description // ""' "$PLUGIN_JSON" +} + +# Collect skill names and descriptions from SKILL.md frontmatter. +# Outputs: namedescription per line. +collect_skills() { + local skills_dir="$PLUGIN_ROOT/skills" + [ -d "$skills_dir" ] || return 0 + for skill_dir in "$skills_dir"/*/; do + local skill_md="$skill_dir/SKILL.md" + [ -f "$skill_md" ] || continue + local name="" desc="" + local in_frontmatter=0 + while IFS= read -r line; do + if [[ "$in_frontmatter" -eq 0 && "$line" == "---" ]]; then + in_frontmatter=1 + continue + fi + if [[ "$in_frontmatter" -eq 1 && "$line" == "---" ]]; then + break + fi + if [[ "$in_frontmatter" -eq 1 ]]; then + if [[ "$line" =~ ^name:\ *(.*) ]]; then + name="${BASH_REMATCH[1]}" + # Strip quotes + name="${name#\"}" + name="${name%\"}" + name="${name#\'}" + name="${name%\'}" + fi + if [[ "$line" =~ ^description:\ *(.*) ]]; then + desc="${BASH_REMATCH[1]}" + desc="${desc#\"}" + desc="${desc%\"}" + desc="${desc#\'}" + desc="${desc%\'}" + fi + fi + done < "$skill_md" + [ -n "$name" ] || name="$(basename "$skill_dir")" + printf '%s\t%s\n' "$name" "$desc" + done +} + +# Collect agent names and descriptions from agent .md frontmatter. +collect_agents() { + local agents_dir="$PLUGIN_ROOT/agents" + [ -d "$agents_dir" ] || return 0 + for agent_md in "$agents_dir"/*.md; do + [ -f "$agent_md" ] || continue + local name="" desc="" + local in_frontmatter=0 + while IFS= read -r line; do + if [[ "$in_frontmatter" -eq 0 && "$line" == "---" ]]; then + in_frontmatter=1 + continue + fi + if [[ "$in_frontmatter" -eq 1 && "$line" == "---" ]]; then + break + fi + if [[ "$in_frontmatter" -eq 1 ]]; then + if [[ "$line" =~ ^name:\ *(.*) ]]; then + name="${BASH_REMATCH[1]}" + name="${name#\"}" + name="${name%\"}" + name="${name#\'}" + name="${name%\'}" + fi + if [[ "$line" =~ ^description:\ *(.*) ]]; then + desc="${BASH_REMATCH[1]}" + desc="${desc#\"}" + desc="${desc%\"}" + desc="${desc#\'}" + desc="${desc%\'}" + fi + fi + done < "$agent_md" + [ -n "$name" ] || name="$(basename "$agent_md" .md)" + printf '%s\t%s\n' "$name" "$desc" + done +} + +# Collect command names and descriptions from command .md files. +collect_commands() { + local cmds_dir="$PLUGIN_ROOT/commands/flow-code" + [ -d "$cmds_dir" ] || return 0 + for cmd_md in "$cmds_dir"/*.md; do + [ -f "$cmd_md" ] || continue + local name="" desc="" + local in_frontmatter=0 + while IFS= read -r line; do + if [[ "$in_frontmatter" -eq 0 && "$line" == "---" ]]; then + in_frontmatter=1 + continue + fi + if [[ "$in_frontmatter" -eq 1 && "$line" == "---" ]]; then + break + fi + if [[ "$in_frontmatter" -eq 1 ]]; then + if [[ "$line" =~ ^name:\ *(.*) ]]; then + name="${BASH_REMATCH[1]}" + name="${name#\"}" + name="${name%\"}" + name="${name#\'}" + name="${name%\'}" + fi + if [[ "$line" =~ ^description:\ *(.*) ]]; then + desc="${BASH_REMATCH[1]}" + desc="${desc#\"}" + desc="${desc%\"}" + desc="${desc#\'}" + desc="${desc%\'}" + fi + fi + done < "$cmd_md" + [ -n "$name" ] || name="$(basename "$cmd_md" .md)" + printf '%s\t%s\n' "$name" "$desc" + done +} + +# --- codex target --- + +generate_codex() { + local out_dir="$PLUGIN_ROOT/.codex" + ensure_dir "$out_dir" + + local name version desc + name="$(plugin_name)" + version="$(plugin_version)" + desc="$(plugin_description)" + + # Build AGENTS.md + { + cat <
Auto-generated by \`scripts/gen-platform.sh codex\`. Safe to re-run. +> Source plugin: $name v$version + +## Overview + +$desc + +## Setup + +This directory was generated from a Claude Code plugin. The canonical source of truth +is \`CLAUDE.md\` at the repository root. Key differences for Codex: + +- **flowctl binary**: The task engine lives at \`bin/flowctl\`. Invoke it directly: + \`\`\`bash + ./bin/flowctl + \`\`\` +- **No slash commands**: Codex does not support \`/plugin:command\` syntax. Use the + skill descriptions below to understand available capabilities. +- **No hooks**: Codex does not support lifecycle hooks. Guard checks must be run manually. + +## Primary Workflow + +1. Plan: Create an epic with tasks — \`./bin/flowctl epic create "description"\` +2. Work: Pick next task — \`./bin/flowctl next\` +3. Complete: Mark done with evidence — \`./bin/flowctl done --summary-file --evidence-json \` + +## Architecture + +\`\`\` +CLAUDE.md → Project instructions (read this first) +bin/flowctl → Rust binary — single source of truth for .flow/ state +commands/flow-code/ → Slash command definitions (Claude Code specific) +skills/*/SKILL.md → Skill implementations (reusable across platforms) +agents/*.md → Subagent definitions +\`\`\` + +HEADER + + # Commands section + printf '## Commands\n\n' + printf 'These map to Claude Code slash commands (`/flow-code:`).\n' + printf 'In Codex, invoke the underlying skill or flowctl command directly.\n\n' + printf '| Command | Description |\n' + printf '|---------|-------------|\n' + while IFS=$'\t' read -r cname cdesc; do + printf '| %s | %s |\n' "$cname" "$cdesc" + done < <(collect_commands) + printf '\n' + + # Skills section + printf '## Skills\n\n' + printf 'Skills are loaded by the Skill tool in Claude Code. In Codex, read the\n' + printf 'corresponding `skills//SKILL.md` for detailed instructions.\n\n' + printf '| Skill | Description |\n' + printf '|-------|-------------|\n' + while IFS=$'\t' read -r sname sdesc; do + printf '| %s | %s |\n' "$sname" "$sdesc" + done < <(collect_skills) + printf '\n' + + # Agents section + printf '## Agents\n\n' + printf 'Agents are spawned as subagents in Claude Code. In Codex, read the\n' + printf 'corresponding `agents/.md` for instructions.\n\n' + printf '| Agent | Description |\n' + printf '|-------|-------------|\n' + while IFS=$'\t' read -r aname adesc; do + printf '| %s | %s |\n' "$aname" "$adesc" + done < <(collect_agents) + printf '\n' + + # Key design decisions — excerpt from CLAUDE.md + printf '## Key Design Decisions\n\n' + printf 'See `CLAUDE.md` for the full list. Highlights:\n\n' + printf -- '- flowctl outputs JSON (`--json` flag) for machine consumption\n' + printf -- '- Tasks follow `todo -> in_progress -> done` state machine\n' + printf -- '- Evidence-based completion: `flowctl done` requires `--summary-file` and `--evidence-json`\n' + printf -- '- File locking prevents concurrent edits in Teams mode\n' + printf -- '- Three-layer quality: guard (lint/test) + plan-review + adversarial review\n' + + } > "$out_dir/AGENTS.md" + + printf 'Generated %s\n' "$out_dir/AGENTS.md" +} + +# --- cursor target --- + +generate_cursor() { + local out_dir="$PLUGIN_ROOT/.cursor/rules" + ensure_dir "$out_dir" + + local name version + name="$(plugin_name)" + version="$(plugin_version)" + + # Rule 1: Project overview from CLAUDE.md + { + cat <<'RULE_HEADER' +--- +description: Flow-code project overview and architecture +globs: +alwaysApply: true +--- + +RULE_HEADER + # Include CLAUDE.md content directly + cat "$CLAUDE_MD" + } > "$out_dir/flow-code-overview.mdc" + printf 'Generated %s\n' "$out_dir/flow-code-overview.mdc" + + # Rule 2: flowctl usage + { + cat <<'RULE_HEADER' +--- +description: How to use the flowctl CLI for task management +globs: + - "flowctl/**" + - ".flow/**" + - "scripts/*.sh" +alwaysApply: false +--- + +RULE_HEADER + cat <<'BODY' +# flowctl CLI Reference + +flowctl is the Rust binary that manages `.flow/` state. Always invoke from the repo root: + +```bash +./bin/flowctl +``` + +## Common Commands + +- `epic create "description"` — Create a new epic +- `epic list` — List all epics +- `tasks --epic ` — List tasks for an epic +- `next [--epic ]` — Get the next ready task +- `start ` — Mark task as in-progress +- `done --summary-file --evidence-json ` — Complete a task +- `restart ` — Reset a task and cascade to dependents +- `task skip --reason "why"` — Skip a task +- `task split --titles "A|B|C" --chain` — Split into sub-tasks +- `lock --task --files ` — Acquire file locks +- `unlock --task ` — Release file locks +- `guard` — Run lint/type/test checks +- `codex adversarial --base main` — Adversarial review via Codex +- `status --interrupted` — Check for unfinished work + +## State Machine + +Tasks: `todo` -> `in_progress` -> `done` (with `blocked` and `skipped` side-states) + +## Output + +Use `--json` for machine-readable JSON output. +BODY + } > "$out_dir/flow-code-flowctl.mdc" + printf 'Generated %s\n' "$out_dir/flow-code-flowctl.mdc" + + # Rule 3: Skills index + { + cat <<'RULE_HEADER' +--- +description: Flow-code available skills and their purposes +globs: + - "skills/**" +alwaysApply: false +--- + +RULE_HEADER + printf '# Flow-code Skills\n\n' + printf 'Each skill lives in `skills//SKILL.md`. Read the SKILL.md for full instructions.\n\n' + printf '| Skill | Description |\n' + printf '|-------|-------------|\n' + while IFS=$'\t' read -r sname sdesc; do + printf '| %s | %s |\n' "$sname" "$sdesc" + done < <(collect_skills) + } > "$out_dir/flow-code-skills.mdc" + printf 'Generated %s\n' "$out_dir/flow-code-skills.mdc" + + # Rule 4: Agents index + { + cat <<'RULE_HEADER' +--- +description: Flow-code available agents and their purposes +globs: + - "agents/**" +alwaysApply: false +--- + +RULE_HEADER + printf '# Flow-code Agents\n\n' + printf 'Each agent lives in `agents/.md`. Read the file for full instructions.\n\n' + printf '| Agent | Description |\n' + printf '|-------|-------------|\n' + while IFS=$'\t' read -r aname adesc; do + printf '| %s | %s |\n' "$aname" "$adesc" + done < <(collect_agents) + } > "$out_dir/flow-code-agents.mdc" + printf 'Generated %s\n' "$out_dir/flow-code-agents.mdc" + + # Rule 5: Code quality + { + cat <<'RULE_HEADER' +--- +description: Code quality and testing rules for flow-code +globs: + - "flowctl/**" + - "scripts/**" + - "hooks/**" +alwaysApply: false +--- + +RULE_HEADER + cat <<'BODY' +# Code Quality Rules + +## Build and Test + +```bash +cd flowctl && cargo build --release && cargo test --all +``` + +## Validation + +```bash +python3 -c "import json; json.load(open('hooks/hooks.json'))" +``` + +## Testing + +```bash +bash scripts/smoke_test.sh # flowctl core +bash scripts/ci_test.sh # full CI +bash scripts/teams_e2e_test.sh # Teams file locking +``` + +All tests create temp directories and clean up. Must NOT run from the plugin repo root. + +## Files to Never Commit + +- `ref/` — reference/backup repos +- `*.upstream` — upstream backup files +- `.tasks/` — runtime state +- `__pycache__/` — Python cache +- `.flow/` — per-project task state +BODY + } > "$out_dir/flow-code-quality.mdc" + printf 'Generated %s\n' "$out_dir/flow-code-quality.mdc" +} + +# --- main --- + +main() { + if [ $# -lt 1 ]; then + cat >&2 < + +Targets: + codex Generate .codex/ directory with AGENTS.md + cursor Generate .cursor/rules/ with .mdc rules files +USAGE + exit 1 + fi + + require_file "$CLAUDE_MD" + require_file "$PLUGIN_JSON" + + case "$1" in + codex) + generate_codex + ;; + cursor) + generate_cursor + ;; + *) + die "Unknown target: $1 (expected 'codex' or 'cursor')" + ;; + esac + + printf 'Done.\n' +} + +main "$@" diff --git a/scripts/validate-skills.sh b/scripts/validate-skills.sh new file mode 100755 index 00000000..20a47d29 --- /dev/null +++ b/scripts/validate-skills.sh @@ -0,0 +1,142 @@ +#!/usr/bin/env bash +# validate-skills.sh — validate SKILL.md files against skill-anatomy.md schema +# Exit 0 = all pass, 1 = failures found +set -euo pipefail + +# Resolve plugin root (same pattern as other scripts) +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +SKILLS_DIR="$PLUGIN_ROOT/skills" + +pass_count=0 +fail_count=0 +warn_count=0 +total=0 + +# Required frontmatter fields +REQUIRED_FM_FIELDS="name description" + +# Required ## sections per docs/skill-anatomy.md +REQUIRED_SECTIONS="Overview|When to Use|Core Process|Common Rationalizations|Red Flags|Verification" + +MAX_LINES=500 + +validate_skill() { + local skill_file="$1" + local skill_dir + skill_dir="$(basename "$(dirname "$skill_file")")" + local has_error=0 + local error_msgs="" + local warn_msgs="" + + # --- Check YAML frontmatter exists --- + local first_line + first_line="$(head -1 "$skill_file")" + if [[ "$first_line" != "---" ]]; then + has_error=1 + error_msgs=" ✗ missing YAML frontmatter" + total=$((total + 1)) + fail_count=$((fail_count + 1)) + echo "FAIL ${skill_dir}" + echo "$error_msgs" + return + fi + + # Extract frontmatter (between first and second ---) + local fm + fm="$(awk 'BEGIN{n=0} /^---$/{n++; if(n==2) exit; next} n==1{print}' "$skill_file")" + + if [[ -z "$fm" ]]; then + has_error=1 + total=$((total + 1)) + fail_count=$((fail_count + 1)) + echo "FAIL ${skill_dir}" + echo " ✗ empty or malformed YAML frontmatter" + return + fi + + # --- Check required frontmatter fields --- + for field in $REQUIRED_FM_FIELDS; do + if ! echo "$fm" | grep -qE "^${field}:"; then + has_error=1 + error_msgs="${error_msgs} ✗ missing frontmatter field: ${field} +" + fi + done + + # --- Check name field value --- + local name_val + name_val="$(echo "$fm" | grep -E '^name:' | head -1 | sed 's/^name:[[:space:]]*//' | tr -d '"' | tr -d "'")" + if [[ -n "$name_val" && "$name_val" != "$skill_dir" ]]; then + warn_msgs="${warn_msgs} ⚠ name '${name_val}' does not match directory '${skill_dir}' +" + fi + + # --- Check description starts with "Use when" --- + local desc_val + desc_val="$(echo "$fm" | grep -E '^description:' | head -1 | sed 's/^description:[[:space:]]*//' | tr -d '"' | tr -d "'")" + if [[ -n "$desc_val" ]]; then + if ! echo "$desc_val" | grep -q '^Use when'; then + warn_msgs="${warn_msgs} ⚠ description should start with 'Use when...' +" + fi + fi + + # --- Check required ## sections (warn, not fail) --- + local body + body="$(awk 'BEGIN{n=0} /^---$/{n++; next} n>=2{print}' "$skill_file")" + + local IFS='|' + for section in $REQUIRED_SECTIONS; do + if ! echo "$body" | grep -qiE "^## .*${section}"; then + warn_msgs="${warn_msgs} ⚠ missing recommended section: ## ${section} +" + fi + done + unset IFS + + # --- File size warning --- + local line_count + line_count="$(wc -l < "$skill_file" | tr -d ' ')" + if [[ "$line_count" -gt "$MAX_LINES" ]]; then + warn_msgs="${warn_msgs} ⚠ ${line_count} lines (>${MAX_LINES} recommended max) +" + fi + + # --- Print result --- + total=$((total + 1)) + + if [[ "$has_error" -eq 1 ]]; then + fail_count=$((fail_count + 1)) + echo "FAIL ${skill_dir}" + [[ -n "$error_msgs" ]] && printf '%s' "$error_msgs" + else + pass_count=$((pass_count + 1)) + echo "PASS ${skill_dir}" + fi + + if [[ -n "$warn_msgs" ]]; then + local wc_lines + wc_lines="$(echo "$warn_msgs" | grep -c '⚠' || true)" + warn_count=$((warn_count + wc_lines)) + printf '%s' "$warn_msgs" + fi +} + +# --- Main --- +echo "Validating skills in ${SKILLS_DIR}/" +echo "---" + +for skill_file in "$SKILLS_DIR"/*/SKILL.md; do + [[ -f "$skill_file" ]] || continue + validate_skill "$skill_file" +done + +echo "---" +echo "Total: ${total} Pass: ${pass_count} Fail: ${fail_count} Warnings: ${warn_count}" + +if [[ "$fail_count" -gt 0 ]]; then + exit 1 +fi +exit 0