diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..9ad037c --- /dev/null +++ b/.env.example @@ -0,0 +1,74 @@ +# ============================================================================= +# ๐ŸŽฉ Hat Stack โ€” Environment Configuration Template +# ============================================================================= +# +# INSTRUCTIONS: +# 1. Copy this file: cp .env.example .env +# 2. Fill in YOUR values below +# 3. NEVER commit .env โ€” it's already in .gitignore +# +# For GitHub Actions (recommended): +# Don't use this file at all. Instead, add these as Repository Secrets +# in your fork: Settings โ†’ Secrets and variables โ†’ Actions โ†’ New repository secret +# +# ============================================================================= + +# --------------------------------------------------------------------------- +# REQUIRED โ€” Ollama Cloud API +# --------------------------------------------------------------------------- +# Your Ollama Cloud API key. Get one at https://ollama.ai/cloud +# This is YOUR key โ€” it stays in YOUR secrets, never in code. +OLLAMA_API_KEY= + +# Ollama Cloud API base URL (default works for most users) +OLLAMA_BASE_URL=https://api.ollama.ai/v1 + +# --------------------------------------------------------------------------- +# OPTIONAL โ€” GitHub Callback (for dispatch mode) +# --------------------------------------------------------------------------- +# A GitHub Personal Access Token (PAT) with repo scope, used to post +# Hats review results back to PRs in OTHER repos via the dispatch handler. +# Only needed if you want hat_stack to comment on external repos' PRs. +# +# Create at: https://github.com/settings/tokens +# Required scopes: repo (for private repos) or public_repo (for public repos) +HAT_STACK_CALLBACK_TOKEN= + +# --------------------------------------------------------------------------- +# OPTIONAL โ€” CLI Configuration (for the `hat` command) +# --------------------------------------------------------------------------- +# Your hat_stack fork โ€” used by the `hat` CLI to dispatch tasks. +# Default: Grumpified-OGGVCT/hat_stack (the original repo) +# Set this to YOUR fork so tasks run with YOUR API keys. +HAT_STACK_REPO= + +# --------------------------------------------------------------------------- +# OPTIONAL โ€” Model Overrides +# --------------------------------------------------------------------------- +# Override the default primary model for all Tier 1 hats (security, safety, adjudication) +# Default: glm-5.1 (per Implementation Guide ยงE2.2) +# Alternatives: kimi-k2.5, deepseek-v3.1 +# HATS_TIER1_MODEL=glm-5.1 + +# Override the default primary model for all Tier 2 hats (architectural reasoning) +# Default: glm-5.1 +# Alternatives: deepseek-v3.1, minimax-m2.7 +# HATS_TIER2_MODEL=glm-5.1 + +# Override the default model for Tier 3 hats (quality analysis) +# Default: nemotron-3-super +# Alternatives: qwen3-coder +# HATS_TIER3_MODEL=nemotron-3-super + +# Override the default model for Tier 4 hats (fast scanning) +# Default: nemotron-3-nano or ministral-3 +# HATS_TIER4_MODEL=nemotron-3-nano + +# --------------------------------------------------------------------------- +# OPTIONAL โ€” Budget Limits +# --------------------------------------------------------------------------- +# Maximum cost per PR review in USD (default: $0.15 for Ollama Cloud) +# HATS_MAX_USD_PER_PR=0.15 + +# Maximum tokens per PR review (default: 150000) +# HATS_MAX_TOKENS_PER_PR=150000 diff --git a/.github/actions/run-hats/action.yml b/.github/actions/run-hats/action.yml new file mode 100644 index 0000000..48240d7 --- /dev/null +++ b/.github/actions/run-hats/action.yml @@ -0,0 +1,144 @@ +# ๐ŸŽฉ Run Hats Review โ€” Composite Action +# +# Use this action in any workflow to run the Hats pipeline on a diff. +# +# Usage: +# - name: Run Hats Review +# uses: Grumpified-OGGVCT/hat_stack/.github/actions/run-hats@main +# with: +# diff_file: /tmp/pr.diff +# env: +# OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }} + +name: "๐ŸŽฉ Run Hats Review" +description: "Run the Hat Stack AI review pipeline on a code diff" + +inputs: + diff_file: + description: "Path to the diff file to review" + required: false + default: "" + diff: + description: "Inline diff text (for small diffs; use diff_file for large ones)" + required: false + default: "" + hats: + description: "Comma-separated hat IDs to run (default: auto-select based on triggers)" + required: false + default: "" + context: + description: "Additional context for the review (e.g., PR description)" + required: false + default: "" + config: + description: "Path to custom hat_configs.yml (default: uses built-in config)" + required: false + default: "" + output_format: + description: "Output format: json, markdown, or both" + required: false + default: "both" + +outputs: + verdict: + description: "The final Hats verdict: ALLOW, ESCALATE, or QUARANTINE" + value: ${{ steps.run.outputs.verdict }} + risk_score: + description: "Composite risk score (0-100)" + value: ${{ steps.run.outputs.risk_score }} + hats_executed: + description: "Number of hats that ran" + value: ${{ steps.run.outputs.hats_executed }} + report_markdown: + description: "Path to the Markdown report file" + value: ${{ steps.run.outputs.report_markdown }} + report_json: + description: "Path to the JSON report file" + value: ${{ steps.run.outputs.report_json }} + +runs: + using: composite + steps: + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + shell: bash + run: | + HAT_STACK_ROOT="$GITHUB_ACTION_PATH/../../.." + pip install -r "$HAT_STACK_ROOT/scripts/requirements.txt" + + - name: Prepare diff + id: prep + shell: bash + run: | + DIFF_FILE="/tmp/hats-action-diff.patch" + + if [ -n "${{ inputs.diff_file }}" ] && [ -f "${{ inputs.diff_file }}" ]; then + cp "${{ inputs.diff_file }}" "$DIFF_FILE" + elif [ -n "${{ inputs.diff }}" ]; then + cat <<'HATS_DIFF_EOF' > "$DIFF_FILE" + ${{ inputs.diff }} + HATS_DIFF_EOF + else + echo "โš ๏ธ No diff provided. Set either 'diff_file' or 'diff' input." >&2 + echo "verdict=ALLOW" >> "$GITHUB_OUTPUT" + echo "risk_score=0" >> "$GITHUB_OUTPUT" + echo "hats_executed=0" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "diff_file=$DIFF_FILE" >> "$GITHUB_OUTPUT" + + - name: Run Hats Pipeline + id: run + shell: bash + env: + OLLAMA_API_KEY: ${{ env.OLLAMA_API_KEY }} + OLLAMA_BASE_URL: ${{ env.OLLAMA_BASE_URL || 'https://api.ollama.ai/v1' }} + HAT_STACK_ACTION_PATH: ${{ github.action_path }} + run: | + HAT_STACK_ROOT="$HAT_STACK_ACTION_PATH/../../.." + CONFIG="$HAT_STACK_ROOT/scripts/hat_configs.yml" + if [ -n "${{ inputs.config }}" ] && [ -f "${{ inputs.config }}" ]; then + CONFIG="${{ inputs.config }}" + fi + + ARGS=( + --diff "${{ steps.prep.outputs.diff_file }}" + --config "$CONFIG" + --output "${{ inputs.output_format }}" + --markdown-file /tmp/hats-report.md + --json-file /tmp/hats-report.json + ) + + if [ -n "${{ inputs.hats }}" ]; then + ARGS+=(--hats "${{ inputs.hats }}") + fi + + if [ -n "${{ inputs.context }}" ]; then + ARGS+=(--context "${{ inputs.context }}") + fi + + set +e + python "$HAT_STACK_ROOT/scripts/hats_runner.py" "${ARGS[@]}" + EXIT_CODE=$? + set -e + + if [ -f /tmp/hats-report.json ]; then + VERDICT=$(python3 -c "import json; print(json.load(open('/tmp/hats-report.json'))['verdict'])") + RISK_SCORE=$(python3 -c "import json; print(json.load(open('/tmp/hats-report.json'))['risk_score'])") + HATS_EXECUTED=$(python3 -c "import json; print(json.load(open('/tmp/hats-report.json'))['hats_executed'])") + else + VERDICT="ALLOW" + RISK_SCORE="0" + HATS_EXECUTED="0" + fi + + echo "verdict=$VERDICT" >> "$GITHUB_OUTPUT" + echo "risk_score=$RISK_SCORE" >> "$GITHUB_OUTPUT" + echo "hats_executed=$HATS_EXECUTED" >> "$GITHUB_OUTPUT" + echo "report_markdown=/tmp/hats-report.md" >> "$GITHUB_OUTPUT" + echo "report_json=/tmp/hats-report.json" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/hats-dispatch.yml b/.github/workflows/hats-dispatch.yml new file mode 100644 index 0000000..10f4f22 --- /dev/null +++ b/.github/workflows/hats-dispatch.yml @@ -0,0 +1,215 @@ +# ๐ŸŽฉ Hats Team Dispatch Handler +# +# Handles repository_dispatch events from external repos/agents. +# Any external system can trigger a Hats review by sending a POST +# to the GitHub API: +# +# curl -X POST \ +# -H "Authorization: Bearer $GITHUB_TOKEN" \ +# -H "Accept: application/vnd.github+json" \ +# https://api.github.com/repos/Grumpified-OGGVCT/hat_stack/dispatches \ +# -d '{ +# "event_type": "run-hats", +# "client_payload": { +# "diff": "--- a/file.py\n+++ b/file.py\n...", +# "callback_repo": "owner/repo", +# "callback_pr": 42, +# "hats": "black,blue,purple", +# "context": "Adding new auth module" +# } +# }' +# +# The workflow runs the Hats pipeline and posts results back to the +# calling repo's PR as a comment (if callback info is provided). + +name: "๐ŸŽฉ Hats Dispatch Handler" + +on: + repository_dispatch: + types: + - run-hats + - hats-review + - hat-review + +permissions: + contents: read + +jobs: + dispatch-review: + name: "๐ŸŽฉ Dispatched Hats Review" + runs-on: ubuntu-latest + + steps: + - name: Checkout hat_stack + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: pip install -r scripts/requirements.txt + + - name: Prepare diff + id: diff + shell: bash + run: | + DIFF_FILE="/tmp/dispatch-diff.patch" + PAYLOAD='${{ toJson(github.event.client_payload) }}' + + # Extract diff from payload + echo "$PAYLOAD" | python3 -c " + import json, sys + try: + payload = json.load(sys.stdin) + diff = payload.get('diff', '') + except (json.JSONDecodeError, KeyError) as e: + print(f'Warning: Could not parse payload: {e}', file=sys.stderr) + diff = '' + with open('$DIFF_FILE', 'w') as f: + f.write(diff) + " || echo "" > "$DIFF_FILE" + + echo "diff_file=$DIFF_FILE" >> "$GITHUB_OUTPUT" + + - name: Extract payload fields + id: payload + shell: bash + run: | + PAYLOAD='${{ toJson(github.event.client_payload) }}' + echo "$PAYLOAD" | python3 -c " + import json, sys, os, re + + def sanitize(value): + '''Remove newlines and control chars to prevent output injection.''' + if not isinstance(value, str): + value = str(value) if value is not None else '' + return re.sub(r'[\r\n]', ' ', value).strip() + + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError: + payload = {} + gh_output = os.environ['GITHUB_OUTPUT'] + with open(gh_output, 'a') as f: + f.write(f\"hats={sanitize(payload.get('hats', ''))}\n\") + f.write(f\"context={sanitize(payload.get('context', ''))}\n\") + f.write(f\"callback_repo={sanitize(payload.get('callback_repo', ''))}\n\") + f.write(f\"callback_pr={sanitize(payload.get('callback_pr', ''))}\n\") + f.write(f\"callback_issue={sanitize(payload.get('callback_issue', ''))}\n\") + " + + - name: Run Hats Pipeline + id: run + env: + OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }} + OLLAMA_BASE_URL: ${{ secrets.OLLAMA_BASE_URL || 'https://api.ollama.ai/v1' }} + shell: bash + run: | + ARGS=( + --diff "${{ steps.diff.outputs.diff_file }}" + --config scripts/hat_configs.yml + --output both + --markdown-file /tmp/hats-report.md + --json-file /tmp/hats-report.json + ) + + if [ -n "${{ steps.payload.outputs.hats }}" ]; then + ARGS+=(--hats "${{ steps.payload.outputs.hats }}") + fi + + if [ -n "${{ steps.payload.outputs.context }}" ]; then + ARGS+=(--context "${{ steps.payload.outputs.context }}") + fi + + set +e + python scripts/hats_runner.py "${ARGS[@]}" + EXIT_CODE=$? + set -e + + if [ -f /tmp/hats-report.json ]; then + VERDICT=$(python3 -c "import json; print(json.load(open('/tmp/hats-report.json'))['verdict'])") + RISK_SCORE=$(python3 -c "import json; print(json.load(open('/tmp/hats-report.json'))['risk_score'])") + else + VERDICT="ALLOW" + RISK_SCORE="0" + fi + + echo "verdict=$VERDICT" >> "$GITHUB_OUTPUT" + echo "risk_score=$RISK_SCORE" >> "$GITHUB_OUTPUT" + + - name: Post results to calling repo PR + if: steps.payload.outputs.callback_repo != '' && steps.payload.outputs.callback_pr != '' + env: + GH_TOKEN: ${{ secrets.HAT_STACK_CALLBACK_TOKEN }} + shell: bash + run: | + CALLBACK_REPO="${{ steps.payload.outputs.callback_repo }}" + CALLBACK_PR="${{ steps.payload.outputs.callback_pr }}" + + if [ -z "$GH_TOKEN" ]; then + echo "โš ๏ธ HAT_STACK_CALLBACK_TOKEN not set โ€” cannot post results to $CALLBACK_REPO#$CALLBACK_PR" + exit 0 + fi + + # Build comment body from report or fallback + if [ -f /tmp/hats-report.md ]; then + REPORT_BODY=$(cat /tmp/hats-report.md) + else + REPORT_BODY="๐ŸŽฉ Hats Review completed. Verdict: ${{ steps.run.outputs.verdict }} (Score: ${{ steps.run.outputs.risk_score }})" + fi + + python3 -c " + import json, sys + body = sys.stdin.read() + print(json.dumps({'body': body})) + " <<< "$REPORT_BODY" | curl -s -X POST \ + -H "Authorization: Bearer $GH_TOKEN" \ + -H "Accept: application/vnd.github+json" \ + "https://api.github.com/repos/$CALLBACK_REPO/issues/$CALLBACK_PR/comments" \ + -d @- + + echo "โœ… Results posted to $CALLBACK_REPO#$CALLBACK_PR" + + - name: Post results to calling repo issue + if: steps.payload.outputs.callback_repo != '' && steps.payload.outputs.callback_issue != '' + env: + GH_TOKEN: ${{ secrets.HAT_STACK_CALLBACK_TOKEN }} + shell: bash + run: | + CALLBACK_REPO="${{ steps.payload.outputs.callback_repo }}" + CALLBACK_ISSUE="${{ steps.payload.outputs.callback_issue }}" + + if [ -z "$GH_TOKEN" ]; then + echo "โš ๏ธ HAT_STACK_CALLBACK_TOKEN not set โ€” cannot post results to $CALLBACK_REPO#$CALLBACK_ISSUE" + exit 0 + fi + + if [ -f /tmp/hats-report.md ]; then + REPORT_BODY=$(cat /tmp/hats-report.md) + else + REPORT_BODY="๐ŸŽฉ Hats Review completed." + fi + + python3 -c " + import json, sys + body = sys.stdin.read() + print(json.dumps({'body': body})) + " <<< "$REPORT_BODY" | curl -s -X POST \ + -H "Authorization: Bearer $GH_TOKEN" \ + -H "Accept: application/vnd.github+json" \ + "https://api.github.com/repos/$CALLBACK_REPO/issues/$CALLBACK_ISSUE/comments" \ + -d @- + + echo "โœ… Results posted to $CALLBACK_REPO#$CALLBACK_ISSUE" + + - name: Upload reports + if: always() + uses: actions/upload-artifact@v4 + with: + name: hats-dispatch-report + path: | + /tmp/hats-report.md + /tmp/hats-report.json + retention-days: 30 diff --git a/.github/workflows/hats-review.yml b/.github/workflows/hats-review.yml new file mode 100644 index 0000000..c1018de --- /dev/null +++ b/.github/workflows/hats-review.yml @@ -0,0 +1,223 @@ +# ๐ŸŽฉ Hats Team Reusable Workflow +# +# This is the core reusable workflow that runs the Hats pipeline. +# Other repositories call this via workflow_call to get hat reviews +# on their PRs, diffs, or arbitrary code changes. +# +# Usage from another repo: +# jobs: +# hats-review: +# uses: /hat_stack/.github/workflows/hats-review.yml@main +# with: +# diff_artifact: pr-diff # or: diff: ${{ steps.get-diff.outputs.diff }} +# secrets: +# ollama_api_key: ${{ secrets.OLLAMA_API_KEY }} +# +# Note: The workflow auto-detects the repository via ${{ github.repository }}, +# so forks work without modifying this file. + +name: "๐ŸŽฉ Hats Team Review" + +on: + workflow_call: + inputs: + diff: + description: "The diff text to review (inline). This is the primary way to pass diffs." + required: false + type: string + default: "" + diff_artifact: + description: "Name of an uploaded artifact containing a diff file (for large diffs that exceed input limits). The artifact must contain a single .diff or .patch file." + required: false + type: string + default: "" + hats: + description: "Comma-separated hat IDs to run (e.g., 'black,blue,purple'). Default: auto-select." + required: false + type: string + default: "" + context: + description: "Additional context for the review (e.g., PR description, task brief)." + required: false + type: string + default: "" + output_format: + description: "Output format: json, markdown, or both." + required: false + type: string + default: "both" + fail_on_escalate: + description: "Whether to fail the workflow on ESCALATE verdict (default: false, only fails on QUARANTINE)." + required: false + type: boolean + default: false + outputs: + verdict: + description: "The final Hats verdict: ALLOW, ESCALATE, or QUARANTINE." + value: ${{ jobs.hats-review.outputs.verdict }} + risk_score: + description: "Composite risk score (0โ€“100)." + value: ${{ jobs.hats-review.outputs.risk_score }} + hats_executed: + description: "Number of hats that ran." + value: ${{ jobs.hats-review.outputs.hats_executed }} + report_markdown: + description: "The Markdown review report." + value: ${{ jobs.hats-review.outputs.report_markdown }} + secrets: + ollama_api_key: + description: "Ollama Cloud API key for LLM inference." + required: true + ollama_base_url: + description: "Ollama Cloud base URL (default: https://api.ollama.ai/v1)." + required: false + +jobs: + hats-review: + name: "๐ŸŽฉ Run Hats Pipeline" + runs-on: ubuntu-latest + outputs: + verdict: ${{ steps.run.outputs.verdict }} + risk_score: ${{ steps.run.outputs.risk_score }} + hats_executed: ${{ steps.run.outputs.hats_executed }} + report_markdown: ${{ steps.report.outputs.markdown }} + + steps: + - name: Checkout hat_stack + uses: actions/checkout@v4 + with: + repository: ${{ github.repository }} + path: hat_stack + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: pip install -r hat_stack/scripts/requirements.txt + + - name: Download diff artifact + if: inputs.diff_artifact != '' + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.diff_artifact }} + path: /tmp/hats-artifact-diff + + - name: Prepare diff input + id: diff + shell: bash + run: | + DIFF_FILE="/tmp/hats-diff.patch" + + # Priority: 1) artifact, 2) inline diff + if [ -d "/tmp/hats-artifact-diff" ]; then + # Find the first .diff or .patch file in the downloaded artifact (null-safe) + FOUND="" + while IFS= read -r -d '' fpath; do + FOUND="$fpath" + break + done < <(find /tmp/hats-artifact-diff -type f \( -name "*.diff" -o -name "*.patch" \) -print0) + if [ -z "$FOUND" ]; then + # Fall back to first file + while IFS= read -r -d '' fpath; do + FOUND="$fpath" + break + done < <(find /tmp/hats-artifact-diff -type f -print0) + fi + if [ -n "$FOUND" ]; then + cp "$FOUND" "$DIFF_FILE" + fi + elif [ -n "${{ inputs.diff }}" ]; then + cat <<'HATS_DIFF_EOF' > "$DIFF_FILE" + ${{ inputs.diff }} + HATS_DIFF_EOF + else + echo "โš ๏ธ No diff provided. Provide either 'diff' (inline) or 'diff_artifact' (artifact name)." >&2 + echo "verdict=ALLOW" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "diff_file=$DIFF_FILE" >> "$GITHUB_OUTPUT" + + - name: Run Hats Pipeline + id: run + env: + OLLAMA_API_KEY: ${{ secrets.ollama_api_key }} + OLLAMA_BASE_URL: ${{ secrets.ollama_base_url || 'https://api.ollama.ai/v1' }} + shell: bash + run: | + ARGS=( + --diff "${{ steps.diff.outputs.diff_file }}" + --config "hat_stack/scripts/hat_configs.yml" + --output "${{ inputs.output_format }}" + --markdown-file /tmp/hats-report.md + --json-file /tmp/hats-report.json + ) + + if [ -n "${{ inputs.hats }}" ]; then + ARGS+=(--hats "${{ inputs.hats }}") + fi + + if [ -n "${{ inputs.context }}" ]; then + ARGS+=(--context "${{ inputs.context }}") + fi + + # Run the pipeline โ€” capture exit code but don't fail yet + set +e + python hat_stack/scripts/hats_runner.py "${ARGS[@]}" + EXIT_CODE=$? + set -e + + # Read outputs from the runner + if [ -f /tmp/hats-report.json ]; then + VERDICT=$(python -c "import json; print(json.load(open('/tmp/hats-report.json'))['verdict'])") + RISK_SCORE=$(python -c "import json; print(json.load(open('/tmp/hats-report.json'))['risk_score'])") + HATS_EXECUTED=$(python -c "import json; print(json.load(open('/tmp/hats-report.json'))['hats_executed'])") + else + VERDICT="ALLOW" + RISK_SCORE="0" + HATS_EXECUTED="0" + fi + + echo "verdict=$VERDICT" >> "$GITHUB_OUTPUT" + echo "risk_score=$RISK_SCORE" >> "$GITHUB_OUTPUT" + echo "hats_executed=$HATS_EXECUTED" >> "$GITHUB_OUTPUT" + + - name: Read Markdown report + id: report + shell: bash + run: | + if [ -f /tmp/hats-report.md ]; then + { + echo "markdown<> "$GITHUB_OUTPUT" + else + echo "markdown=No report generated." >> "$GITHUB_OUTPUT" + fi + + - name: Upload reports as artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: hats-review-report + path: | + /tmp/hats-report.md + /tmp/hats-report.json + retention-days: 30 + + - name: Check verdict + shell: bash + run: | + VERDICT="${{ steps.run.outputs.verdict }}" + if [ "$VERDICT" = "QUARANTINE" ]; then + echo "๐Ÿšซ QUARANTINE โ€” Hats review found critical issues." + exit 1 + elif [ "$VERDICT" = "ESCALATE" ] && [ "${{ inputs.fail_on_escalate }}" = "true" ]; then + echo "โš ๏ธ ESCALATE โ€” Hats review found issues requiring human review." + exit 1 + else + echo "โœ… Verdict: $VERDICT" + fi diff --git a/.github/workflows/hats-self-review.yml b/.github/workflows/hats-self-review.yml new file mode 100644 index 0000000..bb1639d --- /dev/null +++ b/.github/workflows/hats-self-review.yml @@ -0,0 +1,127 @@ +# ๐ŸŽฉ Hats Self-Review +# +# Runs the Hats pipeline on PRs to this repository itself. +# Demonstrates the system reviewing its own changes. + +name: "๐ŸŽฉ Self-Review" + +on: + pull_request: + types: [opened, synchronize, reopened] + +permissions: + contents: read + pull-requests: write + +jobs: + self-review: + name: "๐ŸŽฉ Hat Stack Self-Review" + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Generate diff + id: diff + shell: bash + run: | + git diff origin/${{ github.base_ref }}...HEAD > /tmp/pr-diff.patch + echo "diff_file=/tmp/pr-diff.patch" >> "$GITHUB_OUTPUT" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: pip install -r scripts/requirements.txt + + - name: Run Hats Pipeline + id: hats + env: + OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }} + OLLAMA_BASE_URL: ${{ secrets.OLLAMA_BASE_URL || 'https://api.ollama.ai/v1' }} + PR_NUMBER: ${{ github.event.pull_request.number }} + PR_TITLE: ${{ github.event.pull_request.title }} + shell: bash + run: | + set +e + SANITIZED_PR_TITLE=$(printf '%s' "$PR_TITLE" | tr '\r\n' ' ' | tr '"' "'") + CONTEXT="Self-review of hat_stack PR #$PR_NUMBER: $SANITIZED_PR_TITLE" + + cmd=( + python scripts/hats_runner.py + --diff /tmp/pr-diff.patch + --config scripts/hat_configs.yml + --output both + --markdown-file /tmp/hats-report.md + --json-file /tmp/hats-report.json + --context "$CONTEXT" + ) + "${cmd[@]}" + EXIT_CODE=$? + set -e + + if [ -f /tmp/hats-report.json ]; then + VERDICT=$(python3 -c "import json; print(json.load(open('/tmp/hats-report.json'))['verdict'])") + RISK_SCORE=$(python3 -c "import json; print(json.load(open('/tmp/hats-report.json'))['risk_score'])") + else + VERDICT="ALLOW" + RISK_SCORE="0" + fi + + echo "verdict=$VERDICT" >> "$GITHUB_OUTPUT" + echo "risk_score=$RISK_SCORE" >> "$GITHUB_OUTPUT" + + - name: Post review to PR + if: always() + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + let body = '๐ŸŽฉ Hats Review completed.\n\nVerdict: ${{ steps.hats.outputs.verdict }} (Score: ${{ steps.hats.outputs.risk_score }})'; + try { + body = fs.readFileSync('/tmp/hats-report.md', 'utf8'); + } catch (e) { + // Use default body + } + + // Find and update existing comment, or create new one + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const botComment = comments.find(c => + c.body.includes('๐ŸŽฉ Hats Team Review Report') + ); + + if (botComment) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: body, + }); + } + + - name: Upload reports + if: always() + uses: actions/upload-artifact@v4 + with: + name: hats-self-review-report + path: | + /tmp/hats-report.md + /tmp/hats-report.json + retention-days: 30 diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml new file mode 100644 index 0000000..4e781b6 --- /dev/null +++ b/.github/workflows/hats-task.yml @@ -0,0 +1,253 @@ +# ๐ŸŽฉ Hats Task Execution Workflow +# +# Handles task dispatches from external repos, agents, or the `hat` CLI. +# Unlike hats-dispatch.yml (which reviews diffs), this workflow CREATES +# deliverables: code, docs, plans, tests, etc. +# +# Trigger via GitHub API: +# gh api repos/YOUR_USERNAME/hat_stack/dispatches \ +# -f event_type=run-task \ +# -f client_payload[task]=generate_code \ +# -f client_payload[prompt]="Build a FastAPI auth module with JWT" \ +# -f client_payload[callback_repo]=owner/repo \ +# -f client_payload[callback_pr]=42 +# +# Or via the `hat` CLI wrapper: +# hat task generate_code "Build a FastAPI auth module with JWT" --repo owner/repo --pr 42 + +name: "๐ŸŽฉ Hats Task Execution" + +on: + repository_dispatch: + types: + - run-task + - hat-task + - generate + - build + workflow_dispatch: + inputs: + task: + description: "Task type: generate_code, generate_docs, refactor, analyze, plan, test" + required: true + type: choice + options: + - generate_code + - generate_docs + - refactor + - analyze + - plan + - test + prompt: + description: "What you want done (natural language)" + required: true + type: string + hats: + description: "Comma-separated hat IDs (default: auto-select)" + required: false + type: string + default: "" + callback_repo: + description: "Repo to post results to (owner/name)" + required: false + type: string + default: "" + callback_pr: + description: "PR number to comment results on" + required: false + type: string + default: "" + callback_issue: + description: "Issue number to comment results on" + required: false + type: string + default: "" + +permissions: + contents: read + +jobs: + execute-task: + name: "๐ŸŽฉ Execute Hat Task" + runs-on: ubuntu-latest + + steps: + - name: Checkout hat_stack + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: pip install -r scripts/requirements.txt + + - name: Extract parameters + id: params + shell: bash + run: | + # Determine source: workflow_dispatch or repository_dispatch + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + # workflow_dispatch inputs are controlled by the UI schema โ€” safe + echo "task=${{ inputs.task }}" >> "$GITHUB_OUTPUT" + echo "hats=${{ inputs.hats }}" >> "$GITHUB_OUTPUT" + echo "callback_repo=${{ inputs.callback_repo }}" >> "$GITHUB_OUTPUT" + echo "callback_pr=${{ inputs.callback_pr }}" >> "$GITHUB_OUTPUT" + echo "callback_issue=${{ inputs.callback_issue }}" >> "$GITHUB_OUTPUT" + # Prompt may contain special chars โ€” pass via env to avoid code injection + INPUT_PROMPT="${{ inputs.prompt }}" python3 -c " + import os, re + val = os.environ.get('INPUT_PROMPT', '') + sanitized = re.sub(r'[\r\n]', ' ', val).strip() + with open(os.environ['GITHUB_OUTPUT'], 'a') as f: + f.write(f'prompt={sanitized}\n') + " + else + # repository_dispatch โ€” extract from client_payload with sanitization + PAYLOAD='${{ toJson(github.event.client_payload) }}' + echo "$PAYLOAD" | python3 -c " + import json, sys, os, re + + def sanitize(value): + '''Remove newlines and control chars to prevent output injection.''' + if not isinstance(value, str): + value = str(value) if value is not None else '' + return re.sub(r'[\r\n]', ' ', value).strip() + + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError: + payload = {} + gh_output = os.environ['GITHUB_OUTPUT'] + with open(gh_output, 'a') as f: + f.write(f\"task={sanitize(payload.get('task', 'analyze'))}\n\") + f.write(f\"prompt={sanitize(payload.get('prompt', ''))}\n\") + f.write(f\"hats={sanitize(payload.get('hats', ''))}\n\") + f.write(f\"callback_repo={sanitize(payload.get('callback_repo', ''))}\n\") + f.write(f\"callback_pr={sanitize(payload.get('callback_pr', ''))}\n\") + f.write(f\"callback_issue={sanitize(payload.get('callback_issue', ''))}\n\") + f.write(f\"context={sanitize(payload.get('context', ''))}\n\") + " + fi + + - name: Fetch context from callback repo + id: context + if: steps.params.outputs.callback_repo != '' + env: + GH_TOKEN: ${{ secrets.HAT_STACK_CALLBACK_TOKEN }} + shell: bash + run: | + CALLBACK_REPO="${{ steps.params.outputs.callback_repo }}" + CONTEXT_DIR="/tmp/hats-context" + mkdir -p "$CONTEXT_DIR" + + if [ -n "$GH_TOKEN" ]; then + # Try to fetch repo structure for context + echo "๐Ÿ“ Fetching context from $CALLBACK_REPO..." + # Get the repo's tree (top-level files) + curl -s -H "Authorization: Bearer $GH_TOKEN" \ + -H "Accept: application/vnd.github+json" \ + "https://api.github.com/repos/$CALLBACK_REPO/git/trees/HEAD?recursive=1" \ + | python3 -c " + import json, sys + try: + data = json.load(sys.stdin) + tree = data.get('tree', []) + paths = [item['path'] for item in tree if item['type'] == 'blob'] + with open('/tmp/hats-context/REPO_TREE.txt', 'w') as f: + f.write('\n'.join(paths)) + print(f'Found {len(paths)} files in repo tree') + except Exception as e: + print(f'Warning: Could not fetch repo tree: {e}', file=sys.stderr) + " + fi + + echo "context_dir=$CONTEXT_DIR" >> "$GITHUB_OUTPUT" + + - name: Run Hats Task + id: run + env: + OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }} + OLLAMA_BASE_URL: ${{ secrets.OLLAMA_BASE_URL || 'https://api.ollama.ai/v1' }} + shell: bash + run: | + ARGS=( + --task "${{ steps.params.outputs.task }}" + --prompt "${{ steps.params.outputs.prompt }}" + --config scripts/hat_configs.yml + --output /tmp/hats-task-output + --json-file /tmp/hats-task-result.json + ) + + if [ -n "${{ steps.params.outputs.hats }}" ]; then + ARGS+=(--hats "${{ steps.params.outputs.hats }}") + fi + + if [ -d "/tmp/hats-context" ] && [ "$(ls -A /tmp/hats-context 2>/dev/null)" ]; then + ARGS+=(--context-dir /tmp/hats-context) + fi + + set +e + python scripts/hats_task_runner.py "${ARGS[@]}" + EXIT_CODE=$? + set -e + + echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT" + + if [ -f /tmp/hats-task-result.json ]; then + FILES_GENERATED=$(python3 -c "import json; print(len(json.load(open('/tmp/hats-task-result.json')).get('files', [])))") + echo "files_generated=$FILES_GENERATED" >> "$GITHUB_OUTPUT" + else + echo "files_generated=0" >> "$GITHUB_OUTPUT" + fi + + - name: Post results to callback + if: steps.params.outputs.callback_repo != '' && (steps.params.outputs.callback_pr != '' || steps.params.outputs.callback_issue != '') + env: + GH_TOKEN: ${{ secrets.HAT_STACK_CALLBACK_TOKEN }} + shell: bash + run: | + CALLBACK_REPO="${{ steps.params.outputs.callback_repo }}" + CALLBACK_PR="${{ steps.params.outputs.callback_pr }}" + CALLBACK_ISSUE="${{ steps.params.outputs.callback_issue }}" + TARGET="${CALLBACK_PR:-$CALLBACK_ISSUE}" + + if [ -z "$GH_TOKEN" ] || [ -z "$TARGET" ]; then + echo "โš ๏ธ Cannot post results โ€” missing token or target" + exit 0 + fi + + # Build comment from summary + if [ -f /tmp/hats-task-output/HATS_TASK_SUMMARY.md ]; then + REPORT_BODY=$(cat /tmp/hats-task-output/HATS_TASK_SUMMARY.md) + else + REPORT_BODY="๐ŸŽฉ Hats Task completed. Files generated: ${{ steps.run.outputs.files_generated }}" + fi + + python3 -c " + import json, sys + body = sys.stdin.read() + print(json.dumps({'body': body})) + " <<< "$REPORT_BODY" | curl -s -X POST \ + -H "Authorization: Bearer $GH_TOKEN" \ + -H "Accept: application/vnd.github+json" \ + "https://api.github.com/repos/$CALLBACK_REPO/issues/$TARGET/comments" \ + -d @- + + echo "โœ… Results posted to $CALLBACK_REPO#$TARGET" + + - name: Upload task output + if: always() + uses: actions/upload-artifact@v4 + with: + name: hats-task-output + path: /tmp/hats-task-output/ + retention-days: 30 + + - name: Upload JSON result + if: always() + uses: actions/upload-artifact@v4 + with: + name: hats-task-result + path: /tmp/hats-task-result.json + retention-days: 30 diff --git a/.gitignore b/.gitignore index b7faf40..7e5a5bd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,37 @@ +# ============================================================================= +# ๐ŸŽฉ Hat Stack โ€” Secret & Sensitive File Protection +# ============================================================================= +# NEVER commit these files. Your API keys, tokens, and credentials +# must stay in GitHub Secrets or local .env files only. + +# Environment files (may contain API keys) +.env +.env.* +!.env.example +.envrc + +# Secret/credential files +*.key +*.pem +*.p12 +*.pfx +*.crt +secrets/ +.secrets/ +**/secrets.yml +**/secrets.yaml +**/credentials.yml +**/credentials.yaml + +# Hats runtime output (may contain sensitive review data) +hats-report.json +hats-report.md +/tmp/hats-* + +# ============================================================================= +# Python +# ============================================================================= + # Byte-compiled / optimized / DLL files __pycache__/ *.py[codz] @@ -135,8 +169,7 @@ celerybeat.pid *.sage.py # Environments -.env -.envrc +# (.env and .envrc are covered in the Hat Stack section above) .venv env/ venv/ diff --git a/FORK_SETUP.md b/FORK_SETUP.md new file mode 100644 index 0000000..262a4d8 --- /dev/null +++ b/FORK_SETUP.md @@ -0,0 +1,287 @@ +# ๐ŸŽฉ Fork Setup Guide โ€” Get Your Own Hat Stack Running in 5 Minutes + +Welcome! This guide gets you from **fork** to **working Hats reviews** as fast as possible. + +> **Your keys are YOUR keys.** The original repo owner's secrets are stored in their GitHub repository/organization secrets โ€” they are never in the code. When you fork, you get the code and workflows, but **zero secrets**. You add your own. + +--- + +## Quick Start (3 steps) + +### Step 1: Fork the repo + +Click **Fork** on [github.com/Grumpified-OGGVCT/hat_stack](https://github.com/Grumpified-OGGVCT/hat_stack). + +### Step 2: Add your Ollama Cloud API key + +In **your fork**, go to: + +``` +Settings โ†’ Secrets and variables โ†’ Actions โ†’ New repository secret +``` + +Add these secrets: + +| Secret Name | Required? | Description | +|------------|-----------|-------------| +| `OLLAMA_API_KEY` | โœ… **Yes** | Your Ollama Cloud API key ([get one here](https://ollama.ai/cloud)) | +| `OLLAMA_BASE_URL` | โŒ No | API base URL (default: `https://api.ollama.ai/v1`) | +| `HAT_STACK_CALLBACK_TOKEN` | โŒ No | GitHub PAT for posting results to other repos' PRs (only needed for dispatch mode) | + +That's it. **Two minutes, one secret.** + +### Step 3: You're done + +- **Self-review** runs automatically on every PR to your fork +- **Reusable workflow** is ready for your other repos to call +- **Dispatch handler** is ready for API-triggered reviews + +--- + +## How Secrets Work (Why This Is Safe) + +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ ORIGINAL REPO (Grumpified-OGGVCT/hat_stack) โ”‚ +โ”‚ โ”‚ +โ”‚ Code: โœ… Public (MIT license) โ”‚ +โ”‚ Secrets: ๐Ÿ”’ Owner's OLLAMA_API_KEY, etc. โ”‚ +โ”‚ โ†’ Stored in GitHub Secrets โ”‚ +โ”‚ โ†’ NEVER in code โ”‚ +โ”‚ โ†’ NOT included in forks โ”‚ +โ”‚ โ†’ Cannot be read by anyone else โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ YOUR FORK (you/hat_stack) โ”‚ +โ”‚ โ”‚ +โ”‚ Code: โœ… Same code, workflows, configs โ”‚ +โ”‚ Secrets: ๐Ÿ”’ YOUR OLLAMA_API_KEY, etc. โ”‚ +โ”‚ โ†’ You add them yourself (Step 2 above) โ”‚ +โ”‚ โ†’ Stored in YOUR GitHub Secrets โ”‚ +โ”‚ โ†’ Cannot be read by the original owner โ”‚ +โ”‚ โ†’ Cannot be read by other forkers โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +**Key facts:** +- GitHub Secrets are **encrypted at rest** and **never exposed in logs** +- Secrets are **not transferred** when you fork a repository +- Workflow files reference `${{ secrets.OLLAMA_API_KEY }}` โ€” this resolves to **your fork's secret**, not anyone else's +- The Python runner reads `OLLAMA_API_KEY` from the environment โ€” it never stores, logs, or transmits it + +--- + +## Using Hat Stack from Your Other Projects + +### Option A: Reusable Workflow (recommended) + +In any of your other repos, create `.github/workflows/hats.yml`: + +```yaml +name: "๐ŸŽฉ Hats Review" +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + get-diff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generate diff + id: diff + run: | + git diff origin/${{ github.base_ref }}...HEAD > /tmp/pr.diff + - uses: actions/upload-artifact@v4 + with: + name: pr-diff + path: /tmp/pr.diff + + hats-review: + needs: get-diff + # Point this to YOUR fork: + uses: YOUR_USERNAME/hat_stack/.github/workflows/hats-review.yml@main + with: + diff_artifact: pr-diff + secrets: + ollama_api_key: ${{ secrets.OLLAMA_API_KEY }} +``` + +> **Replace `YOUR_USERNAME`** with your GitHub username. +> The `get-diff` job generates the diff and uploads it as an artifact. +> The reusable workflow downloads and reviews it โ€” no cross-workspace file paths needed. + +### Option B: Composite Action + +Reference the action directly in any workflow step: + +```yaml +- name: Run Hats Review + uses: YOUR_USERNAME/hat_stack/.github/actions/run-hats@main + with: + diff_file: /tmp/pr.diff + env: + OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }} +``` + +### Option C: Dispatch (API trigger) + +Send a review request from anywhere โ€” a CI agent, a script, a chatbot: + +```bash +curl -X POST \ + -H "Authorization: Bearer $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github+json" \ + https://api.github.com/repos/YOUR_USERNAME/hat_stack/dispatches \ + -d '{ + "event_type": "run-hats", + "client_payload": { + "diff": "--- a/file.py\n+++ b/file.py\n@@ -1,3 +1,4 @@\n...", + "callback_repo": "YOUR_USERNAME/other-project", + "callback_pr": 42, + "context": "Adding new authentication module" + } + }' +``` + +The Hats review runs in **your fork** of hat_stack, using **your API key**, and posts results back to the specified PR. + +### Option D: `hat` CLI + Task Mode (for local agents like Copilot) + +This is the big one โ€” your local VS Code Copilot agent (or any script) can tell hat_stack to **do real work**, not just review. + +**Install the `hat` CLI:** +```bash +# Option 1: Symlink (recommended) +ln -s /path/to/your/hat_stack/scripts/hat /usr/local/bin/hat + +# Option 2: Copy +cp scripts/hat /usr/local/bin/hat + +# Point to your fork +export HAT_STACK_REPO="YOUR_USERNAME/hat_stack" +``` + +**Available tasks:** + +| Task | What It Does | Primary Hat | +|------|-------------|-------------| +| `generate_code` | Build modules, functions, classes, APIs | ๐ŸŸข Green Hat | +| `generate_docs` | Write documentation, READMEs, ADRs, specs | ๐Ÿ”ต Blue Hat | +| `refactor` | Restructure, optimize, or modernize code | โšช White Hat | +| `analyze` | Deep analysis with written report | โšซ Black Hat | +| `plan` | Implementation plans, roadmaps, breakdowns | ๐Ÿฉต Cyan Hat | +| `test` | Generate test suites, cases, fixtures | ๐Ÿงช Chartreuse Hat | + +**Examples (what you or your Copilot agent would run):** + +```bash +# Generate a new code module โ€” results posted as a PR comment +hat task generate_code "Build a FastAPI auth module with JWT and refresh tokens" \ + --repo myorg/myapp --pr 42 + +# Write documentation for an endpoint +hat task generate_docs "Write API documentation for the /users endpoints" \ + --repo myorg/myapp --issue 10 + +# Plan a migration +hat task plan "Plan a migration from REST to GraphQL for the orders service" \ + --repo myorg/myapp + +# Generate tests for a module +hat task test "Write unit tests for auth.py covering edge cases and error paths" \ + --repo myorg/myapp --pr 88 + +# Security analysis +hat task analyze "Security audit of the payment processing module" \ + --repo myorg/payments --issue 5 + +# Review a diff (same as dispatch) +git diff main | hat review - --repo myorg/myapp --pr 123 + +# Check status of runs +hat status +``` + +**How it works under the hood:** +1. Your local `hat` CLI calls `gh api repos/YOUR_USERNAME/hat_stack/dispatches` +2. GitHub Actions picks it up and runs the `hats-task.yml` workflow +3. The task runner selects the right hats and models for the job +4. Primary hat generates the deliverable, supporting hats review/enhance it +5. Gold Hat does final QA +6. Results are posted back to your project's PR/issue as a comment + +**For Copilot in VS Code:** Your Copilot agent can shell out to `hat task ...` commands. The `gh` CLI handles auth, and hat_stack handles execution. Your Copilot agent gives the instruction, hat_stack's model pool does the heavy lifting, results come back to the PR. + +--- + +## Customizing Models + +The default model assignments follow the [Implementation Guide](hats/HATS_TEAM_IMPLEMENTATION_GUIDE.md) ยงE2.2. + +**Supported customization method:** Copy `scripts/hat_configs.yml` and modify model assignments per hat. Then set `config_override` in the reusable workflow call or pass a custom config path to the `hat` CLI. + +### Available Models (per the Implementation Guide) + +| Model | Tier | Best For | Cost | +|-------|------|----------|------| +| `glm-5.1` | 1 | Security, safety, final adjudication | $0.40/$1.10 per M tokens | +| `kimi-k2.5` | 1 | Strong reasoning alternative | $0.42/$1.50 per M tokens | +| `deepseek-v3.1` | 2 | Long-context analysis (128K) | $0.10/$0.28 per M tokens | +| `minimax-m2.7` | 2 | Innovation/feasibility analysis | $0.30/$1.20 per M tokens | +| `nemotron-3-super` | 3 | Quality analysis, pattern matching | $0.25/$0.80 per M tokens | +| `qwen3-coder` | 3 | Code-specific reasoning | $0.20/$0.80 per M tokens | +| `nemotron-3-nano` | 4 | Ultra-fast deterministic checks | $0.08/$0.20 per M tokens | +| `ministral-3` | 4 | Ultra-cheap fast scanning | $0.05/$0.15 per M tokens | + +--- + +## Local Development + +For running Hats locally (not in GitHub Actions): + +```bash +# 1. Clone your fork +git clone https://github.com/YOUR_USERNAME/hat_stack.git +cd hat_stack + +# 2. Set up environment +cp .env.example .env +# Edit .env โ€” add your OLLAMA_API_KEY + +# 3. Install dependencies +pip install -r scripts/requirements.txt + +# 4. Run on a diff file +python scripts/hats_runner.py --diff path/to/your.diff + +# 5. Or pipe from git +git diff HEAD~1 | python scripts/hats_runner.py --diff - +``` + +--- + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| "OLLAMA_API_KEY not set" | Add the secret in Settings โ†’ Secrets โ†’ Actions | +| Workflow doesn't trigger | Make sure you're on `main` branch (or update the `@main` ref) | +| Dispatch doesn't work | The `GITHUB_TOKEN` you use to call the dispatch API needs `repo` scope | +| Callback comments don't appear | Add `HAT_STACK_CALLBACK_TOKEN` secret with `repo` scope | +| Models return errors | Verify your Ollama Cloud subscription includes the models you're using | + +--- + +## Security Checklist + +- [ ] `.env` is in `.gitignore` โœ… (already configured) +- [ ] No API keys in any committed file โœ… (all use `${{ secrets.* }}` or `os.environ`) +- [ ] Secrets are in GitHub Actions Secrets, not in code โœ… +- [ ] Fork doesn't inherit original owner's secrets โœ… (GitHub design) +- [ ] Runner never logs or transmits API keys โœ… + +--- + +Back to [README](README.md) ยท Full spec: [SPEC.md](SPEC.md) ยท Implementation guide: [hats/HATS_TEAM_IMPLEMENTATION_GUIDE.md](hats/HATS_TEAM_IMPLEMENTATION_GUIDE.md) diff --git a/README.md b/README.md index be7465d..e712b50 100644 --- a/README.md +++ b/README.md @@ -121,11 +121,111 @@ You don't need to adopt all 18 hats at once. The recommended path: --- +## ๐Ÿš€ Use It โ€” GitHub Actions Integration + +Hat Stack runs **in GitHub** as a tool your other projects can call. It does two things: + +1. **Review** โ€” Analyze PRs and diffs through 18 expert lenses +2. **Task** โ€” Actually *do work*: generate code, write docs, create plans, build tests + +### Quick Start: Fork & Go + +1. **Fork** this repo +2. Add `OLLAMA_API_KEY` as a **Repository Secret** in your fork +3. Done โ€” your fork's workflows are live + +> **Your keys stay yours.** GitHub Secrets are encrypted, never in code, and never transferred to forks. See [`FORK_SETUP.md`](FORK_SETUP.md) for the full guide. + +### Hook Up Your Other Projects (Review Mode) + +**Option A โ€” Reusable Workflow** (recommended): +```yaml +# In your other repo: .github/workflows/hats.yml +name: "๐ŸŽฉ Hats Review" +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + get-diff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generate diff + id: diff + run: | + git diff origin/${{ github.base_ref }}...HEAD > /tmp/pr.diff + - uses: actions/upload-artifact@v4 + with: + name: pr-diff + path: /tmp/pr.diff + + hats-review: + needs: get-diff + uses: YOUR_USERNAME/hat_stack/.github/workflows/hats-review.yml@main + with: + diff_artifact: pr-diff + secrets: + ollama_api_key: ${{ secrets.OLLAMA_API_KEY }} +``` + +**Option B โ€” Composite Action**: +```yaml +- uses: YOUR_USERNAME/hat_stack/.github/actions/run-hats@main + with: + diff_file: /tmp/pr.diff + env: + OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }} +``` + +### ๐Ÿค– Task Mode โ€” Tell It to DO Things (via `hat` CLI or GitHub CLI) + +Install the `hat` CLI, then your local agent (Copilot, etc.) can dispatch real work: + +```bash +# Install (one time) +cp scripts/hat /usr/local/bin/hat # or add scripts/ to PATH +export HAT_STACK_REPO="YOUR_USERNAME/hat_stack" + +# Generate code +hat task generate_code "Build a FastAPI auth module with JWT" --repo myorg/app --pr 42 + +# Write documentation +hat task generate_docs "Write API docs for /users endpoints" --repo myorg/app --issue 10 + +# Create a plan +hat task plan "Plan migration from REST to GraphQL" --repo myorg/app + +# Generate tests +hat task test "Write unit tests for auth.py" --repo myorg/app --pr 88 + +# Deep analysis +hat task analyze "Security audit of payment processing" --repo myorg/payments + +# Review a diff +git diff main | hat review - --repo myorg/app --pr 123 +``` + +Or dispatch directly via `gh` CLI (what your Copilot agent would call): + +```bash +gh api repos/YOUR_USERNAME/hat_stack/dispatches \ + -f event_type=run-task \ + -f client_payload='{"task":"generate_code","prompt":"Build auth module","callback_repo":"myorg/app","callback_pr":"42"}' +``` + +โ†’ Full integration guide: [`FORK_SETUP.md`](FORK_SETUP.md) + +--- + ## Documentation | Document | Description | |----------|-------------| | [`README.md`](README.md) | This file โ€” project overview, architecture, and quick reference | +| [`FORK_SETUP.md`](FORK_SETUP.md) | **Fork & Setup Guide** โ€” get your own working Hat Stack in 5 minutes, secret management, integration patterns | | [`SPEC.md`](SPEC.md) | **Primary specification** โ€” orchestration, gates, retry policies, HITL framework, CI/CD integration, security, deployment guide, and all appendices | | [`CATALOG.md`](CATALOG.md) | **Master Hat Registry** โ€” design philosophy, full hat table with triggers, severity grading, and composite risk score | | [`hats/01_red_hat.md`](hats/01_red_hat.md) โ€“ [`hats/18_gold_hat.md`](hats/18_gold_hat.md) | Individual hat specifications with detailed assignments, severity grading, tools, and token budgets | @@ -140,9 +240,26 @@ You don't need to adopt all 18 hats at once. The recommended path: ``` hat_stack/ โ”œโ”€โ”€ README.md โ† This file โ€” project overview & navigation +โ”œโ”€โ”€ FORK_SETUP.md โ† Fork & setup guide (start here for your own instance) +โ”œโ”€โ”€ .env.example โ† Environment template (copy to .env for local use) โ”œโ”€โ”€ CATALOG.md โ† Master Hat Registry (full table + design philosophy) โ”œโ”€โ”€ SPEC.md โ† Primary specification (16 sections + appendices) โ”œโ”€โ”€ LICENSE โ† MIT License +โ”œโ”€โ”€ .github/ +โ”‚ โ”œโ”€โ”€ workflows/ +โ”‚ โ”‚ โ”œโ”€โ”€ hats-review.yml โ† Reusable workflow (other repos call this for reviews) +โ”‚ โ”‚ โ”œโ”€โ”€ hats-dispatch.yml โ† Dispatch handler (API-triggered reviews) +โ”‚ โ”‚ โ”œโ”€โ”€ hats-task.yml โ† Task execution (generate code, docs, plans, etc.) +โ”‚ โ”‚ โ””โ”€โ”€ hats-self-review.yml โ† Self-review (reviews PRs to this repo) +โ”‚ โ””โ”€โ”€ actions/ +โ”‚ โ””โ”€โ”€ run-hats/ +โ”‚ โ””โ”€โ”€ action.yml โ† Composite action (direct step in any workflow) +โ”œโ”€โ”€ scripts/ +โ”‚ โ”œโ”€โ”€ hat โ† CLI wrapper โ€” dispatch tasks from terminal or agents +โ”‚ โ”œโ”€โ”€ hats_runner.py โ† Review orchestrator (Conductor + all hat logic) +โ”‚ โ”œโ”€โ”€ hats_task_runner.py โ† Task orchestrator (generate, refactor, plan, etc.) +โ”‚ โ”œโ”€โ”€ hat_configs.yml โ† Hat-to-model mapping & configuration +โ”‚ โ””โ”€โ”€ requirements.txt โ† Python dependencies โ””โ”€โ”€ hats/ โ”œโ”€โ”€ 01_red_hat.md โ† Individual hat specifications โ”œโ”€โ”€ 02_black_hat.md diff --git a/scripts/hat b/scripts/hat new file mode 100755 index 0000000..5b978ad --- /dev/null +++ b/scripts/hat @@ -0,0 +1,299 @@ +#!/usr/bin/env bash +# ============================================================================= +# ๐ŸŽฉ hat โ€” CLI for dispatching Hat Stack tasks from your terminal +# ============================================================================= +# +# Prerequisites: +# - GitHub CLI (`gh`) installed and authenticated: https://cli.github.com +# - Your hat_stack fork (or the original if you have dispatch permissions) +# +# Usage: +# hat review [--repo owner/repo] [--pr 42] +# hat task "" [--repo owner/repo] [--pr 42] [--hats black,green] +# hat status [run_id] +# hat list-tasks +# +# Examples: +# # Review a PR diff +# git diff origin/main...HEAD | hat review - --repo myorg/myapp --pr 123 +# +# # Generate a new module +# hat task generate_code "Build a FastAPI auth module with JWT and refresh tokens" \ +# --repo myorg/myapp --pr 123 +# +# # Write docs for an existing module +# hat task generate_docs "Write API documentation for the /users endpoints" \ +# --repo myorg/myapp --issue 45 +# +# # Create an implementation plan +# hat task plan "Plan a migration from REST to GraphQL for the orders service" +# +# # Generate tests +# hat task test "Write unit tests for auth.py covering edge cases" \ +# --repo myorg/myapp --pr 88 +# +# # Deep security analysis +# hat task analyze "Security audit of the payment processing module" \ +# --repo myorg/payments +# +# Configuration: +# Set HAT_STACK_REPO to point to your fork (default: auto-detect from git remote) +# export HAT_STACK_REPO="yourusername/hat_stack" +# +# ============================================================================= + +set -euo pipefail + +# --------------------------------------------------------------------------- +# Config +# --------------------------------------------------------------------------- + +# Auto-detect hat_stack repo from git remote if not explicitly set +detect_hat_stack_repo() { + local remote_url repo + + remote_url="$(git remote get-url origin 2>/dev/null || true)" + [ -n "$remote_url" ] || return 1 + + case "$remote_url" in + git@github.com:*.git) + repo="${remote_url#git@github.com:}" + repo="${repo%.git}" + ;; + git@github.com:*) + repo="${remote_url#git@github.com:}" + ;; + https://github.com/*/*.git) + repo="${remote_url#https://github.com/}" + repo="${repo%.git}" + ;; + https://github.com/*/*) + repo="${remote_url#https://github.com/}" + ;; + *) + return 1 + ;; + esac + + printf '%s\n' "$repo" +} + +# Where is your hat_stack fork? +HAT_STACK_REPO="${HAT_STACK_REPO:-$(detect_hat_stack_repo || printf '%s\n' 'Grumpified-OGGVCT/hat_stack')}" + +# Colors (if terminal supports them) +if [ -t 1 ]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[1;33m' + CYAN='\033[0;36m' + BOLD='\033[1m' + NC='\033[0m' +else + RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC='' +fi + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +die() { echo -e "${RED}โŒ $*${NC}" >&2; exit 1; } +info() { echo -e "${CYAN}๐ŸŽฉ $*${NC}" >&2; } +success() { echo -e "${GREEN}โœ… $*${NC}" >&2; } +warn() { echo -e "${YELLOW}โš ๏ธ $*${NC}" >&2; } + +check_gh() { + command -v gh >/dev/null 2>&1 || die "GitHub CLI (gh) not found. Install: https://cli.github.com" + gh auth status >/dev/null 2>&1 || die "GitHub CLI not authenticated. Run: gh auth login" +} + +# --------------------------------------------------------------------------- +# Commands +# --------------------------------------------------------------------------- + +cmd_review() { + local diff_source="$1"; shift + local callback_repo="" callback_pr="" callback_issue="" hats="" context="" + + while [[ $# -gt 0 ]]; do + case "$1" in + --repo) callback_repo="$2"; shift 2 ;; + --pr) callback_pr="$2"; shift 2 ;; + --issue) callback_issue="$2"; shift 2 ;; + --hats) hats="$2"; shift 2 ;; + --context) context="$2"; shift 2 ;; + *) die "Unknown option: $1" ;; + esac + done + + # Read diff + local diff_text + if [ "$diff_source" = "-" ]; then + diff_text=$(cat) + elif [ -f "$diff_source" ]; then + diff_text=$(cat "$diff_source") + else + die "Diff source not found: $diff_source" + fi + + [ -z "$diff_text" ] && die "Empty diff โ€” nothing to review" + + info "Dispatching review to ${HAT_STACK_REPO}..." + + # Build payload โ€” pass vars via env to avoid shell injection + local payload + payload=$(HAT_CB_REPO="$callback_repo" HAT_CB_PR="$callback_pr" \ + HAT_CB_ISSUE="$callback_issue" HAT_HATS="$hats" HAT_CTX="$context" \ + python3 -c " +import json, os, sys +payload = { + 'diff': sys.stdin.read(), + 'callback_repo': os.environ.get('HAT_CB_REPO', ''), + 'callback_pr': os.environ.get('HAT_CB_PR', ''), + 'callback_issue': os.environ.get('HAT_CB_ISSUE', ''), + 'hats': os.environ.get('HAT_HATS', ''), + 'context': os.environ.get('HAT_CTX', ''), +} +payload = {k: v for k, v in payload.items() if v} +print(json.dumps(payload)) +" <<< "$diff_text") + + gh api "repos/${HAT_STACK_REPO}/dispatches" \ + -f event_type=run-hats \ + --raw-field "client_payload=${payload}" + + success "Review dispatched to ${HAT_STACK_REPO}" + info "Check progress: gh run list -R ${HAT_STACK_REPO} -w 'Hats Dispatch Handler'" + + if [ -n "$callback_repo" ] && [ -n "$callback_pr" ]; then + info "Results will be posted to ${callback_repo}#${callback_pr}" + fi +} + +cmd_task() { + local task_type="$1"; shift + local prompt="$1"; shift + local callback_repo="" callback_pr="" callback_issue="" hats="" context="" + + while [[ $# -gt 0 ]]; do + case "$1" in + --repo) callback_repo="$2"; shift 2 ;; + --pr) callback_pr="$2"; shift 2 ;; + --issue) callback_issue="$2"; shift 2 ;; + --hats) hats="$2"; shift 2 ;; + --context) context="$2"; shift 2 ;; + *) die "Unknown option: $1" ;; + esac + done + + # Validate task type + local valid_tasks="generate_code generate_docs refactor analyze plan test" + echo "$valid_tasks" | grep -qw "$task_type" || \ + die "Unknown task: $task_type\n Available: $valid_tasks" + + info "Dispatching task '${task_type}' to ${HAT_STACK_REPO}..." + info "Prompt: ${prompt}" + + # Build payload โ€” pass vars via env to avoid shell injection + local payload + payload=$(HAT_TASK="$task_type" HAT_PROMPT="$prompt" \ + HAT_CB_REPO="$callback_repo" HAT_CB_PR="$callback_pr" \ + HAT_CB_ISSUE="$callback_issue" HAT_HATS="$hats" HAT_CTX="$context" \ + python3 -c " +import json, os +payload = { + 'task': os.environ.get('HAT_TASK', ''), + 'prompt': os.environ.get('HAT_PROMPT', ''), + 'callback_repo': os.environ.get('HAT_CB_REPO', ''), + 'callback_pr': os.environ.get('HAT_CB_PR', ''), + 'callback_issue': os.environ.get('HAT_CB_ISSUE', ''), + 'hats': os.environ.get('HAT_HATS', ''), + 'context': os.environ.get('HAT_CTX', ''), +} +payload = {k: v for k, v in payload.items() if v} +print(json.dumps(payload)) +") + + gh api "repos/${HAT_STACK_REPO}/dispatches" \ + -f event_type=run-task \ + --raw-field "client_payload=${payload}" + + success "Task dispatched to ${HAT_STACK_REPO}" + info "Check progress: gh run list -R ${HAT_STACK_REPO} -w 'Hats Task Execution'" + + if [ -n "$callback_repo" ]; then + local target="${callback_pr:-$callback_issue}" + [ -n "$target" ] && info "Results will be posted to ${callback_repo}#${target}" + fi +} + +cmd_status() { + local run_id="${1:-}" + + if [ -n "$run_id" ]; then + gh run view "$run_id" -R "${HAT_STACK_REPO}" + else + info "Recent workflow runs:" + gh run list -R "${HAT_STACK_REPO}" --limit 10 + fi +} + +cmd_list_tasks() { + echo -e "${BOLD}Available task types:${NC}" + echo "" + echo -e " ${CYAN}generate_code${NC} โ€” Build modules, functions, classes, APIs" + echo -e " ${CYAN}generate_docs${NC} โ€” Write documentation, READMEs, ADRs, specs" + echo -e " ${CYAN}refactor${NC} โ€” Restructure, optimize, or modernize existing code" + echo -e " ${CYAN}analyze${NC} โ€” Deep analysis with written report" + echo -e " ${CYAN}plan${NC} โ€” Implementation plans, roadmaps, task breakdowns" + echo -e " ${CYAN}test${NC} โ€” Generate test suites, test cases, fixtures" + echo "" + echo -e "${BOLD}Usage:${NC}" + echo ' hat task generate_code "Build a user auth module" --repo myorg/myapp --pr 42' + echo ' hat task plan "Plan migration to microservices" --repo myorg/monolith' +} + +cmd_help() { + echo -e "${BOLD}๐ŸŽฉ hat${NC} โ€” CLI for the Hat Stack agentic AI review & task system" + echo "" + echo -e "${BOLD}Usage:${NC}" + echo " hat review [options] Review a code diff" + echo " hat task \"\" [options] Execute an agentic task" + echo " hat status [run_id] Check workflow run status" + echo " hat list-tasks Show available task types" + echo " hat help Show this help" + echo "" + echo -e "${BOLD}Options:${NC}" + echo " --repo Target repo for callbacks" + echo " --pr PR to post results to" + echo " --issue Issue to post results to" + echo " --hats Specific hats to use" + echo " --context Additional context" + echo "" + echo -e "${BOLD}Examples:${NC}" + echo ' git diff main | hat review - --repo myorg/app --pr 42' + echo ' hat task generate_code "Build JWT auth module" --repo myorg/app --pr 42' + echo ' hat task generate_docs "Write API docs for /users" --repo myorg/app --issue 10' + echo ' hat task plan "Plan GraphQL migration" --repo myorg/app' + echo ' hat task analyze "Security audit of payments" --repo myorg/payments' + echo "" + echo -e "${BOLD}Config:${NC}" + echo " HAT_STACK_REPO Your hat_stack fork (default: ${HAT_STACK_REPO})" + echo "" + echo " Full docs: https://github.com/${HAT_STACK_REPO}/blob/main/FORK_SETUP.md" +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +check_gh + +case "${1:-help}" in + review) shift; cmd_review "$@" ;; + task) shift; cmd_task "$@" ;; + status) shift; cmd_status "$@" ;; + list-tasks) cmd_list_tasks ;; + help|--help|-h) cmd_help ;; + *) die "Unknown command: $1\nRun 'hat help' for usage." ;; +esac diff --git a/scripts/hat_configs.yml b/scripts/hat_configs.yml new file mode 100644 index 0000000..11ac23d --- /dev/null +++ b/scripts/hat_configs.yml @@ -0,0 +1,483 @@ +# ๐ŸŽฉ Hat-to-Model Configuration +# Based on HATS_TEAM_IMPLEMENTATION_GUIDE.md ยงE2.2 and ยงE3.1 +# Models reference Ollama Cloud (OpenAI-compatible API) +# +# Model options per the guide's 29-model inventory: +# Tier 1 (Critical Reasoning): glm-5.1:cloud, kimi-k2.5:cloud +# Tier 2 (Architectural): glm-5.1:cloud, deepseek-v3.1:cloud, minimax-m2.7:cloud +# Tier 3 (Quality Analysis): nemotron-3-super:cloud, qwen3-coder:cloud +# Tier 4 (Fast Scanning): nemotron-3-nano:cloud, ministral-3:cloud + +api: + base_url_env: OLLAMA_BASE_URL + default_base_url: "https://api.ollama.ai/v1" + api_key_env: OLLAMA_API_KEY + +models: + glm-5.1: + tier: 1 + context_window: 128000 + input_cost_per_m: 0.40 + output_cost_per_m: 1.10 + kimi-k2.5: + tier: 1 + context_window: 128000 + input_cost_per_m: 0.42 + output_cost_per_m: 1.50 + deepseek-v3.1: + tier: 2 + context_window: 128000 + input_cost_per_m: 0.10 + output_cost_per_m: 0.28 + minimax-m2.7: + tier: 2 + context_window: 128000 + input_cost_per_m: 0.30 + output_cost_per_m: 1.20 + nemotron-3-super: + tier: 3 + context_window: 32000 + input_cost_per_m: 0.25 + output_cost_per_m: 0.80 + qwen3-coder: + tier: 3 + context_window: 128000 + input_cost_per_m: 0.20 + output_cost_per_m: 0.80 + nemotron-3-nano: + tier: 4 + context_window: 32000 + input_cost_per_m: 0.08 + output_cost_per_m: 0.20 + ministral-3: + tier: 4 + context_window: 32000 + input_cost_per_m: 0.05 + output_cost_per_m: 0.15 + +# Hat definitions โ€” per HATS_TEAM_IMPLEMENTATION_GUIDE.md ยงE2.2 +hats: + black: + name: "Black Hat โ€” Security & Exploits" + emoji: "โšซ" + number: 2 + always_run: true + primary_model: "glm-5.1" + fallback_model: "deepseek-v3.1" + temperature: 0.2 + max_tokens: 4096 + timeout_seconds: 150 + triggers: [] # Always runs + persona: | + You are BlackHat, an elite adversarial security analyst specializing + in penetration testing, threat modeling, and secure code review. You approach + every code change as a potential attack surface. Your mission is to find + exploitable vulnerabilities before attackers do. + + red: + name: "Red Hat โ€” Failure & Resilience" + emoji: "๐Ÿ”ด" + number: 1 + always_run: false + primary_model: "glm-5.1" + fallback_model: "nemotron-3-super" + temperature: 0.3 + max_tokens: 4096 + timeout_seconds: 120 + triggers: + - "error" + - "retry" + - "catch" + - "exception" + - "async" + - "concurrent" + - "thread" + - "lock" + - "mutex" + persona: | + You are RedHat, a chaos engineering and resilience specialist. You think + in failure modes: what breaks when load spikes, when dependencies fail, + when networks partition. You find cascade failures and single points of failure. + + white: + name: "White Hat โ€” Efficiency & Resources" + emoji: "โšช" + number: 3 + always_run: false + primary_model: "nemotron-3-super" + fallback_model: "ministral-3" + temperature: 0.1 + max_tokens: 2048 + timeout_seconds: 90 + triggers: + - "loop" + - "query" + - "select" + - "insert" + - "batch" + - "cache" + - "memory" + - "token" + persona: | + You are WhiteHat, a performance and efficiency analyst. You find wasted + compute, unnecessary allocations, N+1 queries, token waste, and opportunities + for caching, batching, and resource optimization. + + yellow: + name: "Yellow Hat โ€” Synergies & Integration" + emoji: "๐ŸŸก" + number: 4 + always_run: false + primary_model: "glm-5.1" + fallback_model: "nemotron-3-super" + temperature: 0.3 + max_tokens: 4096 + timeout_seconds: 120 + triggers: + - "import" + - "api" + - "service" + - "endpoint" + - "interface" + - "dependency" + persona: | + You are YellowHat, an integration and synergy analyst. You discover + cross-component value, shared abstractions, and dependency optimization + opportunities across services. + + green: + name: "Green Hat โ€” Evolution & Extensibility" + emoji: "๐ŸŸข" + number: 5 + always_run: false + primary_model: "glm-5.1" + fallback_model: "minimax-m2.7" + temperature: 0.3 + max_tokens: 4096 + timeout_seconds: 120 + triggers: + - "module" + - "plugin" + - "abstract" + - "interface" + - "version" + - "deprecat" + persona: | + You are GreenHat, an architecture evolution specialist. You evaluate + extensibility, versioning strategy, deprecation paths, and future-proofing + of architectural decisions. + + blue: + name: "Blue Hat โ€” Process & Specification" + emoji: "๐Ÿ”ต" + number: 6 + always_run: true + primary_model: "nemotron-3-super" + fallback_model: "ministral-3" + temperature: 0.1 + max_tokens: 2048 + timeout_seconds: 60 + triggers: [] # Always runs + persona: | + You are BlueHat, a process and specification compliance analyst. You verify + spec coverage, test completeness, commit hygiene, documentation quality, + and adherence to project standards. + + indigo: + name: "Indigo Hat โ€” Cross-Feature Architecture" + emoji: "๐ŸŸฃ" + number: 7 + always_run: false + primary_model: "deepseek-v3.1" + fallback_model: "nemotron-3-super" + temperature: 0.3 + max_tokens: 4096 + timeout_seconds: 120 + triggers: + - "import" + - "shared" + - "common" + - "util" + - "integration" + persona: | + You are IndigoHat, a cross-feature architecture analyst. You find + macro-level DRY violations, duplicated pipelines, and opportunities + for shared abstractions across modules. + + cyan: + name: "Cyan Hat โ€” Innovation & Feasibility" + emoji: "๐Ÿฉต" + number: 8 + always_run: false + primary_model: "glm-5.1" + fallback_model: "minimax-m2.7" + temperature: 0.4 + max_tokens: 4096 + timeout_seconds: 150 + triggers: + - "experiment" + - "prototype" + - "novel" + - "llm" + - "agent" + - "ml" + - "ai" + persona: | + You are CyanHat, an innovation and feasibility analyst. You evaluate + technical feasibility, risk/ROI analysis, and prototype validation + for experimental patterns and new technology stacks. + + purple: + name: "Purple Hat โ€” AI Safety & Alignment" + emoji: "๐ŸŸช" + number: 9 + always_run: true + primary_model: "glm-5.1" + fallback_model: "deepseek-v3.1" + temperature: 0.2 + max_tokens: 4096 + timeout_seconds: 150 + triggers: [] # Always runs + persona: | + You are PurpleHat, an AI safety and alignment specialist. You evaluate + OWASP Agentic Top 10 compliance, bias detection, PII leakage risk, + and model alignment concerns in every code change. + + orange: + name: "Orange Hat โ€” DevOps & Automation" + emoji: "๐ŸŸ " + number: 10 + always_run: false + primary_model: "glm-5.1" + fallback_model: "nemotron-3-super" + temperature: 0.2 + max_tokens: 4096 + timeout_seconds: 90 + triggers: + - "docker" + - "dockerfile" + - "ci" + - "workflow" + - "terraform" + - "helm" + - "k8s" + - "deploy" + persona: | + You are OrangeHat, a DevOps and automation specialist. You verify + CI/CD pipeline health, infrastructure-as-code quality, container security, + and deployment safety. + + silver: + name: "Silver Hat โ€” Context & Token Optimization" + emoji: "๐Ÿชจ" + number: 11 + always_run: false + primary_model: "nemotron-3-nano" + fallback_model: "ministral-3" + temperature: 0.1 + max_tokens: 2048 + timeout_seconds: 60 + triggers: + - "prompt" + - "context" + - "token" + - "rag" + - "embed" + - "vector" + persona: | + You are SilverHat, a context and token optimization specialist. You + analyze token counting, context compression, and hybrid retrieval + optimization for LLM-powered systems. + + azure: + name: "Azure Hat โ€” MCP & Protocol Integration" + emoji: "๐Ÿ’Ž" + number: 12 + always_run: false + primary_model: "glm-5.1" + fallback_model: "qwen3-coder" + temperature: 0.2 + max_tokens: 4096 + timeout_seconds: 120 + triggers: + - "mcp" + - "tool" + - "function_call" + - "schema" + - "a2a" + - "protocol" + persona: | + You are AzureHat, an MCP and protocol integration specialist. You validate + MCP contracts, A2A schema enforcement, type safety, and tool-calling + correctness. + + brown: + name: "Brown Hat โ€” Data Governance & Privacy" + emoji: "๐ŸŸค" + number: 13 + always_run: false + primary_model: "glm-5.1" + fallback_model: "deepseek-v3.1" + temperature: 0.2 + max_tokens: 4096 + timeout_seconds: 120 + triggers: + - "pii" + - "user" + - "data" + - "log" + - "store" + - "privacy" + - "gdpr" + - "personal" + persona: | + You are BrownHat, a data governance and privacy specialist. You verify + GDPR/CCPA/HIPAA compliance, data minimization, audit logging, and + PII handling practices. + + gray: + name: "Gray Hat โ€” Observability & Reliability" + emoji: "โš™๏ธ" + number: 14 + always_run: false + primary_model: "glm-5.1" + fallback_model: "nemotron-3-super" + temperature: 0.3 + max_tokens: 4096 + timeout_seconds: 90 + triggers: + - "trace" + - "metric" + - "log" + - "monitor" + - "alert" + - "sla" + - "slo" + - "latency" + persona: | + You are GrayHat, an observability and reliability specialist. You verify + distributed tracing, SLO/SLA monitoring, alerting, and latency budgeting + for production services. + + teal: + name: "Teal Hat โ€” Accessibility & Inclusion" + emoji: "โ™ฟ" + number: 15 + always_run: false + primary_model: "nemotron-3-super" + fallback_model: "ministral-3" + temperature: 0.1 + max_tokens: 2048 + timeout_seconds: 60 + triggers: + - "ui" + - "html" + - "css" + - "render" + - "aria" + - "a11y" + - "i18n" + - "l10n" + persona: | + You are TealHat, an accessibility and inclusion specialist. You verify + WCAG compliance, screen-reader compatibility, and inclusive design + in UI changes and content generation. + + steel: + name: "Steel Hat โ€” Supply Chain & Dependencies" + emoji: "๐Ÿ”—" + number: 16 + always_run: false + primary_model: "deepseek-v3.1" + fallback_model: "nemotron-3-nano" + temperature: 0.1 + max_tokens: 2048 + timeout_seconds: 60 + triggers: + - "package.json" + - "requirements.txt" + - "Gemfile" + - "go.mod" + - "pom.xml" + - "Cargo.toml" + - "lockfile" + - "dependency" + persona: | + You are SteelHat, a supply chain and dependency security specialist. + You verify SBOM generation, vulnerability scanning, and license compliance + for all dependency changes. + + chartreuse: + name: "Chartreuse Hat โ€” Testing & Evaluation" + emoji: "๐Ÿงช" + number: 17 + always_run: false + primary_model: "glm-5.1" + fallback_model: "qwen3-coder" + temperature: 0.3 + max_tokens: 4096 + timeout_seconds: 120 + triggers: + - "test" + - "spec" + - "assert" + - "expect" + - "mock" + - "fixture" + - "benchmark" + persona: | + You are ChartreuseHat, a testing and evaluation specialist. You verify + test coverage, RAGAS metrics, prompt evaluation quality, and regression + detection. + + gold: + name: "Gold Hat โ€” CoVE Final QA" + emoji: "โœจ" + number: 18 + always_run: true + run_last: true + primary_model: "glm-5.1" + fallback_model: null # CoVE must always use best available model + temperature: 0.2 + max_tokens: 8192 + timeout_seconds: 300 + triggers: [] # Always runs last + persona: | + You are GoldHat (CoVE โ€” Convergent Verification & Expert), the final + adjudicator. You synthesize all hat reports, resolve conflicts, compute + the composite risk score, and issue the final merge verdict: + ALLOW (score โ‰ค 20), ESCALATE (21โ€“60), or QUARANTINE (> 60 or any CRITICAL). + +# Gate configuration per SPEC.md ยง7 +gates: + cost_budget: + max_tokens_per_pr: 150000 + max_usd_per_pr: 0.15 # Ollama Cloud pricing โ€” $0.15 covers ~18 hats + security_fast_path: + enabled: true + trigger_severity: "CRITICAL" + timeout: + default_per_hat_seconds: 120 + +# Execution settings per SPEC.md ยง8 +execution: + strategy: "tiered_parallel" + max_concurrent_hats: 6 + retry: + max_attempts: 3 + initial_backoff_seconds: 1 + backoff_multiplier: 2 + max_backoff_seconds: 10 + +# Risk score formula per CATALOG.md +risk_score: + critical_weight: 20 + critical_cap: 80 + high_weight: 5 + high_cap: 40 + medium_weight: 1 + medium_cap: 10 + low_weight: 0.1 + low_cap: 5 + allow_threshold: 20 + escalate_threshold: 60 diff --git a/scripts/hats_runner.py b/scripts/hats_runner.py new file mode 100644 index 0000000..4031db3 --- /dev/null +++ b/scripts/hats_runner.py @@ -0,0 +1,688 @@ +#!/usr/bin/env python3 +""" +๐ŸŽฉ Hats Team Runner โ€” GitHub Actions Orchestrator + +Implements the Conductor logic from the Hats Team Specification: + - Hat selection based on diff triggers + - Tiered-parallel execution via Ollama Cloud API + - Gate engine (cost budget, security fast-path, timeout) + - Consolidation and Gold Hat (CoVE) final adjudication + - Structured JSON + Markdown report output + +Usage: + python hats_runner.py --diff [--hats black,blue,...] [--config hat_configs.yml] + +Environment: + OLLAMA_API_KEY โ€” Ollama Cloud API key (required) + OLLAMA_BASE_URL โ€” API base URL (default: https://api.ollama.ai/v1) +""" + +import argparse +import json +import os +import re +import sys +import time +import traceback +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path + +import requests +import yaml + +SCRIPT_DIR = Path(__file__).resolve().parent +DEFAULT_CONFIG = SCRIPT_DIR / "hat_configs.yml" + +# --------------------------------------------------------------------------- +# Preflight health check โ€” clear errors for missing configuration +# --------------------------------------------------------------------------- + +def preflight_check() -> list[str]: + """Check that required environment is configured. + + Returns a list of warning/error messages. Empty list = all good. + """ + issues = [] + + api_key = os.environ.get("OLLAMA_API_KEY", "").strip() + if not api_key: + issues.append( + "โŒ OLLAMA_API_KEY is not set.\n" + " โ†’ For GitHub Actions: Add it as a Repository Secret\n" + " (Settings โ†’ Secrets and variables โ†’ Actions โ†’ New repository secret)\n" + " โ†’ For local use: Copy .env.example to .env and fill in your key\n" + " โ†’ Get a key at: https://ollama.ai/cloud" + ) + + base_url = os.environ.get("OLLAMA_BASE_URL", "").strip() + if not base_url: + # Not an error โ€” we have a default + issues.append( + "โ„น๏ธ OLLAMA_BASE_URL not set โ€” using default: https://api.ollama.ai/v1" + ) + + return issues + + +# --------------------------------------------------------------------------- +# Config loader +# --------------------------------------------------------------------------- + +def load_config(config_path: str | Path) -> dict: + """Load hat configuration from YAML file.""" + with open(config_path, "r", encoding="utf-8") as fh: + return yaml.safe_load(fh) + + +# --------------------------------------------------------------------------- +# Hat selector โ€” determines which hats to run based on diff content +# --------------------------------------------------------------------------- + +def select_hats(config: dict, diff_text: str, requested_hats: list[str] | None = None) -> list[str]: + """Select hats to run based on diff content triggers (per SPEC.md ยง6). + + Always-run hats (Black, Blue, Purple, Gold) are always included. + Conditional hats activate when their trigger keywords appear in the diff. + If requested_hats is provided, only those hats (plus always-run) are used. + """ + hats_cfg = config["hats"] + selected = [] + diff_lower = diff_text.lower() + + for hat_id, hat_def in hats_cfg.items(): + # Always-run hats are mandatory + if hat_def.get("always_run"): + selected.append(hat_id) + continue + + # If caller requested specific hats, only include those + if requested_hats and hat_id not in requested_hats: + continue + + # Check trigger keywords against diff content + triggers = hat_def.get("triggers", []) + if any(trigger.lower() in diff_lower for trigger in triggers): + selected.append(hat_id) + + # Gold/CoVE must always run last โ€” ensure it's at the end + if "gold" in selected: + selected.remove("gold") + selected.append("gold") + + return selected + + +# --------------------------------------------------------------------------- +# Cost estimation gate (G1) +# --------------------------------------------------------------------------- + +def estimate_cost(config: dict, selected_hats: list[str], diff_tokens: int) -> tuple[float, bool]: + """Estimate pipeline cost and check against budget gate. + + Returns (estimated_cost_usd, within_budget). + """ + models_cfg = config["models"] + hats_cfg = config["hats"] + budget = config["gates"]["cost_budget"]["max_usd_per_pr"] + + total_cost = 0.0 + for hat_id in selected_hats: + hat_def = hats_cfg[hat_id] + model_name = hat_def["primary_model"] + model_cfg = models_cfg.get(model_name, {}) + + # Estimate: input = diff_tokens + ~500 (system prompt), output = max_tokens + input_tokens = min(diff_tokens + 500, model_cfg.get("context_window", 128000)) + output_tokens = hat_def.get("max_tokens", 4096) + + input_cost = (input_tokens / 1_000_000) * model_cfg.get("input_cost_per_m", 0.20) + output_cost = (output_tokens / 1_000_000) * model_cfg.get("output_cost_per_m", 0.80) + total_cost += input_cost + output_cost + + return total_cost, total_cost <= budget + + +# --------------------------------------------------------------------------- +# Ollama Cloud API caller +# --------------------------------------------------------------------------- + +def call_ollama(config: dict, model: str, system_prompt: str, user_prompt: str, + temperature: float = 0.3, max_tokens: int = 4096, + timeout: int = 120) -> dict: + """Call the Ollama Cloud OpenAI-compatible chat completions endpoint.""" + api_cfg = config["api"] + base_url = os.environ.get( + api_cfg.get("base_url_env", "OLLAMA_BASE_URL"), + api_cfg.get("default_base_url", "https://api.ollama.ai/v1"), + ) + api_key = os.environ.get( + api_cfg.get("api_key_env", "OLLAMA_API_KEY"), "" + ) + + if not api_key: + return { + "error": "OLLAMA_API_KEY not set", + "model": model, + "content": None, + "usage": {"input": 0, "output": 0}, + } + + url = f"{base_url.rstrip('/')}/chat/completions" + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + payload = { + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + "temperature": temperature, + "max_tokens": max_tokens, + "response_format": {"type": "json_object"}, + } + + try: + resp = requests.post(url, headers=headers, json=payload, timeout=timeout) + resp.raise_for_status() + data = resp.json() + choice = data.get("choices", [{}])[0] + usage = data.get("usage", {}) + return { + "error": None, + "model": model, + "content": choice.get("message", {}).get("content", ""), + "usage": { + "input": usage.get("prompt_tokens", 0), + "output": usage.get("completion_tokens", 0), + }, + } + except requests.exceptions.Timeout: + return { + "error": f"Timeout after {timeout}s", + "model": model, + "content": None, + "usage": {"input": 0, "output": 0}, + } + except requests.exceptions.RequestException as exc: + return { + "error": str(exc), + "model": model, + "content": None, + "usage": {"input": 0, "output": 0}, + } + + +# --------------------------------------------------------------------------- +# Single hat execution +# --------------------------------------------------------------------------- + +_FINDING_SCHEMA = """\ +Respond with a JSON object with this exact schema: +{ + "hat": "", + "findings": [ + { + "severity": "CRITICAL|HIGH|MEDIUM|LOW", + "title": "", + "description": "", + "file": "", + "line": , + "recommendation": "" + } + ], + "summary": "", + "confidence": <0.0-1.0> +} +""" + + +def run_hat(config: dict, hat_id: str, diff_text: str, context: str = "") -> dict: + """Execute a single hat analysis against the diff. + + Returns the hat's structured report or an error report. + """ + hats_cfg = config["hats"] + hat_def = hats_cfg[hat_id] + + system_prompt = hat_def["persona"].strip() + "\n\n" + _FINDING_SCHEMA + + user_prompt = f"## Code Diff to Analyze\n\n```diff\n{diff_text}\n```" + if context: + user_prompt = f"## Additional Context\n\n{context}\n\n{user_prompt}" + + model = hat_def["primary_model"] + temperature = hat_def.get("temperature", 0.3) + max_tokens = hat_def.get("max_tokens", 4096) + timeout = hat_def.get("timeout_seconds", 120) + + start = time.time() + result = call_ollama(config, model, system_prompt, user_prompt, + temperature=temperature, max_tokens=max_tokens, + timeout=timeout) + elapsed = time.time() - start + + # If primary fails, try fallback + if result["error"] and hat_def.get("fallback_model"): + fallback = hat_def["fallback_model"] + result = call_ollama(config, fallback, system_prompt, user_prompt, + temperature=temperature, max_tokens=max_tokens, + timeout=timeout) + elapsed = time.time() - start + + report = { + "hat_id": hat_id, + "hat_name": hat_def["name"], + "emoji": hat_def["emoji"], + "model_used": result["model"], + "latency_seconds": round(elapsed, 2), + "token_usage": result["usage"], + "error": result["error"], + "findings": [], + "summary": "", + "confidence": 0.0, + } + + if result["content"]: + try: + parsed = json.loads(result["content"]) + report["findings"] = parsed.get("findings", []) + report["summary"] = parsed.get("summary", "") + report["confidence"] = parsed.get("confidence", 0.0) + except json.JSONDecodeError: + # Model didn't return valid JSON โ€” wrap the raw text as a single finding + report["findings"] = [{ + "severity": "LOW", + "title": "Unstructured response", + "description": result["content"][:2000], + "file": None, + "line": None, + "recommendation": "Review raw model output", + }] + report["summary"] = "Model returned unstructured response" + + return report + + +# --------------------------------------------------------------------------- +# Consolidator โ€” merge all hat reports +# --------------------------------------------------------------------------- + +def consolidate_reports(reports: list[dict]) -> dict: + """Merge all hat reports into a single consolidated report. + + Aggregates findings from all hats and tallies severities. + """ + all_findings = [] + severity_counts = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0} + hat_summaries = [] + total_tokens = {"input": 0, "output": 0} + total_cost = 0.0 + + for report in reports: + hat_summaries.append({ + "hat": report["hat_name"], + "emoji": report["emoji"], + "model": report["model_used"], + "latency_s": report["latency_seconds"], + "findings_count": len(report["findings"]), + "error": report.get("error"), + }) + + total_tokens["input"] += report["token_usage"]["input"] + total_tokens["output"] += report["token_usage"]["output"] + + for finding in report["findings"]: + severity = finding.get("severity", "LOW").upper() + if severity in severity_counts: + severity_counts[severity] += 1 + + finding_with_source = dict(finding) + finding_with_source["source_hat"] = report["hat_name"] + finding_with_source["source_emoji"] = report["emoji"] + all_findings.append(finding_with_source) + + return { + "hat_summaries": hat_summaries, + "all_findings": all_findings, + "severity_counts": severity_counts, + "total_tokens": total_tokens, + "hats_executed": len(reports), + "hats_failed": sum(1 for r in reports if r.get("error")), + } + + +# --------------------------------------------------------------------------- +# Risk score calculator (per CATALOG.md formula) +# --------------------------------------------------------------------------- + +def compute_risk_score(config: dict, severity_counts: dict) -> tuple[int, str]: + """Compute composite risk score and verdict per CATALOG.md formula. + + Returns (score, verdict) where verdict is ALLOW, ESCALATE, or QUARANTINE. + """ + rs = config.get("risk_score", {}) + + # Any CRITICAL โ†’ automatic QUARANTINE + if severity_counts.get("CRITICAL", 0) > 0: + score = min(100, + min(severity_counts["CRITICAL"] * rs.get("critical_weight", 20), rs.get("critical_cap", 80)) + + min(severity_counts.get("HIGH", 0) * rs.get("high_weight", 5), rs.get("high_cap", 40)) + + min(severity_counts.get("MEDIUM", 0) * rs.get("medium_weight", 1), rs.get("medium_cap", 10)) + + int(min(severity_counts.get("LOW", 0) * rs.get("low_weight", 0.1), rs.get("low_cap", 5))) + ) + return max(score, 61), "QUARANTINE" + + score = min(100, + min(severity_counts.get("HIGH", 0) * rs.get("high_weight", 5), rs.get("high_cap", 40)) + + min(severity_counts.get("MEDIUM", 0) * rs.get("medium_weight", 1), rs.get("medium_cap", 10)) + + int(min(severity_counts.get("LOW", 0) * rs.get("low_weight", 0.1), rs.get("low_cap", 5))) + ) + + allow_threshold = rs.get("allow_threshold", 20) + escalate_threshold = rs.get("escalate_threshold", 60) + + if score <= allow_threshold: + return score, "ALLOW" + elif score <= escalate_threshold: + return score, "ESCALATE" + else: + return score, "QUARANTINE" + + +# --------------------------------------------------------------------------- +# Markdown report generator +# --------------------------------------------------------------------------- + +def generate_markdown_report(consolidated: dict, risk_score: int, verdict: str, + selected_hats: list[str], config: dict) -> str: + """Generate a Markdown summary report suitable for PR comments.""" + lines = [] + lines.append("# ๐ŸŽฉ Hats Team Review Report\n") + + # Verdict banner + emoji_map = {"ALLOW": "โœ…", "ESCALATE": "โš ๏ธ", "QUARANTINE": "๐Ÿšซ"} + lines.append(f"## {emoji_map.get(verdict, 'โ“')} Verdict: **{verdict}** (Risk Score: {risk_score}/100)\n") + + # Severity summary + sc = consolidated["severity_counts"] + lines.append("### Severity Summary\n") + lines.append(f"| Severity | Count |") + lines.append(f"|----------|-------|") + lines.append(f"| ๐Ÿ”ด CRITICAL | {sc['CRITICAL']} |") + lines.append(f"| ๐ŸŸ  HIGH | {sc['HIGH']} |") + lines.append(f"| ๐ŸŸก MEDIUM | {sc['MEDIUM']} |") + lines.append(f"| ๐ŸŸข LOW | {sc['LOW']} |") + lines.append("") + + # Hat execution summary + lines.append("### Hat Execution Summary\n") + lines.append("| Hat | Model | Latency | Findings | Status |") + lines.append("|-----|-------|---------|----------|--------|") + for hs in consolidated["hat_summaries"]: + status = "โŒ Error" if hs["error"] else "โœ… OK" + lines.append( + f"| {hs['emoji']} {hs['hat']} | `{hs['model']}` | " + f"{hs['latency_s']:.1f}s | {hs['findings_count']} | {status} |" + ) + lines.append("") + + # Findings details + if consolidated["all_findings"]: + lines.append("### Findings\n") + for i, finding in enumerate(consolidated["all_findings"], 1): + sev = finding.get("severity", "LOW") + sev_emoji = {"CRITICAL": "๐Ÿ”ด", "HIGH": "๐ŸŸ ", "MEDIUM": "๐ŸŸก", "LOW": "๐ŸŸข"}.get(sev, "โšช") + lines.append(f"#### {i}. {sev_emoji} [{sev}] {finding.get('title', 'Untitled')}") + lines.append(f"**Source:** {finding.get('source_emoji', '')} {finding.get('source_hat', 'Unknown')}") + if finding.get("file"): + loc = f"`{finding['file']}`" + if finding.get("line"): + loc += f" (line {finding['line']})" + lines.append(f"**Location:** {loc}") + lines.append(f"\n{finding.get('description', '')}\n") + if finding.get("recommendation"): + lines.append(f"**Recommendation:** {finding['recommendation']}\n") + + # Pipeline stats + lines.append("### Pipeline Stats\n") + lines.append(f"- **Hats Selected:** {len(selected_hats)}") + lines.append(f"- **Hats Executed:** {consolidated['hats_executed']}") + lines.append(f"- **Hats Failed:** {consolidated['hats_failed']}") + lines.append(f"- **Total Tokens:** {consolidated['total_tokens']['input'] + consolidated['total_tokens']['output']:,}") + lines.append("") + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Main orchestrator +# --------------------------------------------------------------------------- + +def run_pipeline(diff_text: str, config: dict, requested_hats: list[str] | None = None, + context: str = "", output_format: str = "both") -> dict: + """Run the full Hats pipeline: select โ†’ estimate โ†’ execute โ†’ consolidate โ†’ adjudicate. + + Returns a dict with keys: verdict, risk_score, markdown, json_report, consolidated. + """ + # Step 1: Select hats + selected = select_hats(config, diff_text, requested_hats) + print(f"๐ŸŽฉ Selected {len(selected)} hats: {', '.join(selected)}", file=sys.stderr) + + # Step 2: Cost estimation gate (G1) + # Rough token estimate: ~4 chars per token + diff_tokens = len(diff_text) // 4 + est_cost, within_budget = estimate_cost(config, selected, diff_tokens) + print(f"๐Ÿ’ฐ Estimated cost: ${est_cost:.4f} (budget: ${config['gates']['cost_budget']['max_usd_per_pr']}) " + f"{'โœ… PASS' if within_budget else 'โš ๏ธ OVER BUDGET'}", file=sys.stderr) + + if not within_budget: + # Drop lowest-priority (Tier 4) conditional hats to fit budget + always_hats = {h for h, d in config["hats"].items() if d.get("always_run")} + selected = [h for h in selected if h in always_hats or + config["hats"][h].get("primary_model") != "nemotron-3-nano"] + print(f"๐ŸŽฉ Trimmed to {len(selected)} hats after budget gate", file=sys.stderr) + + # Step 3: Execute hats (tiered parallel โ€” per SPEC.md ยง6) + # Separate Gold/CoVE (must run last) from the rest + pre_gold = [h for h in selected if h != "gold"] + max_workers = config["execution"]["max_concurrent_hats"] + + reports = [] + security_fast_path = False + + with ThreadPoolExecutor(max_workers=max_workers) as pool: + futures = {pool.submit(run_hat, config, hat_id, diff_text, context): hat_id + for hat_id in pre_gold} + + for future in as_completed(futures): + hat_id = futures[future] + try: + report = future.result() + except Exception as exc: + report = { + "hat_id": hat_id, + "hat_name": config["hats"][hat_id]["name"], + "emoji": config["hats"][hat_id]["emoji"], + "model_used": "N/A", + "latency_seconds": 0, + "token_usage": {"input": 0, "output": 0}, + "error": str(exc), + "findings": [], + "summary": "", + "confidence": 0.0, + } + reports.append(report) + print(f" {report['emoji']} {report['hat_name']}: " + f"{len(report['findings'])} findings, {report['latency_seconds']:.1f}s" + + (f" โš ๏ธ {report['error']}" if report['error'] else ""), + file=sys.stderr) + + # Security fast-path gate (G2) โ€” per SPEC.md ยง7 + if config["gates"]["security_fast_path"]["enabled"]: + for finding in report["findings"]: + if finding.get("severity", "").upper() == "CRITICAL": + security_fast_path = True + print("๐Ÿšจ CRITICAL finding detected โ€” security fast-path triggered", + file=sys.stderr) + + # Step 4: Run Gold/CoVE last (always) + if "gold" in selected: + # Build CoVE context from all prior reports + cove_context = json.dumps({ + "prior_hat_reports": [ + { + "hat": r["hat_name"], + "findings": r["findings"], + "summary": r["summary"], + "confidence": r["confidence"], + } + for r in reports + ], + "security_fast_path_triggered": security_fast_path, + }, indent=2) + + gold_report = run_hat(config, "gold", diff_text, context=cove_context) + reports.append(gold_report) + print(f" {gold_report['emoji']} {gold_report['hat_name']}: " + f"{len(gold_report['findings'])} findings, {gold_report['latency_seconds']:.1f}s", + file=sys.stderr) + + # Step 5: Consolidate + consolidated = consolidate_reports(reports) + + # Step 6: Compute risk score and verdict + risk_score, verdict = compute_risk_score(config, consolidated["severity_counts"]) + print(f"\n{'='*60}", file=sys.stderr) + print(f"๐ŸŽฉ VERDICT: {verdict} (Risk Score: {risk_score}/100)", file=sys.stderr) + print(f"{'='*60}", file=sys.stderr) + + # Step 7: Generate outputs + markdown = generate_markdown_report(consolidated, risk_score, verdict, selected, config) + + json_report = { + "verdict": verdict, + "risk_score": risk_score, + "severity_counts": consolidated["severity_counts"], + "hats_executed": consolidated["hats_executed"], + "hats_failed": consolidated["hats_failed"], + "total_tokens": consolidated["total_tokens"], + "hat_summaries": consolidated["hat_summaries"], + "findings": consolidated["all_findings"], + "security_fast_path_triggered": security_fast_path, + } + + return { + "verdict": verdict, + "risk_score": risk_score, + "markdown": markdown, + "json_report": json_report, + "consolidated": consolidated, + } + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description="๐ŸŽฉ Hats Team Runner โ€” run the Hats review pipeline on a diff" + ) + parser.add_argument( + "--diff", required=True, + help="Path to diff file, or '-' to read from stdin" + ) + parser.add_argument( + "--config", default=str(DEFAULT_CONFIG), + help="Path to hat_configs.yml (default: scripts/hat_configs.yml)" + ) + parser.add_argument( + "--hats", default=None, + help="Comma-separated list of hat IDs to run (e.g., 'black,blue,purple'). " + "Default: auto-select based on diff triggers." + ) + parser.add_argument( + "--context", default="", + help="Additional context to include in hat prompts (e.g., PR description)" + ) + parser.add_argument( + "--output", choices=["json", "markdown", "both"], default="both", + help="Output format (default: both)" + ) + parser.add_argument( + "--json-file", default=None, + help="Path to write JSON report" + ) + parser.add_argument( + "--markdown-file", default=None, + help="Path to write Markdown report" + ) + + args = parser.parse_args() + + # Preflight health check โ€” fail fast with clear guidance + issues = preflight_check() + has_errors = any(msg.startswith("โŒ") for msg in issues) + for msg in issues: + print(msg, file=sys.stderr) + if has_errors: + print("\n๐Ÿ›‘ Cannot proceed โ€” fix the errors above and try again.", file=sys.stderr) + print(" See FORK_SETUP.md for setup instructions.", file=sys.stderr) + sys.exit(2) + + # Load config + config = load_config(args.config) + + # Read diff + if args.diff == "-": + diff_text = sys.stdin.read() + else: + with open(args.diff, "r", encoding="utf-8") as fh: + diff_text = fh.read() + + if not diff_text.strip(): + print("โš ๏ธ Empty diff โ€” nothing to review.", file=sys.stderr) + sys.exit(0) + + # Parse requested hats + requested_hats = None + if args.hats: + requested_hats = [h.strip() for h in args.hats.split(",")] + + # Run pipeline + result = run_pipeline(diff_text, config, requested_hats=requested_hats, + context=args.context, output_format=args.output) + + # Output results + if args.output in ("markdown", "both"): + if args.markdown_file: + with open(args.markdown_file, "w", encoding="utf-8") as fh: + fh.write(result["markdown"]) + else: + print(result["markdown"]) + + if args.output in ("json", "both"): + json_str = json.dumps(result["json_report"], indent=2) + if args.json_file: + with open(args.json_file, "w", encoding="utf-8") as fh: + fh.write(json_str) + elif args.output == "json": + print(json_str) + + # Set GitHub Actions outputs if running in CI + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + with open(github_output, "a", encoding="utf-8") as fh: + fh.write(f"verdict={result['verdict']}\n") + fh.write(f"risk_score={result['risk_score']}\n") + fh.write(f"hats_executed={result['consolidated']['hats_executed']}\n") + + # Exit code: 0 for ALLOW, 1 for ESCALATE/QUARANTINE + if result["verdict"] != "ALLOW": + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py new file mode 100644 index 0000000..13bc9e4 --- /dev/null +++ b/scripts/hats_task_runner.py @@ -0,0 +1,588 @@ +#!/usr/bin/env python3 +""" +๐ŸŽฉ Hats Task Runner โ€” Agentic Task Execution via Hat Expertise + +Goes beyond PR review: uses the Hats model pool to *do work* on projects. +Your local agent (e.g., GitHub Copilot in VS Code) dispatches tasks here, +and hat_stack executes them using the right hat expertise and model tier. + +Supported task types: + - generate_code โ€” Build modules, functions, classes, APIs + - generate_docs โ€” Write documentation, READMEs, ADRs, specs + - refactor โ€” Restructure, optimize, or modernize existing code + - analyze โ€” Deep analysis with a written report (architecture, security, etc.) + - plan โ€” Create implementation plans, roadmaps, task breakdowns + - test โ€” Generate test suites, test cases, fixtures + - review โ€” Review code/diff (delegates to hats_runner for structured review) + +Usage: + python hats_task_runner.py \\ + --task "generate_code" \\ + --prompt "Build a FastAPI authentication module with JWT tokens" \\ + --target-repo owner/repo \\ + --target-branch feature/auth \\ + --hats black,blue,green \\ + --output /tmp/hats-task-output + +Environment: + OLLAMA_API_KEY โ€” Ollama Cloud API key (required) + OLLAMA_BASE_URL โ€” API base URL (default: https://api.ollama.ai/v1) +""" + +import argparse +import json +import os +import sys +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path + +import requests +import yaml + +SCRIPT_DIR = Path(__file__).resolve().parent +DEFAULT_CONFIG = SCRIPT_DIR / "hat_configs.yml" + +# Task type โ†’ which hats are most useful + what system role to use +TASK_PROFILES = { + "generate_code": { + "description": "Generate code: modules, functions, classes, APIs", + "recommended_hats": ["green", "yellow", "blue", "black", "purple", "gold"], + "primary_hat": "green", # Green Hat = evolution & extensibility + "model_tier": 1, # Use best models for code generation + "output_type": "code", + }, + "generate_docs": { + "description": "Generate documentation, READMEs, ADRs, specs", + "recommended_hats": ["blue", "green", "purple", "gold"], + "primary_hat": "blue", # Blue Hat = process & specification + "model_tier": 2, + "output_type": "markdown", + }, + "refactor": { + "description": "Refactor, restructure, or optimize existing code", + "recommended_hats": ["white", "red", "indigo", "green", "black", "gold"], + "primary_hat": "white", # White Hat = efficiency + "model_tier": 1, + "output_type": "code", + }, + "analyze": { + "description": "Deep analysis with written report", + "recommended_hats": ["black", "purple", "red", "gray", "brown", "gold"], + "primary_hat": "black", # Black Hat is default for security-focused analysis + "model_tier": 1, + "output_type": "markdown", + }, + "plan": { + "description": "Implementation plans, roadmaps, task breakdowns", + "recommended_hats": ["green", "yellow", "cyan", "blue", "gold"], + "primary_hat": "cyan", # Cyan Hat = innovation & feasibility + "model_tier": 2, + "output_type": "markdown", + }, + "test": { + "description": "Generate test suites, test cases, fixtures", + "recommended_hats": ["chartreuse", "red", "black", "blue", "gold"], + "primary_hat": "chartreuse", # Chartreuse Hat = testing & evaluation + "model_tier": 2, + "output_type": "code", + }, +} + +# --------------------------------------------------------------------------- +# Task-mode system prompts โ€” transform hats from reviewers to builders +# --------------------------------------------------------------------------- + +_TASK_SYSTEM_PREFIX = """\ +You are operating in TASK MODE โ€” you are not reviewing code, you are CREATING deliverables. +Your output will be used directly in a project. Be thorough, production-quality, and complete. +""" + +_OUTPUT_SCHEMAS = { + "code": """\ +Respond with a JSON object: +{ + "files": [ + { + "path": "relative/path/to/file.ext", + "content": "full file content here", + "description": "what this file does" + } + ], + "summary": "what was created and why", + "notes": ["any important notes for the developer"] +} +""", + "markdown": """\ +Respond with a JSON object: +{ + "files": [ + { + "path": "relative/path/to/file.md", + "content": "full markdown content here", + "description": "what this document covers" + } + ], + "summary": "what was created and why", + "notes": ["any important notes for the developer"] +} +""", +} + + +def load_config(config_path: str | Path) -> dict: + """Load hat configuration from YAML file.""" + with open(config_path, "r", encoding="utf-8") as fh: + return yaml.safe_load(fh) + + +def call_ollama(config: dict, model: str, system_prompt: str, user_prompt: str, + temperature: float = 0.3, max_tokens: int = 8192, + timeout: int = 300) -> dict: + """Call the Ollama Cloud API.""" + api_cfg = config["api"] + base_url = os.environ.get( + api_cfg.get("base_url_env", "OLLAMA_BASE_URL"), + api_cfg.get("default_base_url", "https://api.ollama.ai/v1"), + ) + api_key = os.environ.get( + api_cfg.get("api_key_env", "OLLAMA_API_KEY"), "" + ) + + if not api_key: + return {"error": "OLLAMA_API_KEY not set", "model": model, "content": None, + "usage": {"input": 0, "output": 0}} + + url = f"{base_url.rstrip('/')}/chat/completions" + headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + payload = { + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + "temperature": temperature, + "max_tokens": max_tokens, + "response_format": {"type": "json_object"}, + } + + try: + resp = requests.post(url, headers=headers, json=payload, timeout=timeout) + resp.raise_for_status() + data = resp.json() + choice = data.get("choices", [{}])[0] + usage = data.get("usage", {}) + return { + "error": None, "model": model, + "content": choice.get("message", {}).get("content", ""), + "usage": {"input": usage.get("prompt_tokens", 0), + "output": usage.get("completion_tokens", 0)}, + } + except requests.exceptions.Timeout: + return {"error": f"Timeout after {timeout}s", "model": model, + "content": None, "usage": {"input": 0, "output": 0}} + except requests.exceptions.RequestException as exc: + return {"error": str(exc), "model": model, + "content": None, "usage": {"input": 0, "output": 0}} + + +def select_model_for_task(config: dict, hat_id: str, task_type: str) -> str: + """Select the best model for a task, using task tier to pick model quality. + + Tier 1 tasks (generate_code, analyze, refactor) use Tier 1 models (e.g. glm-5.1). + Tier 2+ tasks (generate_docs, plan, test) use the hat's assigned primary_model. + """ + hats_cfg = config["hats"] + hat_def = hats_cfg.get(hat_id, {}) + profile = TASK_PROFILES.get(task_type, {}) + models_cfg = config.get("models", {}) + + # For Tier 1 tasks, pick the best available Tier 1 model + if profile.get("model_tier", 2) == 1: + # Prefer the hat's own primary if it's already Tier 1 + hat_model = hat_def.get("primary_model", "glm-5.1") + model_info = models_cfg.get(hat_model, {}) + if model_info.get("tier") == 1: + return hat_model + # Otherwise find a Tier 1 model from the config + for model_name, model_meta in models_cfg.items(): + if model_meta.get("tier") == 1: + return model_name + return "glm-5.1" # ultimate fallback + + # For Tier 2+ tasks, use the hat's assigned primary model + return hat_def.get("primary_model", "nemotron-3-super") + + +def build_task_prompt(config: dict, hat_id: str, task_type: str, + user_prompt: str, context_files: dict | None = None) -> tuple[str, str]: + """Build the system and user prompts for a task execution. + + Returns (system_prompt, user_prompt). + """ + hats_cfg = config["hats"] + hat_def = hats_cfg.get(hat_id, {}) + profile = TASK_PROFILES.get(task_type, {}) + output_type = profile.get("output_type", "markdown") + + # System prompt: task mode prefix + hat persona + output schema + system = ( + _TASK_SYSTEM_PREFIX + "\n" + + hat_def.get("persona", "You are an expert software engineer.").strip() + "\n\n" + + _OUTPUT_SCHEMAS.get(output_type, _OUTPUT_SCHEMAS["markdown"]) + ) + + # User prompt: the actual task + any context files + user = f"## Task\n\n{user_prompt}\n" + if context_files: + user += "\n## Existing Project Files (for context)\n\n" + for filepath, content in context_files.items(): + user += f"### `{filepath}`\n```\n{content[:5000]}\n```\n\n" + + return system, user + + +def run_task_hat(config: dict, hat_id: str, task_type: str, + user_prompt: str, context_files: dict | None = None) -> dict: + """Execute a single hat in task mode.""" + model = select_model_for_task(config, hat_id, task_type) + hat_def = config["hats"].get(hat_id, {}) + system_prompt, full_user_prompt = build_task_prompt( + config, hat_id, task_type, user_prompt, context_files + ) + + start = time.time() + result = call_ollama( + config, model, system_prompt, full_user_prompt, + temperature=hat_def.get("temperature", 0.3), + max_tokens=8192, # Task mode needs more output room + timeout=hat_def.get("timeout_seconds", 300), + ) + elapsed = time.time() - start + + # Try fallback if primary fails + if result["error"] and hat_def.get("fallback_model"): + result = call_ollama( + config, hat_def["fallback_model"], system_prompt, full_user_prompt, + temperature=hat_def.get("temperature", 0.3), + max_tokens=8192, + timeout=hat_def.get("timeout_seconds", 300), + ) + elapsed = time.time() - start + + report = { + "hat_id": hat_id, + "hat_name": hat_def.get("name", hat_id), + "emoji": hat_def.get("emoji", "๐ŸŽฉ"), + "model_used": result["model"], + "latency_seconds": round(elapsed, 2), + "token_usage": result["usage"], + "error": result["error"], + "files": [], + "summary": "", + "notes": [], + } + + if result["content"]: + try: + parsed = json.loads(result["content"]) + report["files"] = parsed.get("files", []) + report["summary"] = parsed.get("summary", "") + report["notes"] = parsed.get("notes", []) + except json.JSONDecodeError: + # Wrap raw output as a single markdown file + report["files"] = [{ + "path": "output.md", + "content": result["content"], + "description": "Raw model output (JSON parsing failed)", + }] + report["summary"] = "Model returned unstructured output" + + return report + + +def run_task_pipeline(config: dict, task_type: str, user_prompt: str, + requested_hats: list[str] | None = None, + context_files: dict | None = None) -> dict: + """Run the full task pipeline. + + 1. Select hats based on task profile (or use requested hats) + 2. Primary hat generates the deliverable + 3. Supporting hats review/enhance the deliverable + 4. Gold Hat does final quality check + """ + profile = TASK_PROFILES.get(task_type) + if not profile: + print(f"โŒ Unknown task type: {task_type}", file=sys.stderr) + print(f" Available: {', '.join(TASK_PROFILES.keys())}", file=sys.stderr) + sys.exit(2) + + # Select hats + if requested_hats and len(requested_hats) > 0: + hat_ids = requested_hats + else: + hat_ids = profile["recommended_hats"] + + primary_hat = hat_ids[0] if requested_hats and len(requested_hats) > 0 else profile["primary_hat"] + supporting_hats = [h for h in hat_ids if h != primary_hat and h != "gold"] + + print(f"๐ŸŽฉ Task: {profile['description']}", file=sys.stderr) + print(f"๐ŸŽฉ Primary hat: {primary_hat}", file=sys.stderr) + print(f"๐ŸŽฉ Supporting hats: {', '.join(supporting_hats) or 'none'}", file=sys.stderr) + + # Step 1: Primary hat generates the main deliverable + print(f"\n๐Ÿ“ Phase 1: Generating with {primary_hat}...", file=sys.stderr) + primary_result = run_task_hat(config, primary_hat, task_type, user_prompt, context_files) + print(f" {primary_result['emoji']} {primary_result['hat_name']}: " + f"{len(primary_result['files'])} files, {primary_result['latency_seconds']:.1f}s" + + (f" โš ๏ธ {primary_result['error']}" if primary_result['error'] else ""), + file=sys.stderr) + + # Step 2: Supporting hats review/enhance (parallel) + supporting_results = [] + if supporting_hats and primary_result["files"]: + print(f"\n๐Ÿ” Phase 2: Review/enhance with {len(supporting_hats)} supporting hats...", + file=sys.stderr) + + # Build a review prompt from the primary output + review_context = json.dumps({ + "primary_hat": primary_hat, + "task_type": task_type, + "generated_files": primary_result["files"], + }, indent=2) + + review_prompt = ( + f"A {primary_hat} hat generated the following deliverable for this task:\n" + f"Original task: {user_prompt}\n\n" + f"Generated output:\n```json\n{review_context}\n```\n\n" + f"Review this output through YOUR hat's expertise lens. " + f"Suggest improvements, flag issues, or enhance the deliverable. " + f"If you have file improvements, include them in your response." + ) + + max_workers = min(len(supporting_hats), config["execution"]["max_concurrent_hats"]) + with ThreadPoolExecutor(max_workers=max_workers) as pool: + futures = { + pool.submit(run_task_hat, config, hat_id, "analyze", review_prompt, context_files): hat_id + for hat_id in supporting_hats + } + for future in as_completed(futures): + hat_id = futures[future] + try: + result = future.result() + except Exception as exc: + result = { + "hat_id": hat_id, + "hat_name": config["hats"].get(hat_id, {}).get("name", hat_id), + "emoji": config["hats"].get(hat_id, {}).get("emoji", "๐ŸŽฉ"), + "model_used": "N/A", + "latency_seconds": 0, + "token_usage": {"input": 0, "output": 0}, + "error": str(exc), + "files": [], "summary": "", "notes": [], + } + supporting_results.append(result) + print(f" {result['emoji']} {result['hat_name']}: " + f"{len(result['notes'])} notes, {result['latency_seconds']:.1f}s" + + (f" โš ๏ธ {result['error']}" if result['error'] else ""), + file=sys.stderr) + + # Step 3: Gold Hat final QA (if in hat list) + gold_result = None + if "gold" in hat_ids: + print(f"\nโœจ Phase 3: Gold Hat final QA...", file=sys.stderr) + gold_context = json.dumps({ + "task_type": task_type, + "primary_output": {"hat": primary_hat, "files": primary_result["files"]}, + "supporting_reviews": [ + {"hat": r["hat_id"], "summary": r["summary"], "notes": r["notes"]} + for r in supporting_results + ], + }, indent=2) + + gold_prompt = ( + f"Original task: {user_prompt}\n\n" + f"Review the complete task output and all supporting hat feedback:\n" + f"```json\n{gold_context}\n```\n\n" + f"Provide final quality assessment and any last improvements." + ) + gold_result = run_task_hat(config, "gold", "analyze", gold_prompt, context_files) + print(f" {gold_result['emoji']} {gold_result['hat_name']}: " + f"{gold_result['latency_seconds']:.1f}s", file=sys.stderr) + + # Compile final output + all_notes = primary_result.get("notes", []) + for r in supporting_results: + all_notes.extend(r.get("notes", [])) + if gold_result: + all_notes.extend(gold_result.get("notes", [])) + + total_tokens = {"input": 0, "output": 0} + all_results = [primary_result] + supporting_results + ([gold_result] if gold_result else []) + for r in all_results: + total_tokens["input"] += r["token_usage"]["input"] + total_tokens["output"] += r["token_usage"]["output"] + + return { + "task_type": task_type, + "primary_hat": primary_hat, + "files": primary_result["files"], + "summary": primary_result["summary"], + "notes": all_notes, + "supporting_reviews": [ + {"hat": r["hat_id"], "summary": r["summary"], "notes": r["notes"]} + for r in supporting_results + ], + "gold_review": { + "summary": gold_result["summary"] if gold_result else "", + "notes": gold_result.get("notes", []) if gold_result else [], + }, + "stats": { + "hats_executed": len(all_results), + "total_tokens": total_tokens, + "total_latency_seconds": sum(r["latency_seconds"] for r in all_results), + }, + } + + +def write_output_files(task_result: dict, output_dir: str): + """Write generated files to the output directory.""" + out = Path(output_dir) + out.mkdir(parents=True, exist_ok=True) + + for file_entry in task_result.get("files", []): + filepath = out / file_entry["path"] + filepath.parent.mkdir(parents=True, exist_ok=True) + filepath.write_text(file_entry["content"], encoding="utf-8") + print(f" ๐Ÿ“„ {filepath}", file=sys.stderr) + + # Write summary + summary_lines = [f"# ๐ŸŽฉ Hats Task Output\n"] + summary_lines.append(f"**Task:** {task_result['task_type']}") + summary_lines.append(f"**Primary Hat:** {task_result['primary_hat']}") + summary_lines.append(f"**Summary:** {task_result['summary']}\n") + + if task_result["files"]: + summary_lines.append("## Generated Files\n") + for f in task_result["files"]: + summary_lines.append(f"- `{f['path']}` โ€” {f.get('description', '')}") + + if task_result["notes"]: + summary_lines.append("\n## Notes\n") + for note in task_result["notes"]: + summary_lines.append(f"- {note}") + + if task_result.get("supporting_reviews"): + summary_lines.append("\n## Supporting Hat Reviews\n") + for rev in task_result["supporting_reviews"]: + if rev["summary"]: + summary_lines.append(f"### {rev['hat']}\n{rev['summary']}\n") + + summary_path = out / "HATS_TASK_SUMMARY.md" + summary_path.write_text("\n".join(summary_lines), encoding="utf-8") + print(f" ๐Ÿ“‹ {summary_path}", file=sys.stderr) + + # Write JSON report + json_path = out / "hats_task_result.json" + json_path.write_text(json.dumps(task_result, indent=2), encoding="utf-8") + print(f" ๐Ÿ“Š {json_path}", file=sys.stderr) + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description="๐ŸŽฉ Hats Task Runner โ€” execute agentic tasks using hat expertise" + ) + parser.add_argument( + "--task", required=True, + choices=list(TASK_PROFILES.keys()), + help="Type of task to execute" + ) + parser.add_argument( + "--prompt", required=True, + help="What you want done (natural language task description)" + ) + parser.add_argument( + "--config", default=str(DEFAULT_CONFIG), + help="Path to hat_configs.yml" + ) + parser.add_argument( + "--hats", default=None, + help="Comma-separated hat IDs to use (default: auto-select per task profile)" + ) + parser.add_argument( + "--context-dir", default=None, + help="Directory of existing project files to provide as context" + ) + parser.add_argument( + "--output", default="/tmp/hats-task-output", + help="Output directory for generated files" + ) + parser.add_argument( + "--json-file", default=None, + help="Path to write JSON result (in addition to output dir)" + ) + + args = parser.parse_args() + + # Preflight + api_key = os.environ.get("OLLAMA_API_KEY", "").strip() + if not api_key: + print("โŒ OLLAMA_API_KEY is not set.", file=sys.stderr) + print(" See FORK_SETUP.md for setup instructions.", file=sys.stderr) + sys.exit(2) + + config = load_config(args.config) + + # Load context files if provided + context_files = None + if args.context_dir: + context_dir = Path(args.context_dir) + if context_dir.is_dir(): + context_files = {} + for p in sorted(context_dir.rglob("*")): + if p.is_file() and p.stat().st_size < 50000: # Skip huge files + try: + context_files[str(p.relative_to(context_dir))] = p.read_text(encoding="utf-8") + except (UnicodeDecodeError, PermissionError): + pass + print(f"๐Ÿ“ Loaded {len(context_files)} context files from {context_dir}", + file=sys.stderr) + + requested_hats = None + if args.hats: + requested_hats = [h.strip() for h in args.hats.split(",")] + + # Run the task + result = run_task_pipeline( + config, args.task, args.prompt, + requested_hats=requested_hats, + context_files=context_files, + ) + + # Write outputs + print(f"\n๐Ÿ“ฆ Writing output to {args.output}/", file=sys.stderr) + write_output_files(result, args.output) + + if args.json_file: + with open(args.json_file, "w", encoding="utf-8") as fh: + json.dump(result, fh, indent=2) + + # GitHub Actions outputs + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + with open(github_output, "a", encoding="utf-8") as fh: + fh.write(f"task_type={result['task_type']}\n") + fh.write(f"files_generated={len(result['files'])}\n") + fh.write(f"hats_executed={result['stats']['hats_executed']}\n") + fh.write(f"output_dir={args.output}\n") + + print(f"\nโœ… Task complete: {len(result['files'])} files generated, " + f"{result['stats']['hats_executed']} hats used", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..eb8507b --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,2 @@ +pyyaml>=6.0,<7 +requests>=2.31,<3