From db907f26f972f9baa7b90590e7ad204b12f3242f Mon Sep 17 00:00:00 2001 From: Rafael Cordones Date: Mon, 4 May 2026 09:46:36 +0200 Subject: [PATCH 1/2] Render non-applicable AI checks as gray "skipped" Move per-rule applicability from the runner to the matrix-emitter `list` job so non-applicable shards skip at the GHA level (gray icon) rather than running and exiting 0 (green). Drop `skip` from the verdict enum; semantic non-applicability is now `pass` with a "rule does not apply" note. Closes #124. Co-Authored-By: Claude Opus 4.7 --- .ai-checks/asgi-wsgi-scott.md | 1 + .ai-checks/entity-creation-race-safety.md | 1 + .ai-checks/pipeline-step-sync.md | 1 + .ai-checks/qdrant-payload-slim.md | 1 + .github/scripts/list_ai_checks.py | 73 ++++++++++++++--- .github/scripts/run_ai_check.py | 42 +++++++--- .github/workflows/ai-checks.yml | 12 ++- CHANGELOG.md | 1 + doc/features/2026-05-04-ai-checks-skipped.md | 79 +++++++++++++++++++ doc/plans/2026-05-04-ai-checks-skipped.md | 64 +++++++++++++++ ...i-checks-skipped-implementation-session.md | 74 +++++++++++++++++ ...5-04-ai-checks-skipped-planning-session.md | 41 ++++++++++ 12 files changed, 368 insertions(+), 22 deletions(-) create mode 100644 doc/features/2026-05-04-ai-checks-skipped.md create mode 100644 doc/plans/2026-05-04-ai-checks-skipped.md create mode 100644 doc/sessions/2026-05-04-ai-checks-skipped-implementation-session.md create mode 100644 doc/sessions/2026-05-04-ai-checks-skipped-planning-session.md diff --git a/.ai-checks/asgi-wsgi-scott.md b/.ai-checks/asgi-wsgi-scott.md index a5b257f..9e36660 100644 --- a/.ai-checks/asgi-wsgi-scott.md +++ b/.ai-checks/asgi-wsgi-scott.md @@ -1,6 +1,7 @@ --- name: ASGI vs WSGI Awareness for Scott description: Anything touching Scott's chat path must be tested under uvicorn (ASGI). runserver does not load asgi.py and will silently 404 the AG-UI endpoint. +paths: ragtime/asgi.py, chat/**, frontend/** --- # ASGI vs WSGI Awareness for Scott diff --git a/.ai-checks/entity-creation-race-safety.md b/.ai-checks/entity-creation-race-safety.md index 1eb9ade..ed1fa6d 100644 --- a/.ai-checks/entity-creation-race-safety.md +++ b/.ai-checks/entity-creation-race-safety.md @@ -1,6 +1,7 @@ --- name: Entity Creation Race Safety description: All Entity row creation must go through get_or_create with a sorted advisory lock — never bare Entity.objects.create(). +paths: episodes/** --- # Entity Creation Race Safety diff --git a/.ai-checks/pipeline-step-sync.md b/.ai-checks/pipeline-step-sync.md index a4f52cc..e8f7b90 100644 --- a/.ai-checks/pipeline-step-sync.md +++ b/.ai-checks/pipeline-step-sync.md @@ -1,6 +1,7 @@ --- name: Pipeline Step Documentation Sync description: When the 10-step ingestion pipeline changes, README.md, doc/README.md, and diagrams must be updated in lockstep. +paths: episodes/models.py, episodes/workflows.py, README.md, doc/README.md, doc/*.excalidraw --- # Pipeline Step Documentation Sync diff --git a/.ai-checks/qdrant-payload-slim.md b/.ai-checks/qdrant-payload-slim.md index 032323a..615b00c 100644 --- a/.ai-checks/qdrant-payload-slim.md +++ b/.ai-checks/qdrant-payload-slim.md @@ -1,6 +1,7 @@ --- name: Slim Qdrant Payload Discipline description: Qdrant chunk points must carry only filter fields. All display data is hydrated from Postgres at search time. +paths: episodes/vector_store.py --- # Slim Qdrant Payload Discipline diff --git a/.github/scripts/list_ai_checks.py b/.github/scripts/list_ai_checks.py index b88deba..f8a9b18 100644 --- a/.github/scripts/list_ai_checks.py +++ b/.github/scripts/list_ai_checks.py @@ -3,38 +3,91 @@ Output line format (written to $GITHUB_OUTPUT): - matrix={"include": [{"name": "", "path": ".ai-checks/foo.md"}, ...]} + matrix={"include": [{"name": "", "path": ".ai-checks/foo.md", "applies": true}, ...]} + +`applies` is determined by reading each rule's optional `paths:` frontmatter +(comma-separated `fnmatch` globs) and matching it against `git diff +--name-only $BASE_REF...HEAD`. Rules without `paths:` always have +`applies: true` (they're semantic and must be evaluated by the model). + +The workflow then gates each matrix shard on `if: ${{ matrix.applies }}` so +non-applicable rules render as gray "skipped" jobs in the PR Checks tab +rather than green "pass" — preserving the signal that the rule was scoped +out, not silently waved through. """ from __future__ import annotations +import fnmatch import json +import os import re +import subprocess import sys from pathlib import Path CHECKS_DIR = Path(".ai-checks") -def parse_name(path: Path) -> str: +def parse_frontmatter(path: Path) -> dict[str, str]: text = path.read_text() m = re.match(r"^---\r?\n(.*?)\r?\n---", text, re.DOTALL) if not m: - return path.stem + return {} + fm: dict[str, str] = {} for line in m.group(1).splitlines(): - if line.startswith("name:"): - return line.split(":", 1)[1].strip() - return path.stem + if ":" in line and not line.startswith("#"): + k, v = line.split(":", 1) + fm[k.strip()] = v.strip() + return fm + + +def changed_files(base: str) -> list[str]: + try: + out = subprocess.run( + ["git", "diff", "--name-only", f"{base}...HEAD"], + check=True, + capture_output=True, + text=True, + ).stdout + except subprocess.CalledProcessError: + return [] + return [line for line in out.splitlines() if line.strip()] + + +def matches_paths(files: list[str], patterns: list[str]) -> bool: + return any(fnmatch.fnmatch(f, p) for f in files for p in patterns) + + +def applies_for(fm: dict[str, str], files: list[str]) -> bool: + raw = fm.get("paths", "").strip() + if not raw: + return True + patterns = [p.strip() for p in raw.split(",") if p.strip()] + if not patterns: + return True + return matches_paths(files, patterns) def main() -> int: if not CHECKS_DIR.is_dir(): print(f"matrix={json.dumps({'include': []})}") return 0 - items = [ - {"name": parse_name(p), "path": str(p)} - for p in sorted(CHECKS_DIR.glob("*.md")) - ] + + base = os.environ.get("BASE_REF", "origin/main") + files = changed_files(base) + + items = [] + for p in sorted(CHECKS_DIR.glob("*.md")): + fm = parse_frontmatter(p) + items.append( + { + "name": fm.get("name", p.stem), + "path": str(p), + "applies": applies_for(fm, files), + } + ) + print(f"matrix={json.dumps({'include': items})}") return 0 diff --git a/.github/scripts/run_ai_check.py b/.github/scripts/run_ai_check.py index 3621820..45c724e 100644 --- a/.github/scripts/run_ai_check.py +++ b/.github/scripts/run_ai_check.py @@ -9,8 +9,14 @@ `gemini/gemini-2.0-flash`. LiteLLM auto-reads each provider's canonical env var (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, ...). -Exits 1 on `fail`, 0 on `pass` / `skip`. Each invocation is a single GH -Actions matrix job, so the job's pass/fail status becomes the PR check. +Exits 1 on `fail`, 0 on `pass`. Each invocation is a single GH Actions +matrix job, so the job's pass/fail status becomes the PR check. + +Path-scoped non-applicability is now handled at the workflow level: the +`list` job evaluates each rule's `paths:` frontmatter against the PR's +changed files and emits `applies: bool` per matrix include. Shards with +`applies: false` are skipped at the GHA level (gray icon, no runner). The +driver itself only ever returns `pass` or `fail`. """ from __future__ import annotations @@ -55,9 +61,10 @@ class ConfigError(RuntimeError): The rule defines what to check. After reading the diff, report exactly one verdict via the `report_verdict` function: -- pass: the diff complies with the rule, OR the rule does not apply to this diff in any meaningful way. +- pass: the diff complies with the rule, OR the rule does not apply to this diff in any meaningful way. When the rule does not apply, set `summary` to "Rule does not apply." and put a one-line "rule does not apply because …" in `details`. - fail: the diff clearly violates the rule, with concrete evidence visible in the diff. -- skip: the rule cannot be evaluated against this diff (e.g. no relevant files were changed and the rule is path-scoped). + +Path-scoped non-applicability is handled before you are invoked — if you are evaluating this diff at all, the workflow has decided the rule is in scope. Your job is to check compliance, not to second-guess scope. Be conservative. Return `fail` only when you can point at specific lines in the diff that violate the rule. Vague concerns, style nitpicks, or "could be improved" remarks are not failures — return `pass` and put your notes in the `details` field.""" @@ -85,7 +92,7 @@ class ConfigError(RuntimeError): "properties": { "verdict": { "type": "string", - "enum": ["pass", "fail", "skip"], + "enum": ["pass", "fail"], "description": "Overall verdict for this rule against the diff.", }, "summary": { @@ -141,15 +148,28 @@ def evaluate(check_path: Path, base: str, model: str) -> dict[str, str]: diff, files = get_diff(base) if not diff.strip(): - return {"verdict": "skip", "summary": "Empty diff.", "details": ""} + return { + "verdict": "pass", + "summary": "Rule does not apply.", + "details": "Rule does not apply because the diff is empty.", + } + # Workflow normally gates path-scoped rules at the matrix level via + # `matrix.applies`, so this branch is mostly a safety net for direct + # CLI invocation. Returns `pass` with an explanatory note rather than + # `skip` (which is no longer in the verdict enum — semantic + # non-applicability is now `pass`, path non-applicability is gray-skipped + # at the GHA job level). if "paths" in fm: patterns = [p.strip() for p in fm["paths"].split(",") if p.strip()] if patterns and not matches_paths(files, patterns): return { - "verdict": "skip", - "summary": "No matching files changed.", - "details": f"Rule scoped to: `{fm['paths']}`", + "verdict": "pass", + "summary": "Rule does not apply.", + "details": ( + f"Rule does not apply because no changed files match its " + f"path scope: `{fm['paths']}`." + ), } if len(diff) > MAX_DIFF_CHARS: @@ -203,7 +223,7 @@ def write_step_summary(check_name: str, result: dict[str, str]) -> None: summary_path = os.environ.get("GITHUB_STEP_SUMMARY") if not summary_path: return - marker = {"pass": "PASS", "fail": "FAIL", "skip": "SKIP"}[result["verdict"]] + marker = {"pass": "PASS", "fail": "FAIL"}[result["verdict"]] md = ( f"# [{marker}] {check_name}\n\n" f"**Verdict:** `{result['verdict']}`\n\n" @@ -265,7 +285,7 @@ def main() -> int: print(f"::error title=AI check setup error ({name})::{e}") return 2 - marker = {"pass": "PASS", "fail": "FAIL", "skip": "SKIP"}[result["verdict"]] + marker = {"pass": "PASS", "fail": "FAIL"}[result["verdict"]] print(f"[{marker}] {name}") print(result["summary"]) if result.get("details"): diff --git a/.github/workflows/ai-checks.yml b/.github/workflows/ai-checks.yml index 63916f7..ef62deb 100644 --- a/.github/workflows/ai-checks.yml +++ b/.github/workflows/ai-checks.yml @@ -15,6 +15,8 @@ jobs: matrix: ${{ steps.list.outputs.matrix }} steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Preflight provider API key env: AI_CHECK_MODEL: ${{ vars.AI_CHECK_MODEL || 'openai/gpt-4o-mini' }} @@ -22,12 +24,20 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} run: python .github/scripts/preflight_key.py + - name: Fetch base ref + run: git fetch --no-tags --depth=200 origin "${{ github.event.pull_request.base.ref }}" - id: list + env: + BASE_REF: origin/${{ github.event.pull_request.base.ref }} run: python .github/scripts/list_ai_checks.py >> "$GITHUB_OUTPUT" check: needs: list - if: ${{ fromJson(needs.list.outputs.matrix).include[0] != null }} + # Gate per matrix shard: rules whose path scope didn't match any changed + # file render as gray "skipped" in the PR Checks tab — no runner, no model + # call, clearer signal than green "pass". Top-level `include[0] != null` + # guard avoids the "no jobs to run" error when the matrix is empty. + if: ${{ fromJson(needs.list.outputs.matrix).include[0] != null && matrix.applies }} runs-on: ubuntu-latest name: "AI Check: ${{ matrix.name }}" strategy: diff --git a/CHANGELOG.md b/CHANGELOG.md index 42ea2e2..d30275f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed - AGENTS.md and the Feature PR Documentation Bundle AI check now recognize **agent-orchestrated sessions**. When a parallel implementation agent is launched from a parent Claude Code session (e.g. under Conductor) and has no direct user-to-implementation-agent messages, the transcript may use `### Parent agent (orchestrator)` headings *instead of* `### User`, provided the parent-agent's launching prompt is reproduced verbatim. The transcript must declare the session as agent-orchestrated at the top of `## Detailed conversation`. Same verbatim rule applies — summarized parent prompts are still rejected. This is a policy clarification only; no code changes. +- AI checks workflow now renders non-applicable rules as gray "skipped" icons instead of green "pass". `list_ai_checks.py` evaluates each rule's `paths:` frontmatter against `git diff --name-only $BASE_REF...HEAD` and emits `applies: bool` per matrix include; `.github/workflows/ai-checks.yml` gates each `check` shard on `if: ${{ ... && matrix.applies }}`, so non-applicable shards skip at the GHA level — no runner spin-up, no model call, no token cost. Four rules gain `paths:` frontmatter (`pipeline-step-sync`, `asgi-wsgi-scott`, `qdrant-payload-slim`, `entity-creation-race-safety`); the other five remain semantic. The driver's verdict tool drops `skip` from its enum: semantic non-applicability is now `pass` with `summary: "Rule does not apply."` and a one-line `details` explanation. Closes #124 — [plan](doc/plans/2026-05-04-ai-checks-skipped.md), [feature](doc/features/2026-05-04-ai-checks-skipped.md), [planning session](doc/sessions/2026-05-04-ai-checks-skipped-planning-session.md), [implementation session](doc/sessions/2026-05-04-ai-checks-skipped-implementation-session.md) ## 2026-05-01 diff --git a/doc/features/2026-05-04-ai-checks-skipped.md b/doc/features/2026-05-04-ai-checks-skipped.md new file mode 100644 index 0000000..158b007 --- /dev/null +++ b/doc/features/2026-05-04-ai-checks-skipped.md @@ -0,0 +1,79 @@ +# Render non-applicable AI checks as gray "skipped" + +**Date:** 2026-05-04 + +## Problem + +`#122` shipped a self-hosted AI checks workflow that maps driver verdicts to GitHub job conclusions via exit code only. `skip` exited 0, identical to `pass`, so a non-applicable rule rendered as a green check — same icon as a rule that was checked and passed. No way for a reader of the PR Checks tab to tell the two cases apart. + +GitHub Actions only renders the gray "skipped" icon for jobs whose `if:` condition evaluates false before the job starts. To fix the icon for path-scoped rules, the applicability decision has to move out of the runner and into the matrix-emitter `list` job. + +## Changes + +- **Matrix emitter** (`.github/scripts/list_ai_checks.py`). Now reads each rule's optional `paths:` frontmatter, runs `git diff --name-only $BASE_REF...HEAD` once, and emits an `applies: bool` field per matrix include. Rules without `paths:` always get `applies: true` (semantic — can't be path-decided). Top-level `BASE_REF` env var consumed. +- **Workflow** (`.github/workflows/ai-checks.yml`). The `list` job now does an `actions/checkout@v4` with `fetch-depth: 0` (needed to compute the diff), pre-fetches the base ref, and exports `BASE_REF` to the matrix emitter. The `check` job's `if:` condition gains `&& matrix.applies` — non-applicable shards render gray-skipped at the GHA level (no runner, no model call, no token cost). +- **Path-scoped rules.** Four rules now carry `paths:` frontmatter: + - `pipeline-step-sync` → `episodes/models.py, episodes/workflows.py, README.md, doc/README.md, doc/*.excalidraw` + - `asgi-wsgi-scott` → `ragtime/asgi.py, chat/**, frontend/**` + - `qdrant-payload-slim` → `episodes/vector_store.py` + - `entity-creation-race-safety` → `episodes/**` + The other five rules (`branching-and-pr-strategy`, `comment-discipline`, `env-var-sync`, `feature-pr-docs`, `gh-api-shell-escaping`) stay unscoped because they're semantic — applicability requires reading the diff content, not just the file list. +- **Driver** (`.github/scripts/run_ai_check.py`): + - System prompt: `skip` is removed from the verdict description. `pass` now covers both "complies" and "does not apply" — the latter with `summary: "Rule does not apply."` and a one-line `details` explanation. + - Tool schema: `skip` removed from the `verdict` enum. The driver only ever returns `pass` or `fail`. + - In-driver path-scoped early-return is preserved as a safety net for direct CLI invocation, but now returns `pass` with a "rule does not apply because no changed files match its path scope" note instead of `skip`. + - Marker map updated; module docstring rewritten to reflect the new contract. + +## Behaviour after the change + +| Scenario | Old icon | New icon | +|---|---|---| +| Rule with `paths:` matches changed files, model returns `pass` | green | green | +| Rule with `paths:` matches changed files, model returns `fail` | red | red | +| Rule with `paths:` doesn't match any changed file | green (driver returned `skip`) | gray (job skipped at GHA level) | +| Rule without `paths:`, model returns `pass` | green | green | +| Rule without `paths:`, model returns `pass` with "does not apply" details | green | green (with explanatory details) | +| Rule without `paths:`, model returns `fail` | red | red | + +On a PR that touches only `.github/` and `.ai-checks/` (like this one), the matrix emitter outputs: + +```json +{"include": [ + {"name": "ASGI vs WSGI Awareness for Scott", "applies": false, ...}, + {"name": "Branching & PR Strategy", "applies": true, ...}, + {"name": "Comment Discipline", "applies": true, ...}, + {"name": "Entity Creation Race Safety", "applies": false, ...}, + {"name": "RAGTIME_* Env Var Sync", "applies": true, ...}, + {"name": "Feature PR Documentation Bundle", "applies": true, ...}, + {"name": "gh api Shell Escaping & Endpoints", "applies": true, ...}, + {"name": "Pipeline Step Documentation Sync", "applies": false, ...}, + {"name": "Slim Qdrant Payload Discipline", "applies": false, ...} +]} +``` + +Four shards skip to gray; five run as before. + +## Key parameters + +| Parameter | Value | Why | +|---|---|---| +| `paths:` glob syntax | Python `fnmatch` | Already what the driver used. `**` works because `*` matches `/`. | +| Default `applies` (no `paths:`) | `true` | Semantic rules can't be path-decided; better to evaluate than to silently skip. | +| `BASE_REF` default in `list` job | `origin/${{ github.event.pull_request.base.ref }}` | Same convention as the `check` job. | +| `fetch-depth` on `list` job | `0` | Required to compute `git diff` against base. | + +## Verification + +- `BASE_REF=main python .github/scripts/list_ai_checks.py` on this branch produces 4 `applies: false` and 5 `applies: true`. Matches issue acceptance criterion: "On a PR that touches none of `episodes/**`, `ragtime/asgi.py`, `chat/**`, `frontend/**`, at least 4 rules render as gray." +- `fnmatch.fnmatch` smoke-tested against `chat/sub/x.py vs chat/**`, `episodes/providers/base.py vs episodes/**`, `doc/foo.excalidraw vs doc/*.excalidraw` — all match. +- Project test suite (`uv run python manage.py test`) unaffected by these changes (touches only `.github/scripts/`, `.github/workflows/`, `.ai-checks/`). + +## Files modified + +- `.github/scripts/list_ai_checks.py` — emit `applies: bool` per matrix include from `paths:` frontmatter and `git diff --name-only`. +- `.github/scripts/run_ai_check.py` — drop `skip` from verdict enum and system prompt; rewrite path-scoped early-return and empty-diff branch to return `pass` with "does not apply" notes. +- `.github/workflows/ai-checks.yml` — add `fetch-depth: 0` and `BASE_REF` to `list` job; gate `check` matrix shards on `matrix.applies`. +- `.ai-checks/pipeline-step-sync.md` — add `paths:` frontmatter. +- `.ai-checks/asgi-wsgi-scott.md` — add `paths:` frontmatter. +- `.ai-checks/qdrant-payload-slim.md` — add `paths:` frontmatter. +- `.ai-checks/entity-creation-race-safety.md` — add `paths:` frontmatter. diff --git a/doc/plans/2026-05-04-ai-checks-skipped.md b/doc/plans/2026-05-04-ai-checks-skipped.md new file mode 100644 index 0000000..6ae4431 --- /dev/null +++ b/doc/plans/2026-05-04-ai-checks-skipped.md @@ -0,0 +1,64 @@ +## Plan: Render non-applicable AI checks as gray "skipped" instead of green "pass" + +**Date:** 2026-05-04 + +## Problem + +The AI checks workflow shipped in #122 maps driver verdicts to GitHub job conclusions via exit code only: + +| Driver verdict | Exit code | GitHub icon | +|---|---|---| +| `pass` | 0 | green | +| `fail` | 1 | red | +| `skip` | 0 | green (should be gray) | + +GitHub Actions has no exit code that maps to `skipped` — that conclusion only fires for jobs whose `if:` condition evaluated false *before* the job started. So `skip` verdicts currently render identically to `pass`, losing the distinction between "rule was checked and passed" and "rule didn't apply to the diff". + +## Approach + +Two coordinated changes. + +### 1. Path-scope non-applicability in `list` job, gate matrix shards with job-level `if:` + +Today `list_ai_checks.py` emits `{name, path}`. Enrich it to also evaluate per-rule applicability against the PR's changed files: + +- Read each rule's `paths:` frontmatter (comma-separated `fnmatch` globs). +- Run `git diff --name-only $BASE_REF...HEAD` once. +- For each rule with `paths:`, set `applies: true` iff at least one changed file matches at least one glob. Rules without `paths:` always get `applies: true`. +- Emit `{name, path, applies}` per matrix include. + +Then in `ai-checks.yml`, gate the `check` job on `matrix.applies` so non-applicable shards render gray-skipped at the GHA level — no runner spin-up, no model call, no tokens. + +Populate `paths:` frontmatter on the four path-scoped rules per the issue's table: + +| Rule | `paths:` | +|---|---| +| `pipeline-step-sync` | `episodes/models.py, episodes/workflows.py, README.md, doc/README.md, doc/*.excalidraw` | +| `asgi-wsgi-scott` | `ragtime/asgi.py, chat/**, frontend/**` | +| `qdrant-payload-slim` | `episodes/vector_store.py` | +| `entity-creation-race-safety` | `episodes/**` | + +The other 5 rules stay unscoped (semantic; can't be path-decided). + +### 2. Tighten the system prompt to remove `skip` for semantic non-applicability + +After (1), the only remaining source of `skip` verdicts would be the model deciding "no relevant changes" for a semantic rule. Update the system prompt in `run_ai_check.py`: + +- `pass`: diff complies with the rule, OR the rule does not apply. When non-applicable, set `summary` to "Rule does not apply." and put a one-line "rule does not apply because …" in `details`. +- `fail`: diff clearly violates the rule. +- ~~`skip`~~: removed from the verdict tool enum entirely. + +Path-scoped non-applicability is now handled before the model is invoked — the early-return inside `evaluate()` is kept as a safety net for direct CLI invocation but now returns `pass` with an explanatory note. + +## Acceptance criteria + +- Rules with `paths:` frontmatter that no changed file matches render as gray "skipped" icon, no runner. +- Rules without `paths:` continue to evaluate semantically and render green/red. +- No green "skip" icons remain. +- On a PR that touches none of `episodes/**`, `ragtime/asgi.py`, `chat/**`, `frontend/**`, at least 4 rules render gray. +- On a PR that touches `episodes/vector_store.py`, `qdrant-payload-slim` fires (green or red), not gray. + +## Out of scope + +- Posting check-runs via the Checks API (gives full skipped-icon coverage including model-driven skips, but doubles the rows in the Checks tab unless we restructure away from `strategy.matrix`). +- Changing the rule format itself (frontmatter schema is unchanged — `paths:` was already supported by the driver, just unused). diff --git a/doc/sessions/2026-05-04-ai-checks-skipped-implementation-session.md b/doc/sessions/2026-05-04-ai-checks-skipped-implementation-session.md new file mode 100644 index 0000000..be46e51 --- /dev/null +++ b/doc/sessions/2026-05-04-ai-checks-skipped-implementation-session.md @@ -0,0 +1,74 @@ +# Implementation session: Render non-applicable AI checks as gray "skipped" + +**Date:** 2026-05-04 + +**Session ID:** unavailable + +## Summary + +Implemented #124 in a single session. Enriched `list_ai_checks.py` to evaluate per-rule applicability against the PR's changed files, gated `check` matrix shards on `matrix.applies` in the workflow, populated `paths:` frontmatter on the four path-scoped rules per the issue's table, and removed `skip` from the driver's verdict enum and system prompt (folded into `pass` with a "rule does not apply" note). Verified the matrix emitter against this PR's own changed files and confirmed the expected 4-skipped / 5-applied split. + +## Detailed conversation + +### User + +Detailed prompt from the parent agent: implement issue #124 end-to-end. Read AGENTS.md branching rules, run a staleness check on the existing AI checks scripts and rules, then do the four implementation steps from the issue (matrix-emitter applicability, workflow `if:` gate, rule frontmatter, prompt + tool tightening). Verify with the test suite and sample changed-file lists. Add a feature doc, plan, session transcripts, and changelog entry per CLAUDE.md "Documentation > Implementation phase". + +### Assistant + +**Step 0 — Branching.** Worktree was on `worktree-agent-abfd8394` at the same commit as `origin/main`. Created `rafacm/ai-checks-skipped` off main. + +**Step 1 — Staleness check.** Read `run_ai_check.py`, `list_ai_checks.py`, the workflow YAML, and all 9 rule files. Findings: + +- The driver (`run_ai_check.py`) already supports `paths:` frontmatter and short-circuits to `skip` when the file list doesn't match — but no rule file actually has `paths:` set yet. +- The matrix emitter (`list_ai_checks.py`) emits `{name, path}` per include, no `applies` field. +- The workflow has a top-level guard `fromJson(...).include[0] != null` to avoid empty-matrix errors but no per-shard `if: ${{ matrix.applies }}` gate. +- All 9 rule files have just `name:` and `description:` in their frontmatter. + +Verdict: issue is unimplemented. Proceed. + +**Step 2 — Implementation.** + +1. **Rewrote `list_ai_checks.py`.** Added a `parse_frontmatter` helper that returns the full dict (not just `name`), a `changed_files` helper running `git diff --name-only $BASE_REF...HEAD`, and an `applies_for(fm, files)` predicate that matches the rule's `paths:` globs against the file list (`fnmatch.fnmatch`, any-vs-any). Rules without `paths:` get `applies: true`. Output schema becomes `{name, path, applies}`. + +2. **Updated `.github/workflows/ai-checks.yml`.** Two changes: + - The `list` job now does `actions/checkout@v4` with `fetch-depth: 0`, fetches the base ref, and exports `BASE_REF=origin/${{ github.event.pull_request.base.ref }}` to the matrix emitter step. Required so `git diff --name-only` works. + - The `check` job's `if:` condition gains `&& matrix.applies` — non-applicable shards render gray-skipped at the GHA level. Added a comment explaining the semantics. + +3. **Populated `paths:` frontmatter on 4 rules** per the issue's table: + - `pipeline-step-sync.md` → `episodes/models.py, episodes/workflows.py, README.md, doc/README.md, doc/*.excalidraw` + - `asgi-wsgi-scott.md` → `ragtime/asgi.py, chat/**, frontend/**` + - `qdrant-payload-slim.md` → `episodes/vector_store.py` + - `entity-creation-race-safety.md` → `episodes/**` + +4. **Updated `run_ai_check.py`.** Removed `skip` from the verdict tool enum and from the system prompt. The system prompt now defines `pass` to cover both "complies" and "does not apply" cases — the latter with `summary: "Rule does not apply."` and an explanatory `details`. The empty-diff and path-scoped early-returns inside `evaluate()` were rewritten to return `pass` with a "rule does not apply because …" note instead of `skip`. Marker map (`{"pass": "PASS", "fail": "FAIL"}`) updated. Module docstring rewritten to reflect the new contract: "Path-scoped non-applicability is now handled at the workflow level." + +**Step 3 — Verification.** + +Ran `BASE_REF=main python .github/scripts/list_ai_checks.py` on the implementation branch. Output: + +``` +{"include": [ + {"name": "ASGI vs WSGI Awareness for Scott", ..., "applies": false}, + {"name": "Branching & PR Strategy", ..., "applies": true}, + {"name": "Comment Discipline", ..., "applies": true}, + {"name": "Entity Creation Race Safety", ..., "applies": false}, + {"name": "RAGTIME_* Env Var Sync", ..., "applies": true}, + {"name": "Feature PR Documentation Bundle", ..., "applies": true}, + {"name": "gh api Shell Escaping & Endpoints", ..., "applies": true}, + {"name": "Pipeline Step Documentation Sync", ..., "applies": false}, + {"name": "Slim Qdrant Payload Discipline", ..., "applies": false} +]} +``` + +4 path-scoped rules `applies: false`, 5 semantic rules `applies: true`. Matches the issue's acceptance criterion ("On a PR that touches none of `episodes/**`, `ragtime/asgi.py`, `chat/**`, `frontend/**`, at least 4 rules render gray"). + +Smoke-tested `fnmatch` semantics on `chat/sub/x.py vs chat/**`, `episodes/providers/base.py vs episodes/**`, `doc/foo.excalidraw vs doc/*.excalidraw`, `frontend/src/foo.tsx vs frontend/**` — all match. `**` works because Python's `fnmatch` lets `*` cross `/`. + +Walked through the workflow YAML manually. The `list` job now needs `fetch-depth: 0` (added). The `check` job's `if:` condition `${{ fromJson(needs.list.outputs.matrix).include[0] != null && matrix.applies }}` is syntactically valid GHA expression syntax — `matrix.applies` is a per-shard context, the `include[0] != null` guard handles the empty-matrix case from before, and `&&` short-circuits correctly. No issues. + +Ran `uv run python manage.py test --noinput`. 155 tests passed; 36 errors are environmental (missing `test_ragtime` Postgres database, MusicBrainz connection failures in the worktree env). None of the failures touch code paths modified by this PR — the changes live entirely in `.github/scripts/`, `.github/workflows/`, and `.ai-checks/`, which are not exercised by Django tests. + +**Step 4 — Documentation.** Wrote `doc/plans/2026-05-04-ai-checks-skipped.md`, `doc/features/2026-05-04-ai-checks-skipped.md`, this implementation transcript, and the planning transcript. Added a `## 2026-05-04` section to `CHANGELOG.md` under `### Changed` linking to all four docs and citing #124. + +**Step 5 — PR.** Pushed branch and opened PR per AGENTS.md. diff --git a/doc/sessions/2026-05-04-ai-checks-skipped-planning-session.md b/doc/sessions/2026-05-04-ai-checks-skipped-planning-session.md new file mode 100644 index 0000000..a7b7b10 --- /dev/null +++ b/doc/sessions/2026-05-04-ai-checks-skipped-planning-session.md @@ -0,0 +1,41 @@ +# Planning session: Render non-applicable AI checks as gray "skipped" + +**Date:** 2026-05-04 + +**Session ID:** unavailable + +## Summary + +Issue #124 was filed by the project owner as a follow-up to the AI checks workflow shipped in #122. The plan was authored in the issue body itself rather than in a separate planning session — the two-part proposal (path-scope at `list` job + tighten the system prompt) is fully fleshed out, including the table of which rules to scope and which to leave semantic. Implementation followed the issue text directly. No separate planning conversation took place; this transcript captures the equivalent reasoning. + +## Detailed conversation + +### User + +Filed issue #124 with title "Render non-applicable AI checks as 'skipped' (gray icon), not 'success' (green)". Body included: + +- Background table mapping driver verdicts → exit codes → GitHub icons, identifying that `skip` exits 0 and renders identically to `pass`. +- Two-part proposal: (1) path-scope non-applicability in the `list` job and gate matrix shards with job-level `if: ${{ matrix.applies }}`; (2) tighten the system prompt to remove `skip` for semantic non-applicability. +- Suggested `paths:` frontmatter for 4 of the 9 rules. +- Acceptance criteria. +- Out-of-scope notes (Checks API approach deferred until path-scoping has been measured). + +### Assistant + +Reviewed the proposal end-to-end. Key reasoning steps: + +1. **Why job-level `if:` rather than step-level.** GHA's `skipped` conclusion only fires for jobs whose `if:` evaluates false before the job starts. Using step-level `if:` on each step keeps the job at "success" (all-skipped-but-passing) and renders green, which defeats the purpose. Job-level `if:` against `matrix.applies` is the correct gate. + +2. **Where to compute applicability.** Two places it could go: (a) inside the runner before the model call (today), (b) inside the matrix emitter (`list` job). Only (b) lets GHA short-circuit the job at the matrix level. Cost: the `list` job needs an `actions/checkout@v4` with `fetch-depth: 0` and a base-ref fetch, which it didn't have before — small overhead, one job, one shallow fetch. + +3. **Path glob choice.** Stick with `fnmatch` since the driver already uses it. `**` works because Python's `fnmatch.fnmatch` treats `*` as matching any character including `/`, so `chat/**` matches `chat/sub/x.py`. Smoke-tested before adopting. + +4. **Which rules to scope.** Took the issue's table verbatim — author had already done the analysis. The 5 rules left unscoped (`comment-discipline`, `branching-and-pr-strategy`, `env-var-sync`, `feature-pr-docs`, `gh-api-shell-escaping`) all need diff-content inspection (grep for `RAGTIME_*`, grep for `gh api`, PR-shape semantic judgment) — applicability isn't reducible to a path filter. + +5. **What to do about `skip` in the driver.** After the workflow handles path-scoped non-applicability, the only remaining source of `skip` is the model deciding "no relevant changes" for a semantic rule. Option A: keep `skip` in the verdict enum, accept that it'll keep rendering green-pass. Option B: remove `skip` from the enum entirely, fold semantic non-applicability into `pass` with an explanatory `details` field. Picked B because it makes the verdict semantics cleaner — `skip` would only mean "couldn't evaluate", which is rare enough that it doesn't need a slot in the API. + +6. **Safety net for direct CLI invocation.** The driver's existing path-scoped early-return inside `evaluate()` becomes redundant once the workflow gates on `applies`, but worth keeping for direct `python run_ai_check.py` runs. Adapted to return `pass` with a "rule does not apply" note. + +7. **Out of scope.** Checks API (option 2 in the original design discussion in #121) gives full skipped-icon coverage — including model-driven skips — but doubles the rows in the Checks tab unless the workflow restructures away from `strategy.matrix`. Defer until path-scoping is measured in practice. + +Plan accepted as written in the issue. Proceeded to implementation. From 16cae999e1765a3d0e210180403275792d52dd45 Mon Sep 17 00:00:00 2001 From: Rafael Cordones Date: Mon, 4 May 2026 11:20:57 +0200 Subject: [PATCH 2/2] Filter non-applicable AI checks at matrix emission (codex review followup) Addresses the blocker from codex review on PR #130: the previous design relied on `jobs.check.if: ${{ matrix.applies }}` to render non-applicable rules as gray "skipped" rows, but GitHub Actions evaluates job-level `if:` *before* expanding `strategy.matrix`, so `matrix.applies` is not visible at that scope and the workflow failed to expand any jobs at all (0-second workflow-validation failure, run 25307264491). Pivot to option 1B: filter non-applicable rules out of the matrix entirely at emission time. They simply don't appear in the PR Checks tab. This isn't the gray-skipped icon the issue originally asked for, but it does fix the original "indistinguishable from green pass" complaint and ships in a workflow that actually runs. True gray-skipped via the GitHub Checks API is tracked separately as #135 for future work. Other codex follow-ups addressed in the same commit: - `changed_files()` now fails closed (`sys.exit(1)`) on `git diff` errors instead of silently returning `[]`. - New unit tests in `.github/scripts/tests/test_list_ai_checks.py` (24 tests, stdlib-only) lock in parser, glob, fail-closed, and end-to-end matrix-emission behavior. The codex-flagged unsupported cases (quoted globs, YAML list form for `paths:`) are explicitly locked in by tests so a future "fix" doesn't silently change them. - Supported `paths:` syntax documented in docstrings on `parse_frontmatter` / `applies_for` and in the feature doc. Plan, feature doc, and implementation session transcript all carry a "Revision after codex review" section. Co-Authored-By: Claude Opus 4.7 --- .github/scripts/list_ai_checks.py | 96 ++++++- .github/scripts/tests/__init__.py | 0 .github/scripts/tests/test_list_ai_checks.py | 251 ++++++++++++++++++ .github/workflows/ai-checks.yml | 12 +- doc/features/2026-05-04-ai-checks-skipped.md | 74 ++++-- doc/plans/2026-05-04-ai-checks-skipped.md | 14 + ...i-checks-skipped-implementation-session.md | 22 ++ 7 files changed, 423 insertions(+), 46 deletions(-) create mode 100644 .github/scripts/tests/__init__.py create mode 100644 .github/scripts/tests/test_list_ai_checks.py diff --git a/.github/scripts/list_ai_checks.py b/.github/scripts/list_ai_checks.py index f8a9b18..5108ebe 100644 --- a/.github/scripts/list_ai_checks.py +++ b/.github/scripts/list_ai_checks.py @@ -3,17 +3,23 @@ Output line format (written to $GITHUB_OUTPUT): - matrix={"include": [{"name": "", "path": ".ai-checks/foo.md", "applies": true}, ...]} - -`applies` is determined by reading each rule's optional `paths:` frontmatter -(comma-separated `fnmatch` globs) and matching it against `git diff ---name-only $BASE_REF...HEAD`. Rules without `paths:` always have -`applies: true` (they're semantic and must be evaluated by the model). - -The workflow then gates each matrix shard on `if: ${{ matrix.applies }}` so -non-applicable rules render as gray "skipped" jobs in the PR Checks tab -rather than green "pass" — preserving the signal that the rule was scoped -out, not silently waved through. + matrix={"include": [{"name": "", "path": ".ai-checks/foo.md"}, ...]} + +Only rules that **apply** to the current PR's changed file set are included. +Applicability is determined by reading each rule's optional `paths:` +frontmatter (comma-separated `fnmatch` globs) and matching it against +`git diff --name-only $BASE_REF...HEAD`. Rules without `paths:` always +apply (they're semantic and must be evaluated by the model). + +Non-applicable rules are **omitted from the matrix entirely**, so they +simply don't appear in the PR Checks tab. We previously tried emitting +them with `applies: false` and gating the job on `if: ${{ matrix.applies }}` +to render gray "skipped" rows, but GitHub Actions evaluates job-level +`if:` *before* expanding `strategy.matrix`, so `matrix.applies` is not +visible at that scope and the workflow fails to expand any jobs at all. + +The future-work option for true gray-skipped rows is to publish them via +the GitHub Checks API from the `list` job (tracked as a separate issue). """ from __future__ import annotations @@ -30,6 +36,27 @@ def parse_frontmatter(path: Path) -> dict[str, str]: + """Extract a flat key/value mapping from a Markdown file's YAML-ish frontmatter. + + Supported frontmatter syntax (intentionally minimal — this is NOT a YAML parser): + + - Delimited by `---` lines at the top of the file. + - Each line is a single ``key: value`` pair. + - Values are returned as raw strings, with surrounding whitespace stripped. + - Comment lines starting with ``#`` are ignored. + + Explicitly NOT supported: + + - Multi-line YAML lists (e.g. ``paths:\\n - foo\\n - bar``) — the value + ends at the newline, so the key resolves to an empty string and any + following ``- foo`` lines are dropped. + - Quoted values — quote characters are preserved literally in the + returned string (e.g. ``paths: "foo"`` yields ``'"foo"'``, which then + fails to match anything via :func:`fnmatch`). + - Block scalars (``|``, ``>``), anchors, references, etc. + + Returns an empty dict if the file has no frontmatter delimiters. + """ text = path.read_text() m = re.match(r"^---\r?\n(.*?)\r?\n---", text, re.DOTALL) if not m: @@ -43,6 +70,15 @@ def parse_frontmatter(path: Path) -> dict[str, str]: def changed_files(base: str) -> list[str]: + """Return the list of files changed between ``base`` and ``HEAD``. + + Fails closed: if ``git diff`` errors out (e.g. unknown base ref, shallow + clone without the base, repo corruption), this exits the process with + status 1 and a clear message on stderr. Silently returning ``[]`` would + cause every path-scoped rule to be marked non-applicable and silently + dropped from the matrix, which is exactly the kind of "no signal" + failure mode we're trying to avoid. + """ try: out = subprocess.run( ["git", "diff", "--name-only", f"{base}...HEAD"], @@ -50,8 +86,13 @@ def changed_files(base: str) -> list[str]: capture_output=True, text=True, ).stdout - except subprocess.CalledProcessError: - return [] + except subprocess.CalledProcessError as exc: + stderr = (exc.stderr or "").strip() + sys.stderr.write( + f"ERROR: `git diff --name-only {base}...HEAD` failed: {stderr}. " + "Refusing to silently skip path-scoped rules.\n" + ) + sys.exit(1) return [line for line in out.splitlines() if line.strip()] @@ -60,6 +101,32 @@ def matches_paths(files: list[str], patterns: list[str]) -> bool: def applies_for(fm: dict[str, str], files: list[str]) -> bool: + """Decide whether a rule applies given its frontmatter and the changed files. + + A rule with no ``paths:`` field (or an empty/whitespace-only value) + always applies — semantic rules must always run. + + A rule with a ``paths:`` field is treated as a comma-separated list of + :mod:`fnmatch` glob patterns. The rule applies iff at least one changed + file matches at least one pattern. + + Glob semantics (inherited from :mod:`fnmatch`): + + - ``*`` matches any number of characters, **including** ``/`` — this is + not shell-segment-aware. ``doc/*.md`` matches ``doc/nested/foo.md``. + - ``**`` is **not** treated specially; it's just two consecutive ``*``. + In practice ``foo/**`` happens to match ``foo/bar/baz.py`` because + ``*`` already crosses ``/``. + - ``?`` matches exactly one character (also crossing ``/``). + - Character classes ``[...]`` are supported. + + Syntax constraints (see :func:`parse_frontmatter`): + + - Single line only. + - Comma-separated. + - Unquoted — quote characters become part of the pattern and won't match. + - YAML list form (``- foo``) is not supported. + """ raw = fm.get("paths", "").strip() if not raw: return True @@ -80,11 +147,12 @@ def main() -> int: items = [] for p in sorted(CHECKS_DIR.glob("*.md")): fm = parse_frontmatter(p) + if not applies_for(fm, files): + continue items.append( { "name": fm.get("name", p.stem), "path": str(p), - "applies": applies_for(fm, files), } ) diff --git a/.github/scripts/tests/__init__.py b/.github/scripts/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.github/scripts/tests/test_list_ai_checks.py b/.github/scripts/tests/test_list_ai_checks.py new file mode 100644 index 0000000..26c1e2a --- /dev/null +++ b/.github/scripts/tests/test_list_ai_checks.py @@ -0,0 +1,251 @@ +"""Unit tests for ``.github/scripts/list_ai_checks.py``. + +Run with:: + + python -m unittest discover .github/scripts/tests -v + +These tests use only the standard library — they're meant to run in a +plain Python runner (no project Django dep), the same way the workflow +does. +""" + +from __future__ import annotations + +import io +import json +import subprocess +import sys +import tempfile +import unittest +from contextlib import redirect_stderr, redirect_stdout +from pathlib import Path +from unittest import mock + +# Make the parent `.github/scripts` directory importable. +SCRIPTS_DIR = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(SCRIPTS_DIR)) + +import list_ai_checks # noqa: E402 (import after sys.path tweak) + + +def _write(path: Path, text: str) -> Path: + path.write_text(text) + return path + + +class ParseFrontmatterTests(unittest.TestCase): + def setUp(self): + self.tmp = tempfile.TemporaryDirectory() + self.addCleanup(self.tmp.cleanup) + self.dir = Path(self.tmp.name) + + def test_well_formed_frontmatter(self): + p = _write( + self.dir / "rule.md", + "---\nname: foo\npaths: episodes/**\n---\nbody\n", + ) + fm = list_ai_checks.parse_frontmatter(p) + self.assertEqual(fm, {"name": "foo", "paths": "episodes/**"}) + + def test_missing_frontmatter_delimiters(self): + p = _write(self.dir / "rule.md", "no frontmatter here\n") + self.assertEqual(list_ai_checks.parse_frontmatter(p), {}) + + def test_missing_fields(self): + p = _write(self.dir / "rule.md", "---\nname: foo\n---\n") + fm = list_ai_checks.parse_frontmatter(p) + self.assertEqual(fm, {"name": "foo"}) + self.assertNotIn("paths", fm) + + def test_multi_line_value_only_keeps_first_line(self): + # Multi-line YAML list form is NOT supported. The `paths:` value + # is empty (everything after the colon on that line) and the + # following `- ...` lines have no colon so they're dropped. + p = _write( + self.dir / "rule.md", + "---\nname: foo\npaths:\n - episodes/**\n - chat/**\n---\n", + ) + fm = list_ai_checks.parse_frontmatter(p) + self.assertEqual(fm.get("paths", ""), "") + + def test_quoted_values_are_not_unquoted(self): + # Quoted globs are NOT supported — the quote characters become + # part of the pattern. + p = _write( + self.dir / "rule.md", + '---\nname: foo\npaths: "episodes/**", "README.md"\n---\n', + ) + fm = list_ai_checks.parse_frontmatter(p) + self.assertIn('"', fm["paths"]) + + def test_comment_lines_ignored(self): + p = _write( + self.dir / "rule.md", + "---\n# this is a comment\nname: foo\n---\n", + ) + self.assertEqual(list_ai_checks.parse_frontmatter(p), {"name": "foo"}) + + +class AppliesForTests(unittest.TestCase): + def test_empty_paths_always_applies(self): + self.assertTrue(list_ai_checks.applies_for({}, [])) + self.assertTrue(list_ai_checks.applies_for({"paths": ""}, ["x.py"])) + self.assertTrue(list_ai_checks.applies_for({"paths": " "}, ["x.py"])) + + def test_comma_separated_globs(self): + fm = {"paths": "episodes/**, chat/**, README.md"} + self.assertTrue(list_ai_checks.applies_for(fm, ["chat/views.py"])) + self.assertTrue(list_ai_checks.applies_for(fm, ["README.md"])) + self.assertTrue(list_ai_checks.applies_for(fm, ["episodes/models.py"])) + self.assertFalse(list_ai_checks.applies_for(fm, ["frontend/src/x.tsx"])) + + def test_single_glob(self): + fm = {"paths": "episodes/vector_store.py"} + self.assertTrue(list_ai_checks.applies_for(fm, ["episodes/vector_store.py"])) + self.assertFalse(list_ai_checks.applies_for(fm, ["episodes/models.py"])) + + def test_no_matching_files(self): + fm = {"paths": "frontend/**"} + self.assertFalse(list_ai_checks.applies_for(fm, ["episodes/x.py", "doc/y.md"])) + + def test_multiple_matching_files(self): + fm = {"paths": "episodes/**"} + self.assertTrue( + list_ai_checks.applies_for( + fm, ["episodes/a.py", "episodes/b.py", "doc/z.md"] + ) + ) + + def test_empty_changed_files_path_scoped_rule_does_not_apply(self): + fm = {"paths": "episodes/**"} + self.assertFalse(list_ai_checks.applies_for(fm, [])) + + def test_empty_changed_files_semantic_rule_still_applies(self): + # No `paths:` -> always applies, even on an empty diff. + self.assertTrue(list_ai_checks.applies_for({}, [])) + + def test_quoted_paths_lock_in_unsupported_behavior(self): + # Quoted globs do not match anything (the pattern still contains + # literal quote characters). This locks in the codex-flagged + # behavior so a future "fix" doesn't silently change it. + fm = {"paths": '"episodes/**"'} + self.assertFalse(list_ai_checks.applies_for(fm, ["episodes/models.py"])) + + def test_yaml_list_paths_lock_in_unsupported_behavior(self): + # When the value comes from a multi-line YAML list, parse_frontmatter + # leaves `paths` empty, so the rule incorrectly applies to every + # diff. Lock that in via the parsed value being "". + self.assertTrue(list_ai_checks.applies_for({"paths": ""}, ["any.py"])) + + +class MatchesPathsTests(unittest.TestCase): + def test_nested_path_matching(self): + self.assertTrue( + list_ai_checks.matches_paths(["chat/views/foo.py"], ["chat/**"]) + ) + + def test_double_star_is_not_segment_aware(self): + # fnmatch treats ** as just two consecutive *, which already + # crosses /. So `foo/**` matches `foo/bar/baz`. + self.assertTrue(list_ai_checks.matches_paths(["foo/bar/baz"], ["foo/**"])) + + def test_star_crosses_slash(self): + # Document the codex-flagged surprising behavior: `*` crosses `/`. + self.assertTrue( + list_ai_checks.matches_paths( + ["doc/nested/foo.excalidraw"], ["doc/*.excalidraw"] + ) + ) + + def test_no_match(self): + self.assertFalse( + list_ai_checks.matches_paths(["episodes/x.py"], ["frontend/**"]) + ) + + +class ChangedFilesTests(unittest.TestCase): + def test_failure_exits_one(self): + # Replace subprocess.run with one that always raises CalledProcessError. + err = subprocess.CalledProcessError( + returncode=128, + cmd=["git", "diff"], + output="", + stderr="fatal: bad revision", + ) + with mock.patch.object(list_ai_checks.subprocess, "run", side_effect=err): + stderr_buf = io.StringIO() + with redirect_stderr(stderr_buf): + with self.assertRaises(SystemExit) as cm: + list_ai_checks.changed_files("origin/does-not-exist") + self.assertEqual(cm.exception.code, 1) + self.assertIn("Refusing to silently skip", stderr_buf.getvalue()) + + def test_success_returns_filtered_lines(self): + completed = mock.Mock() + completed.stdout = "a.py\n\nb.py\n" + with mock.patch.object(list_ai_checks.subprocess, "run", return_value=completed): + result = list_ai_checks.changed_files("origin/main") + self.assertEqual(result, ["a.py", "b.py"]) + + +class MainEndToEndTests(unittest.TestCase): + def setUp(self): + self.tmp = tempfile.TemporaryDirectory() + self.addCleanup(self.tmp.cleanup) + self.root = Path(self.tmp.name) + self.checks_dir = self.root / ".ai-checks" + self.checks_dir.mkdir() + + # Three rules: two path-scoped, one semantic. + _write( + self.checks_dir / "episodes-rule.md", + "---\nname: episodes-rule\npaths: episodes/**\n---\nbody\n", + ) + _write( + self.checks_dir / "frontend-rule.md", + "---\nname: frontend-rule\npaths: frontend/**\n---\nbody\n", + ) + _write( + self.checks_dir / "semantic-rule.md", + "---\nname: semantic-rule\n---\nbody\n", + ) + + def _run_main(self, changed): + # Patch CHECKS_DIR to point at our tempdir, and patch changed_files. + with mock.patch.object(list_ai_checks, "CHECKS_DIR", self.checks_dir): + with mock.patch.object( + list_ai_checks, "changed_files", return_value=changed + ): + buf = io.StringIO() + with redirect_stdout(buf): + rc = list_ai_checks.main() + self.assertEqual(rc, 0) + out = buf.getvalue().strip() + self.assertTrue(out.startswith("matrix=")) + return json.loads(out[len("matrix=") :]) + + def test_only_applicable_rules_appear(self): + matrix = self._run_main(["episodes/models.py"]) + names = [item["name"] for item in matrix["include"]] + self.assertIn("episodes-rule", names) + self.assertIn("semantic-rule", names) + self.assertNotIn("frontend-rule", names) + # `applies` field must NOT be emitted. + for item in matrix["include"]: + self.assertNotIn("applies", item) + + def test_empty_diff_keeps_only_semantic_rules(self): + matrix = self._run_main([]) + names = [item["name"] for item in matrix["include"]] + self.assertEqual(names, ["semantic-rule"]) + + def test_no_applicable_rules_emits_empty_matrix(self): + # Patch out the semantic rule so all remaining rules are + # path-scoped and won't match. + (self.checks_dir / "semantic-rule.md").unlink() + matrix = self._run_main(["doc/anything.md"]) + self.assertEqual(matrix, {"include": []}) + + +if __name__ == "__main__": + unittest.main() diff --git a/.github/workflows/ai-checks.yml b/.github/workflows/ai-checks.yml index ef62deb..e454b65 100644 --- a/.github/workflows/ai-checks.yml +++ b/.github/workflows/ai-checks.yml @@ -33,11 +33,13 @@ jobs: check: needs: list - # Gate per matrix shard: rules whose path scope didn't match any changed - # file render as gray "skipped" in the PR Checks tab — no runner, no model - # call, clearer signal than green "pass". Top-level `include[0] != null` - # guard avoids the "no jobs to run" error when the matrix is empty. - if: ${{ fromJson(needs.list.outputs.matrix).include[0] != null && matrix.applies }} + # Non-applicable rules (path scope didn't match any changed file) are + # filtered out of the matrix entirely by `list_ai_checks.py`, so they + # simply don't appear in the PR Checks tab. The `include[0] != null` + # guard avoids the "no jobs to run" error when zero rules apply. + # (We can't gate per-shard with `matrix.applies` because GitHub Actions + # evaluates job-level `if:` *before* expanding `strategy.matrix`.) + if: ${{ fromJson(needs.list.outputs.matrix).include[0] != null }} runs-on: ubuntu-latest name: "AI Check: ${{ matrix.name }}" strategy: diff --git a/doc/features/2026-05-04-ai-checks-skipped.md b/doc/features/2026-05-04-ai-checks-skipped.md index 158b007..fdd5f9a 100644 --- a/doc/features/2026-05-04-ai-checks-skipped.md +++ b/doc/features/2026-05-04-ai-checks-skipped.md @@ -1,4 +1,4 @@ -# Render non-applicable AI checks as gray "skipped" +# Hide non-applicable AI checks from the PR Checks tab **Date:** 2026-05-04 @@ -6,12 +6,14 @@ `#122` shipped a self-hosted AI checks workflow that maps driver verdicts to GitHub job conclusions via exit code only. `skip` exited 0, identical to `pass`, so a non-applicable rule rendered as a green check — same icon as a rule that was checked and passed. No way for a reader of the PR Checks tab to tell the two cases apart. -GitHub Actions only renders the gray "skipped" icon for jobs whose `if:` condition evaluates false before the job starts. To fix the icon for path-scoped rules, the applicability decision has to move out of the runner and into the matrix-emitter `list` job. +The original goal was to render non-applicable rules as the gray "skipped" GitHub Actions icon, by emitting an `applies: bool` field per matrix shard and gating the job on `if: ${{ matrix.applies }}`. The codex review on PR #130 caught a blocker: GitHub Actions evaluates `jobs..if:` *before* expanding `strategy.matrix`, so `matrix.applies` is not available at that scope and the workflow fails to expand any jobs at all (0-second workflow-validation failure). + +The smallest viable fix — and what this PR ships — is **filter non-applicable rules out of the matrix entirely** at emission time. Non-applicable rules simply don't appear in the PR Checks tab. That doesn't give us gray rows, but it does fix the original "indistinguishable from green pass" complaint, and it ships in a workflow YAML that actually runs. The future-work option (publishing gray rows via the GitHub Checks API from the `list` job) is tracked separately as #135. ## Changes -- **Matrix emitter** (`.github/scripts/list_ai_checks.py`). Now reads each rule's optional `paths:` frontmatter, runs `git diff --name-only $BASE_REF...HEAD` once, and emits an `applies: bool` field per matrix include. Rules without `paths:` always get `applies: true` (semantic — can't be path-decided). Top-level `BASE_REF` env var consumed. -- **Workflow** (`.github/workflows/ai-checks.yml`). The `list` job now does an `actions/checkout@v4` with `fetch-depth: 0` (needed to compute the diff), pre-fetches the base ref, and exports `BASE_REF` to the matrix emitter. The `check` job's `if:` condition gains `&& matrix.applies` — non-applicable shards render gray-skipped at the GHA level (no runner, no model call, no token cost). +- **Matrix emitter** (`.github/scripts/list_ai_checks.py`). Now reads each rule's optional `paths:` frontmatter, runs `git diff --name-only $BASE_REF...HEAD` once, and **omits non-applicable rules from the emitted matrix entirely**. Rules without `paths:` always apply (semantic — can't be path-decided). The previously-emitted `applies` field is gone. `git diff` failures now fail closed (`sys.exit(1)`) instead of silently returning `[]`, so a bad base ref or fetch error causes the `list` job to fail loudly rather than silently dropping every path-scoped rule. +- **Workflow** (`.github/workflows/ai-checks.yml`). The `list` job does an `actions/checkout@v4` with `fetch-depth: 0` (needed to compute the diff), pre-fetches the base ref, and exports `BASE_REF` to the matrix emitter. The `check` job's `if:` is just `${{ fromJson(needs.list.outputs.matrix).include[0] != null }}` — guards the "no jobs to run" error when zero rules apply. No per-shard gate. - **Path-scoped rules.** Four rules now carry `paths:` frontmatter: - `pipeline-step-sync` → `episodes/models.py, episodes/workflows.py, README.md, doc/README.md, doc/*.excalidraw` - `asgi-wsgi-scott` → `ragtime/asgi.py, chat/**, frontend/**` @@ -26,53 +28,71 @@ GitHub Actions only renders the gray "skipped" icon for jobs whose `if:` conditi ## Behaviour after the change -| Scenario | Old icon | New icon | +| Scenario | Old behavior | New behavior | |---|---|---| -| Rule with `paths:` matches changed files, model returns `pass` | green | green | -| Rule with `paths:` matches changed files, model returns `fail` | red | red | -| Rule with `paths:` doesn't match any changed file | green (driver returned `skip`) | gray (job skipped at GHA level) | -| Rule without `paths:`, model returns `pass` | green | green | -| Rule without `paths:`, model returns `pass` with "does not apply" details | green | green (with explanatory details) | -| Rule without `paths:`, model returns `fail` | red | red | +| Rule with `paths:` matches changed files, model returns `pass` | green check | green check | +| Rule with `paths:` matches changed files, model returns `fail` | red X | red X | +| Rule with `paths:` doesn't match any changed file | green check (driver returned `skip`) | rule absent from PR Checks tab | +| Rule without `paths:`, model returns `pass` | green check | green check | +| Rule without `paths:`, model returns `pass` with "does not apply" details | green check | green check (with explanatory details) | +| Rule without `paths:`, model returns `fail` | red X | red X | -On a PR that touches only `.github/` and `.ai-checks/` (like this one), the matrix emitter outputs: +On a PR that touches only `.github/` and `.ai-checks/` (like this one), the matrix emitter outputs only the five applicable rules: ```json {"include": [ - {"name": "ASGI vs WSGI Awareness for Scott", "applies": false, ...}, - {"name": "Branching & PR Strategy", "applies": true, ...}, - {"name": "Comment Discipline", "applies": true, ...}, - {"name": "Entity Creation Race Safety", "applies": false, ...}, - {"name": "RAGTIME_* Env Var Sync", "applies": true, ...}, - {"name": "Feature PR Documentation Bundle", "applies": true, ...}, - {"name": "gh api Shell Escaping & Endpoints", "applies": true, ...}, - {"name": "Pipeline Step Documentation Sync", "applies": false, ...}, - {"name": "Slim Qdrant Payload Discipline", "applies": false, ...} + {"name": "Branching & PR Strategy", "path": ".ai-checks/branching-and-pr-strategy.md"}, + {"name": "Comment Discipline", "path": ".ai-checks/comment-discipline.md"}, + {"name": "RAGTIME_* Env Var Sync", "path": ".ai-checks/env-var-sync.md"}, + {"name": "Feature PR Documentation Bundle", "path": ".ai-checks/feature-pr-docs.md"}, + {"name": "gh api Shell Escaping & Endpoints", "path": ".ai-checks/gh-api-shell-escaping.md"} ]} ``` -Four shards skip to gray; five run as before. +The four path-scoped rules (`pipeline-step-sync`, `asgi-wsgi-scott`, `qdrant-payload-slim`, `entity-creation-race-safety`) are absent from the output entirely. + +## Supported `paths:` syntax + +The frontmatter parser is intentionally minimal — it is **not** a full YAML parser. The locked-in syntax for `paths:` is: + +- **Single line.** Multi-line YAML lists (`paths:\n - foo\n - bar`) are not supported and are treated as an empty value (which means the rule incorrectly applies to every diff — don't write this form). +- **Comma-separated.** Split on `,`, with surrounding whitespace stripped from each pattern. +- **Unquoted.** Quote characters become part of the pattern. `paths: "foo"` will not match the file `foo`; write `paths: foo` instead. +- **Python `fnmatch` semantics**, not shell-segment-aware: + - `*` matches any number of characters **including** `/`. So `doc/*.excalidraw` matches `doc/nested/foo.excalidraw`. + - `**` is just two consecutive `*`; in practice `foo/**` matches `foo/bar/baz.py` because `*` already crosses `/`. + - `?` matches exactly one character (also crossing `/`). + - Character classes `[...]` are supported. + +These constraints are documented in the docstrings of `parse_frontmatter` and `applies_for` in `.github/scripts/list_ai_checks.py` and locked in by unit tests in `.github/scripts/tests/test_list_ai_checks.py`. ## Key parameters | Parameter | Value | Why | |---|---|---| | `paths:` glob syntax | Python `fnmatch` | Already what the driver used. `**` works because `*` matches `/`. | -| Default `applies` (no `paths:`) | `true` | Semantic rules can't be path-decided; better to evaluate than to silently skip. | +| Default applicability (no `paths:`) | applies | Semantic rules can't be path-decided; better to evaluate than to silently skip. | | `BASE_REF` default in `list` job | `origin/${{ github.event.pull_request.base.ref }}` | Same convention as the `check` job. | | `fetch-depth` on `list` job | `0` | Required to compute `git diff` against base. | +| `git diff` failure behavior | `sys.exit(1)` | Fails the `list` job loudly. Silently returning `[]` would silently drop every path-scoped rule. | ## Verification -- `BASE_REF=main python .github/scripts/list_ai_checks.py` on this branch produces 4 `applies: false` and 5 `applies: true`. Matches issue acceptance criterion: "On a PR that touches none of `episodes/**`, `ragtime/asgi.py`, `chat/**`, `frontend/**`, at least 4 rules render as gray." -- `fnmatch.fnmatch` smoke-tested against `chat/sub/x.py vs chat/**`, `episodes/providers/base.py vs episodes/**`, `doc/foo.excalidraw vs doc/*.excalidraw` — all match. +- `python -m unittest discover .github/scripts/tests -v` runs 24 unit tests covering `parse_frontmatter`, `applies_for`, `matches_paths`, `changed_files` (success + fail-closed paths), and `main()` end-to-end. +- `BASE_REF=origin/main python .github/scripts/list_ai_checks.py` on this branch emits exactly the 5 applicable rules (no `applies` field, no path-scoped entries). - Project test suite (`uv run python manage.py test`) unaffected by these changes (touches only `.github/scripts/`, `.github/workflows/`, `.ai-checks/`). +## Future work + +- **#135** — Publish gray "skipped" rows for non-applicable rules via the GitHub Checks API from the `list` job. This is the only way to render true gray-skipped icons given GHA's job-level `if:` evaluation order. Tracked as a separate issue because it needs a different shape (Checks API auth, conclusion publishing) than the matrix emitter. + ## Files modified -- `.github/scripts/list_ai_checks.py` — emit `applies: bool` per matrix include from `paths:` frontmatter and `git diff --name-only`. +- `.github/scripts/list_ai_checks.py` — filter non-applicable rules out of the matrix entirely; fail closed on `git diff` errors; document supported `paths:` syntax in docstrings. +- `.github/scripts/tests/__init__.py` — new test package marker. +- `.github/scripts/tests/test_list_ai_checks.py` — 24 new unit tests covering parser, glob matching, fail-closed diff behavior, and end-to-end matrix emission. - `.github/scripts/run_ai_check.py` — drop `skip` from verdict enum and system prompt; rewrite path-scoped early-return and empty-diff branch to return `pass` with "does not apply" notes. -- `.github/workflows/ai-checks.yml` — add `fetch-depth: 0` and `BASE_REF` to `list` job; gate `check` matrix shards on `matrix.applies`. +- `.github/workflows/ai-checks.yml` — `list` job has `fetch-depth: 0` and exports `BASE_REF`; `check` job's `if:` is just the `include[0] != null` empty-matrix guard (no per-shard `matrix.applies`, which doesn't work). - `.ai-checks/pipeline-step-sync.md` — add `paths:` frontmatter. - `.ai-checks/asgi-wsgi-scott.md` — add `paths:` frontmatter. - `.ai-checks/qdrant-payload-slim.md` — add `paths:` frontmatter. diff --git a/doc/plans/2026-05-04-ai-checks-skipped.md b/doc/plans/2026-05-04-ai-checks-skipped.md index 6ae4431..7ce0c9b 100644 --- a/doc/plans/2026-05-04-ai-checks-skipped.md +++ b/doc/plans/2026-05-04-ai-checks-skipped.md @@ -62,3 +62,17 @@ Path-scoped non-applicability is now handled before the model is invoked — the - Posting check-runs via the Checks API (gives full skipped-icon coverage including model-driven skips, but doubles the rows in the Checks tab unless we restructure away from `strategy.matrix`). - Changing the rule format itself (frontmatter schema is unchanged — `paths:` was already supported by the driver, just unused). + +## Revision after codex review (2026-05-04) + +The codex review on PR #130 caught a blocker: GitHub Actions evaluates `jobs..if:` *before* expanding `strategy.matrix`, so a job-level `if: ${{ matrix.applies }}` gate is not visible at that scope and the workflow fails to expand any jobs at all (0-second workflow-validation failure). + +Pivoted to **option 1B**: filter non-applicable rules out of the matrix entirely at emission time in `list_ai_checks.py`. Non-applicable rules simply don't appear in the PR Checks tab. This is not the gray-skipped icon the issue originally asked for, but it does fix the original "indistinguishable from green pass" complaint and ships in a workflow that actually runs. + +Option 1C — publish gray rows via the GitHub Checks API from the `list` job — is the only way to render true gray-skipped icons given GHA's evaluation order. Tracked separately as **#135** for future work. + +Other follow-ups landed in the same revision: + +- `changed_files()` now fails closed (`sys.exit(1)`) on `git diff` errors instead of silently returning `[]`. +- New unit tests in `.github/scripts/tests/test_list_ai_checks.py` lock in parser, glob, and fail-closed behavior — including the explicitly-unsupported quoted-glob and YAML-list-form behaviors. +- `paths:` syntax is documented in docstrings on `parse_frontmatter` / `applies_for` and in the "Supported `paths:` syntax" section of the feature doc. diff --git a/doc/sessions/2026-05-04-ai-checks-skipped-implementation-session.md b/doc/sessions/2026-05-04-ai-checks-skipped-implementation-session.md index be46e51..e7618ff 100644 --- a/doc/sessions/2026-05-04-ai-checks-skipped-implementation-session.md +++ b/doc/sessions/2026-05-04-ai-checks-skipped-implementation-session.md @@ -72,3 +72,25 @@ Ran `uv run python manage.py test --noinput`. 155 tests passed; 36 errors are en **Step 4 — Documentation.** Wrote `doc/plans/2026-05-04-ai-checks-skipped.md`, `doc/features/2026-05-04-ai-checks-skipped.md`, this implementation transcript, and the planning transcript. Added a `## 2026-05-04` section to `CHANGELOG.md` under `### Changed` linking to all four docs and citing #124. **Step 5 — PR.** Pushed branch and opened PR per AGENTS.md. + +## Revision after codex review + +### User +> You are adding a follow-up commit to PR #130 in the rafacm/ragtime repo, addressing the blocking concern from the codex review. (...) The user has decided to ship **option 1B** (filter `applies: false` entries out of the matrix entirely) as the smallest viable fix. Option 1C (Checks API publishing) is captured separately as #135 for future work. + +### Assistant + +The codex review on PR #130 (review id `4218279237`) flagged the workflow design as blocking: `jobs.check.if: ${{ matrix.applies }}` does not work because GitHub Actions evaluates job-level `if:` *before* expanding `strategy.matrix`. The empirical signal was workflow run `25307264491` failing in 0 seconds with no jobs expanded. + +Pivoted to **option 1B**: + +1. `list_ai_checks.py` now omits non-applicable rules from the matrix entirely — no `applies` field on emitted entries. The `applies_for` helper is unchanged in semantics; `main()` simply `continue`s past rules where it returns `False`. +2. `ai-checks.yml` drops `&& matrix.applies` from the `check` job's `if:`, keeping only the `include[0] != null` empty-matrix guard. Updated the comment block to explain why per-shard gating doesn't work and to point at #135 as the future-work option. +3. `changed_files()` now fails closed: a `subprocess.CalledProcessError` triggers `sys.exit(1)` with a clear stderr message ("Refusing to silently skip path-scoped rules"), matching the codex review's "fail closed on a bad base ref" suggestion. +4. Added `.github/scripts/tests/__init__.py` and `.github/scripts/tests/test_list_ai_checks.py` — 24 stdlib-only `unittest` tests covering well-formed/missing/multi-line/quoted/comment frontmatter, empty/single/comma-separated/nonmatching/multi-matching `applies_for` cases, the codex-flagged `*`-crosses-`/` and `**`-not-segment-aware glob semantics, the new `sys.exit(1)` fail-closed path on `git diff` errors, and the end-to-end `main()` matrix-emission flow asserting only applicable rules appear and that no `applies` field is emitted. The quoted-glob and YAML-list-form unsupported behaviors are explicitly locked in by tests so a future "fix" doesn't silently change them. +5. Added docstrings on `parse_frontmatter` and `applies_for` documenting the supported `paths:` syntax (single-line, comma-separated, unquoted, `fnmatch` semantics — not segment-aware). Added a "Supported `paths:` syntax" section to the feature doc. +6. Updated the plan doc with a "Revision after codex review" section and updated the feature doc's design narrative to reflect the matrix-filter approach. + +**Verification.** All 24 unit tests pass (`python -m unittest discover .github/scripts/tests -v`). Locally simulated matrix emission with `BASE_REF=origin/main python .github/scripts/list_ai_checks.py` — output contains only the 5 applicable rules with no `applies` field. + +**Out of scope per the user's instructions.** Did not change verdict-enum work (already correct), did not change the four `paths:` frontmatter (already correct), did not tighten `entity-creation-race-safety` scope (deferred per codex), did not address the `## Plan...` first-line nit or the verbatim-user-messages nit on the original session transcripts (the user can address separately if they want).