Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ai-checks/asgi-wsgi-scott.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
name: ASGI vs WSGI Awareness for Scott
description: Anything touching Scott's chat path must be tested under uvicorn (ASGI). runserver does not load asgi.py and will silently 404 the AG-UI endpoint.
paths: ragtime/asgi.py, chat/**, frontend/**
---

# ASGI vs WSGI Awareness for Scott
Expand Down
1 change: 1 addition & 0 deletions .ai-checks/entity-creation-race-safety.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
name: Entity Creation Race Safety
description: All Entity row creation must go through get_or_create with a sorted advisory lock — never bare Entity.objects.create().
paths: episodes/**
---

# Entity Creation Race Safety
Expand Down
1 change: 1 addition & 0 deletions .ai-checks/pipeline-step-sync.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
name: Pipeline Step Documentation Sync
description: When the 10-step ingestion pipeline changes, README.md, doc/README.md, and diagrams must be updated in lockstep.
paths: episodes/models.py, episodes/workflows.py, README.md, doc/README.md, doc/*.excalidraw
---

# Pipeline Step Documentation Sync
Expand Down
1 change: 1 addition & 0 deletions .ai-checks/qdrant-payload-slim.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
name: Slim Qdrant Payload Discipline
description: Qdrant chunk points must carry only filter fields. All display data is hydrated from Postgres at search time.
paths: episodes/vector_store.py
---

# Slim Qdrant Payload Discipline
Expand Down
141 changes: 131 additions & 10 deletions .github/scripts/list_ai_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,159 @@

Output line format (written to $GITHUB_OUTPUT):

matrix={"include": [{"name": "<rule name>", "path": ".ai-checks/foo.md"}, ...]}
matrix={"include": [{"name": "<rule>", "path": ".ai-checks/foo.md"}, ...]}

Only rules that **apply** to the current PR's changed file set are included.
Applicability is determined by reading each rule's optional `paths:`
frontmatter (comma-separated `fnmatch` globs) and matching it against
`git diff --name-only $BASE_REF...HEAD`. Rules without `paths:` always
apply (they're semantic and must be evaluated by the model).

Non-applicable rules are **omitted from the matrix entirely**, so they
simply don't appear in the PR Checks tab. We previously tried emitting
them with `applies: false` and gating the job on `if: ${{ matrix.applies }}`
to render gray "skipped" rows, but GitHub Actions evaluates job-level
`if:` *before* expanding `strategy.matrix`, so `matrix.applies` is not
visible at that scope and the workflow fails to expand any jobs at all.

The future-work option for true gray-skipped rows is to publish them via
the GitHub Checks API from the `list` job (tracked as a separate issue).
"""

from __future__ import annotations

import fnmatch
import json
import os
import re
import subprocess
import sys
from pathlib import Path

CHECKS_DIR = Path(".ai-checks")


def parse_name(path: Path) -> str:
def parse_frontmatter(path: Path) -> dict[str, str]:
"""Extract a flat key/value mapping from a Markdown file's YAML-ish frontmatter.

Supported frontmatter syntax (intentionally minimal — this is NOT a YAML parser):

- Delimited by `---` lines at the top of the file.
- Each line is a single ``key: value`` pair.
- Values are returned as raw strings, with surrounding whitespace stripped.
- Comment lines starting with ``#`` are ignored.

Explicitly NOT supported:

- Multi-line YAML lists (e.g. ``paths:\\n - foo\\n - bar``) — the value
ends at the newline, so the key resolves to an empty string and any
following ``- foo`` lines are dropped.
- Quoted values — quote characters are preserved literally in the
returned string (e.g. ``paths: "foo"`` yields ``'"foo"'``, which then
fails to match anything via :func:`fnmatch`).
- Block scalars (``|``, ``>``), anchors, references, etc.

Returns an empty dict if the file has no frontmatter delimiters.
"""
text = path.read_text()
m = re.match(r"^---\r?\n(.*?)\r?\n---", text, re.DOTALL)
if not m:
return path.stem
return {}
fm: dict[str, str] = {}
for line in m.group(1).splitlines():
if line.startswith("name:"):
return line.split(":", 1)[1].strip()
return path.stem
if ":" in line and not line.startswith("#"):
k, v = line.split(":", 1)
fm[k.strip()] = v.strip()
return fm


def changed_files(base: str) -> list[str]:
"""Return the list of files changed between ``base`` and ``HEAD``.

Fails closed: if ``git diff`` errors out (e.g. unknown base ref, shallow
clone without the base, repo corruption), this exits the process with
status 1 and a clear message on stderr. Silently returning ``[]`` would
cause every path-scoped rule to be marked non-applicable and silently
dropped from the matrix, which is exactly the kind of "no signal"
failure mode we're trying to avoid.
"""
try:
out = subprocess.run(
["git", "diff", "--name-only", f"{base}...HEAD"],
check=True,
capture_output=True,
text=True,
).stdout
except subprocess.CalledProcessError as exc:
stderr = (exc.stderr or "").strip()
sys.stderr.write(
f"ERROR: `git diff --name-only {base}...HEAD` failed: {stderr}. "
"Refusing to silently skip path-scoped rules.\n"
)
sys.exit(1)
return [line for line in out.splitlines() if line.strip()]


def matches_paths(files: list[str], patterns: list[str]) -> bool:
return any(fnmatch.fnmatch(f, p) for f in files for p in patterns)


def applies_for(fm: dict[str, str], files: list[str]) -> bool:
"""Decide whether a rule applies given its frontmatter and the changed files.

A rule with no ``paths:`` field (or an empty/whitespace-only value)
always applies — semantic rules must always run.

A rule with a ``paths:`` field is treated as a comma-separated list of
:mod:`fnmatch` glob patterns. The rule applies iff at least one changed
file matches at least one pattern.

Glob semantics (inherited from :mod:`fnmatch`):

- ``*`` matches any number of characters, **including** ``/`` — this is
not shell-segment-aware. ``doc/*.md`` matches ``doc/nested/foo.md``.
- ``**`` is **not** treated specially; it's just two consecutive ``*``.
In practice ``foo/**`` happens to match ``foo/bar/baz.py`` because
``*`` already crosses ``/``.
- ``?`` matches exactly one character (also crossing ``/``).
- Character classes ``[...]`` are supported.

Syntax constraints (see :func:`parse_frontmatter`):

- Single line only.
- Comma-separated.
- Unquoted — quote characters become part of the pattern and won't match.
- YAML list form (``- foo``) is not supported.
"""
raw = fm.get("paths", "").strip()
if not raw:
return True
patterns = [p.strip() for p in raw.split(",") if p.strip()]
if not patterns:
return True
return matches_paths(files, patterns)


def main() -> int:
if not CHECKS_DIR.is_dir():
print(f"matrix={json.dumps({'include': []})}")
return 0
items = [
{"name": parse_name(p), "path": str(p)}
for p in sorted(CHECKS_DIR.glob("*.md"))
]

base = os.environ.get("BASE_REF", "origin/main")
files = changed_files(base)

items = []
for p in sorted(CHECKS_DIR.glob("*.md")):
fm = parse_frontmatter(p)
if not applies_for(fm, files):
continue
items.append(
{
"name": fm.get("name", p.stem),
"path": str(p),
}
)

print(f"matrix={json.dumps({'include': items})}")
return 0

Expand Down
42 changes: 31 additions & 11 deletions .github/scripts/run_ai_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,14 @@
`gemini/gemini-2.0-flash`. LiteLLM auto-reads each provider's canonical env
var (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, ...).

Exits 1 on `fail`, 0 on `pass` / `skip`. Each invocation is a single GH
Actions matrix job, so the job's pass/fail status becomes the PR check.
Exits 1 on `fail`, 0 on `pass`. Each invocation is a single GH Actions
matrix job, so the job's pass/fail status becomes the PR check.

Path-scoped non-applicability is now handled at the workflow level: the
`list` job evaluates each rule's `paths:` frontmatter against the PR's
changed files and emits `applies: bool` per matrix include. Shards with
`applies: false` are skipped at the GHA level (gray icon, no runner). The
driver itself only ever returns `pass` or `fail`.
"""

from __future__ import annotations
Expand Down Expand Up @@ -55,9 +61,10 @@ class ConfigError(RuntimeError):

The rule defines what to check. After reading the diff, report exactly one verdict via the `report_verdict` function:

- pass: the diff complies with the rule, OR the rule does not apply to this diff in any meaningful way.
- pass: the diff complies with the rule, OR the rule does not apply to this diff in any meaningful way. When the rule does not apply, set `summary` to "Rule does not apply." and put a one-line "rule does not apply because …" in `details`.
- fail: the diff clearly violates the rule, with concrete evidence visible in the diff.
- skip: the rule cannot be evaluated against this diff (e.g. no relevant files were changed and the rule is path-scoped).

Path-scoped non-applicability is handled before you are invoked — if you are evaluating this diff at all, the workflow has decided the rule is in scope. Your job is to check compliance, not to second-guess scope.

Be conservative. Return `fail` only when you can point at specific lines in the diff that violate the rule. Vague concerns, style nitpicks, or "could be improved" remarks are not failures — return `pass` and put your notes in the `details` field."""

Expand Down Expand Up @@ -85,7 +92,7 @@ class ConfigError(RuntimeError):
"properties": {
"verdict": {
"type": "string",
"enum": ["pass", "fail", "skip"],
"enum": ["pass", "fail"],
"description": "Overall verdict for this rule against the diff.",
},
"summary": {
Expand Down Expand Up @@ -141,15 +148,28 @@ def evaluate(check_path: Path, base: str, model: str) -> dict[str, str]:
diff, files = get_diff(base)

if not diff.strip():
return {"verdict": "skip", "summary": "Empty diff.", "details": ""}
return {
"verdict": "pass",
"summary": "Rule does not apply.",
"details": "Rule does not apply because the diff is empty.",
}

# Workflow normally gates path-scoped rules at the matrix level via
# `matrix.applies`, so this branch is mostly a safety net for direct
# CLI invocation. Returns `pass` with an explanatory note rather than
# `skip` (which is no longer in the verdict enum — semantic
# non-applicability is now `pass`, path non-applicability is gray-skipped
# at the GHA job level).
if "paths" in fm:
patterns = [p.strip() for p in fm["paths"].split(",") if p.strip()]
if patterns and not matches_paths(files, patterns):
return {
"verdict": "skip",
"summary": "No matching files changed.",
"details": f"Rule scoped to: `{fm['paths']}`",
"verdict": "pass",
"summary": "Rule does not apply.",
"details": (
f"Rule does not apply because no changed files match its "
f"path scope: `{fm['paths']}`."
),
}

if len(diff) > MAX_DIFF_CHARS:
Expand Down Expand Up @@ -203,7 +223,7 @@ def write_step_summary(check_name: str, result: dict[str, str]) -> None:
summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if not summary_path:
return
marker = {"pass": "PASS", "fail": "FAIL", "skip": "SKIP"}[result["verdict"]]
marker = {"pass": "PASS", "fail": "FAIL"}[result["verdict"]]
md = (
f"# [{marker}] {check_name}\n\n"
f"**Verdict:** `{result['verdict']}`\n\n"
Expand Down Expand Up @@ -265,7 +285,7 @@ def main() -> int:
print(f"::error title=AI check setup error ({name})::{e}")
return 2

marker = {"pass": "PASS", "fail": "FAIL", "skip": "SKIP"}[result["verdict"]]
marker = {"pass": "PASS", "fail": "FAIL"}[result["verdict"]]
print(f"[{marker}] {name}")
print(result["summary"])
if result.get("details"):
Expand Down
Empty file.
Loading
Loading