diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b3ef0f5..1cc4323 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,6 +33,9 @@ jobs: - name: Compile run: python -m compileall -q src tests + - name: Verify generated schemas are up to date + run: python scripts/generate_schemas.py --check + - name: Test run: python -m pytest --cov=agents_shipgate --cov-report=term-missing --cov-fail-under=75 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b803922..5c891ab 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -26,6 +26,27 @@ agents-shipgate list-checks - false-positive reduction tests; - report/schema compatibility tests. +## Schema Changes + +The JSON Schemas under `docs/` (`manifest-v0.1.json`, `checks.json`, +`report-schema.v0..json`, `packet-schema.v0..json`) are +**generated artifacts**, not hand-written. They are checked into the +repo so external consumers can validate against a stable URL. + +If you change a Pydantic model — adding/removing a field, bumping +`report_schema_version`, editing `CheckMetadata` — you must regenerate +the schemas and commit them in the same PR: + +```bash +python scripts/generate_schemas.py +git add docs/ && git commit +``` + +CI runs `python scripts/generate_schemas.py --check` and fails fast +with a unified diff if a committed schema drifts from the live model. +The same drift is also caught by `tests/test_schema_roundtrip.py`, so +your test suite will reject the change locally before CI does. + ## Check Contributions Checks should be deterministic, explainable, and covered by tests. Avoid LLM calls, network calls, user-code import, or runtime tool execution. diff --git a/scripts/generate_schemas.py b/scripts/generate_schemas.py index 79addea..f4e3c38 100644 --- a/scripts/generate_schemas.py +++ b/scripts/generate_schemas.py @@ -2,25 +2,34 @@ Run from the repo root: - python scripts/generate_schemas.py + python scripts/generate_schemas.py # write + python scripts/generate_schemas.py --check # verify no drift; exit 1 on diff -Writes: +Writes / verifies: - docs/manifest-v0.1.json (from agents_shipgate.config.schema) -- docs/checks.json (from agents-shipgate list-checks --json) +- docs/checks.json (from agents_shipgate.checks.registry.check_catalog) - docs/report-schema.v0..json (from agents_shipgate.core.models.ReadinessReport; minor derived from report_schema_version default) +- docs/packet-schema.v0..json + (from agents_shipgate.packet.models.EvidencePacket) -CI calls this script and asserts the working tree is clean afterward, so -out-of-date generated files fail the build — drift protection for any -field changes on Finding (e.g., patches) or ReadinessReport -(e.g., manifest_dir). +``--check`` mode is the M4 trust-hardening gate: it generates each schema in +memory (running the same post-processing as ``write``) and compares it to the +committed file. Drift exits non-zero with a unified diff preview, so a Pydantic +model edit that forgets to regenerate fails CI fast with an actionable message. + +Tests should import ``build_*_schema`` directly — they return ``(Path, str)`` +tuples without touching disk, so unit tests stay subprocess-free. """ from __future__ import annotations +import argparse +import difflib import json import sys +from collections.abc import Callable from pathlib import Path REPO_ROOT = Path(__file__).resolve().parent.parent @@ -31,7 +40,62 @@ sys.path.insert(0, str(SRC)) -def write_manifest_schema() -> None: +# --- Shared helpers --------------------------------------------------------- + +# Canonical JSON form for every schema we emit. Matches the v0.x convention +# already on disk: 2-space indent, sorted keys, trailing newline. Tests and +# the --check path both consume this exact form, so any future field reorder +# in Pydantic stays diffable as one logical change. +def _canonical_json(payload: object) -> str: + return json.dumps(payload, indent=2, sort_keys=True) + "\n" + + +_DIFF_PREVIEW_LINES = 40 + + +def _emit(target: Path, content: str, *, check_only: bool, drift: list[str]) -> bool: + """Write ``content`` to ``target`` (write mode) or compare (check mode). + + In check mode, on mismatch, appends a short unified-diff preview to + ``drift`` and returns False; the caller aggregates and exits 1. In write + mode, always writes and returns True. + """ + try: + relative = target.relative_to(REPO_ROOT) + except ValueError: + # Target outside the repo (e.g., test fixture with monkeypatched DOCS). + # Fall back to the bare path so error messages stay readable. + relative = target + if check_only: + if not target.exists(): + drift.append(f"{relative}: missing (run scripts/generate_schemas.py)") + return False + existing = target.read_text(encoding="utf-8") + if existing == content: + return True + diff_lines = list( + difflib.unified_diff( + existing.splitlines(keepends=True), + content.splitlines(keepends=True), + fromfile=f"{relative} (committed)", + tofile=f"{relative} (generated)", + n=2, + ) + ) + preview = "".join(diff_lines[:_DIFF_PREVIEW_LINES]) + suffix = ( + f"\n... ({len(diff_lines) - _DIFF_PREVIEW_LINES} more diff lines truncated)\n" + if len(diff_lines) > _DIFF_PREVIEW_LINES + else "" + ) + drift.append(f"{relative}: drift detected\n{preview}{suffix}") + return False + target.write_text(content, encoding="utf-8") + print(f"Wrote {relative}") + return True + + +def build_manifest_schema() -> tuple[Path, str]: from agents_shipgate.config.schema import AgentsShipgateManifest schema = AgentsShipgateManifest.model_json_schema() @@ -46,11 +110,15 @@ def write_manifest_schema() -> None: "agents_shipgate.config.schema.AgentsShipgateManifest. Do not edit by hand." ) target = DOCS / "manifest-v0.1.json" - target.write_text(json.dumps(schema, indent=2, sort_keys=True) + "\n", encoding="utf-8") - print(f"Wrote {target.relative_to(REPO_ROOT)}") + return target, _canonical_json(schema) + +def write_manifest_schema(*, check_only: bool = False, drift: list[str] | None = None) -> bool: + target, content = build_manifest_schema() + return _emit(target, content, check_only=check_only, drift=drift if drift is not None else []) -def write_report_schema() -> None: + +def build_report_schema() -> tuple[Path, str]: """Generate docs/report-schema.v0..json from the Pydantic ReadinessReport model. @@ -667,13 +735,26 @@ def write_report_schema() -> None: } target = DOCS / f"report-schema.v{minor}.json" - target.write_text(json.dumps(schema, indent=2, sort_keys=True) + "\n", encoding="utf-8") - print(f"Wrote {target.relative_to(REPO_ROOT)}") + return target, _canonical_json(schema) + + +def write_report_schema(*, check_only: bool = False, drift: list[str] | None = None) -> bool: + target, content = build_report_schema() + return _emit(target, content, check_only=check_only, drift=drift if drift is not None else []) -def write_checks_catalog() -> None: +def build_checks_catalog() -> tuple[Path, str]: from agents_shipgate.checks.registry import check_catalog + # Plugins are explicitly disabled here. The committed docs/checks.json + # is the built-in catalog only — if a developer has + # ``AGENTS_SHIPGATE_ENABLE_PLUGINS=1`` set in their shell and any + # third-party check plugin installed, the default ``check_catalog()`` + # would augment the result with that plugin's metadata, and + # ``--check`` would then either falsely flag drift or, worse, + # silently overwrite the committed catalog with a plugin-augmented + # one. Force plugins off so the artifact is deterministic regardless + # of the host environment. payload = { "$id": ( "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/" @@ -684,14 +765,21 @@ def write_checks_catalog() -> None: "Machine-readable catalog of built-in checks. Generated from " "agents_shipgate.checks.registry.check_catalog(). Do not edit by hand." ), - "checks": [check.model_dump(mode="json") for check in check_catalog()], + "checks": [ + check.model_dump(mode="json") + for check in check_catalog(plugins_enabled=False) + ], } target = DOCS / "checks.json" - target.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") - print(f"Wrote {target.relative_to(REPO_ROOT)}") + return target, _canonical_json(payload) -def write_packet_schema() -> None: +def write_checks_catalog(*, check_only: bool = False, drift: list[str] | None = None) -> bool: + target, content = build_checks_catalog() + return _emit(target, content, check_only=check_only, drift=drift if drift is not None else []) + + +def build_packet_schema() -> tuple[Path, str]: """Generate docs/packet-schema.v0..json from EvidencePacket. Versioned independently from the report schema; bumping requires a @@ -714,18 +802,54 @@ def write_packet_schema() -> None: "agents_shipgate.packet.models.EvidencePacket. Do not edit by hand." ) target = DOCS / f"packet-schema.v{minor}.json" - target.write_text( - json.dumps(schema, indent=2, sort_keys=True) + "\n", encoding="utf-8" - ) - print(f"Wrote {target.relative_to(REPO_ROOT)}") + return target, _canonical_json(schema) + + +def write_packet_schema(*, check_only: bool = False, drift: list[str] | None = None) -> bool: + target, content = build_packet_schema() + return _emit(target, content, check_only=check_only, drift=drift if drift is not None else []) + +# Public ordered list of (name, builder) pairs. Tests and the CLI iterate this +# instead of hardcoding individual calls, so adding a new schema is one edit. +BUILDERS: tuple[tuple[str, Callable[[], tuple[Path, str]]], ...] = ( + ("manifest", build_manifest_schema), + ("checks_catalog", build_checks_catalog), + ("report", build_report_schema), + ("packet", build_packet_schema), +) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + prog="generate_schemas", + description=( + "Regenerate docs/*.json schemas (default) or verify they match the " + "current Pydantic models (--check)." + ), + ) + parser.add_argument( + "--check", + action="store_true", + help="Verify committed schemas match the generators; exit 1 on drift.", + ) + args = parser.parse_args(argv) -def main() -> int: DOCS.mkdir(parents=True, exist_ok=True) - write_manifest_schema() - write_checks_catalog() - write_report_schema() - write_packet_schema() + drift: list[str] = [] + for _name, builder in BUILDERS: + target, content = builder() + _emit(target, content, check_only=args.check, drift=drift) + + if args.check and drift: + sys.stderr.write("\n".join(drift)) + sys.stderr.write( + "\n\nSchema drift detected in " + f"{len(drift)} file(s). To resolve:\n" + " python scripts/generate_schemas.py\n" + " git add docs/ && git commit\n" + ) + return 1 return 0 diff --git a/tests/test_schema_roundtrip.py b/tests/test_schema_roundtrip.py new file mode 100644 index 0000000..a8cf51f --- /dev/null +++ b/tests/test_schema_roundtrip.py @@ -0,0 +1,243 @@ +"""Schema round-trip tests (M4 · trust hardening). + +Every committed schema under ``docs/`` (``manifest-v0.1.json``, +``checks.json``, the current-minor ``report-schema.v0.*.json``, and the +current-minor ``packet-schema.v0.*.json``) MUST match what +``scripts/generate_schemas.py`` produces from the live Pydantic models. + +These tests call the generator's builder functions directly — no +subprocess, no I/O — so a Pydantic edit that forgets to regenerate +fails the test locally with a clear diff before CI runs the same check. + +If a test here fails, run:: + + python scripts/generate_schemas.py + git add docs/ && git commit + +That is the same remediation surfaced by +``scripts/generate_schemas.py --check`` in CI. +""" + +from __future__ import annotations + +import difflib +import importlib.util +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent +GENERATOR_PATH = REPO_ROOT / "scripts" / "generate_schemas.py" + + +def _load_generator(): + """Import scripts/generate_schemas.py without adding scripts/ to sys.path. + + importlib.util keeps the module local to this test file. The + generator's top-level ``sys.path.insert(0, str(SRC))`` then makes + ``agents_shipgate`` importable from a source checkout, the same as + when the script runs standalone. + """ + spec = importlib.util.spec_from_file_location( + "agents_shipgate_schema_generator", GENERATOR_PATH + ) + assert spec is not None and spec.loader is not None, ( + f"could not load {GENERATOR_PATH}" + ) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +@pytest.fixture(scope="module") +def generator(): + return _load_generator() + + +def _assert_match(target: Path, generated_content: str) -> None: + assert target.exists(), ( + f"{target.relative_to(REPO_ROOT)} is missing. " + "Run `python scripts/generate_schemas.py` to create it." + ) + committed = target.read_text(encoding="utf-8") + if committed == generated_content: + return + diff = "".join( + difflib.unified_diff( + committed.splitlines(keepends=True), + generated_content.splitlines(keepends=True), + fromfile=f"{target.relative_to(REPO_ROOT)} (committed)", + tofile=f"{target.relative_to(REPO_ROOT)} (generated)", + n=2, + ) + ) + pytest.fail( + f"{target.relative_to(REPO_ROOT)} drifted from the live Pydantic " + f"model. Run `python scripts/generate_schemas.py` and commit " + f"the result.\n\n{diff}" + ) + + +def test_manifest_schema_matches_committed_file(generator): + target, content = generator.build_manifest_schema() + _assert_match(target, content) + + +def test_report_schema_matches_committed_file(generator): + target, content = generator.build_report_schema() + _assert_match(target, content) + + +def test_packet_schema_matches_committed_file(generator): + target, content = generator.build_packet_schema() + _assert_match(target, content) + + +def test_checks_catalog_matches_committed_file(generator): + target, content = generator.build_checks_catalog() + _assert_match(target, content) + + +def test_check_mode_passes_on_current_repo(generator, capsys): + """End-to-end: `generate_schemas.py --check` exits 0 when artifacts match. + + Catches regressions in the --check wiring itself (e.g., a future + refactor that bypasses one of the four builders). + """ + exit_code = generator.main(["--check"]) + assert exit_code == 0, ( + "generate_schemas.py --check exited non-zero — at least one " + "schema file drifted. Run `python scripts/generate_schemas.py` " + "and commit." + ) + + +def test_check_mode_reports_drift(generator, tmp_path, monkeypatch, capsys): + """Negative control: a synthetic drift must trigger exit 1 with a diff. + + Asserts the failure path is wired correctly so check-mode never + silently passes on a real drift. Redirects DOCS to a temp tree so + one stale file plus three missing files exercise both drift shapes + (mismatch + missing). + """ + monkeypatch.setattr(generator, "DOCS", tmp_path) + stale_target = tmp_path / "manifest-v0.1.json" + stale_target.write_text('{"stale": true}\n', encoding="utf-8") + + exit_code = generator.main(["--check"]) + err = capsys.readouterr().err + + assert exit_code == 1 + assert "drift detected" in err, "expected unified-diff preview for stale file" + assert "missing" in err, "expected 'missing' marker for absent files" + assert "python scripts/generate_schemas.py" in err, ( + "expected remediation command in failure output" + ) + + +def test_builders_are_pure(generator): + """Build functions return ``(Path, str)`` and produce identical output + on repeated calls. + + Locks the M4 invariant that builders are I/O-free and deterministic + — the same model state must always produce byte-identical schema + text, which is what makes the round-trip test trustworthy. + """ + for builder in ( + generator.build_manifest_schema, + generator.build_report_schema, + generator.build_packet_schema, + generator.build_checks_catalog, + ): + target_a, content_a = builder() + target_b, content_b = builder() + assert isinstance(target_a, Path) + assert isinstance(content_a, str) + assert target_a == target_b + assert content_a == content_b, ( + f"{builder.__name__} produced different output on repeated call; " + "generator is not deterministic." + ) + assert content_a.endswith("\n"), ( + f"{builder.__name__} output missing trailing newline; canonical " + "form requires it for stable git diffs." + ) + + +def test_checks_catalog_ignores_enabled_plugins(generator, monkeypatch): + """Regression: the generated docs/checks.json must be the built-in + catalog only, regardless of ``AGENTS_SHIPGATE_ENABLE_PLUGINS``. + + Before the fix, ``build_checks_catalog()`` called + ``check_catalog()`` without an explicit ``plugins_enabled=False``. + With the env var set and any third-party plugin installed, the + function would resolve plugins from entry points and include them + in the generated catalog. ``--check`` would then either falsely + flag drift against the built-in-only committed + ``docs/checks.json``, or — on a `write` run — silently overwrite + the committed artifact with a plugin-augmented one. + + The fix forces ``plugins_enabled=False`` at the build site. This + test installs a fake plugin entry point with a distinctive + ``check_id`` and asserts: + + 1. the plugin's check_id is absent from the generator output, and + 2. the generator output is byte-identical to a clean run with + plugins env unset. + """ + from agents_shipgate.checks import registry + + def plugin(_context): # pragma: no cover — never executed + return [] + + plugin.AGENTS_SHIPGATE_METADATA = { + "id": "PLUGIN-DETERMINISM-CANARY", + "category": "custom", + "default_severity": "medium", + "description": "Canary plugin for generator determinism test.", + } + + class FakeEntryPoint: + value = "acme_shipgate_checks:run" + + def load(self): + return plugin + + # First, generate the baseline (plugins env unset). + monkeypatch.delenv("AGENTS_SHIPGATE_ENABLE_PLUGINS", raising=False) + monkeypatch.setattr(registry, "entry_points", lambda group: []) + _baseline_target, baseline_content = generator.build_checks_catalog() + assert "PLUGIN-DETERMINISM-CANARY" not in baseline_content, ( + "sanity: baseline must not already contain the canary id" + ) + + # Now enable plugins AND install the canary plugin via a fake + # entry point. The generator must still produce byte-identical + # output — plugins must not leak into the deterministic artifact. + monkeypatch.setenv("AGENTS_SHIPGATE_ENABLE_PLUGINS", "1") + monkeypatch.setattr(registry, "entry_points", lambda group: [FakeEntryPoint()]) + + # Cross-check the threat model: with plugins enabled at the + # registry call site, the catalog WOULD include the canary. If + # this assertion ever flips, the upstream check_catalog() + # plugin-resolution path no longer works the way the regression + # is testing for, and this test needs to be reworked rather than + # falsely passing. + augmented = registry.check_catalog() + assert any(check.id == "PLUGIN-DETERMINISM-CANARY" for check in augmented), ( + "test setup did not actually enable the plugin path; the " + "regression check below would pass vacuously" + ) + + _target, content = generator.build_checks_catalog() + assert "PLUGIN-DETERMINISM-CANARY" not in content, ( + "AGENTS_SHIPGATE_ENABLE_PLUGINS=1 leaked a plugin into the " + "generated docs/checks.json — generator must force " + "plugins_enabled=False so the committed artifact is " + "deterministic regardless of host environment." + ) + assert content == baseline_content, ( + "generator output diverged between plugins-off and " + "plugins-on runs; the committed artifact must be a pure " + "function of the built-in catalog." + )