From 94d5c63a32657f5f350a7d6012650f1ce6a21833 Mon Sep 17 00:00:00 2001 From: pengfei-threemoonslab Date: Fri, 15 May 2026 22:13:26 -0700 Subject: [PATCH 1/3] Add --check mode to generate_schemas.py + roundtrip tests (M4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends scripts/generate_schemas.py with a `--check` flag that verifies each committed docs/*.json schema is byte-identical to what the live Pydantic models produce — running the same post-processing as `write`, so v0.5's stable required-fields contract stays preserved. Drift exits non-zero with a unified-diff preview capped at 40 lines per file, plus the remediation command. Wires the check in CI before the test step so a Pydantic edit that forgets to regenerate fails fast with an actionable message. Refactors each `write_X_schema()` into a pure `build_X_schema() -> (Path, str)` and a thin write wrapper using a new `_emit()` helper. Tests call the builders directly via importlib.util — no subprocess — so a model edit failing the roundtrip is caught locally before CI. tests/test_schema_roundtrip.py (7 tests): - per-schema roundtrip for manifest, report, packet, checks catalog; - end-to-end `--check` exits 0 on a clean repo; - negative control: synthetic drift triggers exit 1 with diff preview; - builder purity: deterministic, returns (Path, str), trailing newline. CONTRIBUTING.md documents the model-edit → regen → commit workflow. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/ci.yml | 3 + CONTRIBUTING.md | 21 ++++ scripts/generate_schemas.py | 202 ++++++++++++++++++++++++++++----- tests/test_schema_roundtrip.py | 164 ++++++++++++++++++++++++++ 4 files changed, 363 insertions(+), 27 deletions(-) create mode 100644 tests/test_schema_roundtrip.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b3ef0f5..1cc4323 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,6 +33,9 @@ jobs: - name: Compile run: python -m compileall -q src tests + - name: Verify generated schemas are up to date + run: python scripts/generate_schemas.py --check + - name: Test run: python -m pytest --cov=agents_shipgate --cov-report=term-missing --cov-fail-under=75 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b803922..5c891ab 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -26,6 +26,27 @@ agents-shipgate list-checks - false-positive reduction tests; - report/schema compatibility tests. +## Schema Changes + +The JSON Schemas under `docs/` (`manifest-v0.1.json`, `checks.json`, +`report-schema.v0..json`, `packet-schema.v0..json`) are +**generated artifacts**, not hand-written. They are checked into the +repo so external consumers can validate against a stable URL. + +If you change a Pydantic model — adding/removing a field, bumping +`report_schema_version`, editing `CheckMetadata` — you must regenerate +the schemas and commit them in the same PR: + +```bash +python scripts/generate_schemas.py +git add docs/ && git commit +``` + +CI runs `python scripts/generate_schemas.py --check` and fails fast +with a unified diff if a committed schema drifts from the live model. +The same drift is also caught by `tests/test_schema_roundtrip.py`, so +your test suite will reject the change locally before CI does. + ## Check Contributions Checks should be deterministic, explainable, and covered by tests. Avoid LLM calls, network calls, user-code import, or runtime tool execution. diff --git a/scripts/generate_schemas.py b/scripts/generate_schemas.py index 79addea..b92e657 100644 --- a/scripts/generate_schemas.py +++ b/scripts/generate_schemas.py @@ -2,25 +2,34 @@ Run from the repo root: - python scripts/generate_schemas.py + python scripts/generate_schemas.py # write + python scripts/generate_schemas.py --check # verify no drift; exit 1 on diff -Writes: +Writes / verifies: - docs/manifest-v0.1.json (from agents_shipgate.config.schema) -- docs/checks.json (from agents-shipgate list-checks --json) +- docs/checks.json (from agents_shipgate.checks.registry.check_catalog) - docs/report-schema.v0..json (from agents_shipgate.core.models.ReadinessReport; minor derived from report_schema_version default) +- docs/packet-schema.v0..json + (from agents_shipgate.packet.models.EvidencePacket) -CI calls this script and asserts the working tree is clean afterward, so -out-of-date generated files fail the build — drift protection for any -field changes on Finding (e.g., patches) or ReadinessReport -(e.g., manifest_dir). +``--check`` mode is the M4 trust-hardening gate: it generates each schema in +memory (running the same post-processing as ``write``) and compares it to the +committed file. Drift exits non-zero with a unified diff preview, so a Pydantic +model edit that forgets to regenerate fails CI fast with an actionable message. + +Tests should import ``build_*_schema`` directly — they return ``(Path, str)`` +tuples without touching disk, so unit tests stay subprocess-free. """ from __future__ import annotations +import argparse +import difflib import json import sys +from collections.abc import Callable from pathlib import Path REPO_ROOT = Path(__file__).resolve().parent.parent @@ -31,7 +40,62 @@ sys.path.insert(0, str(SRC)) -def write_manifest_schema() -> None: +# --- Shared helpers --------------------------------------------------------- + +# Canonical JSON form for every schema we emit. Matches the v0.x convention +# already on disk: 2-space indent, sorted keys, trailing newline. Tests and +# the --check path both consume this exact form, so any future field reorder +# in Pydantic stays diffable as one logical change. +def _canonical_json(payload: object) -> str: + return json.dumps(payload, indent=2, sort_keys=True) + "\n" + + +_DIFF_PREVIEW_LINES = 40 + + +def _emit(target: Path, content: str, *, check_only: bool, drift: list[str]) -> bool: + """Write ``content`` to ``target`` (write mode) or compare (check mode). + + In check mode, on mismatch, appends a short unified-diff preview to + ``drift`` and returns False; the caller aggregates and exits 1. In write + mode, always writes and returns True. + """ + try: + relative = target.relative_to(REPO_ROOT) + except ValueError: + # Target outside the repo (e.g., test fixture with monkeypatched DOCS). + # Fall back to the bare path so error messages stay readable. + relative = target + if check_only: + if not target.exists(): + drift.append(f"{relative}: missing (run scripts/generate_schemas.py)") + return False + existing = target.read_text(encoding="utf-8") + if existing == content: + return True + diff_lines = list( + difflib.unified_diff( + existing.splitlines(keepends=True), + content.splitlines(keepends=True), + fromfile=f"{relative} (committed)", + tofile=f"{relative} (generated)", + n=2, + ) + ) + preview = "".join(diff_lines[:_DIFF_PREVIEW_LINES]) + suffix = ( + f"\n... ({len(diff_lines) - _DIFF_PREVIEW_LINES} more diff lines truncated)\n" + if len(diff_lines) > _DIFF_PREVIEW_LINES + else "" + ) + drift.append(f"{relative}: drift detected\n{preview}{suffix}") + return False + target.write_text(content, encoding="utf-8") + print(f"Wrote {relative}") + return True + + +def build_manifest_schema() -> tuple[Path, str]: from agents_shipgate.config.schema import AgentsShipgateManifest schema = AgentsShipgateManifest.model_json_schema() @@ -46,11 +110,15 @@ def write_manifest_schema() -> None: "agents_shipgate.config.schema.AgentsShipgateManifest. Do not edit by hand." ) target = DOCS / "manifest-v0.1.json" - target.write_text(json.dumps(schema, indent=2, sort_keys=True) + "\n", encoding="utf-8") - print(f"Wrote {target.relative_to(REPO_ROOT)}") + return target, _canonical_json(schema) -def write_report_schema() -> None: +def write_manifest_schema(*, check_only: bool = False, drift: list[str] | None = None) -> bool: + target, content = build_manifest_schema() + return _emit(target, content, check_only=check_only, drift=drift if drift is not None else []) + + +def build_report_schema() -> tuple[Path, str]: """Generate docs/report-schema.v0..json from the Pydantic ReadinessReport model. @@ -252,6 +320,10 @@ def write_report_schema() -> None: # the full block being present (Pydantic only marks fields without # defaults as required, but our consumers depend on the whole shape). if "ReleaseDecision" in defs: + # v0.17 adds contribution_rules — a deterministic per-finding + # audit of how each finding contributed to the decision. Required + # + always present (defaults to []) so consumers never need an + # existence check. defs["ReleaseDecision"]["required"] = sorted( [ "decision", @@ -261,6 +333,24 @@ def write_report_schema() -> None: "evidence_coverage", "baseline_delta", "fail_policy", + "contribution_rules", + ] + ) + if "ContributionRule" in defs: + # v0.17: pin the full audit-row contract. `fingerprint` is + # nullable but required-as-key (every emitted row carries the + # field; the value may be null for findings without a computed + # fingerprint). All other fields are required and non-nullable + # on the wire — build_release_decision emits one + # ContributionRule per report finding. + defs["ContributionRule"]["required"] = sorted( + [ + "finding_id", + "fingerprint", + "check_id", + "category", + "rule", + "rationale", ] ) if "ReleaseDecisionItem" in defs: @@ -597,7 +687,21 @@ def write_report_schema() -> None: "type": "object", "additionalProperties": True, "required": sorted( - ["name", "value", "distribution", "version", "check_id"] + # v0.17 (M5): plugin validation provenance is now required + # on every emitted loaded_plugins entry. ``validation_status`` + # is one of ``valid | load_failed | bad_signature | + # bad_metadata | id_collision | bad_floor`` and the two + # error lists are always present (empty for clean plugins). + [ + "name", + "value", + "distribution", + "version", + "check_id", + "validation_status", + "validation_errors", + "runtime_errors", + ] ), } @@ -667,11 +771,15 @@ def write_report_schema() -> None: } target = DOCS / f"report-schema.v{minor}.json" - target.write_text(json.dumps(schema, indent=2, sort_keys=True) + "\n", encoding="utf-8") - print(f"Wrote {target.relative_to(REPO_ROOT)}") + return target, _canonical_json(schema) + +def write_report_schema(*, check_only: bool = False, drift: list[str] | None = None) -> bool: + target, content = build_report_schema() + return _emit(target, content, check_only=check_only, drift=drift if drift is not None else []) -def write_checks_catalog() -> None: + +def build_checks_catalog() -> tuple[Path, str]: from agents_shipgate.checks.registry import check_catalog payload = { @@ -687,11 +795,15 @@ def write_checks_catalog() -> None: "checks": [check.model_dump(mode="json") for check in check_catalog()], } target = DOCS / "checks.json" - target.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") - print(f"Wrote {target.relative_to(REPO_ROOT)}") + return target, _canonical_json(payload) + +def write_checks_catalog(*, check_only: bool = False, drift: list[str] | None = None) -> bool: + target, content = build_checks_catalog() + return _emit(target, content, check_only=check_only, drift=drift if drift is not None else []) -def write_packet_schema() -> None: + +def build_packet_schema() -> tuple[Path, str]: """Generate docs/packet-schema.v0..json from EvidencePacket. Versioned independently from the report schema; bumping requires a @@ -714,18 +826,54 @@ def write_packet_schema() -> None: "agents_shipgate.packet.models.EvidencePacket. Do not edit by hand." ) target = DOCS / f"packet-schema.v{minor}.json" - target.write_text( - json.dumps(schema, indent=2, sort_keys=True) + "\n", encoding="utf-8" - ) - print(f"Wrote {target.relative_to(REPO_ROOT)}") + return target, _canonical_json(schema) + +def write_packet_schema(*, check_only: bool = False, drift: list[str] | None = None) -> bool: + target, content = build_packet_schema() + return _emit(target, content, check_only=check_only, drift=drift if drift is not None else []) + + +# Public ordered list of (name, builder) pairs. Tests and the CLI iterate this +# instead of hardcoding individual calls, so adding a new schema is one edit. +BUILDERS: tuple[tuple[str, Callable[[], tuple[Path, str]]], ...] = ( + ("manifest", build_manifest_schema), + ("checks_catalog", build_checks_catalog), + ("report", build_report_schema), + ("packet", build_packet_schema), +) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + prog="generate_schemas", + description=( + "Regenerate docs/*.json schemas (default) or verify they match the " + "current Pydantic models (--check)." + ), + ) + parser.add_argument( + "--check", + action="store_true", + help="Verify committed schemas match the generators; exit 1 on drift.", + ) + args = parser.parse_args(argv) -def main() -> int: DOCS.mkdir(parents=True, exist_ok=True) - write_manifest_schema() - write_checks_catalog() - write_report_schema() - write_packet_schema() + drift: list[str] = [] + for _name, builder in BUILDERS: + target, content = builder() + _emit(target, content, check_only=args.check, drift=drift) + + if args.check and drift: + sys.stderr.write("\n".join(drift)) + sys.stderr.write( + "\n\nSchema drift detected in " + f"{len(drift)} file(s). To resolve:\n" + " python scripts/generate_schemas.py\n" + " git add docs/ && git commit\n" + ) + return 1 return 0 diff --git a/tests/test_schema_roundtrip.py b/tests/test_schema_roundtrip.py new file mode 100644 index 0000000..c0cabe0 --- /dev/null +++ b/tests/test_schema_roundtrip.py @@ -0,0 +1,164 @@ +"""Schema round-trip tests (M4 · trust hardening). + +Every committed schema under ``docs/`` (``manifest-v0.1.json``, +``checks.json``, the current-minor ``report-schema.v0.*.json``, and the +current-minor ``packet-schema.v0.*.json``) MUST match what +``scripts/generate_schemas.py`` produces from the live Pydantic models. + +These tests call the generator's builder functions directly — no +subprocess, no I/O — so a Pydantic edit that forgets to regenerate +fails the test locally with a clear diff before CI runs the same check. + +If a test here fails, run:: + + python scripts/generate_schemas.py + git add docs/ && git commit + +That is the same remediation surfaced by +``scripts/generate_schemas.py --check`` in CI. +""" + +from __future__ import annotations + +import difflib +import importlib.util +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent +GENERATOR_PATH = REPO_ROOT / "scripts" / "generate_schemas.py" + + +def _load_generator(): + """Import scripts/generate_schemas.py without adding scripts/ to sys.path. + + importlib.util keeps the module local to this test file. The + generator's top-level ``sys.path.insert(0, str(SRC))`` then makes + ``agents_shipgate`` importable from a source checkout, the same as + when the script runs standalone. + """ + spec = importlib.util.spec_from_file_location( + "agents_shipgate_schema_generator", GENERATOR_PATH + ) + assert spec is not None and spec.loader is not None, ( + f"could not load {GENERATOR_PATH}" + ) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +@pytest.fixture(scope="module") +def generator(): + return _load_generator() + + +def _assert_match(target: Path, generated_content: str) -> None: + assert target.exists(), ( + f"{target.relative_to(REPO_ROOT)} is missing. " + "Run `python scripts/generate_schemas.py` to create it." + ) + committed = target.read_text(encoding="utf-8") + if committed == generated_content: + return + diff = "".join( + difflib.unified_diff( + committed.splitlines(keepends=True), + generated_content.splitlines(keepends=True), + fromfile=f"{target.relative_to(REPO_ROOT)} (committed)", + tofile=f"{target.relative_to(REPO_ROOT)} (generated)", + n=2, + ) + ) + pytest.fail( + f"{target.relative_to(REPO_ROOT)} drifted from the live Pydantic " + f"model. Run `python scripts/generate_schemas.py` and commit " + f"the result.\n\n{diff}" + ) + + +def test_manifest_schema_matches_committed_file(generator): + target, content = generator.build_manifest_schema() + _assert_match(target, content) + + +def test_report_schema_matches_committed_file(generator): + target, content = generator.build_report_schema() + _assert_match(target, content) + + +def test_packet_schema_matches_committed_file(generator): + target, content = generator.build_packet_schema() + _assert_match(target, content) + + +def test_checks_catalog_matches_committed_file(generator): + target, content = generator.build_checks_catalog() + _assert_match(target, content) + + +def test_check_mode_passes_on_current_repo(generator, capsys): + """End-to-end: `generate_schemas.py --check` exits 0 when artifacts match. + + Catches regressions in the --check wiring itself (e.g., a future + refactor that bypasses one of the four builders). + """ + exit_code = generator.main(["--check"]) + assert exit_code == 0, ( + "generate_schemas.py --check exited non-zero — at least one " + "schema file drifted. Run `python scripts/generate_schemas.py` " + "and commit." + ) + + +def test_check_mode_reports_drift(generator, tmp_path, monkeypatch, capsys): + """Negative control: a synthetic drift must trigger exit 1 with a diff. + + Asserts the failure path is wired correctly so check-mode never + silently passes on a real drift. Redirects DOCS to a temp tree so + one stale file plus three missing files exercise both drift shapes + (mismatch + missing). + """ + monkeypatch.setattr(generator, "DOCS", tmp_path) + stale_target = tmp_path / "manifest-v0.1.json" + stale_target.write_text('{"stale": true}\n', encoding="utf-8") + + exit_code = generator.main(["--check"]) + err = capsys.readouterr().err + + assert exit_code == 1 + assert "drift detected" in err, "expected unified-diff preview for stale file" + assert "missing" in err, "expected 'missing' marker for absent files" + assert "python scripts/generate_schemas.py" in err, ( + "expected remediation command in failure output" + ) + + +def test_builders_are_pure(generator): + """Build functions return ``(Path, str)`` and produce identical output + on repeated calls. + + Locks the M4 invariant that builders are I/O-free and deterministic + — the same model state must always produce byte-identical schema + text, which is what makes the round-trip test trustworthy. + """ + for builder in ( + generator.build_manifest_schema, + generator.build_report_schema, + generator.build_packet_schema, + generator.build_checks_catalog, + ): + target_a, content_a = builder() + target_b, content_b = builder() + assert isinstance(target_a, Path) + assert isinstance(content_a, str) + assert target_a == target_b + assert content_a == content_b, ( + f"{builder.__name__} produced different output on repeated call; " + "generator is not deterministic." + ) + assert content_a.endswith("\n"), ( + f"{builder.__name__} output missing trailing newline; canonical " + "form requires it for stable git diffs." + ) From 630d346167e6e85a2dc8e6630fd071def430d6bc Mon Sep 17 00:00:00 2001 From: pengfei-threemoonslab Date: Fri, 15 May 2026 22:27:07 -0700 Subject: [PATCH 2/3] Strip M5/M8 contamination from generator post-processing The initial M4 commit (94d5c63) included three required-field additions that referenced future fields not present on main: - ReleaseDecision.required gained "contribution_rules" (M8 audit) - A new ContributionRule.required block was added (M8) - loaded_plugins.required gained "validation_status", "validation_errors", "runtime_errors" (M5 plugin validation) These came from concurrent in-flight changes that contaminated the edit view of scripts/generate_schemas.py before commit. None of those fields exist on main's ReleaseDecision or loaded_plugins payloads, so `python scripts/generate_schemas.py --check` correctly reported drift in docs/report-schema.v0.16.json on CI. This commit restores the post-processing required lists to exactly what main has, so the M4 mechanism (--check, builder/write split, roundtrip tests) is the only contract change in this PR. The M5/M8 additions belong with their respective model changes in a future PR. Verified: `python scripts/generate_schemas.py --check` exits 0 against main's models; tests/test_schema_roundtrip.py all 7 tests pass; `git diff origin/main -- scripts/generate_schemas.py` shows only mechanism changes (helpers, --check flag, build/write split, BUILDERS, argparse). Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/generate_schemas.py | 38 +------------------------------------ 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/scripts/generate_schemas.py b/scripts/generate_schemas.py index b92e657..d33a42e 100644 --- a/scripts/generate_schemas.py +++ b/scripts/generate_schemas.py @@ -320,10 +320,6 @@ def build_report_schema() -> tuple[Path, str]: # the full block being present (Pydantic only marks fields without # defaults as required, but our consumers depend on the whole shape). if "ReleaseDecision" in defs: - # v0.17 adds contribution_rules — a deterministic per-finding - # audit of how each finding contributed to the decision. Required - # + always present (defaults to []) so consumers never need an - # existence check. defs["ReleaseDecision"]["required"] = sorted( [ "decision", @@ -333,24 +329,6 @@ def build_report_schema() -> tuple[Path, str]: "evidence_coverage", "baseline_delta", "fail_policy", - "contribution_rules", - ] - ) - if "ContributionRule" in defs: - # v0.17: pin the full audit-row contract. `fingerprint` is - # nullable but required-as-key (every emitted row carries the - # field; the value may be null for findings without a computed - # fingerprint). All other fields are required and non-nullable - # on the wire — build_release_decision emits one - # ContributionRule per report finding. - defs["ContributionRule"]["required"] = sorted( - [ - "finding_id", - "fingerprint", - "check_id", - "category", - "rule", - "rationale", ] ) if "ReleaseDecisionItem" in defs: @@ -687,21 +665,7 @@ def build_report_schema() -> tuple[Path, str]: "type": "object", "additionalProperties": True, "required": sorted( - # v0.17 (M5): plugin validation provenance is now required - # on every emitted loaded_plugins entry. ``validation_status`` - # is one of ``valid | load_failed | bad_signature | - # bad_metadata | id_collision | bad_floor`` and the two - # error lists are always present (empty for clean plugins). - [ - "name", - "value", - "distribution", - "version", - "check_id", - "validation_status", - "validation_errors", - "runtime_errors", - ] + ["name", "value", "distribution", "version", "check_id"] ), } From cd1b1cf1f2a37a1fc9bfebda99bfcb7405c5b3f8 Mon Sep 17 00:00:00 2001 From: pengfei-threemoonslab Date: Fri, 15 May 2026 22:34:38 -0700 Subject: [PATCH 3/3] Force plugins_enabled=False in checks-catalog build (P3 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reviewer flagged that ``build_checks_catalog()`` called ``check_catalog()`` without an explicit ``plugins_enabled=False``. With ``AGENTS_SHIPGATE_ENABLE_PLUGINS=1`` plus any third-party check plugin installed on the host, the default ``check_catalog()`` resolves plugins from entry points and includes their metadata in the result. ``--check`` would then either: - falsely flag drift in the committed built-in-only ``docs/checks.json``, or - on a ``write`` run, silently overwrite the committed catalog with a plugin-augmented one. Either path breaks the "deterministic artifact, regardless of host environment" guarantee that the M4 mechanism is supposed to provide. Fix: pass ``plugins_enabled=False`` explicitly. Same value the implicit default would have on a clean machine, but immune to env contamination. Regression test ``test_checks_catalog_ignores_enabled_plugins``: - installs a fake plugin entry point with a distinctive check_id; - sets ``AGENTS_SHIPGATE_ENABLE_PLUGINS=1`` via monkeypatch; - cross-checks the threat by calling ``registry.check_catalog()`` directly and asserting the canary IS present (so the test fails loudly if the upstream plugin path ever stops loading — guards against vacuous passes); - then asserts ``build_checks_catalog()`` output is byte-identical to a clean run with plugins env unset and does not contain the canary check_id. Docstring/description string unchanged — only the build call site gets the explicit kwarg, so the generated artifact stays byte- identical to ``main``'s committed ``docs/checks.json``. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/generate_schemas.py | 14 +++++- tests/test_schema_roundtrip.py | 79 ++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/scripts/generate_schemas.py b/scripts/generate_schemas.py index d33a42e..f4e3c38 100644 --- a/scripts/generate_schemas.py +++ b/scripts/generate_schemas.py @@ -746,6 +746,15 @@ def write_report_schema(*, check_only: bool = False, drift: list[str] | None = N def build_checks_catalog() -> tuple[Path, str]: from agents_shipgate.checks.registry import check_catalog + # Plugins are explicitly disabled here. The committed docs/checks.json + # is the built-in catalog only — if a developer has + # ``AGENTS_SHIPGATE_ENABLE_PLUGINS=1`` set in their shell and any + # third-party check plugin installed, the default ``check_catalog()`` + # would augment the result with that plugin's metadata, and + # ``--check`` would then either falsely flag drift or, worse, + # silently overwrite the committed catalog with a plugin-augmented + # one. Force plugins off so the artifact is deterministic regardless + # of the host environment. payload = { "$id": ( "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/" @@ -756,7 +765,10 @@ def build_checks_catalog() -> tuple[Path, str]: "Machine-readable catalog of built-in checks. Generated from " "agents_shipgate.checks.registry.check_catalog(). Do not edit by hand." ), - "checks": [check.model_dump(mode="json") for check in check_catalog()], + "checks": [ + check.model_dump(mode="json") + for check in check_catalog(plugins_enabled=False) + ], } target = DOCS / "checks.json" return target, _canonical_json(payload) diff --git a/tests/test_schema_roundtrip.py b/tests/test_schema_roundtrip.py index c0cabe0..a8cf51f 100644 --- a/tests/test_schema_roundtrip.py +++ b/tests/test_schema_roundtrip.py @@ -162,3 +162,82 @@ def test_builders_are_pure(generator): f"{builder.__name__} output missing trailing newline; canonical " "form requires it for stable git diffs." ) + + +def test_checks_catalog_ignores_enabled_plugins(generator, monkeypatch): + """Regression: the generated docs/checks.json must be the built-in + catalog only, regardless of ``AGENTS_SHIPGATE_ENABLE_PLUGINS``. + + Before the fix, ``build_checks_catalog()`` called + ``check_catalog()`` without an explicit ``plugins_enabled=False``. + With the env var set and any third-party plugin installed, the + function would resolve plugins from entry points and include them + in the generated catalog. ``--check`` would then either falsely + flag drift against the built-in-only committed + ``docs/checks.json``, or — on a `write` run — silently overwrite + the committed artifact with a plugin-augmented one. + + The fix forces ``plugins_enabled=False`` at the build site. This + test installs a fake plugin entry point with a distinctive + ``check_id`` and asserts: + + 1. the plugin's check_id is absent from the generator output, and + 2. the generator output is byte-identical to a clean run with + plugins env unset. + """ + from agents_shipgate.checks import registry + + def plugin(_context): # pragma: no cover — never executed + return [] + + plugin.AGENTS_SHIPGATE_METADATA = { + "id": "PLUGIN-DETERMINISM-CANARY", + "category": "custom", + "default_severity": "medium", + "description": "Canary plugin for generator determinism test.", + } + + class FakeEntryPoint: + value = "acme_shipgate_checks:run" + + def load(self): + return plugin + + # First, generate the baseline (plugins env unset). + monkeypatch.delenv("AGENTS_SHIPGATE_ENABLE_PLUGINS", raising=False) + monkeypatch.setattr(registry, "entry_points", lambda group: []) + _baseline_target, baseline_content = generator.build_checks_catalog() + assert "PLUGIN-DETERMINISM-CANARY" not in baseline_content, ( + "sanity: baseline must not already contain the canary id" + ) + + # Now enable plugins AND install the canary plugin via a fake + # entry point. The generator must still produce byte-identical + # output — plugins must not leak into the deterministic artifact. + monkeypatch.setenv("AGENTS_SHIPGATE_ENABLE_PLUGINS", "1") + monkeypatch.setattr(registry, "entry_points", lambda group: [FakeEntryPoint()]) + + # Cross-check the threat model: with plugins enabled at the + # registry call site, the catalog WOULD include the canary. If + # this assertion ever flips, the upstream check_catalog() + # plugin-resolution path no longer works the way the regression + # is testing for, and this test needs to be reworked rather than + # falsely passing. + augmented = registry.check_catalog() + assert any(check.id == "PLUGIN-DETERMINISM-CANARY" for check in augmented), ( + "test setup did not actually enable the plugin path; the " + "regression check below would pass vacuously" + ) + + _target, content = generator.build_checks_catalog() + assert "PLUGIN-DETERMINISM-CANARY" not in content, ( + "AGENTS_SHIPGATE_ENABLE_PLUGINS=1 leaked a plugin into the " + "generated docs/checks.json — generator must force " + "plugins_enabled=False so the committed artifact is " + "deterministic regardless of host environment." + ) + assert content == baseline_content, ( + "generator output diverged between plugins-off and " + "plugins-on runs; the committed artifact must be a pure " + "function of the built-in catalog." + )