diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b3ef0f5..1cc4323 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,6 +33,9 @@ jobs:
       - name: Compile
         run: python -m compileall -q src tests
 
+      - name: Verify generated schemas are up to date
+        run: python scripts/generate_schemas.py --check
+
       - name: Test
         run: python -m pytest --cov=agents_shipgate --cov-report=term-missing --cov-fail-under=75
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b803922..5c891ab 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -26,6 +26,27 @@ agents-shipgate list-checks
 - false-positive reduction tests;
 - report/schema compatibility tests.
 
+## Schema Changes
+
+The JSON Schemas under `docs/` (`manifest-v0.1.json`, `checks.json`,
+`report-schema.v0.<minor>.json`, `packet-schema.v0.<minor>.json`) are
+**generated artifacts**, not hand-written. They are checked into the
+repo so external consumers can validate against a stable URL.
+
+If you change a Pydantic model — adding/removing a field, bumping
+`report_schema_version`, editing `CheckMetadata` — you must regenerate
+the schemas and commit them in the same PR:
+
+```bash
+python scripts/generate_schemas.py
+git add docs/ && git commit
+```
+
+CI runs `python scripts/generate_schemas.py --check` and fails fast
+with a unified diff if a committed schema drifts from the live model.
+The same drift is also caught by `tests/test_schema_roundtrip.py`, so
+your test suite will reject the change locally before CI does.
+
 ## Check Contributions
 
 Checks should be deterministic, explainable, and covered by tests. Avoid LLM calls, network calls, user-code import, or runtime tool execution.
diff --git a/scripts/generate_schemas.py b/scripts/generate_schemas.py
index 79addea..f4e3c38 100644
--- a/scripts/generate_schemas.py
+++ b/scripts/generate_schemas.py
@@ -2,25 +2,34 @@
 
 Run from the repo root:
 
-    python scripts/generate_schemas.py
+    python scripts/generate_schemas.py            # write
+    python scripts/generate_schemas.py --check    # verify no drift; exit 1 on diff
 
-Writes:
+Writes / verifies:
 - docs/manifest-v0.1.json       (from agents_shipgate.config.schema)
-- docs/checks.json              (from agents-shipgate list-checks --json)
+- docs/checks.json              (from agents_shipgate.checks.registry.check_catalog)
 - docs/report-schema.v0.<minor>.json
                                 (from agents_shipgate.core.models.ReadinessReport;
                                  minor derived from report_schema_version default)
+- docs/packet-schema.v0.<minor>.json
+                                (from agents_shipgate.packet.models.EvidencePacket)
 
-CI calls this script and asserts the working tree is clean afterward, so
-out-of-date generated files fail the build — drift protection for any
-field changes on Finding (e.g., patches) or ReadinessReport
-(e.g., manifest_dir).
+``--check`` mode is the M4 trust-hardening gate: it generates each schema in
+memory (running the same post-processing as ``write``) and compares it to the
+committed file. Drift exits non-zero with a unified diff preview, so a Pydantic
+model edit that forgets to regenerate fails CI fast with an actionable message.
+
+Tests should import ``build_*_schema`` directly — they return ``(Path, str)``
+tuples without touching disk, so unit tests stay subprocess-free.
 """
 
 from __future__ import annotations
 
+import argparse
+import difflib
 import json
 import sys
+from collections.abc import Callable
 from pathlib import Path
 
 REPO_ROOT = Path(__file__).resolve().parent.parent
@@ -31,7 +40,62 @@
 sys.path.insert(0, str(SRC))
 
 
-def write_manifest_schema() -> None:
+# --- Shared helpers ---------------------------------------------------------
+
+# Canonical JSON form for every schema we emit. Matches the v0.x convention
+# already on disk: 2-space indent, sorted keys, trailing newline. Tests and
+# the --check path both consume this exact form, so any future field reorder
+# in Pydantic stays diffable as one logical change.
+def _canonical_json(payload: object) -> str:
+    return json.dumps(payload, indent=2, sort_keys=True) + "\n"
+
+
+_DIFF_PREVIEW_LINES = 40
+
+
+def _emit(target: Path, content: str, *, check_only: bool, drift: list[str]) -> bool:
+    """Write ``content`` to ``target`` (write mode) or compare (check mode).
+
+    In check mode, on mismatch, appends a short unified-diff preview to
+    ``drift`` and returns False; the caller aggregates and exits 1. In write
+    mode, always writes and returns True.
+    """
+    try:
+        relative = target.relative_to(REPO_ROOT)
+    except ValueError:
+        # Target outside the repo (e.g., test fixture with monkeypatched DOCS).
+        # Fall back to the bare path so error messages stay readable.
+        relative = target
+    if check_only:
+        if not target.exists():
+            drift.append(f"{relative}: missing (run scripts/generate_schemas.py)")
+            return False
+        existing = target.read_text(encoding="utf-8")
+        if existing == content:
+            return True
+        diff_lines = list(
+            difflib.unified_diff(
+                existing.splitlines(keepends=True),
+                content.splitlines(keepends=True),
+                fromfile=f"{relative} (committed)",
+                tofile=f"{relative} (generated)",
+                n=2,
+            )
+        )
+        preview = "".join(diff_lines[:_DIFF_PREVIEW_LINES])
+        suffix = (
+            f"\n... ({len(diff_lines) - _DIFF_PREVIEW_LINES} more diff lines truncated)\n"
+            if len(diff_lines) > _DIFF_PREVIEW_LINES
+            else ""
+        )
+        drift.append(f"{relative}: drift detected\n{preview}{suffix}")
+        return False
+    target.write_text(content, encoding="utf-8")
+    print(f"Wrote {relative}")
+    return True
+
+
+def build_manifest_schema() -> tuple[Path, str]:
     from agents_shipgate.config.schema import AgentsShipgateManifest
 
     schema = AgentsShipgateManifest.model_json_schema()
@@ -46,11 +110,15 @@ def write_manifest_schema() -> None:
         "agents_shipgate.config.schema.AgentsShipgateManifest. Do not edit by hand."
     )
     target = DOCS / "manifest-v0.1.json"
-    target.write_text(json.dumps(schema, indent=2, sort_keys=True) + "\n", encoding="utf-8")
-    print(f"Wrote {target.relative_to(REPO_ROOT)}")
+    return target, _canonical_json(schema)
+
 
+def write_manifest_schema(*, check_only: bool = False, drift: list[str] | None = None) -> bool:
+    target, content = build_manifest_schema()
+    return _emit(target, content, check_only=check_only, drift=drift if drift is not None else [])
 
-def write_report_schema() -> None:
+
+def build_report_schema() -> tuple[Path, str]:
     """Generate docs/report-schema.v0.<minor>.json from the Pydantic
     ReadinessReport model.
 
@@ -667,13 +735,26 @@ def write_report_schema() -> None:
     }
 
     target = DOCS / f"report-schema.v{minor}.json"
-    target.write_text(json.dumps(schema, indent=2, sort_keys=True) + "\n", encoding="utf-8")
-    print(f"Wrote {target.relative_to(REPO_ROOT)}")
+    return target, _canonical_json(schema)
+
+
+def write_report_schema(*, check_only: bool = False, drift: list[str] | None = None) -> bool:
+    target, content = build_report_schema()
+    return _emit(target, content, check_only=check_only, drift=drift if drift is not None else [])
 
 
-def write_checks_catalog() -> None:
+def build_checks_catalog() -> tuple[Path, str]:
     from agents_shipgate.checks.registry import check_catalog
 
+    # Plugins are explicitly disabled here. The committed docs/checks.json
+    # is the built-in catalog only — if a developer has
+    # ``AGENTS_SHIPGATE_ENABLE_PLUGINS=1`` set in their shell and any
+    # third-party check plugin installed, the default ``check_catalog()``
+    # would augment the result with that plugin's metadata, and
+    # ``--check`` would then either falsely flag drift or, worse,
+    # silently overwrite the committed catalog with a plugin-augmented
+    # one. Force plugins off so the artifact is deterministic regardless
+    # of the host environment.
     payload = {
         "$id": (
             "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/"
@@ -684,14 +765,21 @@ def write_checks_catalog() -> None:
             "Machine-readable catalog of built-in checks. Generated from "
             "agents_shipgate.checks.registry.check_catalog(). Do not edit by hand."
         ),
-        "checks": [check.model_dump(mode="json") for check in check_catalog()],
+        "checks": [
+            check.model_dump(mode="json")
+            for check in check_catalog(plugins_enabled=False)
+        ],
     }
     target = DOCS / "checks.json"
-    target.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
-    print(f"Wrote {target.relative_to(REPO_ROOT)}")
+    return target, _canonical_json(payload)
 
 
-def write_packet_schema() -> None:
+def write_checks_catalog(*, check_only: bool = False, drift: list[str] | None = None) -> bool:
+    target, content = build_checks_catalog()
+    return _emit(target, content, check_only=check_only, drift=drift if drift is not None else [])
+
+
+def build_packet_schema() -> tuple[Path, str]:
     """Generate docs/packet-schema.v0.<minor>.json from EvidencePacket.
 
     Versioned independently from the report schema; bumping requires a
@@ -714,18 +802,54 @@ def write_packet_schema() -> None:
         "agents_shipgate.packet.models.EvidencePacket. Do not edit by hand."
     )
     target = DOCS / f"packet-schema.v{minor}.json"
-    target.write_text(
-        json.dumps(schema, indent=2, sort_keys=True) + "\n", encoding="utf-8"
-    )
-    print(f"Wrote {target.relative_to(REPO_ROOT)}")
+    return target, _canonical_json(schema)
+
+
+def write_packet_schema(*, check_only: bool = False, drift: list[str] | None = None) -> bool:
+    target, content = build_packet_schema()
+    return _emit(target, content, check_only=check_only, drift=drift if drift is not None else [])
+
 
+# Public ordered list of (name, builder) pairs. Tests and the CLI iterate this
+# instead of hardcoding individual calls, so adding a new schema is one edit.
+BUILDERS: tuple[tuple[str, Callable[[], tuple[Path, str]]], ...] = (
+    ("manifest", build_manifest_schema),
+    ("checks_catalog", build_checks_catalog),
+    ("report", build_report_schema),
+    ("packet", build_packet_schema),
+)
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="generate_schemas",
+        description=(
+            "Regenerate docs/*.json schemas (default) or verify they match the "
+            "current Pydantic models (--check)."
+        ),
+    )
+    parser.add_argument(
+        "--check",
+        action="store_true",
+        help="Verify committed schemas match the generators; exit 1 on drift.",
+    )
+    args = parser.parse_args(argv)
 
-def main() -> int:
     DOCS.mkdir(parents=True, exist_ok=True)
-    write_manifest_schema()
-    write_checks_catalog()
-    write_report_schema()
-    write_packet_schema()
+    drift: list[str] = []
+    for _name, builder in BUILDERS:
+        target, content = builder()
+        _emit(target, content, check_only=args.check, drift=drift)
+
+    if args.check and drift:
+        sys.stderr.write("\n".join(drift))
+        sys.stderr.write(
+            "\n\nSchema drift detected in "
+            f"{len(drift)} file(s). To resolve:\n"
+            "  python scripts/generate_schemas.py\n"
+            "  git add docs/ && git commit\n"
+        )
+        return 1
     return 0
 
 
diff --git a/tests/test_schema_roundtrip.py b/tests/test_schema_roundtrip.py
new file mode 100644
index 0000000..a8cf51f
--- /dev/null
+++ b/tests/test_schema_roundtrip.py
@@ -0,0 +1,243 @@
+"""Schema round-trip tests (M4 · trust hardening).
+
+Every committed schema under ``docs/`` (``manifest-v0.1.json``,
+``checks.json``, the current-minor ``report-schema.v0.*.json``, and the
+current-minor ``packet-schema.v0.*.json``) MUST match what
+``scripts/generate_schemas.py`` produces from the live Pydantic models.
+
+These tests call the generator's builder functions directly — no
+subprocess, no I/O — so a Pydantic edit that forgets to regenerate
+fails the test locally with a clear diff before CI runs the same check.
+
+If a test here fails, run::
+
+    python scripts/generate_schemas.py
+    git add docs/ && git commit
+
+That is the same remediation surfaced by
+``scripts/generate_schemas.py --check`` in CI.
+"""
+
+from __future__ import annotations
+
+import difflib
+import importlib.util
+from pathlib import Path
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+GENERATOR_PATH = REPO_ROOT / "scripts" / "generate_schemas.py"
+
+
+def _load_generator():
+    """Import scripts/generate_schemas.py without adding scripts/ to sys.path.
+
+    importlib.util keeps the module local to this test file. The
+    generator's top-level ``sys.path.insert(0, str(SRC))`` then makes
+    ``agents_shipgate`` importable from a source checkout, the same as
+    when the script runs standalone.
+    """
+    spec = importlib.util.spec_from_file_location(
+        "agents_shipgate_schema_generator", GENERATOR_PATH
+    )
+    assert spec is not None and spec.loader is not None, (
+        f"could not load {GENERATOR_PATH}"
+    )
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+@pytest.fixture(scope="module")
+def generator():
+    return _load_generator()
+
+
+def _assert_match(target: Path, generated_content: str) -> None:
+    assert target.exists(), (
+        f"{target.relative_to(REPO_ROOT)} is missing. "
+        "Run `python scripts/generate_schemas.py` to create it."
+    )
+    committed = target.read_text(encoding="utf-8")
+    if committed == generated_content:
+        return
+    diff = "".join(
+        difflib.unified_diff(
+            committed.splitlines(keepends=True),
+            generated_content.splitlines(keepends=True),
+            fromfile=f"{target.relative_to(REPO_ROOT)} (committed)",
+            tofile=f"{target.relative_to(REPO_ROOT)} (generated)",
+            n=2,
+        )
+    )
+    pytest.fail(
+        f"{target.relative_to(REPO_ROOT)} drifted from the live Pydantic "
+        f"model. Run `python scripts/generate_schemas.py` and commit "
+        f"the result.\n\n{diff}"
+    )
+
+
+def test_manifest_schema_matches_committed_file(generator):
+    target, content = generator.build_manifest_schema()
+    _assert_match(target, content)
+
+
+def test_report_schema_matches_committed_file(generator):
+    target, content = generator.build_report_schema()
+    _assert_match(target, content)
+
+
+def test_packet_schema_matches_committed_file(generator):
+    target, content = generator.build_packet_schema()
+    _assert_match(target, content)
+
+
+def test_checks_catalog_matches_committed_file(generator):
+    target, content = generator.build_checks_catalog()
+    _assert_match(target, content)
+
+
+def test_check_mode_passes_on_current_repo(generator, capsys):
+    """End-to-end: `generate_schemas.py --check` exits 0 when artifacts match.
+
+    Catches regressions in the --check wiring itself (e.g., a future
+    refactor that bypasses one of the four builders).
+    """
+    exit_code = generator.main(["--check"])
+    assert exit_code == 0, (
+        "generate_schemas.py --check exited non-zero — at least one "
+        "schema file drifted. Run `python scripts/generate_schemas.py` "
+        "and commit."
+    )
+
+
+def test_check_mode_reports_drift(generator, tmp_path, monkeypatch, capsys):
+    """Negative control: a synthetic drift must trigger exit 1 with a diff.
+
+    Asserts the failure path is wired correctly so check-mode never
+    silently passes on a real drift. Redirects DOCS to a temp tree so
+    one stale file plus three missing files exercise both drift shapes
+    (mismatch + missing).
+    """
+    monkeypatch.setattr(generator, "DOCS", tmp_path)
+    stale_target = tmp_path / "manifest-v0.1.json"
+    stale_target.write_text('{"stale": true}\n', encoding="utf-8")
+
+    exit_code = generator.main(["--check"])
+    err = capsys.readouterr().err
+
+    assert exit_code == 1
+    assert "drift detected" in err, "expected unified-diff preview for stale file"
+    assert "missing" in err, "expected 'missing' marker for absent files"
+    assert "python scripts/generate_schemas.py" in err, (
+        "expected remediation command in failure output"
+    )
+
+
+def test_builders_are_pure(generator):
+    """Build functions return ``(Path, str)`` and produce identical output
+    on repeated calls.
+
+    Locks the M4 invariant that builders are I/O-free and deterministic
+    — the same model state must always produce byte-identical schema
+    text, which is what makes the round-trip test trustworthy.
+    """
+    for builder in (
+        generator.build_manifest_schema,
+        generator.build_report_schema,
+        generator.build_packet_schema,
+        generator.build_checks_catalog,
+    ):
+        target_a, content_a = builder()
+        target_b, content_b = builder()
+        assert isinstance(target_a, Path)
+        assert isinstance(content_a, str)
+        assert target_a == target_b
+        assert content_a == content_b, (
+            f"{builder.__name__} produced different output on repeated call; "
+            "generator is not deterministic."
+        )
+        assert content_a.endswith("\n"), (
+            f"{builder.__name__} output missing trailing newline; canonical "
+            "form requires it for stable git diffs."
+        )
+
+
+def test_checks_catalog_ignores_enabled_plugins(generator, monkeypatch):
+    """Regression: the generated docs/checks.json must be the built-in
+    catalog only, regardless of ``AGENTS_SHIPGATE_ENABLE_PLUGINS``.
+
+    Before the fix, ``build_checks_catalog()`` called
+    ``check_catalog()`` without an explicit ``plugins_enabled=False``.
+    With the env var set and any third-party plugin installed, the
+    function would resolve plugins from entry points and include them
+    in the generated catalog. ``--check`` would then either falsely
+    flag drift against the built-in-only committed
+    ``docs/checks.json``, or — on a `write` run — silently overwrite
+    the committed artifact with a plugin-augmented one.
+
+    The fix forces ``plugins_enabled=False`` at the build site. This
+    test installs a fake plugin entry point with a distinctive
+    ``check_id`` and asserts:
+
+    1. the plugin's check_id is absent from the generator output, and
+    2. the generator output is byte-identical to a clean run with
+       plugins env unset.
+    """
+    from agents_shipgate.checks import registry
+
+    def plugin(_context):  # pragma: no cover — never executed
+        return []
+
+    plugin.AGENTS_SHIPGATE_METADATA = {
+        "id": "PLUGIN-DETERMINISM-CANARY",
+        "category": "custom",
+        "default_severity": "medium",
+        "description": "Canary plugin for generator determinism test.",
+    }
+
+    class FakeEntryPoint:
+        value = "acme_shipgate_checks:run"
+
+        def load(self):
+            return plugin
+
+    # First, generate the baseline (plugins env unset).
+    monkeypatch.delenv("AGENTS_SHIPGATE_ENABLE_PLUGINS", raising=False)
+    monkeypatch.setattr(registry, "entry_points", lambda group: [])
+    _baseline_target, baseline_content = generator.build_checks_catalog()
+    assert "PLUGIN-DETERMINISM-CANARY" not in baseline_content, (
+        "sanity: baseline must not already contain the canary id"
+    )
+
+    # Now enable plugins AND install the canary plugin via a fake
+    # entry point. The generator must still produce byte-identical
+    # output — plugins must not leak into the deterministic artifact.
+    monkeypatch.setenv("AGENTS_SHIPGATE_ENABLE_PLUGINS", "1")
+    monkeypatch.setattr(registry, "entry_points", lambda group: [FakeEntryPoint()])
+
+    # Cross-check the threat model: with plugins enabled at the
+    # registry call site, the catalog WOULD include the canary. If
+    # this assertion ever flips, the upstream check_catalog()
+    # plugin-resolution path no longer works the way the regression
+    # is testing for, and this test needs to be reworked rather than
+    # falsely passing.
+    augmented = registry.check_catalog()
+    assert any(check.id == "PLUGIN-DETERMINISM-CANARY" for check in augmented), (
+        "test setup did not actually enable the plugin path; the "
+        "regression check below would pass vacuously"
+    )
+
+    _target, content = generator.build_checks_catalog()
+    assert "PLUGIN-DETERMINISM-CANARY" not in content, (
+        "AGENTS_SHIPGATE_ENABLE_PLUGINS=1 leaked a plugin into the "
+        "generated docs/checks.json — generator must force "
+        "plugins_enabled=False so the committed artifact is "
+        "deterministic regardless of host environment."
+    )
+    assert content == baseline_content, (
+        "generator output diverged between plugins-off and "
+        "plugins-on runs; the committed artifact must be a pure "
+        "function of the built-in catalog."
+    )