From 75ad577efeb92b657d8b8770a1ebb4ccda713bc1 Mon Sep 17 00:00:00 2001 From: klickd-agent Date: Tue, 2 Jun 2026 11:30:16 +0000 Subject: [PATCH] feat(supply-chain): tool-backed logical diff report for skill/pack candidates Add the logical-diff stage of the x.klickd skill/pack supply chain: a deterministic, offline CLI that compares a previous candidate (--before) against a new one (--after) and classifies governance/guardrail/memory/ evidence/claim/public-boundary changes rather than raw JSON lines. Hard-fails (exit 1) on guardrail lowering, claim-boundary violations, or public/private-boundary violations; exit 0 when no blocking finding. The deterministic_diff_id is a sha256 over input hashes + sorted findings, clock- and host-independent. Stdlib only, no network. Adds 10 before/after fixtures and 17 tests covering each blocking and non-blocking class plus determinism and CLI exit codes. Operator README under .internal-skills/supply-chain/diff/. No claim of full end-to-end automation; downstream stages remain planned. Co-Authored-By: Claude Opus 4.7 --- .internal-skills/supply-chain/diff/README.md | 95 +++ scripts/generate_supply_chain_diff.py | 751 ++++++++++++++++++ .../supply_chain_diff/after_benign.json | 103 +++ .../after_claim_violation.json | 99 +++ .../after_evidence_weakened.json | 98 +++ .../after_floor_removed.json | 97 +++ .../after_governance_violation.json | 98 +++ .../after_guardrail_lowered.json | 98 +++ .../after_memory_changed.json | 98 +++ .../after_public_violation.json | 98 +++ .../supply_chain_diff/after_unchanged.json | 98 +++ tests/fixtures/supply_chain_diff/before.json | 60 ++ tests/test_supply_chain_diff.py | 227 ++++++ 13 files changed, 2020 insertions(+) create mode 100644 .internal-skills/supply-chain/diff/README.md create mode 100644 scripts/generate_supply_chain_diff.py create mode 100644 tests/fixtures/supply_chain_diff/after_benign.json create mode 100644 tests/fixtures/supply_chain_diff/after_claim_violation.json create mode 100644 tests/fixtures/supply_chain_diff/after_evidence_weakened.json create mode 100644 tests/fixtures/supply_chain_diff/after_floor_removed.json create mode 100644 tests/fixtures/supply_chain_diff/after_governance_violation.json create mode 100644 tests/fixtures/supply_chain_diff/after_guardrail_lowered.json create mode 100644 tests/fixtures/supply_chain_diff/after_memory_changed.json create mode 100644 tests/fixtures/supply_chain_diff/after_public_violation.json create mode 100644 tests/fixtures/supply_chain_diff/after_unchanged.json create mode 100644 tests/fixtures/supply_chain_diff/before.json create mode 100644 tests/test_supply_chain_diff.py diff --git a/.internal-skills/supply-chain/diff/README.md b/.internal-skills/supply-chain/diff/README.md new file mode 100644 index 0000000..0488536 --- /dev/null +++ b/.internal-skills/supply-chain/diff/README.md @@ -0,0 +1,95 @@ +# Supply-chain logical diff (stage: diff report) + +This directory holds the output of the **logical diff** stage of the x.klickd +skill/pack supply chain. It is one tool-backed stage, not the full pipeline, +and makes **no claim of total automation**. + +## What it does + +`scripts/generate_supply_chain_diff.py` compares a previous version of a +skill/pack candidate (`--before`) against a new version (`--after`) and +classifies the changes that matter for governance, security and claim +discipline — not just raw JSON/text line changes. + +It is meant to help a human/agent reviewer decide: + +- candidate acceptable, +- premium pass required, +- immediate rejection, +- rollback / deprecation required. + +## Usage + +```bash +python scripts/generate_supply_chain_diff.py \ + --before path/to/before.json \ + --after path/to/after.json \ + --out .internal-skills/supply-chain/diff/report.json +``` + +- Prints the report to stdout (suppress with `--quiet`). +- Writes the report to `--out` when given. +- Standard library only. No network, no provider calls, no paid resources. + +## Change classification + +`added`, `removed`, `changed`, `unchanged`, `risk_raised`, +`guardrail_lowered`, `evidence_changed`, `governance_changed`, +`memory_policy_changed`, `public_boundary_changed`, `claim_boundary_changed`. + +## Exit codes + +| Code | Meaning | +|---|---| +| 0 | no blocking finding | +| 1 | at least one **blocking** finding (guardrail lowered, claim-boundary, or public/private-boundary violation) | +| 2 | usage / input error (missing or unparseable input) | + +## Blocking (hard-fail) conditions + +Per the supply-chain rules, any **lowering of a non-lowerable safeguard** is a +hard fail rather than a silent change. Blocking findings include: + +- a verification gate weakened (`block` → `confirm` → `silent`) or removed; +- a `human_veto.non_lowerable_floor` entry removed, or `raise_only` disabled; +- `evidence_policy.required_for_claims` / `pointer_only` turned off; +- `human_authority.final_decision_owner` moved off `human_carrier`; +- `_pack_metadata.claims_v41_ga` flipped to `true`, or `non_normative` dropped; +- a banned public claim introduced (e.g. "universal standard", "automatic + GDPR / EU AI Act compliance", "proven benchmark superiority"); +- an internal codename leaking into the candidate; +- `contains_real_pii` / `contains_secrets` flipped to `true`; +- the `encrypted` flag downgraded `true` → `false`; +- a `forbidden_fields` entry removed. + +Non-blocking but flagged: memory-policy changes, evidence-policy shape +changes, agent-role escalation (`risk_raised`), and generic added/removed/ +changed pack keys. + +## Determinism + +`deterministic_diff_id` is a `sha256:` over the before/after input hashes plus +the sorted, normalized findings. It does not depend on the clock, host, or run +order. Two runs over identical inputs produce an identical id. Any clock-based +marker a caller adds lives in `non_deterministic_zone` and is excluded from the +hash. + +## Tool-vs-planned matrix (this stage) + +| Capability | State | +|---|---| +| logical diff classification (governance/guardrail/memory/evidence/claim/public boundary) | **tool** (this stage) | +| deterministic diff id + report | **tool** (this stage) | +| hard-fail on guardrail lowering / claim / public-boundary violation | **tool** (this stage) | +| threat model, license check, source-freshness, full PII/secrets scanner | **planned** | +| candidate-skill generation, context-graph generation | **planned** | +| premium pass | **manual** (human/agent, post-diff) | + +## Known limits + +- The diff understands the documented x.klickd pack shape. Renamed or + restructured roots are reported as generic `changed` rather than mapped to a + semantic class. +- The banned-claim and codename checks are substring tripwires on the + candidate document, not a general-purpose PII/secrets scanner. +- This stage does not generate, promote, release, tag or publish anything. diff --git a/scripts/generate_supply_chain_diff.py b/scripts/generate_supply_chain_diff.py new file mode 100644 index 0000000..171e2ea --- /dev/null +++ b/scripts/generate_supply_chain_diff.py @@ -0,0 +1,751 @@ +#!/usr/bin/env python3 +"""Offline logical-diff CLI for x.klickd skill/pack candidates. + +This is the supply-chain *logical diff* stage. It compares a previous version +(``--before``) of a skill/pack candidate against a new version (``--after``) +and classifies the changes that *matter* for governance, security and claim +discipline -- not just raw JSON/text line changes. + +It is NOT a full end-to-end supply chain and makes no claim of total +automation. It is one tool-backed stage: it produces a deterministic report +that helps a human/agent reviewer decide whether a candidate is acceptable, +needs a premium pass, must be rejected, or requires rollback/deprecation. + +Scope of what it understands (semantic, not just textual): + + * governance -- human_veto / human_authority / gate policy + * guardrails -- gate levels (block > confirm > silent), raise_only, + claim_grounding_required, non_lowerable_floor + * memory policy -- memory_scope / memory_segments[].policy / + structured_memory.policy + * evidence / proofs -- evidence_policy (required_for_claims, pointer_only) + * claim boundary -- _pack_metadata.claims_v41_ga / non_normative / + contains_real_pii / contains_secrets + banned claim + strings introduced anywhere in the after document + * public/private -- internal codename leak, encrypted flag downgrade, + forbidden_fields removal + * competencies -- added/removed competency refs + * risk -- risk markers raised + +Change classification (per the brief): + + added, removed, changed, unchanged, risk_raised, guardrail_lowered, + evidence_changed, governance_changed, memory_policy_changed, + public_boundary_changed, claim_boundary_changed + +Exit codes: + + 0 no blocking finding + 1 at least one BLOCKING finding (guardrail lowered, claim-boundary + violation, or public/private-boundary violation) + 2 usage / input error (cannot read or parse an input) + +Determinism: the report's ``deterministic_diff_id`` is a sha256 over the +before/after input hashes plus the sorted, normalized findings. It does not +depend on the clock, host, or run order. A ``generated_at`` field, if present, +lives in ``non_deterministic_zone`` and is excluded from every hash. + +No network. No provider calls. No paid resources. Standard library only. +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import sys +from pathlib import Path +from typing import Any + +SCHEMA_VERSION = "supply-chain-diff/0.1" + +# --- claim discipline ------------------------------------------------------- +# Substrings that must never be *introduced* by a candidate. Matching is +# case-insensitive over the serialized after-document. These mirror the +# repo-wide banned-claim list; introducing any is a claim-boundary violation. +BANNED_CLAIM_SUBSTRINGS = ( + "universal standard", + "automatic gdpr compliance", + "automatic gdpr/eu ai act compliance", + "automatic eu ai act compliance", + "proven benchmark superiority", + "benchmark superiority", + "guaranteed compliance", +) + +# Internal codename that must never leak into a candidate artifact. +INTERNAL_CODENAME = "chimera" + +# Ordered guardrail strength. Lower index == stronger guardrail. Moving a gate +# to a higher index (weaker) is a guardrail-lowering event. +GATE_LEVEL_ORDER = {"block": 0, "confirm": 1, "silent": 2, "off": 3, "none": 3} + + +def _read_json(path: Path) -> dict[str, Any]: + data = path.read_bytes() + return json.loads(data.decode("utf-8")) + + +def _sha256_file(path: Path) -> str: + return hashlib.sha256(path.read_bytes()).hexdigest() + + +def _pack(doc: dict[str, Any]) -> dict[str, Any]: + """Return the x_klickd_pack body, or the document itself as a fallback.""" + pack = doc.get("x_klickd_pack") + return pack if isinstance(pack, dict) else doc + + +def _meta(doc: dict[str, Any]) -> dict[str, Any]: + meta = doc.get("_pack_metadata") + return meta if isinstance(meta, dict) else {} + + +def _gates_by_id(pack: dict[str, Any]) -> dict[str, dict[str, Any]]: + """Flatten verification_gates.gates into {id: gate}.""" + vg = pack.get("verification_gates") + out: dict[str, dict[str, Any]] = {} + if isinstance(vg, dict) and isinstance(vg.get("gates"), list): + for gate in vg["gates"]: + if isinstance(gate, dict) and gate.get("id"): + out[str(gate["id"])] = gate + return out + + +def _gate_strength(level: Any) -> int: + return GATE_LEVEL_ORDER.get(str(level).lower(), 1) + + +def _as_set(value: Any) -> set[str]: + if isinstance(value, list): + return {str(v) for v in value} + return set() + + +def _finding( + kind: str, + path: str, + *, + before: Any = None, + after: Any = None, + severity: str = "info", + blocking: bool = False, + detail: str = "", +) -> dict[str, Any]: + return { + "kind": kind, + "path": path, + "before": before, + "after": after, + "severity": severity, + "blocking": blocking, + "detail": detail, + } + + +def _diff_gates( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + bg = _gates_by_id(before_pack) + ag = _gates_by_id(after_pack) + + for gid in sorted(set(bg) | set(ag)): + b = bg.get(gid) + a = ag.get(gid) + path = f"verification_gates.gates[{gid}].level" + if b is not None and a is None: + # A removed gate is a removed guardrail -> lowering. + findings.append( + _finding( + "guardrail_lowered", + f"verification_gates.gates[{gid}]", + before=b.get("level"), + after=None, + severity="high", + blocking=True, + detail=f"gate {gid!r} removed", + ) + ) + continue + if a is not None and b is None: + findings.append( + _finding( + "added", + f"verification_gates.gates[{gid}]", + before=None, + after=a.get("level"), + severity="info", + detail=f"gate {gid!r} added", + ) + ) + continue + bl, al = b.get("level"), a.get("level") + if bl == al: + continue + if _gate_strength(al) > _gate_strength(bl): + findings.append( + _finding( + "guardrail_lowered", + path, + before=bl, + after=al, + severity="high", + blocking=True, + detail=f"gate {gid!r} weakened {bl!r} -> {al!r}", + ) + ) + else: + findings.append( + _finding( + "governance_changed", + path, + before=bl, + after=al, + severity="info", + detail=f"gate {gid!r} strengthened {bl!r} -> {al!r}", + ) + ) + return findings + + +def _diff_floor( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + """non_lowerable_floor entries must not disappear.""" + findings: list[dict[str, Any]] = [] + bv = before_pack.get("human_veto") or {} + av = after_pack.get("human_veto") or {} + b_floor = _as_set(bv.get("non_lowerable_floor")) + a_floor = _as_set(av.get("non_lowerable_floor")) + for removed in sorted(b_floor - a_floor): + findings.append( + _finding( + "guardrail_lowered", + "human_veto.non_lowerable_floor", + before=removed, + after=None, + severity="high", + blocking=True, + detail=f"non-lowerable floor entry {removed!r} removed", + ) + ) + for added in sorted(a_floor - b_floor): + findings.append( + _finding( + "governance_changed", + "human_veto.non_lowerable_floor", + before=None, + after=added, + severity="info", + detail=f"floor entry {added!r} added", + ) + ) + # raise_only must not go true -> false. + if bv.get("raise_only") is True and av.get("raise_only") is False: + findings.append( + _finding( + "guardrail_lowered", + "human_veto.raise_only", + before=True, + after=False, + severity="high", + blocking=True, + detail="raise_only disabled", + ) + ) + + # gates.verification_gates_default flags. + bd = (before_pack.get("gates") or {}).get("verification_gates_default") or {} + ad = (after_pack.get("gates") or {}).get("verification_gates_default") or {} + for flag in ("raise_only", "claim_grounding_required"): + if bd.get(flag) is True and ad.get(flag) is False: + findings.append( + _finding( + "guardrail_lowered", + f"gates.verification_gates_default.{flag}", + before=True, + after=False, + severity="high", + blocking=True, + detail=f"{flag} disabled", + ) + ) + return findings + + +def _diff_governance( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + ba = before_pack.get("human_authority") or {} + aa = after_pack.get("human_authority") or {} + # Final decision must remain human-owned. + b_owner = ba.get("final_decision_owner") + a_owner = aa.get("final_decision_owner") + if b_owner != a_owner: + lowered = b_owner == "human_carrier" and a_owner != "human_carrier" + findings.append( + _finding( + "guardrail_lowered" if lowered else "governance_changed", + "human_authority.final_decision_owner", + before=b_owner, + after=a_owner, + severity="high" if lowered else "info", + blocking=lowered, + detail="final decision owner changed", + ) + ) + # agent_role escalation from advisory to autonomous is risk-raising. + if ba.get("agent_role") != aa.get("agent_role"): + raised = ba.get("agent_role") == "advisory" and aa.get("agent_role") not in ( + "advisory", + None, + ) + findings.append( + _finding( + "risk_raised" if raised else "governance_changed", + "human_authority.agent_role", + before=ba.get("agent_role"), + after=aa.get("agent_role"), + severity="high" if raised else "info", + blocking=False, + detail="agent role changed", + ) + ) + return findings + + +def _diff_evidence( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + be = before_pack.get("evidence_policy") or {} + ae = after_pack.get("evidence_policy") or {} + if be == ae: + return findings + # Specific weakenings are blocking; other shape changes are just flagged. + for flag in ("required_for_claims", "pointer_only"): + if be.get(flag) is True and ae.get(flag) is False: + findings.append( + _finding( + "guardrail_lowered", + f"evidence_policy.{flag}", + before=True, + after=False, + severity="high", + blocking=True, + detail=f"evidence policy {flag} disabled", + ) + ) + findings.append( + _finding( + "evidence_changed", + "evidence_policy", + before=be, + after=ae, + severity="medium", + detail="evidence policy shape changed", + ) + ) + return findings + + +def _diff_memory( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + if before_pack.get("memory_scope") != after_pack.get("memory_scope"): + findings.append( + _finding( + "memory_policy_changed", + "memory_scope", + before=before_pack.get("memory_scope"), + after=after_pack.get("memory_scope"), + severity="medium", + detail="memory scope changed", + ) + ) + b_seg = { + s.get("id"): s.get("policy") + for s in before_pack.get("memory_segments", []) + if isinstance(s, dict) + } + a_seg = { + s.get("id"): s.get("policy") + for s in after_pack.get("memory_segments", []) + if isinstance(s, dict) + } + for sid in sorted(set(b_seg) | set(a_seg), key=lambda x: str(x)): + if b_seg.get(sid) != a_seg.get(sid): + findings.append( + _finding( + "memory_policy_changed", + f"memory_segments[{sid}].policy", + before=b_seg.get(sid), + after=a_seg.get(sid), + severity="medium", + detail=f"memory segment {sid!r} policy changed", + ) + ) + bsm = (before_pack.get("structured_memory") or {}).get("policy") + asm = (after_pack.get("structured_memory") or {}).get("policy") + if bsm != asm: + findings.append( + _finding( + "memory_policy_changed", + "structured_memory.policy", + before=bsm, + after=asm, + severity="medium", + detail="structured memory policy changed", + ) + ) + return findings + + +def _diff_competencies( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + b = { + c.get("competency_ref") + for c in before_pack.get("competencies", []) + if isinstance(c, dict) and c.get("competency_ref") + } + a = { + c.get("competency_ref") + for c in after_pack.get("competencies", []) + if isinstance(c, dict) and c.get("competency_ref") + } + for ref in sorted(a - b): + findings.append( + _finding("added", f"competencies[{ref}]", after=ref, detail="competency added") + ) + for ref in sorted(b - a): + findings.append( + _finding( + "removed", f"competencies[{ref}]", before=ref, detail="competency removed" + ) + ) + return findings + + +def _diff_claim_boundary( + before_doc: dict[str, Any], after_doc: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + bm = _meta(before_doc) + am = _meta(after_doc) + + # claims_v41_ga flipping false -> true is a claim-boundary violation. + if bm.get("claims_v41_ga") in (False, None) and am.get("claims_v41_ga") is True: + findings.append( + _finding( + "claim_boundary_changed", + "_pack_metadata.claims_v41_ga", + before=bm.get("claims_v41_ga"), + after=True, + severity="high", + blocking=True, + detail="candidate now claims v4.1 GA", + ) + ) + # non_normative true -> false escalates a candidate to a normative claim. + if bm.get("non_normative") is True and am.get("non_normative") is False: + findings.append( + _finding( + "claim_boundary_changed", + "_pack_metadata.non_normative", + before=True, + after=False, + severity="high", + blocking=True, + detail="candidate dropped non_normative flag", + ) + ) + + # Banned claim substrings newly introduced anywhere in the after document. + after_blob = json.dumps(after_doc, ensure_ascii=False).lower() + before_blob = json.dumps(before_doc, ensure_ascii=False).lower() + for needle in BANNED_CLAIM_SUBSTRINGS: + if needle in after_blob and needle not in before_blob: + findings.append( + _finding( + "claim_boundary_changed", + "document", + after=needle, + severity="high", + blocking=True, + detail=f"banned claim introduced: {needle!r}", + ) + ) + return findings + + +def _diff_public_boundary( + before_doc: dict[str, Any], after_doc: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + before_pack = _pack(before_doc) + after_pack = _pack(after_doc) + bm = _meta(before_doc) + am = _meta(after_doc) + + # Internal codename newly leaking into the candidate. + after_blob = json.dumps(after_doc, ensure_ascii=False).lower() + before_blob = json.dumps(before_doc, ensure_ascii=False).lower() + if INTERNAL_CODENAME in after_blob and INTERNAL_CODENAME not in before_blob: + findings.append( + _finding( + "public_boundary_changed", + "document", + after=INTERNAL_CODENAME, + severity="high", + blocking=True, + detail="internal codename leaked into candidate", + ) + ) + + # PII / secrets markers flipping false -> true. + for flag in ("contains_real_pii", "contains_secrets"): + if bm.get(flag) in (False, None) and am.get(flag) is True: + findings.append( + _finding( + "public_boundary_changed", + f"_pack_metadata.{flag}", + before=bm.get(flag), + after=True, + severity="high", + blocking=True, + detail=f"{flag} now true", + ) + ) + + # encrypted true -> false is a confidentiality downgrade. + if before_doc.get("encrypted") is True and after_doc.get("encrypted") is False: + findings.append( + _finding( + "public_boundary_changed", + "encrypted", + before=True, + after=False, + severity="high", + blocking=True, + detail="encryption flag downgraded", + ) + ) + + # forbidden_fields entries removed weaken the private/public boundary. + b_ff = _as_set(before_pack.get("forbidden_fields")) + a_ff = _as_set(after_pack.get("forbidden_fields")) + for removed in sorted(b_ff - a_ff): + findings.append( + _finding( + "public_boundary_changed", + "forbidden_fields", + before=removed, + after=None, + severity="high", + blocking=True, + detail=f"forbidden_fields entry {removed!r} removed", + ) + ) + return findings + + +def _generic_changed( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + """Coarse top-level changed/added/removed over pack keys we don't model + semantically above. Keeps the summary honest without re-flagging the keys + already covered by dedicated analyzers.""" + covered = { + "verification_gates", + "human_veto", + "human_authority", + "gates", + "evidence_policy", + "memory_scope", + "memory_segments", + "structured_memory", + "competencies", + } + findings: list[dict[str, Any]] = [] + keys = sorted((set(before_pack) | set(after_pack)) - covered) + for k in keys: + in_b = k in before_pack + in_a = k in after_pack + if in_b and not in_a: + findings.append(_finding("removed", f"x_klickd_pack.{k}", before="")) + elif in_a and not in_b: + findings.append(_finding("added", f"x_klickd_pack.{k}", after="")) + elif before_pack.get(k) != after_pack.get(k): + findings.append(_finding("changed", f"x_klickd_pack.{k}")) + return findings + + +def _normalize_for_hash(finding: dict[str, Any]) -> str: + return json.dumps( + {k: finding[k] for k in ("kind", "path", "before", "after", "blocking")}, + sort_keys=True, + ensure_ascii=False, + default=str, + ) + + +def build_report(before_path: Path, after_path: Path) -> dict[str, Any]: + before_doc = _read_json(before_path) + after_doc = _read_json(after_path) + before_pack = _pack(before_doc) + after_pack = _pack(after_doc) + + findings: list[dict[str, Any]] = [] + findings += _diff_gates(before_pack, after_pack) + findings += _diff_floor(before_pack, after_pack) + findings += _diff_governance(before_pack, after_pack) + findings += _diff_evidence(before_pack, after_pack) + findings += _diff_memory(before_pack, after_pack) + findings += _diff_competencies(before_pack, after_pack) + findings += _diff_claim_boundary(before_doc, after_doc) + findings += _diff_public_boundary(before_doc, after_doc) + findings += _generic_changed(before_pack, after_pack) + + # Deterministic ordering: sort by (kind, path, detail). + findings.sort(key=lambda f: (f["kind"], f["path"], f["detail"])) + + before_hash = _sha256_file(before_path) + after_hash = _sha256_file(after_path) + + summary: dict[str, int] = {} + for f in findings: + summary[f["kind"]] = summary.get(f["kind"], 0) + 1 + if not findings: + summary["unchanged"] = 1 + + blocked = [f for f in findings if f["blocking"]] + high_risk = [f for f in findings if f["severity"] == "high" and not f["blocking"]] + + diff_id_material = json.dumps( + { + "before_hash": before_hash, + "after_hash": after_hash, + "findings": [_normalize_for_hash(f) for f in findings], + }, + sort_keys=True, + ensure_ascii=False, + ) + deterministic_diff_id = "sha256:" + hashlib.sha256( + diff_id_material.encode("utf-8") + ).hexdigest() + + recommendations = _recommend(blocked, high_risk, findings) + + return { + "schema_version": SCHEMA_VERSION, + "before_path": str(before_path), + "after_path": str(after_path), + "before_hash": before_hash, + "after_hash": after_hash, + "deterministic_diff_id": deterministic_diff_id, + "summary": dict(sorted(summary.items())), + "changed_paths": sorted({f["path"] for f in findings if f["kind"] != "unchanged"}), + "findings": findings, + "high_risk_findings": high_risk, + "blocked_findings": blocked, + "recommendations": recommendations, + "non_deterministic_zone": { + "note": ( + "Fields here are excluded from deterministic_diff_id. None are " + "emitted by default; a generated_at marker may be added by a " + "caller without affecting the diff id." + ) + }, + } + + +def _recommend( + blocked: list[dict[str, Any]], + high_risk: list[dict[str, Any]], + findings: list[dict[str, Any]], +) -> list[str]: + recs: list[str] = [] + if blocked: + recs.append("REJECT_OR_ROLLBACK: blocking finding(s) present; do not promote.") + kinds = sorted({f["kind"] for f in blocked}) + if "guardrail_lowered" in kinds: + recs.append( + "guardrail_lowered detected: a non-lowerable safeguard was weakened " + "or removed. Requires explicit human veto review." + ) + if "claim_boundary_changed" in kinds: + recs.append( + "claim_boundary_changed detected: candidate introduces a claim that " + "is not proven/bounded. Strip the claim before any further stage." + ) + if "public_boundary_changed" in kinds: + recs.append( + "public_boundary_changed detected: private/internal content or PII/" + "secrets risk leaking. Quarantine the candidate." + ) + elif high_risk: + recs.append( + "PREMIUM_PASS_REQUIRED: high-risk (non-blocking) changes need human/agent " + "review before promotion." + ) + elif findings: + recs.append("ACCEPTABLE_WITH_REVIEW: only non-critical changes detected.") + else: + recs.append("UNCHANGED: no logical change detected between before and after.") + return recs + + +def main(argv: list[str]) -> int: + parser = argparse.ArgumentParser( + description="Logical diff between two x.klickd skill/pack candidates." + ) + parser.add_argument("--before", required=True, help="path to previous version JSON") + parser.add_argument("--after", required=True, help="path to candidate version JSON") + parser.add_argument( + "--out", + default=None, + help="optional path to write the deterministic JSON report", + ) + parser.add_argument( + "--quiet", + action="store_true", + help="suppress the report on stdout (still writes --out if given)", + ) + args = parser.parse_args(argv[1:]) + + before_path = Path(args.before) + after_path = Path(args.after) + for p in (before_path, after_path): + if not p.exists(): + print(f"error: input not found: {p}", file=sys.stderr) + return 2 + try: + report = build_report(before_path, after_path) + except (json.JSONDecodeError, UnicodeDecodeError) as exc: + print(f"error: could not parse input JSON: {exc}", file=sys.stderr) + return 2 + + rendered = json.dumps(report, indent=2, ensure_ascii=False, sort_keys=False) + if args.out: + out_path = Path(args.out) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(rendered + "\n", encoding="utf-8") + if not args.quiet: + print(rendered) + + if report["blocked_findings"]: + print( + f"BLOCKED: {len(report['blocked_findings'])} blocking finding(s).", + file=sys.stderr, + ) + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/tests/fixtures/supply_chain_diff/after_benign.json b/tests/fixtures/supply_chain_diff/after_benign.json new file mode 100644 index 0000000..5de1330 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_benign.json @@ -0,0 +1,103 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.2.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + }, + { + "competency_ref": "esco:S2.0", + "scheme": "esco", + "prefLabel": "gamma" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_claim_violation.json b/tests/fixtures/supply_chain_diff/after_claim_violation.json new file mode 100644 index 0000000..98931be --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_claim_violation.json @@ -0,0 +1,99 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": true, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ], + "target_user": "Now a universal standard with automatic GDPR compliance." + } +} diff --git a/tests/fixtures/supply_chain_diff/after_evidence_weakened.json b/tests/fixtures/supply_chain_diff/after_evidence_weakened.json new file mode 100644 index 0000000..3c5dc86 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_evidence_weakened.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": false, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_floor_removed.json b/tests/fixtures/supply_chain_diff/after_floor_removed.json new file mode 100644 index 0000000..28ed86c --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_floor_removed.json @@ -0,0 +1,97 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": false, + "non_lowerable_floor": [ + "risk_class_below_evidence" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_governance_violation.json b/tests/fixtures/supply_chain_diff/after_governance_violation.json new file mode 100644 index 0000000..953243f --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_governance_violation.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "agent", + "agent_role": "autonomous", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_guardrail_lowered.json b/tests/fixtures/supply_chain_diff/after_guardrail_lowered.json new file mode 100644 index 0000000..912d0ae --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_guardrail_lowered.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "confirm", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_memory_changed.json b/tests/fixtures/supply_chain_diff/after_memory_changed.json new file mode 100644 index 0000000..346fa00 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_memory_changed.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "store_inline" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "shared_global", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_public_violation.json b/tests/fixtures/supply_chain_diff/after_public_violation.json new file mode 100644 index 0000000..a0f1365 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_public_violation.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": true, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "scoring_rubric", + "tone_rules" + ], + "internal_note": "see chimera internal docs" + } +} diff --git a/tests/fixtures/supply_chain_diff/after_unchanged.json b/tests/fixtures/supply_chain_diff/after_unchanged.json new file mode 100644 index 0000000..bfde8b2 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_unchanged.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/before.json b/tests/fixtures/supply_chain_diff/before.json new file mode 100644 index 0000000..ab267d5 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/before.json @@ -0,0 +1,60 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + {"competency_ref": "esco:S1.0", "scheme": "esco", "prefLabel": "alpha"}, + {"competency_ref": "esco:S1.5", "scheme": "esco", "prefLabel": "beta"} + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + {"id": "works", "scope": "memory.x_klickd.example.works", "policy": "pointer_only_attribution"} + ], + "structured_memory": {"scope": "memory.x_klickd.example", "policy": "pack_scoped_only", "entries": []}, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": ["risk_class_below_evidence", "skip_counsel_for_definitive_decision"] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + {"id": "compliance-attestation", "action_class": "compliance_attestation", "level": "confirm", "reason": "confirm required"}, + {"id": "risk-class-downgrade", "action_class": "risk_class_below_evidence", "level": "block", "reason": "blocked"}, + {"id": "counsel-escalation-skip", "action_class": "skip_counsel_for_definitive_decision", "level": "block", "reason": "blocked"} + ] + }, + "forbidden_fields": ["system_prompt_overrides", "scoring_rubric", "tone_rules"] + } +} diff --git a/tests/test_supply_chain_diff.py b/tests/test_supply_chain_diff.py new file mode 100644 index 0000000..5df64fb --- /dev/null +++ b/tests/test_supply_chain_diff.py @@ -0,0 +1,227 @@ +"""Tests for scripts/generate_supply_chain_diff.py. + +The logical-diff stage compares a previous skill/pack candidate against a new +one and classifies the changes that matter for governance, security and claim +discipline. These tests exercise the static fixtures under +tests/fixtures/supply_chain_diff/ plus a couple of in-memory edge cases. +""" +from __future__ import annotations + +import importlib.util +import json +import sys +from pathlib import Path + +import pytest + +REPO = Path(__file__).resolve().parent.parent +SCRIPT = REPO / "scripts" / "generate_supply_chain_diff.py" +FIX = REPO / "tests" / "fixtures" / "supply_chain_diff" + + +def _load_module(): + spec = importlib.util.spec_from_file_location("generate_supply_chain_diff", SCRIPT) + assert spec is not None and spec.loader is not None + mod = importlib.util.module_from_spec(spec) + sys.modules["generate_supply_chain_diff"] = mod + spec.loader.exec_module(mod) + return mod + + +diff = _load_module() + +BEFORE = FIX / "before.json" + + +def _report(after_name: str) -> dict: + return diff.build_report(BEFORE, FIX / after_name) + + +def _kinds(report: dict) -> set[str]: + return {f["kind"] for f in report["findings"]} + + +# --- structural ------------------------------------------------------------ + + +def test_report_is_valid_json_and_has_required_fields(): + report = _report("after_benign.json") + rendered = json.dumps(report) # must serialize + parsed = json.loads(rendered) + for field in ( + "schema_version", + "before_path", + "after_path", + "before_hash", + "after_hash", + "deterministic_diff_id", + "summary", + "changed_paths", + "findings", + "high_risk_findings", + "blocked_findings", + "recommendations", + "non_deterministic_zone", + ): + assert field in parsed, f"missing field {field}" + assert parsed["deterministic_diff_id"].startswith("sha256:") + + +def test_unchanged_file_yields_stable_empty_diff(): + report = _report("after_unchanged.json") + assert report["findings"] == [] + assert report["changed_paths"] == [] + assert report["summary"] == {"unchanged": 1} + assert report["blocked_findings"] == [] + assert "UNCHANGED" in report["recommendations"][0] + + +def test_added_removed_changed_detected(): + report = _report("after_benign.json") + # added competency esco:S2.0 ; pack_version changed (generic) ; non-blocking + assert "added" in _kinds(report) + assert report["blocked_findings"] == [] + paths = report["changed_paths"] + assert any("competencies[esco:S2.0]" in p for p in paths) + assert any("pack_version" in p for p in paths) + + +# --- guardrails (blocking) ------------------------------------------------- + + +def test_guardrail_lowering_gate_level_blocks(): + report = _report("after_guardrail_lowered.json") + blocked = report["blocked_findings"] + assert blocked, "expected a blocking finding" + assert all(f["blocking"] for f in blocked) + assert any(f["kind"] == "guardrail_lowered" for f in blocked) + assert "REJECT_OR_ROLLBACK" in report["recommendations"][0] + + +def test_non_lowerable_floor_removal_and_raise_only_block(): + report = _report("after_floor_removed.json") + details = [f["detail"] for f in report["blocked_findings"]] + assert any("non-lowerable floor" in d for d in details) + assert any("raise_only disabled" in d for d in details) + + +def test_evidence_weakening_blocks_and_flags_change(): + report = _report("after_evidence_weakened.json") + kinds = _kinds(report) + assert "evidence_changed" in kinds + assert any( + f["kind"] == "guardrail_lowered" and "evidence" in f["detail"] + for f in report["blocked_findings"] + ) + + +def test_governance_owner_move_off_human_blocks(): + report = _report("after_governance_violation.json") + assert any( + f["kind"] == "guardrail_lowered" + and "final_decision_owner" in f["path"] + for f in report["blocked_findings"] + ) + # agent_role advisory -> autonomous is risk_raised (non-blocking) + assert any(f["kind"] == "risk_raised" for f in report["findings"]) + + +# --- claim / public boundary (blocking) ------------------------------------ + + +def test_claim_boundary_violation_detected(): + report = _report("after_claim_violation.json") + details = [f["detail"] for f in report["blocked_findings"]] + assert any("claims v4.1 GA" in d for d in details) + assert any("banned claim introduced" in d for d in details) + assert "claim_boundary_changed" in _kinds(report) + + +def test_public_boundary_violation_detected(): + report = _report("after_public_violation.json") + details = [f["detail"] for f in report["blocked_findings"]] + assert any("codename leaked" in d for d in details) + assert any("contains_real_pii now true" in d for d in details) + assert any("forbidden_fields entry" in d for d in details) + + +def test_memory_policy_change_flagged_non_blocking(): + report = _report("after_memory_changed.json") + assert "memory_policy_changed" in _kinds(report) + assert report["blocked_findings"] == [] + assert "PREMIUM_PASS_REQUIRED" not in report["recommendations"][0] + + +# --- determinism ----------------------------------------------------------- + + +def test_diff_id_is_deterministic_across_runs(): + r1 = _report("after_guardrail_lowered.json") + r2 = _report("after_guardrail_lowered.json") + assert r1["deterministic_diff_id"] == r2["deterministic_diff_id"] + assert r1["findings"] == r2["findings"] + + +def test_diff_id_changes_when_content_changes(): + r1 = _report("after_benign.json") + r2 = _report("after_guardrail_lowered.json") + assert r1["deterministic_diff_id"] != r2["deterministic_diff_id"] + + +# --- CLI exit codes -------------------------------------------------------- + + +def test_cli_exit_zero_on_unchanged(): + rc = diff.main( + ["prog", "--before", str(BEFORE), "--after", str(FIX / "after_unchanged.json"), "--quiet"] + ) + assert rc == 0 + + +def test_cli_exit_one_on_blocking(): + rc = diff.main( + [ + "prog", + "--before", + str(BEFORE), + "--after", + str(FIX / "after_guardrail_lowered.json"), + "--quiet", + ] + ) + assert rc == 1 + + +def test_cli_exit_two_on_missing_input(tmp_path): + rc = diff.main(["prog", "--before", str(BEFORE), "--after", str(tmp_path / "nope.json"), "--quiet"]) + assert rc == 2 + + +def test_cli_writes_out_file(tmp_path): + out = tmp_path / "nested" / "report.json" + diff.main( + [ + "prog", + "--before", + str(BEFORE), + "--after", + str(FIX / "after_benign.json"), + "--out", + str(out), + "--quiet", + ] + ) + assert out.exists() + parsed = json.loads(out.read_text()) + assert parsed["schema_version"] == diff.SCHEMA_VERSION + + +# --- artifacts hygiene ----------------------------------------------------- + + +def test_no_codename_leak_in_script_output_for_clean_input(): + # A clean before/after pair must not surface the internal codename in the + # report unless the candidate itself leaked it. + report = _report("after_benign.json") + blob = json.dumps(report).lower() + assert diff.INTERNAL_CODENAME not in blob