diff --git a/STABILITY.md b/STABILITY.md index 6b84e86..c75ddd0 100644 --- a/STABILITY.md +++ b/STABILITY.md @@ -14,7 +14,7 @@ These commands and flags are stable across all `0.x.y` releases. They will only | Command | Stable flags | |---|---| -| `agents-shipgate scan` | `-c`, `--config`, `--out`, `--format`, `--ci-mode`, `--fail-on`, `--baseline`, `--diff-from`, `--no-plugins`, `--verbose`, `--workspace`, `--packet`/`--no-packet`, `--packet-format` | +| `agents-shipgate scan` | `-c`, `--config`, `--out`, `--format`, `--ci-mode`, `--fail-on`, `--baseline`, `--diff-from`, `--no-plugins`, `--strict-plugins`, `--verbose`, `--workspace`, `--packet`/`--no-packet`, `--packet-format` | | `agents-shipgate evidence-packet` | `--from`, `--out`, `--format`, `--json` | | `agents-shipgate scenario suggest` | `--from`, `--out` | | `agents-shipgate init` | `--workspace`, `--write`, `--json` | @@ -100,6 +100,7 @@ In `agents-shipgate-reports/report.json`, the following are guaranteed: - `baseline.{matched_count, new_count, resolved_count, path}` (when `--baseline` is used) - `tool_inventory[].{name, source_type, source_ref, risk_tags, auth_scopes, owner, confidence}` - `loaded_plugins[].{name, value, distribution, version, check_id}` +- `loaded_plugins[].{validation_status, validation_errors, runtime_errors}` (v0.17+ / M5) — plugin validation provenance, required + present on every entry. `validation_status` is one of `valid | load_failed | bad_signature | bad_metadata | id_collision | bad_floor`; the two error lists are always present and empty for clean plugins. Invalid plugins still appear in this array (with `check_id: null` for entries that failed before metadata parsing), so reviewers can see what was skipped without reading scanner logs. Plugin findings whose `check_id` does not match the declared metadata are dropped at runtime and recorded under `runtime_errors`. ### Scenario Suggestion YAML @@ -188,6 +189,16 @@ The scanner does not, under any circumstances: Plugins are off by default. `AGENTS_SHIPGATE_ENABLE_PLUGINS=1` enables loading; `--no-plugins` overrides at the CLI level. When loaded, every plugin is enumerated in `report.loaded_plugins`. +Plugin validation (v0.17+ / M5). Every entry point is checked against five load-time gates before it can run: + +1. **load** — `entry_point.load()` must not raise. +2. **signature** — the loaded object must be callable and accept exactly one required positional parameter (`ScanContext`); extra defaulted positional / keyword-only parameters are allowed. +3. **metadata** — `AGENTS_SHIPGATE_METADATA` must be present and parseable as `CheckMetadata`. Both `id` and `check_id` are accepted as the identifier key (v0.17 alias); newer plugins should prefer `check_id` for symmetry with `Finding.check_id`. +4. **id_collision** — the plugin's check ID must not shadow a built-in (including legacy aliases) or a previously-registered plugin. +5. **bad_floor** — `floor_severity` must not exceed `default_severity` on the same metadata block. + +Plugins that pass every gate run with the same trust as built-ins. Runtime validation additionally drops findings whose `Finding.check_id` does not match the plugin's declared `id`/`check_id`, drops non-`Finding` items, and captures any exception raised during the plugin call into `loaded_plugins[].runtime_errors`. The scan continues regardless; `--strict-plugins` elevates any non-`valid` plugin or non-empty `runtime_errors` to exit code 4. + ### Manifest Schema The manifest schema version (`version: "0.1"`) is independent of the CLI diff --git a/docs/checks.json b/docs/checks.json index ddddffa..80fb9e4 100644 --- a/docs/checks.json +++ b/docs/checks.json @@ -11,6 +11,7 @@ "change" ], "fires_when": "Base action approval.required was true and head no longer requires approval.", + "floor_severity": null, "id": "SHIP-ACTION-APPROVAL-REMOVED", "rationale": "Removing approval weakens the release boundary for an existing action.", "recommendation": "Restore action_surface.actions[].approval.required: true or document the reviewed exception under action_surface.actions[].evidence.approval_ticket.", @@ -30,6 +31,7 @@ "declared" ], "fires_when": "action_surface.actions.approval or safeguards sets an inherited true control to false.", + "floor_severity": null, "id": "SHIP-ACTION-CONTROL-DOWNGRADE", "rationale": "Manifest-wide approval and safeguard controls are governance requirements; per-action metadata should not silently weaken them.", "recommendation": "Keep the inherited action_surface.actions[] approval/safeguard control enabled or remove the weakening declaration.", @@ -47,6 +49,7 @@ "missing" ], "fires_when": "An added destructive action lacks approval.required or safeguards.rollback.", + "floor_severity": null, "id": "SHIP-ACTION-DESTRUCTIVE-ROLLBACK-MISSING", "rationale": "Destructive actions need explicit approval and rollback evidence before release.", "recommendation": "Declare approval.required and safeguards.rollback or remove the destructive action.", @@ -65,6 +68,7 @@ "declared_effect" ], "fires_when": "action_surface.actions.effect is lower risk than the effect inferred from the loaded tool metadata.", + "floor_severity": null, "id": "SHIP-ACTION-EFFECT-DOWNGRADE-DECLARED", "rationale": "Per-action metadata should not be able to declare away a higher-risk operation inferred from the tool surface.", "recommendation": "Set action_surface.actions[].effect to the inferred operation effect or remove the weaker declaration.", @@ -81,6 +85,7 @@ "change" ], "fires_when": "An action changes to a higher-risk effect such as read to write or write to destructive.", + "floor_severity": null, "id": "SHIP-ACTION-EFFECT-ESCALATED", "rationale": "Effect escalation changes what the agent can do in the real world and needs explicit review.", "recommendation": "Review action_surface.actions[].effect; restore the prior effect or document approval/evidence for the escalation.", @@ -98,6 +103,7 @@ "missing" ], "fires_when": "An added external_communication action lacks safeguards.audit_log.", + "floor_severity": null, "id": "SHIP-ACTION-EXTERNAL-COMMUNICATION-AUDIT-MISSING", "rationale": "External communication changes agent blast radius and should be auditable.", "recommendation": "Declare safeguards.audit_log for the external communication action.", @@ -115,6 +121,7 @@ "missing" ], "fires_when": "An added action is financial_write and lacks approval.required, safeguards.audit_log, or safeguards.idempotency.", + "floor_severity": null, "id": "SHIP-ACTION-FINANCIAL-WRITE-CONTROL-MISSING", "rationale": "Financial write actions need approval, audit, and idempotency evidence before release.", "recommendation": "Declare approval.required, safeguards.audit_log, and safeguards.idempotency.", @@ -133,6 +140,7 @@ "missing" ], "fires_when": "A user-declared action_surface.policies rule matches an action and one or more required dot-path values are absent or different.", + "floor_severity": null, "id": "SHIP-ACTION-POLICY-VIOLATION", "rationale": "Action Surface Diff policies are the reviewer-facing release boundary for external action capability.", "recommendation": "Satisfy the action-surface policy requirements or remove/narrow the action.", @@ -149,6 +157,7 @@ "change" ], "fires_when": "A previously true action safeguard is false or absent in the head surface.", + "floor_severity": null, "id": "SHIP-ACTION-SAFEGUARD-REMOVED", "rationale": "Removing audit, idempotency, rollback, or dry-run safeguards expands blast radius.", "recommendation": "Restore the removed action_surface.actions[].safeguards field or document the reviewed exception under action_surface.actions[].evidence.", @@ -166,6 +175,7 @@ "tool_name" ], "fires_when": "action_surface.require_explicit_actions is true and a loaded tool has no matching action_surface.actions entry.", + "floor_severity": null, "id": "SHIP-ACTION-UNDECLARED", "rationale": "Action Surface Diff depends on reviewer-visible action metadata for release decisions.", "recommendation": "Add action_surface.actions metadata for the tool or disable require_explicit_actions.", @@ -184,6 +194,7 @@ "change" ], "fires_when": "An added action declares a broad scope, or a modified action expands into a broad scope.", + "floor_severity": null, "id": "SHIP-ACTION-WILDCARD-SCOPE", "rationale": "Wildcard scopes make action blast radius too broad for deterministic release review.", "recommendation": "Replace action_surface.actions[].scopes with operation-specific scopes; remove wildcard/admin scopes.", @@ -201,6 +212,7 @@ "explicit_inventory" ], "fires_when": "A Google ADK toolset is dynamic or unresolved and no explicit MCP/OpenAPI/tool inventory input is declared.", + "floor_severity": null, "id": "SHIP-ADK-DYNAMIC-TOOLSET-NOT-ENUMERABLE", "rationale": "Release review needs an explicit tool inventory; ADK MCP/OpenAPI toolsets may resolve tools dynamically at runtime.", "recommendation": "Provide explicit MCP/OpenAPI/tool inventory inputs for dynamic ADK toolsets.", @@ -218,6 +230,7 @@ "eval_file_count" ], "fires_when": "Google ADK inputs target production_like or production and no eval files are declared.", + "floor_severity": null, "id": "SHIP-ADK-EVAL-COVERAGE-MISSING", "rationale": "ADK releases should include response and tool-trajectory eval evidence before promotion.", "recommendation": "Declare ADK eval files for expected responses and tool-use trajectories.", @@ -235,6 +248,7 @@ "source_type" ], "fires_when": "A Google ADK function/config tool lacks description or parameter metadata.", + "floor_severity": null, "id": "SHIP-ADK-FUNCTION-TOOL-METADATA-MISSING", "rationale": "Static review depends on descriptions and parameter schemas because user ADK code is not imported.", "recommendation": "Add docstrings, annotations, or explicit local inventory metadata.", @@ -251,6 +265,7 @@ "tools" ], "fires_when": "High-risk ADK tools are present without callbacks, plugins, or equivalent manifest policy evidence.", + "floor_severity": null, "id": "SHIP-ADK-GUARDRAIL-EVIDENCE-MISSING", "rationale": "Callbacks and plugins are the static ADK surface where release reviewers can see guardrail intent.", "recommendation": "Attach ADK callbacks/plugins or manifest policies that document guardrails.", @@ -267,6 +282,7 @@ "output_schema" ], "fires_when": "A LongRunningFunctionTool lacks structured status/progress and operation id output evidence.", + "floor_severity": null, "id": "SHIP-ADK-LONGRUNNING-CONTRACT-MISSING", "rationale": "Long-running tools need explicit status and operation-id semantics for safe continuation.", "recommendation": "Document operation id, status/progress fields, and completion behavior.", @@ -285,6 +301,7 @@ "inventory_path" ], "fires_when": "A Google ADK McpToolset has no static tool_filter.", + "floor_severity": null, "id": "SHIP-ADK-MCP-TOOLSET-UNFILTERED", "rationale": "Unfiltered MCP toolsets can expose more tools than reviewers expect.", "recommendation": "Declare a tool_filter and provide a local MCP tool inventory.", @@ -302,6 +319,7 @@ "risk_tags" ], "fires_when": "An OpenAI API function lacks strict=true, object parameters, additionalProperties=false, complete required fields, or bounded risky fields.", + "floor_severity": null, "id": "SHIP-API-FUNCTION-SCHEMA-STRICTNESS", "rationale": "Strict schemas reduce ambiguous tool arguments and downstream side-effect risk.", "recommendation": "Use strict function schemas with explicit required fields and constrained risky parameters.", @@ -318,6 +336,7 @@ "legacy_check_id" ], "fires_when": "Not emitted by v0.4 scans; matching configuration expands to the v0.4 atomic OpenAI API operational readiness checks.", + "floor_severity": null, "id": "SHIP-API-OPERATIONAL-READINESS", "rationale": "v0.4 emits atomic OpenAI API readiness check IDs, but this ID remains available for existing suppressions, severity overrides, baselines, SARIF consumers, and explain/list-checks workflows during the deprecation window.", "recommendation": "Migrate suppressions, severity overrides, and baselines to the specific v0.4 SHIP-API-* readiness check IDs.", @@ -334,6 +353,7 @@ "tools" ], "fires_when": "Prompt text says read-only/advice-only while write tools are enabled, or high-risk tools lack approval/confirmation instructions.", + "floor_severity": null, "id": "SHIP-API-PROMPT-TOOL-SCOPE-MISMATCH", "rationale": "Prompt instructions should match the actual write/high-risk tool surface.", "recommendation": "Align prompt scope with enabled tools and add approval/confirmation instructions.", @@ -350,6 +370,7 @@ "high_risk_tools" ], "fires_when": "High-risk OpenAI API tools exist and no retry_policy is declared.", + "floor_severity": null, "id": "SHIP-API-RETRY-POLICY-MISSING", "rationale": "Retries need explicit policy metadata so reviewers can reason about duplicate side effects.", "recommendation": "Declare retry_policy in openai_api.policy_rules or model_config.", @@ -367,6 +388,7 @@ "risk_tags" ], "fires_when": "Retry policy is declared and a risky write tool lacks idempotency evidence.", + "floor_severity": null, "id": "SHIP-API-RETRY-WITHOUT-IDEMPOTENCY", "rationale": "Retries against non-idempotent writes can duplicate financial, destructive, or external side effects.", "recommendation": "Add idempotency evidence for retried risky OpenAI API tools or avoid retrying those side effects.", @@ -385,6 +407,7 @@ "high_risk_tools" ], "fires_when": "No response format exists, a response schema is too broad, decision/status fields lack enums, or refusal/needs_review/error modeling is absent.", + "floor_severity": null, "id": "SHIP-API-STRUCTURED-OUTPUT-READINESS", "rationale": "Downstream release decisions need explicit, structured success/refusal/review modeling.", "recommendation": "Declare a strict response format with decision/status enums, needs_review/refusal/error fields, and critical fields.", @@ -401,6 +424,7 @@ "high_risk_tools" ], "fires_when": "High-risk OpenAI API tools exist and no test cases are declared.", + "floor_severity": null, "id": "SHIP-API-TEST-CASES-MISSING", "rationale": "High-risk tool-call flows should have release evidence before promotion.", "recommendation": "Add simple OpenAI API test cases for high-risk tool-call flows.", @@ -417,6 +441,7 @@ "high_risk_tools" ], "fires_when": "High-risk OpenAI API tools exist and no timeout metadata is declared.", + "floor_severity": null, "id": "SHIP-API-TIMEOUT-MISSING", "rationale": "Timeouts define failure behavior and reduce ambiguous tool-call continuation.", "recommendation": "Declare tool-call timeout metadata for high-risk OpenAI API flows.", @@ -433,6 +458,7 @@ "tool_output_schemas" ], "fires_when": "A high-risk OpenAI API tool lacks declared success/failure output schema metadata.", + "floor_severity": null, "id": "SHIP-API-TOOL-OUTPUT-SCHEMA-MISSING", "rationale": "Tool output schemas help release reviewers reason about downstream failure handling.", "recommendation": "Declare success_fields and failure_fields for high-risk OpenAI API tools.", @@ -450,6 +476,7 @@ "approved" ], "fires_when": "A trace sample marks approved=false for a tool with approval policy evidence.", + "floor_severity": null, "id": "SHIP-API-TRACE-APPROVAL-MISSING", "rationale": "Trace samples should demonstrate approval behavior for tools that require approval.", "recommendation": "Require approval before calling policy-controlled OpenAI API tools.", @@ -467,6 +494,7 @@ "confirmed" ], "fires_when": "A trace sample marks confirmed=false for a tool with confirmation policy evidence.", + "floor_severity": null, "id": "SHIP-API-TRACE-CONFIRMATION-MISSING", "rationale": "Trace samples should demonstrate explicit confirmation for tools that require confirmation.", "recommendation": "Require explicit confirmation before calling policy-controlled OpenAI API tools.", @@ -483,6 +511,7 @@ "scopes" ], "fires_when": "permissions.scopes contains wildcard/admin-like scopes.", + "floor_severity": null, "id": "SHIP-AUTH-MANIFEST-BROAD-SCOPE", "rationale": "Broad manifest scopes weaken least-privilege review.", "recommendation": "Replace with operation-specific scopes.", @@ -499,6 +528,7 @@ "risk_tags" ], "fires_when": "A write or sensitive-data tool has no auth scopes.", + "floor_severity": null, "id": "SHIP-AUTH-MISSING-SCOPE", "rationale": "Reviewers cannot assess least privilege without scope metadata.", "recommendation": "Declare scopes in OpenAPI, MCP, or manifest metadata.", @@ -517,6 +547,7 @@ "missing_scopes" ], "fires_when": "A tool scope is absent from permissions.scopes and not covered by a wildcard.", + "floor_severity": null, "id": "SHIP-AUTH-SCOPE-COVERAGE-MISSING", "rationale": "The manifest should describe the actual permissions needed by the release.", "recommendation": "Add or reconcile required scopes.", @@ -533,6 +564,7 @@ "scopes" ], "fires_when": "A tool auth scope is wildcard/admin-like.", + "floor_severity": null, "id": "SHIP-AUTH-TOOL-BROAD-SCOPE", "rationale": "Tool-level broad scopes may grant more power than the operation needs.", "recommendation": "Use narrower tool scopes.", @@ -552,6 +584,7 @@ "path" ], "fires_when": "A Codex plugin declares an app connector without an explicit local enumerable tool surface.", + "floor_severity": null, "id": "SHIP-CODEX-PLUGIN-APP-SURFACE-NOT-ENUMERABLE", "rationale": "Connector-backed app capabilities are externally mediated and cannot be proven from local plugin metadata alone.", "recommendation": "Treat connector app capabilities as a review item or provide explicit local inventory/policy evidence.", @@ -571,6 +604,7 @@ "reason" ], "fires_when": "A plugin component path is missing, outside the plugin root, or outside the manifest directory.", + "floor_severity": null, "id": "SHIP-CODEX-PLUGIN-COMPONENT-PATH-MISSING", "rationale": "Release review cannot inspect declared skills, MCP servers, apps, or hooks when component paths are missing or escape the package.", "recommendation": "Fix component paths so every declared plugin artifact resolves locally.", @@ -589,6 +623,7 @@ "missing" ], "fires_when": "A marketplace plugin entry lacks policy.installation, policy.authentication, or category.", + "floor_severity": null, "id": "SHIP-CODEX-PLUGIN-MARKETPLACE-POLICY-MISSING", "rationale": "Marketplace installation and authentication policy are part of the release surface coding agents need to review.", "recommendation": "Add policy.installation, policy.authentication, and category to the marketplace entry.", @@ -608,6 +643,7 @@ "inventory_path" ], "fires_when": "A Codex plugin declares an MCP server and no matching codex_plugins.mcp_tool_inventories entry loaded successfully.", + "floor_severity": null, "id": "SHIP-CODEX-PLUGIN-MCP-SERVER-NOT-ENUMERABLE", "rationale": "Agents Shipgate does not execute MCP commands, so reviewer-visible tool metadata requires an explicit local inventory.", "recommendation": "Provide a local MCP tool inventory for the plugin server.", @@ -627,6 +663,7 @@ "issues" ], "fires_when": "A Codex plugin manifest is missing required identity metadata, its name does not match the package root, or duplicate plugin names are present.", + "floor_severity": null, "id": "SHIP-CODEX-PLUGIN-METADATA-MISSING", "rationale": "Plugin identity needs to be stable before publication or downstream agent adoption.", "recommendation": "Fill required plugin metadata and make plugin names unambiguous.", @@ -647,6 +684,7 @@ "duplicate" ], "fires_when": "A SKILL.md lacks name or description frontmatter, or two skills in the same plugin share a name.", + "floor_severity": null, "id": "SHIP-CODEX-PLUGIN-SKILL-METADATA-MISSING", "rationale": "Skill frontmatter is the static routing surface agents use to decide whether a skill applies.", "recommendation": "Give each skill a unique name and clear description frontmatter.", @@ -664,6 +702,7 @@ "explicit_inventory" ], "fires_when": "A CrewAI agent tool binding is dynamic or unresolved and no explicit local inventory is declared.", + "floor_severity": null, "id": "SHIP-CREWAI-DYNAMIC-TOOL-SURFACE-NOT-ENUMERABLE", "rationale": "CrewAI exposes ad hoc agent-bound tool lists rather than a consistent toolset abstraction, so the check names the broader tool surface instead of ADK's toolset.", "recommendation": "Provide explicit MCP-style tool inventory metadata for dynamic CrewAI tool lists.", @@ -681,6 +720,7 @@ "source_type" ], "fires_when": "A CrewAI @tool or BaseTool class lacks description or parameter metadata.", + "floor_severity": null, "id": "SHIP-CREWAI-FUNCTION-TOOL-METADATA-MISSING", "rationale": "Static review depends on descriptions and parameter schemas because user CrewAI code is not imported.", "recommendation": "Add docstrings, descriptions, type annotations, args_schema, or explicit local inventory metadata.", @@ -697,6 +737,7 @@ "matched" ], "fires_when": "Description text matches instruction override patterns. Severity is high only when multiple patterns match on a write/high-risk tool.", + "floor_severity": null, "id": "SHIP-DOC-INJECTION-RISK", "rationale": "Tool metadata can be placed into model context and should not contain prompt-like directives.", "recommendation": "Rewrite the description as neutral metadata.", @@ -713,6 +754,7 @@ "description_length" ], "fires_when": "A tool description is missing or shorter than the minimum.", + "floor_severity": null, "id": "SHIP-DOC-MISSING-DESCRIPTION", "rationale": "Poor tool descriptions increase wrong-tool and reviewer misunderstanding risk.", "recommendation": "Add a clear capability description.", @@ -729,6 +771,7 @@ "matched" ], "fires_when": "Description contains known key formats or labeled secret-like values. Severity is high only when multiple patterns match on a write/high-risk tool.", + "floor_severity": null, "id": "SHIP-DOC-SECRET-IN-DESCRIPTION", "rationale": "Credentials in tool metadata can leak into reports, prompts, or logs.", "recommendation": "Remove and rotate the exposed secret.", @@ -750,6 +793,7 @@ "source_provenance" ], "fires_when": "validation.required_evidence.approval_trace_required is true and no loaded local approval trace shows approved=true for an approval-required tool.", + "floor_severity": null, "id": "SHIP-EVIDENCE-APPROVAL-TRACE-MISSING", "rationale": "Limited automation review depends on reviewer-visible local evidence that approval-controlled actions were approved before the tool call; absence of local evidence does not prove the runtime control is absent.", "recommendation": "Add or fix local approval trace evidence, or change the validation review posture.", @@ -771,6 +815,7 @@ "source_provenance" ], "fires_when": "validation.required_evidence.high_risk_auto_approval_exclusion_required is true and a high-risk tool with declared approval policy is not listed in loaded high_risk_auto_approval_exclusions.", + "floor_severity": null, "id": "SHIP-EVIDENCE-HIGH-RISK-EXCLUSION-MISSING", "rationale": "High-risk tools that already declare approval policy need separate local evidence that they are excluded from auto-approval review posture; absence of local evidence does not prove the runtime control is absent.", "recommendation": "Document high-risk approval-controlled tools in local high_risk_auto_approval_exclusions with reasons.", @@ -792,6 +837,7 @@ "source_provenance" ], "fires_when": "validation.target_review_posture is limited_auto_approval and promotion criteria are absent, unloadable, or the canonical required-evidence flags are not true in the manifest and criteria file.", + "floor_severity": null, "id": "SHIP-EVIDENCE-HITL-PROMOTION-CRITERIA-MISSING", "rationale": "A limited auto-approval review posture needs local criteria evidence; Shipgate structures the missing evidence for reviewers but does not certify runtime enforcement.", "recommendation": "Add or fix local promotion criteria evidence documenting the review posture and required evidence flags.", @@ -812,6 +858,7 @@ "source_provenance" ], "fires_when": "validation.required_evidence.override_reason_required is true and override logs are absent, empty, unloadable, or contain events without non-empty reasons.", + "floor_severity": null, "id": "SHIP-EVIDENCE-OVERRIDE-REASON-MISSING", "rationale": "Override, bypass, and auto-approval events need reviewer-visible local reasons for governance review; absence of local evidence does not prove the runtime control is absent.", "recommendation": "Record non-empty reasons in local override, bypass, and auto-approval evidence.", @@ -828,6 +875,7 @@ "tools" ], "fires_when": "environment.target is production and tools include lower-confidence extraction.", + "floor_severity": null, "id": "SHIP-INVENTORY-LOW-CONFIDENCE-PRODUCTION-SURFACE", "rationale": "Production promotion should not depend primarily on best-effort SDK inference.", "recommendation": "Declare those tools through manifest, MCP, or OpenAPI inputs.", @@ -844,6 +892,7 @@ "tool_sources" ], "fires_when": "No tools are loaded from required manifest sources.", + "floor_severity": null, "id": "SHIP-INVENTORY-NOT-ENUMERABLE", "rationale": "A release gate must fail closed when it cannot see the agent's tools.", "recommendation": "Declare at least one local MCP JSON or OpenAPI tool source.", @@ -861,6 +910,7 @@ "threshold" ], "fires_when": "The normalized tool count exceeds the built-in threshold.", + "floor_severity": null, "id": "SHIP-INVENTORY-TOOL-SURFACE-TOO-LARGE", "rationale": "Large tool surfaces are harder to reason about during promotion.", "recommendation": "Split or reduce the tool surface before release.", @@ -878,6 +928,7 @@ "source_ref" ], "fires_when": "A source declares all tools or wildcard exposure.", + "floor_severity": null, "id": "SHIP-INVENTORY-WILDCARD-TOOLS", "rationale": "Wildcard tools make review and least-privilege reasoning impossible.", "recommendation": "Replace wildcard exposure with an explicit allowlist.", @@ -895,6 +946,7 @@ "explicit_inventory" ], "fires_when": "A LangChain or LangGraph tool binding is dynamic or unresolved and no explicit local inventory is declared.", + "floor_severity": null, "id": "SHIP-LANGCHAIN-DYNAMIC-TOOL-SURFACE-NOT-ENUMERABLE", "rationale": "LangChain and LangGraph expose ad hoc tool lists and agent-bound tools rather than a consistent toolset abstraction, so the check names the broader tool surface instead of ADK's toolset.", "recommendation": "Provide explicit MCP-style tool inventory metadata for dynamic LangChain tool lists.", @@ -912,6 +964,7 @@ "source_type" ], "fires_when": "A LangChain @tool or StructuredTool lacks description or parameter metadata.", + "floor_severity": null, "id": "SHIP-LANGCHAIN-FUNCTION-TOOL-METADATA-MISSING", "rationale": "Static review depends on descriptions and parameter schemas because user LangChain code is not imported.", "recommendation": "Add docstrings, descriptions, type annotations, args_schema, or explicit local inventory metadata.", @@ -929,6 +982,7 @@ "risk_tags" ], "fires_when": "environment.target is production_like or production and a high-risk tool lacks owner metadata.", + "floor_severity": null, "id": "SHIP-MANIFEST-HIGH-RISK-OWNER-MISSING", "rationale": "High-risk production tools need an accountable owning team for review and remediation.", "recommendation": "Declare an owner for each high-risk production tool.", @@ -946,6 +1000,7 @@ "tool" ], "fires_when": "A policy entry names a tool that is not loaded.", + "floor_severity": null, "id": "SHIP-MANIFEST-STALE-POLICY", "rationale": "Approval, confirmation, and idempotency policies should map to the actual release surface.", "recommendation": "Remove stale policy entries or update them to current tool names.", @@ -962,6 +1017,7 @@ "tool" ], "fires_when": "risk_overrides.tools contains a tool that is not loaded.", + "floor_severity": null, "id": "SHIP-MANIFEST-STALE-RISK-OVERRIDE", "rationale": "Risk overrides should not outlive the tool they describe.", "recommendation": "Remove stale risk overrides or update them to current tool names.", @@ -980,6 +1036,7 @@ "issues" ], "fires_when": "checks.ignore contains an unknown check_id or a tool name that is not loaded.", + "floor_severity": null, "id": "SHIP-MANIFEST-STALE-SUPPRESSION", "rationale": "Stale suppressions hide intent and make release review harder to audit.", "recommendation": "Remove stale suppressions or update them to current check IDs and tool names.", @@ -997,6 +1054,7 @@ "tool_scopes" ], "fires_when": "permissions.scopes includes a scope not required by any loaded tool.", + "floor_severity": null, "id": "SHIP-MANIFEST-UNUSED-SCOPE", "rationale": "Unused permissions weaken least-privilege review and often indicate stale config.", "recommendation": "Remove unused scopes or add tool metadata showing why they are required.", @@ -1014,6 +1072,7 @@ "source_type" ], "fires_when": "An n8n AI-exposed tool lacks description or static parameter metadata.", + "floor_severity": null, "id": "SHIP-N8N-AI-TOOL-METADATA-MISSING", "rationale": "Static review depends on descriptions and parameter schemas because Shipgate does not execute n8n workflows.", "recommendation": "Add n8n tool descriptions, $fromAI() parameter metadata, workflow input schemas, or explicit inventory metadata.", @@ -1031,6 +1090,7 @@ "credential_stub_file_count" ], "fires_when": "Production-like n8n workflows reference credentials but no local credential stubs are declared.", + "floor_severity": null, "id": "SHIP-N8N-CREDENTIAL-EVIDENCE-MISSING", "rationale": "Credential type evidence lets reviewers assess high-risk integrations without exposing secret values.", "recommendation": "Declare local n8n credential stubs.", @@ -1048,6 +1108,7 @@ "explicit_inventory" ], "fires_when": "An n8n workflow has an unresolved DB workflow id, runtime expression in a tool name, wildcard MCP Server/Client exposure without an inventory, or an uninventoried community/custom tool node.", + "floor_severity": null, "id": "SHIP-N8N-DYNAMIC-TOOL-SURFACE-NOT-ENUMERABLE", "rationale": "Release review needs an explicit local inventory when n8n workflow JSON uses runtime tool names, unresolved workflow references, wildcard MCP exposure, or community tool nodes without static metadata. This is high severity in every environment because static release evidence cannot prove the actual tool inventory.", "recommendation": "Provide explicit local n8n/MCP tool inventory metadata or replace runtime/wildcard n8n tool exposure.", @@ -1066,6 +1127,7 @@ "eval_file_count" ], "fires_when": "n8n inputs target production_like or production and no eval files are declared.", + "floor_severity": null, "id": "SHIP-N8N-EVAL-COVERAGE-MISSING", "rationale": "n8n AI workflow releases should include response and tool-trajectory eval evidence before promotion.", "recommendation": "Declare n8n eval files for expected responses and tool-use trajectories.", @@ -1085,6 +1147,7 @@ "explicit_inventory" ], "fires_when": "An n8n MCP Client Tool uses All or All Except selection and no explicit local inventory is declared.", + "floor_severity": null, "id": "SHIP-N8N-MCP-CLIENT-TOOLSET-UNFILTERED", "rationale": "All-tools and all-except MCP client selections can expose more tools than reviewers expect. The check is environment-sensitive because the selector is straightforward to narrow before production, while production-like use increases blast radius.", "recommendation": "Select an explicit MCP tool allowlist in n8n or provide a local MCP inventory.", @@ -1103,6 +1166,7 @@ "secret_kind" ], "fires_when": "A static n8n workflow parameter, node note, pinData entry, or staticData entry matches a secret-like token pattern. Evidence is redacted and contains only source location and secret kind, never a secret verifier.", + "floor_severity": null, "id": "SHIP-N8N-SECRET-IN-WORKFLOW-PARAMETER", "rationale": "Workflow JSON, pinned data, static data, and node notes can be committed or reported; hardcoded secret-like values should be moved into credentials or variables.", "recommendation": "Move secret values into n8n credentials or variables and rotate the exposed value.", @@ -1120,6 +1184,7 @@ "policy_match" ], "fires_when": "Financial/destructive/infrastructure/code-exec risk exists without approval policy.", + "floor_severity": null, "id": "SHIP-POLICY-APPROVAL-MISSING", "rationale": "High-risk actions need explicit approval before promotion.", "recommendation": "Declare an approval policy or remove the tool.", @@ -1137,6 +1202,7 @@ "policy_match" ], "fires_when": "Risk tags require confirmation but no confirmation policy matches.", + "floor_severity": null, "id": "SHIP-POLICY-CONFIRMATION-MISSING", "rationale": "Destructive and external actions should require explicit confirmation.", "recommendation": "Declare confirmation policy or remove the tool.", @@ -1154,6 +1220,7 @@ "type" ], "fires_when": "A write/action-like tool has free-form command/action/update-style parameters.", + "floor_severity": null, "id": "SHIP-SCHEMA-BROAD-FREE-TEXT", "rationale": "Broad action/body/update fields increase blast radius for write tools.", "recommendation": "Constrain the field with structured schema or enums.", @@ -1170,6 +1237,7 @@ "output_schema" ], "fires_when": "A tool output schema is string or an SDK function returns str.", + "floor_severity": null, "id": "SHIP-SCHEMA-FREEFORM-OUTPUT", "rationale": "Free-form tool output may carry prompt injection into later model context.", "recommendation": "Prefer structured output for model-consumed tool results.", @@ -1187,6 +1255,7 @@ "type" ], "fires_when": "A risky numeric parameter lacks a maximum.", + "floor_severity": null, "id": "SHIP-SCHEMA-MISSING-BOUNDS", "rationale": "Unbounded counts or financial amounts weaken blast-radius control.", "recommendation": "Add a maximum or equivalent policy limit.", @@ -1204,6 +1273,7 @@ "risk_tags" ], "fires_when": "Tool name/description/risk tags overlap prohibited_actions without a mitigating policy.", + "floor_severity": null, "id": "SHIP-SCOPE-PROHIBITED-TOOL-PRESENT", "rationale": "Prohibited actions should not be contradicted by attached tool capabilities.", "recommendation": "Remove or narrow the tool, or revise policy/scope text.", @@ -1221,6 +1291,7 @@ "risk_tags" ], "fires_when": "Purpose text is read-only but attached tools are write-capable.", + "floor_severity": null, "id": "SHIP-SCOPE-TOOL-OUTSIDE-PURPOSE", "rationale": "Declared purpose should constrain the attached tool surface.", "recommendation": "Remove the tool or update release scope.", @@ -1238,6 +1309,7 @@ "retry_policy_known" ], "fires_when": "Risky write tool lacks idempotency annotation, key, or policy.", + "floor_severity": null, "id": "SHIP-SIDEFX-IDEMPOTENCY-MISSING", "rationale": "Retries against non-idempotent writes can duplicate financial or external side effects.", "recommendation": "Add idempotency evidence or policy.", diff --git a/docs/checks.md b/docs/checks.md index e7c8cb0..40ebc06 100644 --- a/docs/checks.md +++ b/docs/checks.md @@ -565,7 +565,9 @@ agents-shipgate list-checks --json agents-shipgate explain SHIP-POLICY-APPROVAL-MISSING ``` -Third-party packages can register checks through the `agents_shipgate.checks` Python entry-point group. Plugins are disabled by default because loading them imports third-party Python modules. Set `AGENTS_SHIPGATE_ENABLE_PLUGINS=1` to opt in, or pass `--no-plugins` to force them off for a scan or catalog command. Reports include `loaded_plugins` provenance for every third-party check entry point that ran. A plugin check should expose a callable with the same `ScanContext -> list[Finding]` shape as built-ins and may attach `AGENTS_SHIPGATE_METADATA` as either a `CheckMetadata` instance or a compatible dictionary. Adapter artifacts are available through `context.framework_artifacts` or `context.artifact("openai_api", OpenAIApiArtifacts)`. Legacy `context.*_artifacts` read-only properties remain available for v0.11 plugin compatibility, raise `TypeError` on artifact type mismatch, and are scheduled for removal in v0.12. +Third-party packages can register checks through the `agents_shipgate.checks` Python entry-point group. Plugins are disabled by default because loading them imports third-party Python modules. Set `AGENTS_SHIPGATE_ENABLE_PLUGINS=1` to opt in, or pass `--no-plugins` to force them off for a scan or catalog command. Reports include `loaded_plugins` provenance for every third-party check entry point Shipgate discovered — including ones that failed validation. A plugin check should expose a callable with the same `ScanContext -> list[Finding]` shape as built-ins and attach `AGENTS_SHIPGATE_METADATA` as either a `CheckMetadata` instance or a compatible dictionary. Adapter artifacts are available through `context.framework_artifacts` or `context.artifact("openai_api", OpenAIApiArtifacts)`. Legacy `context.*_artifacts` read-only properties remain available for v0.11 plugin compatibility, raise `TypeError` on artifact type mismatch, and are scheduled for removal in v0.12. + +**Plugin validation (v0.17+).** Shipgate runs five load-time gates against every entry point — load, signature, metadata, ID-collision, and floor-consistency — before letting it produce findings. Metadata may use either `id` or `check_id` as the identifier key (the alias is symmetric with `Finding.check_id`); both names map to `CheckMetadata.id`. Plugins that fail validation surface in `loaded_plugins[]` with a non-`valid` `validation_status` and human-readable `validation_errors`, and they do not run. At runtime, findings whose `check_id` does not match the declared plugin metadata are dropped and recorded under `loaded_plugins[].runtime_errors` — a plugin cannot smuggle findings under another check ID. Default behavior is lenient (record failures, continue scanning). Pass `--strict-plugins` to exit non-zero (code 4) when any plugin has a non-`valid` status or non-empty `runtime_errors`. See [STABILITY.md § Trust-model invariants](../STABILITY.md#trust-model-invariants) for the contract. ## Declarative Policy Packs diff --git a/docs/report-schema.v0.17.json b/docs/report-schema.v0.17.json index 91a4c12..e468c12 100644 --- a/docs/report-schema.v0.17.json +++ b/docs/report-schema.v0.17.json @@ -3790,6 +3790,9 @@ "check_id", "distribution", "name", + "runtime_errors", + "validation_errors", + "validation_status", "value", "version" ], diff --git a/llms-full.txt b/llms-full.txt index 9264c47..dddd503 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -1512,7 +1512,9 @@ agents-shipgate list-checks --json agents-shipgate explain SHIP-POLICY-APPROVAL-MISSING ``` -Third-party packages can register checks through the `agents_shipgate.checks` Python entry-point group. Plugins are disabled by default because loading them imports third-party Python modules. Set `AGENTS_SHIPGATE_ENABLE_PLUGINS=1` to opt in, or pass `--no-plugins` to force them off for a scan or catalog command. Reports include `loaded_plugins` provenance for every third-party check entry point that ran. A plugin check should expose a callable with the same `ScanContext -> list[Finding]` shape as built-ins and may attach `AGENTS_SHIPGATE_METADATA` as either a `CheckMetadata` instance or a compatible dictionary. Adapter artifacts are available through `context.framework_artifacts` or `context.artifact("openai_api", OpenAIApiArtifacts)`. Legacy `context.*_artifacts` read-only properties remain available for v0.11 plugin compatibility, raise `TypeError` on artifact type mismatch, and are scheduled for removal in v0.12. +Third-party packages can register checks through the `agents_shipgate.checks` Python entry-point group. Plugins are disabled by default because loading them imports third-party Python modules. Set `AGENTS_SHIPGATE_ENABLE_PLUGINS=1` to opt in, or pass `--no-plugins` to force them off for a scan or catalog command. Reports include `loaded_plugins` provenance for every third-party check entry point Shipgate discovered — including ones that failed validation. A plugin check should expose a callable with the same `ScanContext -> list[Finding]` shape as built-ins and attach `AGENTS_SHIPGATE_METADATA` as either a `CheckMetadata` instance or a compatible dictionary. Adapter artifacts are available through `context.framework_artifacts` or `context.artifact("openai_api", OpenAIApiArtifacts)`. Legacy `context.*_artifacts` read-only properties remain available for v0.11 plugin compatibility, raise `TypeError` on artifact type mismatch, and are scheduled for removal in v0.12. + +**Plugin validation (v0.17+).** Shipgate runs five load-time gates against every entry point — load, signature, metadata, ID-collision, and floor-consistency — before letting it produce findings. Metadata may use either `id` or `check_id` as the identifier key (the alias is symmetric with `Finding.check_id`); both names map to `CheckMetadata.id`. Plugins that fail validation surface in `loaded_plugins[]` with a non-`valid` `validation_status` and human-readable `validation_errors`, and they do not run. At runtime, findings whose `check_id` does not match the declared plugin metadata are dropped and recorded under `loaded_plugins[].runtime_errors` — a plugin cannot smuggle findings under another check ID. Default behavior is lenient (record failures, continue scanning). Pass `--strict-plugins` to exit non-zero (code 4) when any plugin has a non-`valid` status or non-empty `runtime_errors`. See [STABILITY.md § Trust-model invariants](../STABILITY.md#trust-model-invariants) for the contract. ## Declarative Policy Packs diff --git a/scripts/generate_schemas.py b/scripts/generate_schemas.py index 0b81421..2d634b2 100644 --- a/scripts/generate_schemas.py +++ b/scripts/generate_schemas.py @@ -685,8 +685,25 @@ def build_report_schema() -> tuple[Path, str]: properties["loaded_plugins"]["items"] = { "type": "object", "additionalProperties": True, + # v0.17 (M5): plugin validation provenance is required on + # every emitted loaded_plugins entry. ``validation_status`` + # is one of ``valid | load_failed | bad_signature | + # bad_metadata | id_collision | bad_floor`` and the two + # error lists are always present (empty for clean plugins). + # The v0.7 frozen schema preserves the original 5-field + # required list — those frozen-schema tests pin the + # pre-M5 shape; this required list is the current contract. "required": sorted( - ["name", "value", "distribution", "version", "check_id"] + [ + "name", + "value", + "distribution", + "version", + "check_id", + "validation_status", + "validation_errors", + "runtime_errors", + ] ), } diff --git a/src/agents_shipgate/checks/plugin_validation.py b/src/agents_shipgate/checks/plugin_validation.py new file mode 100644 index 0000000..a020019 --- /dev/null +++ b/src/agents_shipgate/checks/plugin_validation.py @@ -0,0 +1,373 @@ +"""Plugin validation gates for the `agents_shipgate.checks` entry-point group. + +Five validation gates, each producing a single ``ValidationFailure`` row +that lands in ``loaded_plugins[].validation_errors``: + +1. **load** — ``entry_point.load()`` raises (broken module import, syntax + error, missing dependency). Exception is captured; the plugin is + recorded with ``validation_status="load_failed"`` and skipped. +2. **signature** — the loaded object is not callable, or its call signature + does not accept exactly one positional parameter (``ScanContext``). +3. **metadata** — ``AGENTS_SHIPGATE_METADATA`` is missing, malformed, or + fails ``CheckMetadata.model_validate``. Both ``id`` and ``check_id`` + keys are accepted via the ``CheckMetadata`` alias. +4. **id_collision** — the plugin's check ID shadows a built-in (or one of + its legacy aliases), or is already registered by an earlier plugin + in the same scan. +5. **bad_floor** — already enforced inside ``CheckMetadata`` via a model + validator. Surfaced explicitly here so the failure shows up under a + stable label rather than as a generic metadata error. + +Runtime validation (``run_validated_plugin``) is a separate concern from +the load-time gates. It wraps the actual check call and ensures: + +* Exceptions during the call are captured (the scan continues), but the + plugin's record gains a ``runtime_errors`` entry. +* The returned object must be a ``list``; otherwise we drop the result. +* Each item must be a ``Finding``; non-Finding items are dropped with an + error row. +* Each ``Finding.check_id`` must match the plugin's declared metadata + ``id``. Mismatches are dropped — a plugin cannot smuggle findings under + another check ID. This is the load-bearing rule for plugin trust. + +A plugin landing here in ``valid`` state with empty ``runtime_errors`` +behaves exactly like a v0.x plugin. Everything else surfaces in +``report.loaded_plugins[]`` so the operator can see what was skipped and +why without reading scanner logs. +""" + +from __future__ import annotations + +import inspect +from collections.abc import Callable +from dataclasses import dataclass, field +from typing import Any + +from pydantic import ValidationError + +from agents_shipgate.core.context import ScanContext +from agents_shipgate.core.models import CheckMetadata, Finding + +PluginCheck = Callable[[ScanContext], list[Finding]] + +# Stable enum surfaced to consumers via ``loaded_plugins[].validation_status``. +# Ordering is informational; consumers should compare to literal strings. +ValidationStatus = str +VALID = "valid" +LOAD_FAILED = "load_failed" +BAD_SIGNATURE = "bad_signature" +BAD_METADATA = "bad_metadata" +ID_COLLISION = "id_collision" +BAD_FLOOR = "bad_floor" + + +@dataclass +class ValidatedPlugin: + """One row in the plugin-load result table. + + ``check`` is ``None`` when validation failed; ``run_checks`` skips + plugins with ``check is None`` but still surfaces them under + ``loaded_plugins[]``. + + ``info`` is the dict shape persisted into the JSON report (see + ``ReadinessReport.loaded_plugins``). The dict is intentionally + mutable so the post-call runtime wrapper can append + ``runtime_errors`` after the check has executed. + """ + + check: PluginCheck | None + metadata: CheckMetadata | None + info: dict[str, Any] + runtime_errors: list[str] = field(default_factory=list) + + @property + def valid(self) -> bool: + return self.info.get("validation_status") == VALID + + +def validate_entry_point( + entry_point: Any, + *, + builtin_ids: set[str], + already_registered_plugin_ids: set[str], +) -> ValidatedPlugin: + """Run the five load-time gates against a single entry point. + + Returns a ``ValidatedPlugin`` whose ``info`` dict carries the + decision (``validation_status``, ``validation_errors``, plus the + legacy ``name``/``value``/``distribution``/``version``/``check_id`` + fields a v0.x consumer expects). + """ + + info = _base_info(entry_point) + + # Gate 1: load + try: + loaded = entry_point.load() + except Exception as exc: # noqa: BLE001 — capture is the point + return _fail(info, LOAD_FAILED, f"entry_point.load() raised: {exc!r}") + + # Gate 2: signature + sig_error = _signature_error(loaded) + if sig_error is not None: + return _fail(info, BAD_SIGNATURE, sig_error) + + # Gate 3: metadata + metadata_value = getattr(loaded, "AGENTS_SHIPGATE_METADATA", None) + if metadata_value is None: + return _fail(info, BAD_METADATA, "AGENTS_SHIPGATE_METADATA is missing") + try: + metadata = _coerce_metadata(metadata_value) + except (ValidationError, TypeError, ValueError) as exc: + # A floor-above-default failure raises ValidationError from + # CheckMetadata's model validator; we surface it under a more + # specific status below. + if _is_floor_error(exc): + return _fail(info, BAD_FLOOR, _format_validation_error(exc)) + return _fail(info, BAD_METADATA, _format_validation_error(exc)) + + info["check_id"] = metadata.id + + # Gate 4: ID collision + if metadata.id in builtin_ids: + return _fail( + info, + ID_COLLISION, + f"check_id {metadata.id!r} is reserved by a built-in check", + ) + if metadata.id in already_registered_plugin_ids: + return _fail( + info, + ID_COLLISION, + f"check_id {metadata.id!r} is already registered by another plugin", + ) + + # All gates pass. + info["validation_status"] = VALID + info["validation_errors"] = [] + return ValidatedPlugin(check=loaded, metadata=metadata, info=info) + + +def run_validated_plugin( + plugin: ValidatedPlugin, context: ScanContext +) -> list[Finding]: + """Execute a validated plugin and return only the findings that pass + runtime validation. + + Mutates ``plugin.info["runtime_errors"]`` in place. ``plugin.check`` + must not be ``None``; callers should skip plugins where + ``plugin.valid`` is ``False``. + """ + + if plugin.check is None or plugin.metadata is None: # defensive + raise RuntimeError("run_validated_plugin called on an invalid plugin record") + + runtime_errors: list[str] = [] + + try: + raw = plugin.check(context) + except Exception as exc: # noqa: BLE001 — capture is the point + runtime_errors.append(f"plugin call raised: {exc!r}") + _set_runtime_errors(plugin, runtime_errors) + return [] + + if not isinstance(raw, list): + runtime_errors.append( + f"plugin must return list[Finding]; got {type(raw).__name__!r}" + ) + _set_runtime_errors(plugin, runtime_errors) + return [] + + declared_id = plugin.metadata.id + valid: list[Finding] = [] + for item in raw: + if not isinstance(item, Finding): + runtime_errors.append( + f"non-Finding item dropped: {type(item).__name__!r}" + ) + continue + if item.check_id != declared_id: + runtime_errors.append( + f"finding check_id={item.check_id!r} does not match plugin " + f"declared id={declared_id!r}; dropped" + ) + continue + valid.append(item) + + _set_runtime_errors(plugin, runtime_errors) + return valid + + +def strict_failure_messages(loaded_plugins: list[dict[str, Any]]) -> list[str]: + """Collect human-readable reasons strict-mode should fail on. + + Returns an empty list if every plugin is ``valid`` with empty + ``runtime_errors``. ``--strict-plugins`` callers should turn a + non-empty list into a non-zero exit. + """ + + messages: list[str] = [] + for record in loaded_plugins: + status = record.get("validation_status") + if status and status != VALID: + errors = record.get("validation_errors") or [status] + for err in errors: + messages.append( + f"plugin {record.get('value') or record.get('name')!r}: {err}" + ) + runtime_errors = record.get("runtime_errors") or [] + for err in runtime_errors: + messages.append( + f"plugin {record.get('value') or record.get('name')!r}: {err}" + ) + return messages + + +# --- internals ------------------------------------------------------------- + + +def _base_info(entry_point: Any) -> dict[str, Any]: + dist = getattr(entry_point, "dist", None) + return { + "name": _maybe_str(getattr(entry_point, "name", None)), + "value": _maybe_str(getattr(entry_point, "value", None)), + "distribution": _distribution_name(dist), + "version": _distribution_version(dist), + "check_id": None, + "validation_status": VALID, + "validation_errors": [], + "runtime_errors": [], + } + + +def _fail(info: dict[str, Any], status: ValidationStatus, message: str) -> ValidatedPlugin: + info["validation_status"] = status + info["validation_errors"] = [message] + return ValidatedPlugin(check=None, metadata=None, info=info) + + +def _signature_error(loaded: Any) -> str | None: + """Decide if ``loaded`` has a callable signature compatible with + ``Callable[[ScanContext], list[Finding]]``. + + Returns a human-readable error if the plugin would always fail to + bind at call time. The validation contract is: a plugin whose + ``validation_status`` is ``valid`` will be called as + ``plugin(context)`` — no other arguments. Any required parameter + that cannot be satisfied by that single positional argument must + fail this gate, not slip through to ``runtime_errors`` later. + """ + + if not callable(loaded): + return f"plugin object is not callable (got {type(loaded).__name__!r})" + try: + sig = inspect.signature(loaded) + except (TypeError, ValueError): + # Builtins / C extensions / lambdas with weird signatures; accept + # them — the runtime wrapper will catch arity mismatches on call. + return None + positional = [ + p + for p in sig.parameters.values() + if p.kind + in ( + inspect.Parameter.POSITIONAL_ONLY, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + inspect.Parameter.VAR_POSITIONAL, + ) + ] + # Need at least one positional slot for ``ScanContext``. Extra + # positional parameters with defaults are allowed — common shape for + # plugins that take optional config. Reject only zero-positional and + # ≥2 required-positional cases. + if not positional: + return "plugin must accept a ScanContext positional parameter" + required_positional = [ + p + for p in positional + if p.kind != inspect.Parameter.VAR_POSITIONAL + and p.default is inspect.Parameter.empty + ] + if len(required_positional) > 1: + return ( + "plugin must accept exactly one required positional parameter " + f"(ScanContext); got {len(required_positional)}" + ) + # Required keyword-only parameters cannot be filled by the + # ``plugin(context)`` call shape — accepting these as ``valid`` + # would lie about runtime callability. Optional kw-only params + # (with defaults) and ``**kwargs`` are fine. + required_kw_only = [ + p + for p in sig.parameters.values() + if p.kind == inspect.Parameter.KEYWORD_ONLY + and p.default is inspect.Parameter.empty + ] + if required_kw_only: + names = ", ".join(p.name for p in required_kw_only) + return ( + "plugin signature has required keyword-only parameter(s) " + f"({names}); the validation contract calls plugins as " + "plugin(context) with no keyword arguments" + ) + return None + + +def _coerce_metadata(value: Any) -> CheckMetadata: + if isinstance(value, CheckMetadata): + # Re-validate to enforce the floor invariant on objects that may + # have been constructed without going through model validation + # (e.g., test helpers using ``__init__``). + return CheckMetadata.model_validate(value.model_dump()) + if isinstance(value, dict): + return CheckMetadata.model_validate(value) + raise TypeError( + f"AGENTS_SHIPGATE_METADATA must be CheckMetadata or dict; " + f"got {type(value).__name__!r}" + ) + + +def _is_floor_error(exc: BaseException) -> bool: + return "floor_severity" in str(exc) + + +def _format_validation_error(exc: BaseException) -> str: + if isinstance(exc, ValidationError): + first = exc.errors()[0] + loc = ".".join(str(part) for part in first.get("loc", ())) or "" + return f"metadata invalid at {loc}: {first.get('msg', '')}" + return f"metadata invalid: {exc}" + + +def _set_runtime_errors(plugin: ValidatedPlugin, errors: list[str]) -> None: + plugin.runtime_errors = errors + plugin.info["runtime_errors"] = errors + + +def _maybe_str(value: Any) -> str | None: + if value is None: + return None + text = str(value) + return text or None + + +def _distribution_name(dist: Any) -> str | None: + if dist is None: + return None + metadata = getattr(dist, "metadata", None) + if metadata is not None: + try: + name = metadata.get("Name") + except AttributeError: + name = None + if isinstance(name, str) and name: + return name + name = getattr(dist, "name", None) + return str(name) if name else None + + +def _distribution_version(dist: Any) -> str | None: + if dist is None: + return None + version = getattr(dist, "version", None) + return str(version) if version else None diff --git a/src/agents_shipgate/checks/registry.py b/src/agents_shipgate/checks/registry.py index 4c9dca9..e83fc7d 100644 --- a/src/agents_shipgate/checks/registry.py +++ b/src/agents_shipgate/checks/registry.py @@ -23,7 +23,15 @@ schema, side_effects, ) -from agents_shipgate.core.check_ids import known_check_ids_with_legacy +from agents_shipgate.checks.plugin_validation import ( + ValidatedPlugin, + run_validated_plugin, + validate_entry_point, +) +from agents_shipgate.core.check_ids import ( + LEGACY_CHECK_ID_ALIASES, + known_check_ids_with_legacy, +) from agents_shipgate.core.context import ScanContext from agents_shipgate.core.models import CheckMetadata, Finding @@ -199,31 +207,48 @@ def _meta(**kwargs: object) -> CheckMetadata: ] +# Back-compat alias. v0.x callers (third-party tooling that imported +# ``LoadedPluginCheck`` for typing) get the same shape — a frozen pair +# of ``check`` callable and ``info`` dict. ``ValidatedPlugin`` is the +# richer record used internally by the registry post-M5. @dataclass(frozen=True) class LoadedPluginCheck: check: Callable[[ScanContext], list[Finding]] - info: dict[str, str | None] + info: dict[str, Any] def run_checks( context: ScanContext, *, plugins_enabled: bool | None = None, - loaded_plugins: list[dict[str, str | None]] | None = None, + loaded_plugins: list[dict[str, Any]] | None = None, extra_known_check_ids: set[str] | None = None, ) -> list[Finding]: findings: list[Finding] = [] - plugin_checks = _plugin_check_records(plugins_enabled=plugins_enabled) + plugin_records = _plugin_check_records(plugins_enabled=plugins_enabled) + # Order: surface plugin provenance into ``loaded_plugins`` first so + # invalid plugins still appear in the report even when they don't + # run. ``run_validated_plugin`` will mutate ``record.info`` in place + # to append ``runtime_errors`` after the check fires. if loaded_plugins is not None: - loaded_plugins.extend(record.info for record in plugin_checks) - for check in [*BUILTIN_CHECKS, *(record.check for record in plugin_checks)]: + loaded_plugins.extend(record.info for record in plugin_records) + for check in BUILTIN_CHECKS: findings.extend(check(context)) + for record in plugin_records: + if not record.valid: + continue + findings.extend(run_validated_plugin(record, context)) findings.extend( manifest_consistency.run( context, known_check_ids=known_check_ids_with_legacy( { *(metadata.id for metadata in CHECK_METADATA), + *( + record.metadata.id + for record in plugin_records + if record.metadata is not None + ), *(extra_known_check_ids or set()), } ), @@ -234,11 +259,13 @@ def run_checks( def check_catalog(*, plugins_enabled: bool | None = None) -> list[CheckMetadata]: metadata = [*CHECK_METADATA] - for check in _plugin_checks(plugins_enabled=plugins_enabled): - plugin_metadata = getattr(check, "AGENTS_SHIPGATE_METADATA", None) - if plugin_metadata is None: + for record in _plugin_check_records(plugins_enabled=plugins_enabled): + if record.metadata is None: + # Invalid plugins (failed load/signature/metadata gates) do + # not appear in the catalog. Their provenance still shows up + # in ``report.loaded_plugins`` when a scan runs. continue - metadata.append(_metadata_from_plugin(plugin_metadata)) + metadata.append(record.metadata) for check in metadata: if check.docs_url is None: check.docs_url = f"docs/checks.md#{check.id.lower()}" @@ -248,37 +275,67 @@ def check_catalog(*, plugins_enabled: bool | None = None) -> list[CheckMetadata] def check_functions( *, plugins_enabled: bool | None = None ) -> list[Callable[[ScanContext], list[Finding]]]: - return [*BUILTIN_CHECKS, *_plugin_checks(plugins_enabled=plugins_enabled)] + return [ + *BUILTIN_CHECKS, + *( + record.check + for record in _plugin_check_records(plugins_enabled=plugins_enabled) + if record.valid and record.check is not None + ), + ] def _plugin_checks( *, plugins_enabled: bool | None = None ) -> list[Callable[[ScanContext], list[Finding]]]: + """Back-compat accessor for the bare list of valid plugin callables. + + Kept stable for ``tests/test_plugins.py`` and any external code that + imported the helper. Invalid plugins are filtered out — matching the + v0.x behavior of "callable plugins only". + """ + return [ record.check for record in _plugin_check_records(plugins_enabled=plugins_enabled) + if record.valid and record.check is not None ] def _plugin_check_records( *, plugins_enabled: bool | None = None, -) -> list[LoadedPluginCheck]: +) -> list[ValidatedPlugin]: + """Discover and validate every third-party plugin entry point. + + Returns one ``ValidatedPlugin`` per non-builtin entry point — including + those that failed validation. Invalid records carry ``check=None`` and + appear in ``report.loaded_plugins`` so the operator can see what was + skipped and why without reading scanner logs. + """ + if not _plugins_enabled(plugins_enabled): return [] - checks: list[LoadedPluginCheck] = [] + + builtin_ids: set[str] = {metadata.id for metadata in CHECK_METADATA} + builtin_ids.update(LEGACY_CHECK_ID_ALIASES.keys()) + + records: list[ValidatedPlugin] = [] + already_registered: set[str] = set() + for entry_point in entry_points(group="agents_shipgate.checks"): if _is_builtin_entry_point(entry_point): continue - loaded = entry_point.load() - if callable(loaded): - checks.append( - LoadedPluginCheck( - check=loaded, - info=_plugin_info(entry_point, loaded), - ) - ) - return checks + record = validate_entry_point( + entry_point, + builtin_ids=builtin_ids, + already_registered_plugin_ids=already_registered, + ) + records.append(record) + if record.metadata is not None: + already_registered.add(record.metadata.id) + + return records def _plugins_enabled(override: bool | None = None) -> bool: @@ -300,52 +357,28 @@ def _is_builtin_entry_point(entry_point: Any) -> bool: return False -def _plugin_info( - entry_point: Any, - loaded: Callable[[ScanContext], list[Finding]], -) -> dict[str, str | None]: - metadata = getattr(loaded, "AGENTS_SHIPGATE_METADATA", None) - check_id: str | None = None - if isinstance(metadata, CheckMetadata): - check_id = metadata.id - elif isinstance(metadata, dict) and isinstance(metadata.get("id"), str): - check_id = metadata["id"] - dist = getattr(entry_point, "dist", None) - return { - "name": str(getattr(entry_point, "name", "")) or None, - "value": str(getattr(entry_point, "value", "")) or None, - "distribution": _distribution_name(dist), - "version": _distribution_version(dist), - "check_id": check_id, - } +def _normalize_distribution_name(value: str | None) -> str: + return (value or "").replace("_", "-").lower() def _distribution_name(dist: Any) -> str | None: + """Internal helper retained for ``_is_builtin_entry_point``. + + The richer entry-point introspection moved to + ``plugin_validation._distribution_name``; this thin wrapper exists + so builtin detection logic doesn't need to import private symbols + from the validation module. + """ + if dist is None: return None metadata = getattr(dist, "metadata", None) if metadata is not None: - name = metadata.get("Name") + try: + name = metadata.get("Name") + except AttributeError: + name = None if isinstance(name, str): return name name = getattr(dist, "name", None) return str(name) if name else None - - -def _distribution_version(dist: Any) -> str | None: - if dist is None: - return None - version = getattr(dist, "version", None) - return str(version) if version else None - - -def _normalize_distribution_name(value: str | None) -> str: - return (value or "").replace("_", "-").lower() - - -def _metadata_from_plugin(value: Any) -> CheckMetadata: - if isinstance(value, CheckMetadata): - return value - if isinstance(value, dict): - return CheckMetadata.model_validate(value) - raise TypeError("AGENTS_SHIPGATE_METADATA must be CheckMetadata or dict") diff --git a/src/agents_shipgate/cli/_helpers.py b/src/agents_shipgate/cli/_helpers.py index 9729314..76e2d34 100644 --- a/src/agents_shipgate/cli/_helpers.py +++ b/src/agents_shipgate/cli/_helpers.py @@ -8,6 +8,7 @@ import typer from agents_shipgate import __version__ +from agents_shipgate.checks.plugin_validation import strict_failure_messages from agents_shipgate.cli.diagnostics import ( diagnose_invalid_manifest, diagnose_missing_manifest, @@ -20,6 +21,43 @@ logger = logging.getLogger(__name__) +# Exit code for ``--strict-plugins`` failures. Reuses the documented +# ``other_error`` slot (per ``.well-known/agents-shipgate.json``) so the +# stable exit-code surface stays narrow — strict-plugins is plugin +# infrastructure failure, not gate failure. +_STRICT_PLUGINS_EXIT_CODE = 4 + + +def _apply_strict_plugins( + report, + exit_code: int, + *, + strict_plugins: bool, + label: str | None = None, +) -> int: + """Apply ``--strict-plugins`` policy after a single scan completes. + + Returns the (possibly elevated) exit code. Emits one human-readable + line per failure to stderr. ``label`` prefixes each line in + multi-config scans so the operator can tell which manifest tripped. + """ + + if not strict_plugins: + return exit_code + messages = strict_failure_messages(report.loaded_plugins) + if not messages: + return exit_code + prefix = f"{label}: " if label else "" + typer.echo( + f"{prefix}--strict-plugins: {len(messages)} plugin issue(s) detected; " + "scan failed under strict-plugins policy.", + err=True, + ) + for message in messages: + typer.echo(f"{prefix}--strict-plugins: {message}", err=True) + return max(exit_code, _STRICT_PLUGINS_EXIT_CODE) + + def _parse_formats(value: str) -> list[str]: formats = [item.strip() for item in value.split(",") if item.strip()] invalid = [item for item in formats if item not in {"markdown", "json", "sarif"}] @@ -179,6 +217,7 @@ def _run_multi_scan( suggest_patches: bool = False, packet_enabled: bool | None = None, packet_formats: list[str] | None = None, + strict_plugins: bool = False, ) -> int: typer.echo(f"Agents Shipgate {__version__}") typer.echo(f"Scanning {len(config_paths)} manifests") @@ -221,6 +260,15 @@ def _run_multi_scan( logger.exception("unhandled exception while scanning %s", config_path) typer.echo(f"{config_path}: internal_error - {exc}", err=True) else: + # Apply --strict-plugins after the scan but before printing + # the per-config summary so the operator sees the elevated + # exit code reflected in the multi-scan tally. + scan_exit_code = _apply_strict_plugins( + report, + scan_exit_code, + strict_plugins=strict_plugins, + label=str(config_path), + ) # v0.8: lead with release_decision.decision (baseline-aware, # the recommended release-gate signal). Fall back to the # legacy summary.status only if the report somehow lacks diff --git a/src/agents_shipgate/cli/_register_scan.py b/src/agents_shipgate/cli/_register_scan.py index 4a32323..bb53dad 100644 --- a/src/agents_shipgate/cli/_register_scan.py +++ b/src/agents_shipgate/cli/_register_scan.py @@ -6,6 +6,7 @@ import typer from agents_shipgate.cli._helpers import ( + _apply_strict_plugins, _diagnose_config_error, _parse_fail_on, _parse_formats, @@ -88,6 +89,15 @@ def scan( "--no-plugins", help="Do not load third-party check plugins even when AGENTS_SHIPGATE_ENABLE_PLUGINS is set.", ), + strict_plugins: bool = typer.Option( + False, + "--strict-plugins", + help=( + "Exit non-zero (code 4) if any loaded plugin failed validation or " + "produced runtime errors. Default lenient mode records the failure " + "in report.loaded_plugins but proceeds with the scan." + ), + ), suggest_patches: bool = typer.Option( False, "--suggest-patches", @@ -171,6 +181,9 @@ def scan( packet_enabled=packet, packet_formats=parsed_packet_formats, ) + exit_code = _apply_strict_plugins( + report, exit_code, strict_plugins=strict_plugins + ) _print_cli_summary(report, ci_mode or "advisory", exit_code, verbose=verbose) raise typer.Exit(exit_code) exit_code = _run_multi_scan( @@ -189,6 +202,7 @@ def scan( suggest_patches=suggest_patches, packet_enabled=packet, packet_formats=parsed_packet_formats, + strict_plugins=strict_plugins, ) except ConfigError as exc: typer.echo(f"Config error: {exc}", err=True) diff --git a/src/agents_shipgate/core/models.py b/src/agents_shipgate/core/models.py index c148f02..17f6b87 100644 --- a/src/agents_shipgate/core/models.py +++ b/src/agents_shipgate/core/models.py @@ -2,7 +2,7 @@ from typing import Any, Literal, cast, get_args -from pydantic import BaseModel, ConfigDict, Field, model_validator +from pydantic import AliasChoices, BaseModel, ConfigDict, Field, model_validator from agents_shipgate.core.patches import Patch @@ -1430,7 +1430,14 @@ class LoadedToolSource(BaseModel): class CheckMetadata(BaseModel): - id: str + # Plugins may supply ``AGENTS_SHIPGATE_METADATA`` with either ``id`` or + # ``check_id`` as the identifier key. Built-ins continue to use ``id``; + # ``check_id`` is accepted for symmetry with the field name used in + # ``Finding.check_id`` so newer plugins can use a single spelling + # across metadata + emitted findings. + model_config = ConfigDict(populate_by_name=True) + + id: str = Field(validation_alias=AliasChoices("id", "check_id")) category: str default_severity: Severity description: str @@ -1446,6 +1453,26 @@ class CheckMetadata(BaseModel): autofix_safe: bool = False requires_human_review: bool = True suggested_patch_kind: SuggestedPatchKind = "manual" + # v0.17 (M5): the lowest severity that ``checks.severity_overrides`` + # is allowed to apply to this check, and the lowest severity a plugin + # may declare for findings under this check ID. ``None`` (default) + # means no floor — preserves v0.x behavior for every check that doesn't + # opt in. The M1 manifest-side floor enforcement consumes this field; + # M5 enforces plugin self-consistency by rejecting plugins whose + # ``floor_severity`` exceeds their own ``default_severity``. + floor_severity: Severity | None = None + + @model_validator(mode="after") + def _check_floor_not_above_default(self) -> CheckMetadata: + if self.floor_severity is None: + return self + order = list(get_args(Severity)) + if order.index(self.floor_severity) > order.index(self.default_severity): + raise ValueError( + f"floor_severity={self.floor_severity!r} must not exceed " + f"default_severity={self.default_severity!r} for {self.id!r}" + ) + return self def _string_list(value: Any) -> list[str]: diff --git a/tests/test_plugin_validation.py b/tests/test_plugin_validation.py new file mode 100644 index 0000000..fa0fa42 --- /dev/null +++ b/tests/test_plugin_validation.py @@ -0,0 +1,556 @@ +"""Tests for the M5 plugin validation pipeline. + +Five load-time gates + runtime result validation + ``--strict-plugins`` +exit policy. Each test sets up a fake entry point via monkeypatch and +asserts the resulting ``loaded_plugins[]`` entry plus the visible scan +findings. + +Conventions: + +* Plugin tests run via ``run_scan`` (not ``_plugin_check_records`` in + isolation) so we exercise the production code path end-to-end. +* Every assertion on ``loaded_plugins`` uses ``len() == 1`` + dict + subscript so a future additive field doesn't churn the test diff. +* The shared fixture path is ``samples/clean_read_only_agent/shipgate.yaml`` + for parity with the existing v0.x plugin tests. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from agents_shipgate.checks import registry +from agents_shipgate.checks.plugin_validation import ( + BAD_FLOOR, + BAD_METADATA, + BAD_SIGNATURE, + ID_COLLISION, + LOAD_FAILED, + VALID, +) +from agents_shipgate.cli.scan import run_scan +from agents_shipgate.core.models import CheckMetadata, Finding + +CLEAN_FIXTURE = Path("samples/clean_read_only_agent/shipgate.yaml") + + +class _Dist: + metadata = {"Name": "test-plugin-dist"} + version = "9.9.9" + + +def _entry_point(load_fn, *, name="plugin", value="pkg.module:run", dist=_Dist()): + class EP: + pass + + EP.name = name + EP.value = value + EP.dist = dist + EP.load = staticmethod(load_fn) + return EP() + + +def _patch_entries(monkeypatch, entries): + monkeypatch.setenv("AGENTS_SHIPGATE_ENABLE_PLUGINS", "1") + monkeypatch.setattr(registry, "entry_points", lambda group: entries) + + +# --- Gate 1: load ----------------------------------------------------------- + + +def test_gate_load_failure_is_recorded(monkeypatch, tmp_path): + def boom(): + raise ImportError("simulated module load failure") + + _patch_entries(monkeypatch, [_entry_point(boom)]) + + report, exit_code = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + + assert exit_code == 0, "lenient default must not fail on plugin issues" + assert len(report.loaded_plugins) == 1 + record = report.loaded_plugins[0] + assert record["validation_status"] == LOAD_FAILED + assert any("simulated module load failure" in err for err in record["validation_errors"]) + assert record["check_id"] is None + assert record["runtime_errors"] == [] + + +# --- Gate 2: signature ------------------------------------------------------ + + +def test_gate_signature_rejects_non_callable(monkeypatch, tmp_path): + _patch_entries(monkeypatch, [_entry_point(lambda: "not a function")]) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert record["validation_status"] == BAD_SIGNATURE + assert record["check_id"] is None + + +def test_gate_signature_rejects_zero_arg_callable(monkeypatch, tmp_path): + def no_args(): + return [] + + no_args.AGENTS_SHIPGATE_METADATA = _good_metadata() + _patch_entries(monkeypatch, [_entry_point(lambda: no_args)]) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert record["validation_status"] == BAD_SIGNATURE + assert any("ScanContext" in err for err in record["validation_errors"]) + + +def test_gate_signature_accepts_optional_extras(monkeypatch, tmp_path): + """Plugins with extra default-valued positional parameters are OK.""" + + def with_extras(context, *, config=None): + return [] + + with_extras.AGENTS_SHIPGATE_METADATA = _good_metadata() + _patch_entries(monkeypatch, [_entry_point(lambda: with_extras)]) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + assert report.loaded_plugins[0]["validation_status"] == VALID + + +def test_gate_signature_rejects_required_keyword_only(monkeypatch, tmp_path): + """Regression for the v1 plan: a plugin with a required keyword-only + parameter would always TypeError when called as ``plugin(context)``, + but was marked ``valid`` because the gate only counted positional + parameters. + + The validation contract is "valid means callable at runtime by the + scanner". A required kw-only param violates that contract. + """ + + def plugin_with_required_kwarg(context, *, required_config): + return [] + + plugin_with_required_kwarg.AGENTS_SHIPGATE_METADATA = _good_metadata( + "ACME-REQUIRED-KWARG" + ) + _patch_entries( + monkeypatch, [_entry_point(lambda: plugin_with_required_kwarg)] + ) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert record["validation_status"] == BAD_SIGNATURE + assert any( + "required_config" in err for err in record["validation_errors"] + ), record["validation_errors"] + + +def test_gate_signature_accepts_optional_keyword_only(monkeypatch, tmp_path): + """Optional keyword-only parameters (with defaults) are fine — the + scanner never passes kwargs, so defaults apply.""" + + def plugin_with_optional_kwarg(context, *, optional_config=None): + return [] + + plugin_with_optional_kwarg.AGENTS_SHIPGATE_METADATA = _good_metadata( + "ACME-OPTIONAL-KWARG" + ) + _patch_entries( + monkeypatch, [_entry_point(lambda: plugin_with_optional_kwarg)] + ) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + assert report.loaded_plugins[0]["validation_status"] == VALID + + +# --- Gate 3: metadata ------------------------------------------------------- + + +def test_gate_metadata_missing(monkeypatch, tmp_path): + def plugin(context): + return [] + + # Intentionally no AGENTS_SHIPGATE_METADATA. + _patch_entries(monkeypatch, [_entry_point(lambda: plugin)]) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert record["validation_status"] == BAD_METADATA + assert any("missing" in err for err in record["validation_errors"]) + + +def test_gate_metadata_malformed(monkeypatch, tmp_path): + def plugin(context): + return [] + + plugin.AGENTS_SHIPGATE_METADATA = {"id": "ACME-BAD"} # missing required fields + _patch_entries(monkeypatch, [_entry_point(lambda: plugin)]) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert record["validation_status"] == BAD_METADATA + + +def test_gate_metadata_accepts_check_id_alias(monkeypatch, tmp_path): + """v0.17 M5: plugins may use ``check_id`` instead of ``id``.""" + + def plugin(context): + return [] + + plugin.AGENTS_SHIPGATE_METADATA = { + "check_id": "ACME-ALIAS", + "category": "custom", + "default_severity": "medium", + "description": "Custom plugin check via check_id alias.", + } + _patch_entries(monkeypatch, [_entry_point(lambda: plugin)]) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert record["validation_status"] == VALID + assert record["check_id"] == "ACME-ALIAS" + + +# --- Gate 4: id_collision --------------------------------------------------- + + +def test_gate_id_collision_with_builtin(monkeypatch, tmp_path): + def plugin(context): + return [] + + plugin.AGENTS_SHIPGATE_METADATA = { + "id": "SHIP-POLICY-APPROVAL-MISSING", # built-in ID + "category": "policy", + "default_severity": "info", + "description": "Shadow attempt.", + } + _patch_entries(monkeypatch, [_entry_point(lambda: plugin)]) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert record["validation_status"] == ID_COLLISION + assert any("reserved" in err for err in record["validation_errors"]) + + +def test_gate_id_collision_between_plugins(monkeypatch, tmp_path): + def make_plugin(label): + def plugin(context): + return [] + + plugin.AGENTS_SHIPGATE_METADATA = { + "id": "ACME-DUPLICATE", + "category": "custom", + "default_severity": "medium", + "description": f"Plugin {label}.", + } + return plugin + + p1 = make_plugin("one") + p2 = make_plugin("two") + _patch_entries( + monkeypatch, + [ + _entry_point(lambda: p1, name="first", value="first:run"), + _entry_point(lambda: p2, name="second", value="second:run"), + ], + ) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + assert len(report.loaded_plugins) == 2 + assert report.loaded_plugins[0]["validation_status"] == VALID + assert report.loaded_plugins[1]["validation_status"] == ID_COLLISION + + +# --- Gate 5: bad_floor ------------------------------------------------------ + + +def test_gate_floor_higher_than_default_is_rejected(monkeypatch, tmp_path): + def plugin(context): + return [] + + plugin.AGENTS_SHIPGATE_METADATA = { + "id": "ACME-FLOOR", + "category": "custom", + "default_severity": "medium", + "floor_severity": "high", # higher than default — nonsensical + "description": "Floor higher than default.", + } + _patch_entries(monkeypatch, [_entry_point(lambda: plugin)]) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert record["validation_status"] == BAD_FLOOR + + +# --- Runtime validation ----------------------------------------------------- + + +def test_runtime_plugin_exception_is_captured(monkeypatch, tmp_path): + def plugin(context): + raise RuntimeError("simulated runtime crash") + + plugin.AGENTS_SHIPGATE_METADATA = _good_metadata("ACME-RUNTIME-CRASH") + _patch_entries(monkeypatch, [_entry_point(lambda: plugin)]) + + report, exit_code = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert record["validation_status"] == VALID + # Plugin loaded fine, but the call raised — captured in runtime_errors. + assert any("simulated runtime crash" in err for err in record["runtime_errors"]) + # Other findings (built-ins) still ran. + assert exit_code == 0 + + +def test_runtime_plugin_must_return_list(monkeypatch, tmp_path): + def plugin(context): + return "not a list" + + plugin.AGENTS_SHIPGATE_METADATA = _good_metadata("ACME-WRONG-RETURN") + _patch_entries(monkeypatch, [_entry_point(lambda: plugin)]) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert any("list[Finding]" in err for err in record["runtime_errors"]) + + +def test_runtime_plugin_cannot_smuggle_other_check_ids(monkeypatch, tmp_path): + """Critical trust rule: a plugin cannot emit findings under a check + ID other than the one it declared in metadata. Such findings are + dropped and recorded in runtime_errors. + """ + + def plugin(context): + # Plugin claims it owns ACME-DECLARED but tries to emit a + # finding under SHIP-POLICY-APPROVAL-MISSING (a built-in). + return [ + Finding( + check_id="SHIP-POLICY-APPROVAL-MISSING", + category="policy", + severity="critical", + title="Smuggled finding", + recommendation="—", + provenance_kind="policy_pack", + ) + ] + + plugin.AGENTS_SHIPGATE_METADATA = _good_metadata("ACME-DECLARED") + _patch_entries(monkeypatch, [_entry_point(lambda: plugin)]) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert any( + "does not match plugin declared id" in err for err in record["runtime_errors"] + ), record["runtime_errors"] + # The smuggled finding does not appear. + smuggled = [f for f in report.findings if f.title == "Smuggled finding"] + assert smuggled == [] + + +def test_runtime_plugin_non_finding_items_dropped(monkeypatch, tmp_path): + def plugin(context): + return [ + "not a finding", + Finding( + check_id="ACME-MIXED", + category="custom", + severity="medium", + title="A real finding", + recommendation="Land it.", + provenance_kind="policy_pack", + ), + ] + + plugin.AGENTS_SHIPGATE_METADATA = _good_metadata("ACME-MIXED") + _patch_entries(monkeypatch, [_entry_point(lambda: plugin)]) + + report, _ = run_scan( + config_path=CLEAN_FIXTURE, + output_dir=tmp_path, + formats=["json"], + ci_mode="advisory", + ) + record = report.loaded_plugins[0] + assert any("non-Finding item dropped" in err for err in record["runtime_errors"]) + titles = [f.title for f in report.findings if f.check_id == "ACME-MIXED"] + assert titles == ["A real finding"] + + +# --- --strict-plugins exit policy ------------------------------------------- + + +def test_strict_plugins_exits_nonzero_on_validation_failure(monkeypatch, tmp_path): + """Lenient mode exits 0; ``--strict-plugins`` should exit ≥ 4 when + any plugin failed validation. + + Drives ``--strict-plugins`` through the CLI layer because the flag + is a CLI concern, not part of ``run_scan``'s contract. + """ + + from typer.testing import CliRunner + + from agents_shipgate.cli.main import app + + def broken(context): + return [] + + # Metadata missing -> bad_metadata. + _patch_entries(monkeypatch, [_entry_point(lambda: broken)]) + + runner = CliRunner() + result = runner.invoke( + app, + [ + "scan", + "--config", + str(CLEAN_FIXTURE), + "--out", + str(tmp_path), + "--ci-mode", + "advisory", + "--strict-plugins", + "--format", + "json", + ], + ) + assert result.exit_code == 4, result.output + + +def test_strict_plugins_passes_when_all_plugins_valid(monkeypatch, tmp_path): + from typer.testing import CliRunner + + from agents_shipgate.cli.main import app + + def plugin(context): + return [] + + plugin.AGENTS_SHIPGATE_METADATA = _good_metadata("ACME-STRICT-OK") + _patch_entries(monkeypatch, [_entry_point(lambda: plugin)]) + + runner = CliRunner() + result = runner.invoke( + app, + [ + "scan", + "--config", + str(CLEAN_FIXTURE), + "--out", + str(tmp_path), + "--ci-mode", + "advisory", + "--strict-plugins", + "--format", + "json", + ], + ) + assert result.exit_code == 0, result.output + + +# --- Helpers ---------------------------------------------------------------- + + +def _good_metadata(check_id: str = "ACME-VALID") -> dict: + return { + "id": check_id, + "category": "custom", + "default_severity": "medium", + "description": "Valid plugin check.", + } + + +# --- CheckMetadata floor invariant ----------------------------------------- + + +def test_check_metadata_rejects_floor_above_default(): + with pytest.raises(ValueError, match="floor_severity"): + CheckMetadata( + id="ACME-BAD-FLOOR", + category="custom", + default_severity="medium", + floor_severity="critical", + description="Bad floor.", + ) + + +def test_check_metadata_accepts_check_id_alias(): + metadata = CheckMetadata.model_validate( + { + "check_id": "ACME-ALIAS", + "category": "custom", + "default_severity": "high", + "description": "Alias.", + } + ) + assert metadata.id == "ACME-ALIAS" diff --git a/tests/test_plugins.py b/tests/test_plugins.py index da62d00..e6a5509 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -130,6 +130,9 @@ def load(self): ci_mode="advisory", ) + # v0.17 (M5): every loaded_plugins entry now carries validation + # provenance fields. A clean plugin shows ``valid`` status with + # empty ``validation_errors`` and ``runtime_errors`` arrays. assert report.loaded_plugins == [ { "name": "acme", @@ -137,6 +140,9 @@ def load(self): "distribution": "acme-shipgate-checks", "version": "1.2.3", "check_id": "ACME-CHECK", + "validation_status": "valid", + "validation_errors": [], + "runtime_errors": [], } ] diff --git a/tests/test_reports.py b/tests/test_reports.py index 422d319..72bd430 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -730,6 +730,31 @@ def test_v07_schema_preserves_nested_required_lists(): assert "dynamic_toolset_count" in google_adk_required +def test_v17_loaded_plugins_required_includes_validation_fields(): + """v0.17 (M5): plugin validation provenance fields are required at + the JSON-schema level on every emitted ``loaded_plugins[]`` entry. + + Paired with ``test_v07_schema_preserves_nested_required_lists`` + above, which locks the original 5-field contract for the frozen + v0.7 schema. Generator wiring lives in + ``scripts/generate_schemas.py::_postprocess_report`` near the + ``loaded_plugins`` block. + """ + + schema = json.loads(REPORT_SCHEMA_V17.read_text(encoding="utf-8")) + required = set(schema["properties"]["loaded_plugins"]["items"]["required"]) + assert required == { + "name", + "value", + "distribution", + "version", + "check_id", + "validation_status", + "validation_errors", + "runtime_errors", + } + + def test_v08_schema_requires_release_decision(): """Top-level required must include `release_decision` and the ReleaseDecision $def must require all leaf blocks. Catches drift