From 1854d5da1e592b68de59134eb2bfb13654e7804c Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 12 Jun 2026 01:14:53 +0000 Subject: [PATCH 1/4] feat(authoring): ADVISORY-D7 extract-method cyclomatic false-FAIL detector (D7 closure) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dogfooding_findings_tracker D7 (2026-05-28 real-PR pass FINDING-F2) を candidate path (a)+(b) 両方で解決し、D-class を 8/8 全解決にする (ROADMAP v0.1.0 exit criterion 'D 全解決/waive' 充足)。 complexity_delta.cyclomatic は extractor 可視関数の総和 delta で、各関数は base 1 から始まるため、faithful な extract-method refactor は分岐を完全 保存しても総和が必ず +1/関数 微増する。primary_kind=refactor が cyclomatic ≤ 0 を宣言すると、まさに承認したい refactor で構造的に false-FAIL する。cognitive は抽出で下がる側の metric。 - nested_defs.py に count_visible_defs 追加: extract_python_complexity に 委譲して emission 規則との parity drift を構造的に排除 + VisibleDefGrowth - hazards.py detect_d7: refactor kind × cyclomatic 増加禁止 shape (≤0 / <1 / equals ≤0 / within_range [·, ≤0]) × 可視 def 増加で発火。 ≤3 等の許容幅つき lock は対象外 (false-FAIL trap でないため)。 canonical order は D6 の直後 - target-doctor: _DiffContext に visible_def_growth 追加、blob 1 回取得で nested/visible 両カウントを計算 (_def_counts_at)。out-of-scope 側 0 / syntax error skip の D6 契約をそのまま共有 - docs: guide Hazard 5 新設 (refactor パターン別 metric 選択表、D6/D7 は 相補ペア)、cli_usage / json_schema / exit_codes / schema enum に D7 追記、 tracker D7 行を解決 + 8/8 完了宣言 - tests: 可視 def counter 6 件 + detect_d7 9 件 + CLI git 統合 4 件 (extract-method 発火 / cognitive 沈黙 / 抽出なし沈黙 / D6+D7 同時発火)。 全 1810 件 pass / ruff check + format clean https://claude.ai/code/session_01BtUNrsnRygJY52xAy5bFRw --- CLAUDE.md | 2 +- docs/cli_usage.md | 3 +- docs/dogfooding_findings_tracker.md | 6 +- docs/exit_codes.md | 4 +- docs/json_schema.md | 4 +- docs/target_yaml_guide.md | 35 +++++ src/semantic_ci_code/authoring/advisory.py | 1 + src/semantic_ci_code/authoring/hazards.py | 126 +++++++++++++++- src/semantic_ci_code/authoring/nested_defs.py | 70 +++++++-- .../cli/commands/target_doctor.py | 96 ++++++++---- src/semantic_ci_code/cli/main.py | 4 +- .../schemas/doctor_advisory.schema.json | 1 + tests/authoring/test_hazards.py | 139 +++++++++++++++++- tests/authoring/test_nested_defs.py | 42 +++++- tests/cli/test_target_doctor.py | 98 +++++++++++- 15 files changed, 566 insertions(+), 65 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 84e3994..cbbb792 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -175,7 +175,7 @@ Status legend: **ACTIVE** (current spec/contract, AI agents should read first) / | `docs/exit_codes.md` | ACTIVE | Stable CLI exit code policy (0 / 1 / 2 / 3 / 4) for CI integration, including `--strict-repair`, `severity: info` Advisor channel, and per-subcommand notes | | `docs/json_schema.md` | ACTIVE | CLI JSON envelopes — verdict / compile at `schema_version="6"`, compile-repair at independent `schema_version="1"`, validate-plan at independent `schema_version="2"`. Includes compatibility policy and v2→v3 through v5→v6 diffs (plus validate-plan v1→v2) | | `docs/cli_test_inventory.md` | ACTIVE | CLI test coverage inventory, runtime notes, and conservative reduction candidates | -| `docs/target_yaml_guide.md` | ACTIVE | `target.yaml` authoring guide. Practical companion to `design.md §4` / `cli_usage.md`. Centralises authoring hazards D1 (`--package-root` scope vs `tests/` visibility), D3 (template / user constraint duplication), D4 (config-only PR の vacuous PASS), D6 (nested-function complexity 盲点) — 2026-05-07 Session 4 / 2026-05-28 real-PR dogfooding 由来 | +| `docs/target_yaml_guide.md` | ACTIVE | `target.yaml` authoring guide. Practical companion to `design.md §4` / `cli_usage.md`. Centralises authoring hazards D1 (`--package-root` scope vs `tests/` visibility), D3 (template / user constraint duplication), D4 (config-only PR の vacuous PASS), D6 (nested-function complexity 盲点), D7 (extract-method × cyclomatic 微増) — 2026-05-07 Session 4 / 2026-05-28 real-PR dogfooding 由来 | | `docs/target_authoring_surface.md` | ACTIVE | Authoring surface 設計契約 (Brief 8 / CSCI-41)。target.yaml は hand-written 必須でない / 生成経路 3 通り (recipe + sources / catalog 参照 / hand-written) / LLM 経路は Brief 8b 分離 / 全経路は verdict 前に declared intent として固定 / Authoring・Advisor・Provenance surface は evaluator 不可参照 / `candidate_code_used: false` 固定。§23.3.1 の実装側 catch-up | | `docs/ssp_protocol.md` | ACTIVE | SSP v0.1 normative spec: SensorOutput / Finding / SSPDelta / SSPVerdict definitions, 5-element SAST + 3-element SCA fingerprint, Python profile AST normalization, delta computation, verdict precedence (`unknown > fail > pass`), JSON Schema artifact, Sensor Provenance Invariant (§23.1 mirror), determinism requirements, core isolation contract | | `docs/ssp_usage_guide.md` | ACTIVE | SSP practical usage guide: quick start (Semgrep SAST / pip-audit SCA / fixture mode), output formats (JSON / human / SARIF), CI integration (GitHub Actions workflow / exit code routing / fixture-based CI), hand-built SensorOutput examples, delta computation overview, relationship to core Semantic CI | diff --git a/docs/cli_usage.md b/docs/cli_usage.md index 51a1f31..03491a7 100644 --- a/docs/cli_usage.md +++ b/docs/cli_usage.md @@ -738,7 +738,7 @@ semantic-ci target-doctor [--target ] [--package-root ] [--format {human,json}] [--output ] ``` -Audits a `target.yaml` for eight authoring hazards and renders them as +Audits a `target.yaml` for nine authoring hazards and renders them as advisories. Advisor surface — advisory presence does not change the verdict and does not change the exit code (`docs/exit_codes.md`). @@ -748,6 +748,7 @@ verdict and does not change the exit code (`docs/exit_codes.md`). | `ADVISORY-D3` | A user constraint duplicates a template-expanded constraint (same kind/target/operator/expected). | | `ADVISORY-D4` | The target is lock-only and the candidate diff (`--baseline-rev` ↔ `--candidate-rev`) touches no Python files; the verdict would be a vacuous PASS. Skipped silently when neither rev is given and git is unavailable. | | `ADVISORY-D6` | The target declares a `complexity_delta` constraint and the candidate diff (`--baseline-rev` ↔ `--candidate-rev`) grows the nested-function count in an in-scope Python file. The complexity extractor does not descend into nested defs, so a reported cyclomatic/cognitive decrease may be displacement, not simplification. Skipped silently when neither rev is given and git is unavailable. | +| `ADVISORY-D7` | A `primary_kind: refactor` target locks `complexity_delta.cyclomatic` against any increase, and the candidate diff adds extractor-visible function definitions (the extract-method shape). The summed cyclomatic is mathematically guaranteed to micro-increase (+1 base per extracted helper), so the lock can FAIL on a faithful refactor — prefer `complexity_delta.cognitive`. Skipped silently when neither rev is given and git is unavailable. | | `ADVISORY-I1` | `intent` is the empty string. Repair adapters and `validate-plan` produce better guidance when intent describes the change purpose; use `init --intent` or edit `target.yaml`. | | `ADVISORY-P1` | `primary_kind: feature` has no positive addition constraint. | | `ADVISORY-P2` | `primary_kind: bugfix` has no `test_surface_delta.new_cases` expectation. | diff --git a/docs/dogfooding_findings_tracker.md b/docs/dogfooding_findings_tracker.md index 24c3a40..ce7bed9 100644 --- a/docs/dogfooding_findings_tracker.md +++ b/docs/dogfooding_findings_tracker.md @@ -23,7 +23,7 @@ Status taxonomy: | D4 | 2026-05-07 Session 4 dogfood | vacuous PASS (out-of-scope diff) | config-only PR produces empty Python `CodeStateDelta`; lock-only template constraints pass without exercising the actual change | **解決** | `docs/target_yaml_guide.md` Hazard 3 + `ADVISORY-D4` detector | | D5 | 2026-05-07 Session 5 dogfood (FINDING-1) | set operator partial-match semantics | partial-dict `expected` items canonicalised as different elements from full extractor records — false positive on `includes_*` / `subset_of` / `superset_of`, **false negative (CI bypass) on `excludes_all`** | **解決** | PR #65 (CSCI-35c) — Match Schema partial-record matching + flat-projection aliases + `evidence.matched`; schema_version v4→v5 | | D6 | 2026-05-28 real-PR complexity dogfood (FINDING-F1) | vacuous PASS (extractor coverage gap) — **重複・関連 = sibling of D4** | nested function bodies are excluded from `ComplexityEntry` by `python_complexity_extractor` spec (`api_surface` parity); refactor that nests outer-function body into nested helpers reports large CC drop while real complexity is unchanged | **解決** | `docs/target_yaml_guide.md` Hazard 4 + `ADVISORY-D6` detector (`authoring/hazards.py::detect_d6`, candidate path (a), mirrors D4's diff-aware contract) — fires when a verdict-participating `complexity_delta` constraint meets nested-def growth in the in-scope candidate diff (`--baseline-rev` ↔ `--candidate-rev`); growth signal computed per file via `authoring/nested_defs.py`, syntax-error sides skipped fail-silent. Path (b) (extractor emits nested-function entries) remains a long-term, schema-impacting option, not pursued. Reproduction: langgraph PR #3700 (8/1 vacuous PASS in real-PR pass) | -| D7 | 2026-05-28 real-PR complexity dogfood (FINDING-F2) | authoring mismatch (operator / constraint pairing) | `extract-method` refactor is mathematically guaranteed to **micro-increase cyclomatic** (each extracted function adds base 1), even with `_` prefix discipline and api_surface preserved. Cognitive is the metric that drops. Authors declaring `complexity_delta.cyclomatic ≤ 0` for extract-method refactors hit a structural false-FAIL | **未解決** | Candidate paths: (a) authoring guide section "Choosing complexity metric per refactor pattern" recommending `cognitive_delta` for extract-method; (b) future `ADVISORY-D7` detector emitted when a `change.primary_kind=refactor` target uses `cyclomatic_delta ≤ 0` and the diff matches extract-method shape. Low priority: this is authoring advice, not a CI integrity hazard | +| D7 | 2026-05-28 real-PR complexity dogfood (FINDING-F2) | authoring mismatch (operator / constraint pairing) | `extract-method` refactor is mathematically guaranteed to **micro-increase cyclomatic** (each extracted function adds base 1), even with `_` prefix discipline and api_surface preserved. Cognitive is the metric that drops. Authors declaring `complexity_delta.cyclomatic ≤ 0` for extract-method refactors hit a structural false-FAIL | **解決** | `docs/target_yaml_guide.md` Hazard 5 (incl. "Choosing a complexity metric per refactor pattern" table, candidate path (a)) + `ADVISORY-D7` detector (`authoring/hazards.py::detect_d7`, candidate path (b)) — fires when a `primary_kind=refactor` target locks `complexity_delta.cyclomatic` against any increase (`≤ 0` / `< 1` / `equals ≤ 0` / `within_range [·, ≤ 0]`) and the in-scope candidate diff adds extractor-visible defs (extract-method shape; counted via `authoring/nested_defs.py::count_visible_defs`, which delegates to the real extractor so parity cannot drift). Same diff-aware contract as D4 / D6 (silent skip without revs) | | D8 | 2026-06-07 scale + security dogfood (SCA gap) | SCA sensor dependency-source discovery gap | SSP SCA auto-discovery (`_requirements_file` in `src/semantic_ci_code/cli/commands/ssp.py`) only found `requirements.txt` at repo root; the `--locked` fallback only accepted `pylock.toml` / requirements lockfiles. PEP 621 pyproject-only projects (litellm) and `pdm.lock` projects (pdm) declared deps in unrecognised formats → `pip-audit --locked .` errors "no lockfiles found" → empty JSON → adapter degraded to `unknown` (exit 3). Correct graceful degradation (no silent false PASS, honours `unknown > fail > pass`) but a real usability gap that blocked SCA on most modern Python projects | **解決** | CSCI-55 / PR #151 — dependency source discovery now recognises `requirements.txt`, `pylock.toml` / `pylock.*.toml`, `uv.lock`, `pdm.lock`, `poetry.lock`, and static PEP 621 `[project].dependencies`; lock sources are converted deterministically to pinned temp requirements, optional/non-default-group/marker-inactive packages are filtered, and malformed recognized sources fail closed to SSP `unknown` | ## Reading order @@ -37,8 +37,8 @@ Status taxonomy: ## Classification at a glance - **重複・関連 pairs**: D4 ↔ D6 (both are "vacuous PASS" via extractor coverage gap, distinct mechanism — D4 is "diff outside Python scope", D6 is "diff inside scope but inside nested function") -- **解決 (7 of 8)**: D1, D2, D3, D4, D5, D6, D8 -- **未解決 (1 of 8)**: D7 (authoring advice, low priority) +- **解決 (8 of 8)**: D1, D2, D3, D4, D5, D6, D7, D8 — **D-class closure complete** (ROADMAP v0.1.0 exit criterion "D 全解決/waive" satisfied) +- **未解決 (0 of 8)**: none - **observation-only (not a D#)**: F6 (pattern-SAST logic-vuln blindspot) — **UNTESTED HYPOTHESIS, not a demonstrated observation in the 2026-06-07 pass**: the Semgrep registry rulesets returned HTTP 403, so Semgrep ran with 0 loaded rules over 0 paths and produced no valid SAST measurement. F6 records the *a-priori* expectation that deterministic SAST misses semantic / business-logic vulns, cross-linked to Phase H (`docs/llm_sensor_adapter_planning.md`) as **motivation** — it is **not** empirically validated by this pass. Recorded in `docs/dogfooding_scale_and_security.md` (which now carries a validity warning + repro note for redoing the SAST sub-pass under a network policy allowing `semgrep.dev`). Distinct from the demonstrated observations of the same pass: real vulns merged-then-fixed (git evidence) and SCA clean-on-litellm (pip-audit positive-controlled with `jinja2==2.11.2` → 5 CVEs) ## Source pass index diff --git a/docs/exit_codes.md b/docs/exit_codes.md index a022182..0ce0d90 100644 --- a/docs/exit_codes.md +++ b/docs/exit_codes.md @@ -78,8 +78,8 @@ git errors (`CompileError` on `target.yaml`, git revision resolution failure when `--baseline-rev` / `--candidate-rev` is given, git unavailable when explicitly required) exit 3. Internal bugs exit 4. When neither `--baseline-rev` nor `--candidate-rev` is given and git is -unavailable or no baseline can be resolved, ADVISORY-D4 and ADVISORY-D6 -are silently skipped rather than failing. There is no `--strict-advice` flag; CI that +unavailable or no baseline can be resolved, the diff-aware advisories +(ADVISORY-D4 / D6 / D7) are silently skipped rather than failing. There is no `--strict-advice` flag; CI that wants to gate on advisory presence should consume `--format json` and apply a workflow-level policy. Silent success on bad input is forbidden — the advisor surface only suppresses the verdict step, not the input diff --git a/docs/json_schema.md b/docs/json_schema.md index 7200518..2a82855 100644 --- a/docs/json_schema.md +++ b/docs/json_schema.md @@ -318,12 +318,12 @@ compile-repair, or validate-plan envelopes. The shape is pinned by |---|---| | `schema_version` | Always `"advisory-1"`. | | `subcommand` | Always `"target-doctor"`. | -| `advisories[].code` | One of `ADVISORY-D1`, `ADVISORY-D3`, `ADVISORY-D4`, `ADVISORY-D6`, `ADVISORY-I1`, `ADVISORY-P1`, `ADVISORY-P2`, `ADVISORY-S1`. | +| `advisories[].code` | One of `ADVISORY-D1`, `ADVISORY-D3`, `ADVISORY-D4`, `ADVISORY-D6`, `ADVISORY-D7`, `ADVISORY-I1`, `ADVISORY-P1`, `ADVISORY-P2`, `ADVISORY-S1`. | | `advisories[].severity` | Always `"info"` — the Advisor surface never participates in the verdict (`docs/code_semantic_ci_design.md §23.3.1`). | | `advisories[].message` | Human-readable explanation of the hazard. | | `advisories[].evidence` | Per-advisory diagnostic fields (e.g. `constraint_id`, `target`, `package_root`, `files_touched_count`). | -Advisories are emitted in canonical order (D1 → D3 → D4 → D6 → I1 → P1 → P2 → S1) +Advisories are emitted in canonical order (D1 → D3 → D4 → D6 → D7 → I1 → P1 → P2 → S1) with `constraint_id` as the within-code tiebreak so output is byte-identical across runs. Advisory presence does not change the exit code — see `docs/exit_codes.md`. diff --git a/docs/target_yaml_guide.md b/docs/target_yaml_guide.md index 8b695d7..0723335 100644 --- a/docs/target_yaml_guide.md +++ b/docs/target_yaml_guide.md @@ -316,6 +316,41 @@ candidate diff grows the nested-def count in an in-scope Python file. Growth is a heuristic displacement signal, not proof — review the diff rather than treating the advisory as a verdict. +## Hazard 5 — Extract-method micro-increases cyclomatic (D7) + +The mirror image of Hazard 4 (observed on a real PR in +`docs/dogfooding_real_pr_complexity.md` FINDING-F2). Where D6 hides +complexity by nesting helpers *inside* a function, D7 bites when you do +the **right** thing — extracting helpers to module level, where they are +extracted and counted. + +`complexity_delta.cyclomatic` is the **sum** of per-function cyclomatic +over extractor-visible functions, and every function starts at base 1. +An extract-method refactor that preserves every branch therefore raises +the sum by exactly +1 per extracted helper — it is mathematically +impossible for a faithful extraction to keep `cyclomatic ≤ 0`. A +`primary_kind: refactor` target declaring that lock will FAIL on exactly +the refactor it means to endorse (a structural false-FAIL, not an engine +bug). Cognitive complexity is the metric that *drops* under extraction: +the nesting penalty disappears and `BoolOp` / branch costs redistribute. + +**Choosing a complexity metric per refactor pattern:** + +| Refactor pattern | Recommended constraint | +|---|---| +| extract-method / extract-function | `complexity_delta.cognitive less_than_or_equal 0` | +| inline-method (merging helpers back) | `complexity_delta.cyclomatic less_than_or_equal 0` (cognitive may rise from re-nesting) | +| pure simplification, no function count change | either metric; `cyclomatic` is stricter | +| large decomposition (N extracted helpers) | `cyclomatic less_than_or_equal N` if you must use cyclomatic | + +`semantic-ci target-doctor` detects this hazard as `ADVISORY-D7` when +`--baseline-rev` / `--candidate-rev` are given: it fires when a refactor +target locks `complexity_delta.cyclomatic` against any increase and the +candidate diff adds extractor-visible function definitions. Hazards 4 +and 5 are complementary: D6 warns that a green cyclomatic verdict may be +hollow, D7 warns that a red one may be noise — both resolve by pairing +the right metric with the refactor shape. + ## Constraint Authoring Tips ### Pick `kind` deliberately diff --git a/src/semantic_ci_code/authoring/advisory.py b/src/semantic_ci_code/authoring/advisory.py index 871dacf..3d2449a 100644 --- a/src/semantic_ci_code/authoring/advisory.py +++ b/src/semantic_ci_code/authoring/advisory.py @@ -8,6 +8,7 @@ "ADVISORY-D3", "ADVISORY-D4", "ADVISORY-D6", + "ADVISORY-D7", "ADVISORY-I1", "ADVISORY-P1", "ADVISORY-P2", diff --git a/src/semantic_ci_code/authoring/hazards.py b/src/semantic_ci_code/authoring/hazards.py index ee480ef..56fdfe8 100644 --- a/src/semantic_ci_code/authoring/hazards.py +++ b/src/semantic_ci_code/authoring/hazards.py @@ -18,7 +18,7 @@ from pathlib import Path from semantic_ci_code.authoring.advisory import Advisory -from semantic_ci_code.authoring.nested_defs import NestedDefGrowth +from semantic_ci_code.authoring.nested_defs import NestedDefGrowth, VisibleDefGrowth from semantic_ci_code.compiler.target_compiler import ( CompiledConstraint, CompiledTarget, @@ -38,6 +38,7 @@ "ADVISORY-D3", "ADVISORY-D4", "ADVISORY-D6", + "ADVISORY-D7", "ADVISORY-I1", "ADVISORY-P1", "ADVISORY-P2", @@ -99,13 +100,15 @@ def detect_advisories( package_root: Path | None = None, files_touched: tuple[Path, ...] | None = None, nested_def_growth: tuple[NestedDefGrowth, ...] | None = None, + visible_def_growth: tuple[VisibleDefGrowth, ...] | None = None, ) -> tuple[Advisory, ...]: """Run every detector and return the advisories in canonical order. `package_root` is required for D1; when omitted the detector is - skipped silently. `files_touched` is required for D4 and - `nested_def_growth` for D6; when omitted (e.g. git not available) - the corresponding detector is skipped. + skipped silently. `files_touched` is required for D4, + `nested_def_growth` for D6, and `visible_def_growth` for D7; when + omitted (e.g. git not available) the corresponding detector is + skipped. """ advisories: list[Advisory] = [] if package_root is not None: @@ -115,6 +118,8 @@ def detect_advisories( advisories.extend(detect_d4(target, files_touched=files_touched)) if nested_def_growth is not None: advisories.extend(detect_d6(target, nested_def_growth=nested_def_growth)) + if visible_def_growth is not None: + advisories.extend(detect_d7(target, visible_def_growth=visible_def_growth)) advisories.extend(detect_i1(target)) advisories.extend(detect_p1(target)) advisories.extend(detect_p2(target)) @@ -293,6 +298,80 @@ def detect_d6( ) +def detect_d7( + target: CompiledTarget, + *, + visible_def_growth: tuple[VisibleDefGrowth, ...], +) -> tuple[Advisory, ...]: + """Refactor + cyclomatic no-increase lock + extract-method shape. + + `complexity_delta.cyclomatic` is the **summed** cyclomatic delta over + extractor-visible functions, and every function starts at base 1. An + extract-method refactor therefore micro-increases the sum by +1 per + extracted helper even when every branch is preserved — a + `primary_kind: refactor` target locking `cyclomatic <= 0` is + structurally guaranteed to FAIL on exactly the refactor it means to + endorse (D7, `docs/dogfooding_findings_tracker.md`). Cognitive is the + metric that drops under extraction. + + The detector fires when the target is a refactor, declares a + verdict-participating `complexity_delta.cyclomatic` constraint whose + shape forbids any increase, AND the candidate diff grows the + extractor-visible def count in an in-scope Python file (the + extract-method shape). Growth is a heuristic shape signal, not + proof — the advisory recommends a metric, it never seats the + verdict. `None` (inapplicable, git unavailable) is filtered out by + the caller, mirroring D4 / D6. + """ + if target.primary_kind is not ChangeKind.REFACTOR: + return () + cyclomatic_locks = tuple( + c + for c in target.constraints + if _is_cyclomatic_leaf_target(c.target) + and _participates_in_verdict(c) + and _forbids_cyclomatic_increase(c) + ) + if not cyclomatic_locks: + return () + grown = tuple(g for g in visible_def_growth if g.candidate_count > g.baseline_count) + if not grown: + return () + total_added = sum(g.candidate_count - g.baseline_count for g in grown) + constraint_ids = ", ".join(repr(c.id) for c in cyclomatic_locks) + return ( + Advisory( + code="ADVISORY-D7", + message=( + f"primary_kind=refactor locks complexity_delta.cyclomatic against " + f"any increase ({constraint_ids}), and the candidate diff adds " + f"{total_added} extractor-visible function definition(s) across " + f"{len(grown)} file(s) — the extract-method shape. The cyclomatic " + f"delta is summed over functions and each function starts at base " + f"1, so a faithful extract-method refactor is mathematically " + f"guaranteed to micro-increase it; this lock can FAIL on exactly " + f"the refactor it means to endorse. If the intent is 'no " + f"complexity growth', constrain complexity_delta.cognitive " + f"instead (it drops under extraction), or widen the cyclomatic " + f"allowance by the number of extracted helpers." + ), + evidence={ + "constraint_ids": [c.id for c in cyclomatic_locks], + "visible_defs_added": total_added, + "grown_files_count": len(grown), + "files": [ + { + "path": g.path, + "baseline_visible_defs": g.baseline_count, + "candidate_visible_defs": g.candidate_count, + } + for g in grown[:5] + ], + }, + ), + ) + + def detect_i1(target: CompiledTarget) -> tuple[Advisory, ...]: """Empty intent degrades repair adapter and validate-plan guidance.""" if target.intent: @@ -765,6 +844,45 @@ def _is_complexity_delta_target(path: str) -> bool: return head == "complexity_delta" +def _is_cyclomatic_leaf_target(path: str) -> bool: + """Match the `complexity_delta.cyclomatic` scalar leaf only. + + D7 is cyclomatic-specific: cognitive is the recommended alternative + (it drops under extraction), so cognitive locks are out of scope, and + whole-mapping `complexity_delta` locks are D6 territory. + """ + return path == "complexity_delta.cyclomatic" + + +def _forbids_cyclomatic_increase(constraint: CompiledConstraint) -> bool: + """True if the constraint shape rejects every positive observed delta. + + Only shapes that forbid *any* increase are flagged — `<= 3` tolerates + a few extracted helpers and is not the D7 false-FAIL trap: + + - `less_than_or_equal N` with N <= 0 + - `less_than N` with N <= 1 (observed must be <= 0) + - `equals N` with N <= 0 + - `within_range [low, high]` with high <= 0 + """ + operator = constraint.operator + expected = constraint.expected + if operator is Operator.LESS_THAN_OR_EQUAL: + return _is_scalar_number(expected) and expected <= 0 + if operator is Operator.LESS_THAN: + return _is_scalar_number(expected) and expected <= 1 + if operator is Operator.EQUALS: + return _is_scalar_number(expected) and expected <= 0 + if ( + operator is Operator.WITHIN_RANGE + and isinstance(expected, list | tuple) + and len(expected) == 2 + and all(_is_scalar_number(item) for item in expected) + ): + return expected[1] <= 0 + return False + + def _targets_new_test_cases(constraint: CompiledConstraint) -> bool: if "test_surface_delta" not in constraint.target: return False diff --git a/src/semantic_ci_code/authoring/nested_defs.py b/src/semantic_ci_code/authoring/nested_defs.py index ba1e12d..2349364 100644 --- a/src/semantic_ci_code/authoring/nested_defs.py +++ b/src/semantic_ci_code/authoring/nested_defs.py @@ -1,18 +1,29 @@ -"""Nested-function counting for ADVISORY-D6 (nested-function blind spot). - -`python_complexity_extractor` stops descent at nested ``FunctionDef`` / -``AsyncFunctionDef`` / ``ClassDef`` boundaries and only emits entries for -the ``api_surface`` parity subset (module-level defs + direct methods of -module-level classes). Complexity moved into function-nested helpers -therefore disappears from ``complexity_delta`` numbers (D6, -`docs/dogfooding_findings_tracker.md`). This module provides the -deterministic per-file signal — the count of function-nested defs — that -the CLI layer feeds to `detect_d6` as `NestedDefGrowth` records. - -Scope note: only defs nested (at any depth) inside another function are -counted. Lambdas are not counted — they cannot contain statements, so they -cannot absorb branch complexity the way a nested def can. Methods of -module-level classes are extractor-visible and not counted. +"""Def counting for the complexity authoring advisories (D6 / D7). + +`python_complexity_extractor` only emits entries for the ``api_surface`` +parity subset (module-level defs + direct methods of module-level +classes) and stops descent at nested ``def`` boundaries. Both complexity +hazards are shapes of that emission rule +(`docs/dogfooding_findings_tracker.md`): + +- **D6**: complexity displaced into function-nested helpers vanishes from + ``complexity_delta`` numbers (vacuous PASS). Signal: per-file + function-nested def count growth (`count_nested_defs` / + `NestedDefGrowth`). +- **D7**: extract-method refactors add extractor-visible functions, each + starting at cyclomatic base 1, so a summed ``cyclomatic <= 0`` lock is + mathematically guaranteed to false-FAIL on a faithful extraction. + Signal: per-file extractor-visible def count growth + (`count_visible_defs` / `VisibleDefGrowth`). + +The CLI layer computes the per-file growth records from the candidate +diff and feeds them to `detect_d6` / `detect_d7`. + +Scope note for `count_nested_defs`: only defs nested (at any depth) +inside another function are counted. Lambdas are not counted — they +cannot contain statements, so they cannot absorb branch complexity the +way a nested def can. Methods of module-level classes are +extractor-visible and not counted. """ from __future__ import annotations @@ -20,6 +31,8 @@ import ast from dataclasses import dataclass +from semantic_ci_code.complexity.python_complexity_extractor import extract_python_complexity + @dataclass(frozen=True) class NestedDefGrowth: @@ -34,6 +47,18 @@ class NestedDefGrowth: candidate_count: int +@dataclass(frozen=True) +class VisibleDefGrowth: + """Per-file extractor-visible def counts on both diff sides. + + Same path / absent-side conventions as `NestedDefGrowth`. + """ + + path: str + baseline_count: int + candidate_count: int + + def count_nested_defs(source: str) -> int | None: """Count function defs nested inside another function. @@ -58,3 +83,18 @@ def count_nested_defs(source: str) -> int | None: count += 1 stack.append((child, inside_function or is_def)) return count + + +def count_visible_defs(source: str) -> int | None: + """Count extractor-visible function defs (``api_surface`` parity). + + Delegates to `extract_python_complexity` so the count can never + drift from the real emission rule (module-level defs incl. + module-scope compound statements + direct methods of module-level + classes). Returns `None` when `source` does not parse, mirroring + `count_nested_defs`. + """ + try: + return len(extract_python_complexity(source, module_fqn="_advisory_probe")) + except (SyntaxError, ValueError): + return None diff --git a/src/semantic_ci_code/cli/commands/target_doctor.py b/src/semantic_ci_code/cli/commands/target_doctor.py index 2aadee2..ce8068d 100644 --- a/src/semantic_ci_code/cli/commands/target_doctor.py +++ b/src/semantic_ci_code/cli/commands/target_doctor.py @@ -1,7 +1,7 @@ """`semantic-ci target-doctor` — Advisor surface command. -Renders authoring hazards (D1 / D3 / D4 / D6 / I1 / P1 / P2 / S1) detected -on a `target.yaml`. Advisor surface (`docs/code_semantic_ci_design.md +Renders authoring hazards (D1 / D3 / D4 / D6 / D7 / I1 / P1 / P2 / S1) +detected on a `target.yaml`. Advisor surface (`docs/code_semantic_ci_design.md §23.3.1`): the verdict is not computed and advisory presence does not change the exit code. Usage / configuration errors return 2; engine / git failures return 3; unhandled exceptions return 4 (`docs/exit_codes.md`). @@ -15,7 +15,12 @@ from pathlib import Path from semantic_ci_code.authoring import detect_advisories -from semantic_ci_code.authoring.nested_defs import NestedDefGrowth, count_nested_defs +from semantic_ci_code.authoring.nested_defs import ( + NestedDefGrowth, + VisibleDefGrowth, + count_nested_defs, + count_visible_defs, +) from semantic_ci_code.cli.command_support import ( engine_error, internal_bug, @@ -54,6 +59,7 @@ def run_target_doctor(args: Namespace) -> int: package_root=package_root, files_touched=diff_context.files_touched if diff_context else None, nested_def_growth=diff_context.nested_def_growth if diff_context else None, + visible_def_growth=diff_context.visible_def_growth if diff_context else None, ) payload = build_doctor_payload(advisories) if args.format == "json": @@ -81,12 +87,15 @@ class _DiffContext: `files_touched` feeds D4 (vacuous PASS on non-Python diffs); `nested_def_growth` feeds D6 (complexity displaced into nested - functions). Both are derived from the same numstat pass so the two - detectors always describe the same baseline ↔ candidate range. + functions); `visible_def_growth` feeds D7 (extract-method shape vs a + cyclomatic no-increase lock). All are derived from the same numstat + pass so the detectors always describe the same baseline ↔ candidate + range. """ files_touched: tuple[Path, ...] nested_def_growth: tuple[NestedDefGrowth, ...] + visible_def_growth: tuple[VisibleDefGrowth, ...] def _resolve_diff_context( @@ -94,15 +103,15 @@ def _resolve_diff_context( *, package_root: Path, ) -> _DiffContext | None: - """Resolve the candidate diff for D4 / D6, filtered to the + """Resolve the candidate diff for D4 / D6 / D7, filtered to the `--package-root` slice. `semantic-ci check` extracts only inside `--package-root`, so D4's "vacuous PASS" hazard applies whenever the in-scope slice has no Python diff — even if other parts of the repo do. We filter the repo-wide numstat to paths under `package_root` before - classification. D6 reads the in-scope Python entries' content at - both revs to compute the nested-def growth signal. + classification. D6 / D7 read the in-scope Python entries' content at + both revs to compute the nested-def / visible-def growth signals. When the user passes `--baseline-rev` or `--candidate-rev`, git failures surface as exit 3. When neither is passed and git is @@ -165,6 +174,7 @@ def in_scope(path: Path) -> bool: # (api_surface_delta.removed_public on the old name). files_touched: list[Path] = [] growth: list[NestedDefGrowth] = [] + visible_growth: list[VisibleDefGrowth] = [] for entry in entries: candidate_path = entry.path baseline_path = entry.old_path if entry.old_path is not None else entry.path @@ -173,43 +183,65 @@ def in_scope(path: Path) -> bool: if entry.old_path is not None and in_scope(entry.old_path): files_touched.append(entry.old_path) - # D6 signal: nested-def counts on both sides of every in-scope - # Python entry. A file absent at one rev (added / deleted) - # contributes 0 on that side, and so does an out-of-scope side: - # `semantic-ci check --package-root` never observed it, so a - # rename across the package-root boundary must count as 0 → N - # (newly in-scope nested defs) rather than N → N (Codex review - # P2 — matching counts would mask the displacement). A side that - # fails to parse skips the entry entirely so a syntax error - # cannot fabricate or suppress a growth signal. + # D6 / D7 signal: nested-def and extractor-visible-def counts on + # both sides of every in-scope Python entry. A file absent at one + # rev (added / deleted) contributes 0 on that side, and so does + # an out-of-scope side: `semantic-ci check --package-root` never + # observed it, so a rename across the package-root boundary must + # count as 0 → N (newly in-scope defs) rather than N → N (Codex + # review P2 — matching counts would mask the displacement). A + # side that fails to parse skips the entry entirely so a syntax + # error cannot fabricate or suppress a growth signal. if candidate_path.suffix.lower() != ".py" and baseline_path.suffix.lower() != ".py": continue if not (in_scope(candidate_path) or in_scope(baseline_path)): continue - baseline_count = ( - _nested_defs_at(root, baseline_ref, baseline_path) if in_scope(baseline_path) else 0 + baseline_counts = ( + _def_counts_at(root, baseline_ref, baseline_path) if in_scope(baseline_path) else (0, 0) ) - candidate_count = ( - _nested_defs_at(root, candidate_ref, candidate_path) if in_scope(candidate_path) else 0 + candidate_counts = ( + _def_counts_at(root, candidate_ref, candidate_path) + if in_scope(candidate_path) + else (0, 0) ) - if baseline_count is None or candidate_count is None: + if baseline_counts is None or candidate_counts is None: continue + path_posix = candidate_path.as_posix() growth.append( NestedDefGrowth( - path=candidate_path.as_posix(), - baseline_count=baseline_count, - candidate_count=candidate_count, + path=path_posix, + baseline_count=baseline_counts[0], + candidate_count=candidate_counts[0], + ) + ) + visible_growth.append( + VisibleDefGrowth( + path=path_posix, + baseline_count=baseline_counts[1], + candidate_count=candidate_counts[1], ) ) - return _DiffContext(files_touched=tuple(files_touched), nested_def_growth=tuple(growth)) + return _DiffContext( + files_touched=tuple(files_touched), + nested_def_growth=tuple(growth), + visible_def_growth=tuple(visible_growth), + ) -def _nested_defs_at(root: Path, ref: str, path: Path) -> int | None: - """Nested-def count for `path` at `ref`; 0 when absent, None on - syntax error (caller skips the entry).""" +def _def_counts_at(root: Path, ref: str, path: Path) -> tuple[int, int] | None: + """`(nested, visible)` def counts for `path` at `ref`. + + `(0, 0)` when absent or not a Python file, None on syntax error + (caller skips the entry for both signals). The blob is fetched once + and fed to both counters. + """ if path.suffix.lower() != ".py": - return 0 + return (0, 0) source = blob_text(ref, path.as_posix(), cwd=root) if source is None: - return 0 - return count_nested_defs(source) + return (0, 0) + nested = count_nested_defs(source) + visible = count_visible_defs(source) + if nested is None or visible is None: + return None + return (nested, visible) diff --git a/src/semantic_ci_code/cli/main.py b/src/semantic_ci_code/cli/main.py index 9dc3d81..9cadb35 100644 --- a/src/semantic_ci_code/cli/main.py +++ b/src/semantic_ci_code/cli/main.py @@ -338,12 +338,12 @@ def build_parser() -> argparse.ArgumentParser: target_doctor.add_argument( "--baseline-rev", default=None, - help="baseline git ref for ADVISORY-D4 / D6 (diff-aware hazards)", + help="baseline git ref for ADVISORY-D4 / D6 / D7 (diff-aware hazards)", ) target_doctor.add_argument( "--candidate-rev", default=None, - help="candidate git ref for ADVISORY-D4 / D6; defaults to HEAD", + help="candidate git ref for ADVISORY-D4 / D6 / D7; defaults to HEAD", ) target_doctor.add_argument( "--format", diff --git a/src/semantic_ci_code/schemas/doctor_advisory.schema.json b/src/semantic_ci_code/schemas/doctor_advisory.schema.json index 09adcda..1d3f7f1 100644 --- a/src/semantic_ci_code/schemas/doctor_advisory.schema.json +++ b/src/semantic_ci_code/schemas/doctor_advisory.schema.json @@ -21,6 +21,7 @@ "ADVISORY-D3", "ADVISORY-D4", "ADVISORY-D6", + "ADVISORY-D7", "ADVISORY-I1", "ADVISORY-P1", "ADVISORY-P2", diff --git a/tests/authoring/test_hazards.py b/tests/authoring/test_hazards.py index 591f831..e00b7a0 100644 --- a/tests/authoring/test_hazards.py +++ b/tests/authoring/test_hazards.py @@ -3,7 +3,7 @@ from pathlib import Path from semantic_ci_code.authoring.hazards import detect_advisories -from semantic_ci_code.authoring.nested_defs import NestedDefGrowth +from semantic_ci_code.authoring.nested_defs import NestedDefGrowth, VisibleDefGrowth from semantic_ci_code.compiler.target_compiler import compile_target_svp @@ -209,3 +209,140 @@ def test_advisory_order_d6_between_d4_and_i1(): assert "ADVISORY-D6" in codes assert "ADVISORY-I1" in codes assert codes.index("ADVISORY-D4") < codes.index("ADVISORY-D6") < codes.index("ADVISORY-I1") + + +# --------------------------------------------------------------------------- +# ADVISORY-D7 — refactor + cyclomatic no-increase lock + extract-method shape +# --------------------------------------------------------------------------- + + +def _cyclomatic_refactor_target(operator: str = "less_than_or_equal", expected: str = "0") -> str: + return ( + "intent: extract helpers\n" + "change:\n" + " primary_kind: refactor\n" + "constraints:\n" + " - id: cc_no_increase\n" + " kind: delta\n" + " target: complexity_delta.cyclomatic\n" + f" operator: {operator}\n" + f" expected: {expected}\n" + ) + + +def _visible_growth( + path: str = "src/mod.py", baseline: int = 1, candidate: int = 3 +) -> VisibleDefGrowth: + return VisibleDefGrowth(path=path, baseline_count=baseline, candidate_count=candidate) + + +def test_detect_d7_fires_on_refactor_cyclomatic_lock_with_extraction(): + target = _compile_target(_cyclomatic_refactor_target()) + + advisories = detect_advisories(target, visible_def_growth=(_visible_growth(),)) + d7 = [advisory for advisory in advisories if advisory.code == "ADVISORY-D7"] + + assert len(d7) == 1 + assert "cognitive" in d7[0].message + assert d7[0].evidence["constraint_ids"] == ["cc_no_increase"] + assert d7[0].evidence["visible_defs_added"] == 2 + assert d7[0].evidence["files"] == [ + {"path": "src/mod.py", "baseline_visible_defs": 1, "candidate_visible_defs": 3} + ] + + +def test_detect_d7_fires_on_equals_zero_and_within_range_shapes(): + for operator, expected in (("equals", "0"), ("within_range", "[-5, 0]"), ("less_than", "1")): + target = _compile_target(_cyclomatic_refactor_target(operator, expected)) + advisories = detect_advisories(target, visible_def_growth=(_visible_growth(),)) + codes = [advisory.code for advisory in advisories] + assert "ADVISORY-D7" in codes, f"{operator} {expected} should fire" + + +def test_detect_d7_silent_when_allowance_tolerates_increase(): + for operator, expected in (("less_than_or_equal", "3"), ("within_range", "[-5, 2]")): + target = _compile_target(_cyclomatic_refactor_target(operator, expected)) + advisories = detect_advisories(target, visible_def_growth=(_visible_growth(),)) + assert "ADVISORY-D7" not in [advisory.code for advisory in advisories], ( + f"{operator} {expected} should stay silent" + ) + + +def test_detect_d7_silent_for_non_refactor_kind(): + yaml_text = _cyclomatic_refactor_target().replace( + "primary_kind: refactor", "primary_kind: feature" + ) + target = _compile_target(yaml_text) + + advisories = detect_advisories(target, visible_def_growth=(_visible_growth(),)) + + assert "ADVISORY-D7" not in [advisory.code for advisory in advisories] + + +def test_detect_d7_silent_for_cognitive_target(): + yaml_text = _cyclomatic_refactor_target().replace( + "complexity_delta.cyclomatic", "complexity_delta.cognitive" + ) + target = _compile_target(yaml_text) + + advisories = detect_advisories(target, visible_def_growth=(_visible_growth(),)) + + assert "ADVISORY-D7" not in [advisory.code for advisory in advisories] + + +def test_detect_d7_silent_without_visible_growth(): + target = _compile_target(_cyclomatic_refactor_target()) + + advisories = detect_advisories( + target, + visible_def_growth=( + _visible_growth(baseline=3, candidate=3), + _visible_growth(path="src/other.py", baseline=4, candidate=2), + ), + ) + + assert "ADVISORY-D7" not in [advisory.code for advisory in advisories] + + +def test_detect_d7_skipped_when_context_is_none(): + target = _compile_target(_cyclomatic_refactor_target()) + + advisories = detect_advisories(target, visible_def_growth=None) + + assert "ADVISORY-D7" not in [advisory.code for advisory in advisories] + + +def test_detect_d7_ignores_non_verdict_participating_constraint(): + yaml_text = _cyclomatic_refactor_target() + " severity: info\n unknown_policy: ignore\n" + target = _compile_target(yaml_text) + + advisories = detect_advisories(target, visible_def_growth=(_visible_growth(),)) + + assert "ADVISORY-D7" not in [advisory.code for advisory in advisories] + + +def test_advisory_order_d7_between_d6_and_i1(): + yaml_text = ( + 'intent: ""\n' + "change:\n" + " primary_kind: refactor\n" + "constraints:\n" + " - id: cc_no_increase\n" + " kind: delta\n" + " target: complexity_delta.cyclomatic\n" + " operator: less_than_or_equal\n" + " expected: 0\n" + ) + target = _compile_target(yaml_text) + + advisories = detect_advisories( + target, + nested_def_growth=(_growth(),), + visible_def_growth=(_visible_growth(),), + ) + codes = [advisory.code for advisory in advisories] + + assert "ADVISORY-D6" in codes + assert "ADVISORY-D7" in codes + assert "ADVISORY-I1" in codes + assert codes.index("ADVISORY-D6") < codes.index("ADVISORY-D7") < codes.index("ADVISORY-I1") diff --git a/tests/authoring/test_nested_defs.py b/tests/authoring/test_nested_defs.py index 6e5970c..99fd2ac 100644 --- a/tests/authoring/test_nested_defs.py +++ b/tests/authoring/test_nested_defs.py @@ -1,6 +1,10 @@ from __future__ import annotations -from semantic_ci_code.authoring.nested_defs import NestedDefGrowth, count_nested_defs +from semantic_ci_code.authoring.nested_defs import ( + NestedDefGrowth, + count_nested_defs, + count_visible_defs, +) def test_module_level_defs_are_not_nested(): @@ -73,3 +77,39 @@ def test_empty_source_counts_zero(): def test_growth_record_is_frozen_and_comparable(): growth = NestedDefGrowth(path="src/mod.py", baseline_count=0, candidate_count=2) assert growth.candidate_count > growth.baseline_count + + +# --------------------------------------------------------------------------- +# count_visible_defs — extractor-parity visible def counting (D7 signal) +# --------------------------------------------------------------------------- + + +def test_visible_counts_module_level_defs(): + source = "def foo(): return 1\n\nasync def bar(): return 2\n" + assert count_visible_defs(source) == 2 + + +def test_visible_counts_methods_of_module_level_class(): + source = "class A:\n def method(self): return 1\n" + assert count_visible_defs(source) == 1 + + +def test_visible_excludes_nested_defs(): + source = "def outer():\n def helper(): return 1\n return helper()\n" + assert count_visible_defs(source) == 1 + + +def test_visible_counts_defs_in_module_scope_compound_statements(): + # Extractor parity: definitions inside module-scope compound + # statements are emitted. + source = "if True:\n def foo(): return 1\n" + assert count_visible_defs(source) == 1 + + +def test_visible_excludes_nested_class_methods(): + source = "class A:\n class B:\n def method(self): return 1\n" + assert count_visible_defs(source) == 0 + + +def test_visible_syntax_error_returns_none(): + assert count_visible_defs("def broken(:\n") is None diff --git a/tests/cli/test_target_doctor.py b/tests/cli/test_target_doctor.py index 7da0531..5599be3 100644 --- a/tests/cli/test_target_doctor.py +++ b/tests/cli/test_target_doctor.py @@ -1,6 +1,6 @@ """CSCI-43: `semantic-ci target-doctor` (Advisor surface). -Covers the advisories (D1 / D3 / D4 / D6 / I1 / P1 / P2 / S1) with both a +Covers the advisories (D1 / D3 / D4 / D6 / D7 / I1 / P1 / P2 / S1) with both a detection fixture and a false-positive prevention fixture, the JSON envelope schema, the exit-code contract from `docs/exit_codes.md` and `docs/brief_8_planning.md §6.3.3`, and the determinism + argparse spec @@ -1784,3 +1784,99 @@ def test_d6_cli_silent_on_rename_out_of_package_root_scope(tmp_path: Path): payload = _run_doctor_json(repo, baseline_sha) assert "ADVISORY-D6" not in [a["code"] for a in payload["advisories"]] + + +# --------------------------------------------------------------------------- +# CLI integration with git for D7 (covers visible_def_growth resolution path) +# --------------------------------------------------------------------------- + +_TARGET_CYCLOMATIC_NO_INCREASE = ( + "intent: extract helpers\n" + "change:\n" + " primary_kind: refactor\n" + "constraints:\n" + " - id: cc_no_increase\n" + " kind: delta\n" + " target: complexity_delta.cyclomatic\n" + " operator: less_than_or_equal\n" + " expected: 0\n" +) + +# Faithful extract-method refactor of _FLAT_FUNCTION: every branch is +# preserved, but the extracted module-level helper adds cyclomatic base 1 +# (summed delta +1) — the D7 structural false-FAIL shape. +_EXTRACT_METHOD_REFACTOR = ( + "def _keep(item):\n" + " return bool(item)\n" + "\n" + "def process(items):\n" + " out = []\n" + " for item in items:\n" + " if _keep(item):\n" + " out.append(item)\n" + " return out\n" +) + + +def test_d7_cli_fires_on_extract_method_with_cyclomatic_lock(tmp_path: Path): + repo, baseline_sha = _d6_repo( + tmp_path, + candidate_source=_EXTRACT_METHOD_REFACTOR, + target_yaml=_TARGET_CYCLOMATIC_NO_INCREASE, + ) + + payload = _run_doctor_json(repo, baseline_sha) + jsonschema.validate(payload, DOCTOR_SCHEMA) + d7 = [a for a in payload["advisories"] if a["code"] == "ADVISORY-D7"] + + assert len(d7) == 1 + assert d7[0]["evidence"]["constraint_ids"] == ["cc_no_increase"] + assert d7[0]["evidence"]["visible_defs_added"] == 1 + assert d7[0]["evidence"]["files"] == [ + {"path": "src/mod.py", "baseline_visible_defs": 1, "candidate_visible_defs": 2} + ] + # The extraction added no nested defs, so D6 must stay silent. + assert "ADVISORY-D6" not in [a["code"] for a in payload["advisories"]] + + +def test_d7_cli_silent_with_cognitive_constraint(tmp_path: Path): + repo, baseline_sha = _d6_repo( + tmp_path, + candidate_source=_EXTRACT_METHOD_REFACTOR, + target_yaml=_TARGET_CYCLOMATIC_NO_INCREASE.replace( + "complexity_delta.cyclomatic", "complexity_delta.cognitive" + ), + ) + + payload = _run_doctor_json(repo, baseline_sha) + + assert "ADVISORY-D7" not in [a["code"] for a in payload["advisories"]] + + +def test_d7_cli_silent_without_extraction(tmp_path: Path): + repo, baseline_sha = _d6_repo( + tmp_path, + candidate_source=_FLAT_FUNCTION.replace("if item:", "if item is not None:"), + target_yaml=_TARGET_CYCLOMATIC_NO_INCREASE, + ) + + payload = _run_doctor_json(repo, baseline_sha) + + assert "ADVISORY-D7" not in [a["code"] for a in payload["advisories"]] + + +def test_d6_and_d7_can_fire_together_on_nested_extraction(tmp_path: Path): + """A refactor that both extracts a module-level helper AND pushes + logic into a nested def trips both complementary advisories.""" + candidate = _NESTED_REFACTOR + "\ndef _extra_helper(x):\n return x\n" + repo, baseline_sha = _d6_repo( + tmp_path, + candidate_source=candidate, + target_yaml=_TARGET_CYCLOMATIC_NO_INCREASE, + ) + + payload = _run_doctor_json(repo, baseline_sha) + codes = [a["code"] for a in payload["advisories"]] + + assert "ADVISORY-D6" in codes + assert "ADVISORY-D7" in codes From e3a3c2cd7e0ab635fdcb29fa45f8a1712a0d227d Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 12 Jun 2026 01:26:53 +0000 Subject: [PATCH 2/4] fix(authoring): D7 uses net visible-def growth and honours tolerance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review P2 ×2 対応: - net growth: complexity_delta.cyclomatic は extracted state 全体の総和 delta のため、ファイル間の関数移動 (+1/-1) は相殺されて lock は通る。 per-file 正のみで発火すると誤 advisory → aggregate (net) 比較に変更。 pure relocation で沈黙 / 部分 shrinkage 込み net 正で発火をテスト encode - tolerance: evaluator は lt/le/within_range に tolerance を適用する (le: observed <= expected + tol)。'<= 0, tolerance: 1' は +1 を既に 予算化しているので増加禁止 shape ではない → +1 が violate する条件 (le: expected+tol < 1 / lt: <= 1 / within_range: high+tol < 1) に変更。 equals は evaluator が tolerance 非適用のため従来通り expected <= 0。 予算化 3 形 / 不足 tolerance / equals+tolerance をテスト encode https://claude.ai/code/session_01BtUNrsnRygJY52xAy5bFRw --- docs/cli_usage.md | 2 +- docs/target_yaml_guide.md | 7 ++- src/semantic_ci_code/authoring/hazards.py | 58 ++++++++++++------ tests/authoring/test_hazards.py | 75 +++++++++++++++++++++++ 4 files changed, 119 insertions(+), 23 deletions(-) diff --git a/docs/cli_usage.md b/docs/cli_usage.md index 03491a7..4381da0 100644 --- a/docs/cli_usage.md +++ b/docs/cli_usage.md @@ -748,7 +748,7 @@ verdict and does not change the exit code (`docs/exit_codes.md`). | `ADVISORY-D3` | A user constraint duplicates a template-expanded constraint (same kind/target/operator/expected). | | `ADVISORY-D4` | The target is lock-only and the candidate diff (`--baseline-rev` ↔ `--candidate-rev`) touches no Python files; the verdict would be a vacuous PASS. Skipped silently when neither rev is given and git is unavailable. | | `ADVISORY-D6` | The target declares a `complexity_delta` constraint and the candidate diff (`--baseline-rev` ↔ `--candidate-rev`) grows the nested-function count in an in-scope Python file. The complexity extractor does not descend into nested defs, so a reported cyclomatic/cognitive decrease may be displacement, not simplification. Skipped silently when neither rev is given and git is unavailable. | -| `ADVISORY-D7` | A `primary_kind: refactor` target locks `complexity_delta.cyclomatic` against any increase, and the candidate diff adds extractor-visible function definitions (the extract-method shape). The summed cyclomatic is mathematically guaranteed to micro-increase (+1 base per extracted helper), so the lock can FAIL on a faithful refactor — prefer `complexity_delta.cognitive`. Skipped silently when neither rev is given and git is unavailable. | +| `ADVISORY-D7` | A `primary_kind: refactor` target locks `complexity_delta.cyclomatic` against any increase (declared `tolerance` honoured), and the candidate diff adds extractor-visible function definitions net across the in-scope diff (the extract-method shape). The summed cyclomatic is mathematically guaranteed to micro-increase (+1 base per extracted helper), so the lock can FAIL on a faithful refactor — prefer `complexity_delta.cognitive`. Skipped silently when neither rev is given and git is unavailable. | | `ADVISORY-I1` | `intent` is the empty string. Repair adapters and `validate-plan` produce better guidance when intent describes the change purpose; use `init --intent` or edit `target.yaml`. | | `ADVISORY-P1` | `primary_kind: feature` has no positive addition constraint. | | `ADVISORY-P2` | `primary_kind: bugfix` has no `test_surface_delta.new_cases` expectation. | diff --git a/docs/target_yaml_guide.md b/docs/target_yaml_guide.md index 0723335..7fea23c 100644 --- a/docs/target_yaml_guide.md +++ b/docs/target_yaml_guide.md @@ -345,8 +345,11 @@ the nesting penalty disappears and `BoolOp` / branch costs redistribute. `semantic-ci target-doctor` detects this hazard as `ADVISORY-D7` when `--baseline-rev` / `--candidate-rev` are given: it fires when a refactor -target locks `complexity_delta.cyclomatic` against any increase and the -candidate diff adds extractor-visible function definitions. Hazards 4 +target locks `complexity_delta.cyclomatic` against any increase (a +declared `tolerance` that already budgets the helpers is honoured) and +the candidate diff adds extractor-visible function definitions **net** +across the in-scope diff (pure relocation between files cancels out and +does not warn). Hazards 4 and 5 are complementary: D6 warns that a green cyclomatic verdict may be hollow, D7 warns that a red one may be noise — both resolve by pairing the right metric with the refactor shape. diff --git a/src/semantic_ci_code/authoring/hazards.py b/src/semantic_ci_code/authoring/hazards.py index 56fdfe8..f4c6ca5 100644 --- a/src/semantic_ci_code/authoring/hazards.py +++ b/src/semantic_ci_code/authoring/hazards.py @@ -316,12 +316,17 @@ def detect_d7( The detector fires when the target is a refactor, declares a verdict-participating `complexity_delta.cyclomatic` constraint whose - shape forbids any increase, AND the candidate diff grows the - extractor-visible def count in an in-scope Python file (the - extract-method shape). Growth is a heuristic shape signal, not - proof — the advisory recommends a metric, it never seats the - verdict. `None` (inapplicable, git unavailable) is filtered out by - the caller, mirroring D4 / D6. + shape rejects every positive observed delta (tolerance included — + see `_forbids_cyclomatic_increase`), AND the candidate diff grows + the extractor-visible def count **net across the in-scope diff** + (the extract-method shape). The net comparison matters because the + cyclomatic delta is summed over the whole extracted state: a + refactor that merely relocates a function between files (+1 in one + file, -1 in another) cancels out and cannot trip the lock, so + per-file growth alone must not warn (Codex review P2). Growth is a + heuristic shape signal, not proof — the advisory recommends a + metric, it never seats the verdict. `None` (inapplicable, git + unavailable) is filtered out by the caller, mirroring D4 / D6. """ if target.primary_kind is not ChangeKind.REFACTOR: return () @@ -334,10 +339,12 @@ def detect_d7( ) if not cyclomatic_locks: return () + net_added = sum(g.candidate_count - g.baseline_count for g in visible_def_growth) + if net_added <= 0: + return () grown = tuple(g for g in visible_def_growth if g.candidate_count > g.baseline_count) if not grown: return () - total_added = sum(g.candidate_count - g.baseline_count for g in grown) constraint_ids = ", ".join(repr(c.id) for c in cyclomatic_locks) return ( Advisory( @@ -345,8 +352,8 @@ def detect_d7( message=( f"primary_kind=refactor locks complexity_delta.cyclomatic against " f"any increase ({constraint_ids}), and the candidate diff adds " - f"{total_added} extractor-visible function definition(s) across " - f"{len(grown)} file(s) — the extract-method shape. The cyclomatic " + f"{net_added} extractor-visible function definition(s) net across " + f"the in-scope diff — the extract-method shape. The cyclomatic " f"delta is summed over functions and each function starts at base " f"1, so a faithful extract-method refactor is mathematically " f"guaranteed to micro-increase it; this lock can FAIL on exactly " @@ -357,7 +364,7 @@ def detect_d7( ), evidence={ "constraint_ids": [c.id for c in cyclomatic_locks], - "visible_defs_added": total_added, + "visible_defs_added": net_added, "grown_files_count": len(grown), "files": [ { @@ -857,20 +864,31 @@ def _is_cyclomatic_leaf_target(path: str) -> bool: def _forbids_cyclomatic_increase(constraint: CompiledConstraint) -> bool: """True if the constraint shape rejects every positive observed delta. - Only shapes that forbid *any* increase are flagged — `<= 3` tolerates - a few extracted helpers and is not the D7 false-FAIL trap: - - - `less_than_or_equal N` with N <= 0 - - `less_than N` with N <= 1 (observed must be <= 0) - - `equals N` with N <= 0 - - `within_range [low, high]` with high <= 0 + Mirrors the evaluator's tolerance semantics + (`evaluator.operators._numeric_compare` / `_within_range`): `lt` / + `le` satisfy when observed `<` / `<=` `expected + tolerance`, + `within_range` widens to `high + tolerance`, and `equals` matches + exactly with tolerance NOT applied. The observed + `complexity_delta.cyclomatic` is an integer, so "rejects every + increase" reduces to "+1 violates" (Codex review P2 — a declared + `tolerance` that already budgets the extracted helper must not + warn): + + - `less_than_or_equal N`: N + tolerance < 1 + - `less_than N`: N + tolerance <= 1 + - `equals N`: N <= 0 (tolerance unused by the evaluator) + - `within_range [low, high]`: high + tolerance < 1 + + Shapes that tolerate at least one extracted helper (`<= 3`, + `<= 0, tolerance: 1`, ...) are not the D7 false-FAIL trap. """ operator = constraint.operator expected = constraint.expected + tolerance = constraint.tolerance or 0.0 if operator is Operator.LESS_THAN_OR_EQUAL: - return _is_scalar_number(expected) and expected <= 0 + return _is_scalar_number(expected) and expected + tolerance < 1 if operator is Operator.LESS_THAN: - return _is_scalar_number(expected) and expected <= 1 + return _is_scalar_number(expected) and expected + tolerance <= 1 if operator is Operator.EQUALS: return _is_scalar_number(expected) and expected <= 0 if ( @@ -879,7 +897,7 @@ def _forbids_cyclomatic_increase(constraint: CompiledConstraint) -> bool: and len(expected) == 2 and all(_is_scalar_number(item) for item in expected) ): - return expected[1] <= 0 + return expected[1] + tolerance < 1 return False diff --git a/tests/authoring/test_hazards.py b/tests/authoring/test_hazards.py index e00b7a0..101596e 100644 --- a/tests/authoring/test_hazards.py +++ b/tests/authoring/test_hazards.py @@ -346,3 +346,78 @@ def test_advisory_order_d7_between_d6_and_i1(): assert "ADVISORY-D7" in codes assert "ADVISORY-I1" in codes assert codes.index("ADVISORY-D6") < codes.index("ADVISORY-D7") < codes.index("ADVISORY-I1") + + +def test_detect_d7_silent_on_net_zero_relocation(): + """Codex review P2: relocating a function between files (+1 / -1) + cancels in the summed cyclomatic delta — the lock can still pass, so + per-file growth alone must not warn.""" + target = _compile_target(_cyclomatic_refactor_target()) + + advisories = detect_advisories( + target, + visible_def_growth=( + _visible_growth(path="src/dst.py", baseline=1, candidate=2), + _visible_growth(path="src/src.py", baseline=2, candidate=1), + ), + ) + + assert "ADVISORY-D7" not in [advisory.code for advisory in advisories] + + +def test_detect_d7_reports_net_growth_with_partial_shrinkage(): + target = _compile_target(_cyclomatic_refactor_target()) + + advisories = detect_advisories( + target, + visible_def_growth=( + _visible_growth(path="src/dst.py", baseline=1, candidate=3), + _visible_growth(path="src/src.py", baseline=2, candidate=1), + ), + ) + d7 = [advisory for advisory in advisories if advisory.code == "ADVISORY-D7"] + + assert len(d7) == 1 + assert d7[0].evidence["visible_defs_added"] == 1 + assert d7[0].evidence["files"] == [ + {"path": "src/dst.py", "baseline_visible_defs": 1, "candidate_visible_defs": 3} + ] + + +def test_detect_d7_silent_when_tolerance_budgets_the_increase(): + """Codex review P2: the evaluator applies `tolerance` to numeric + comparisons (`<= 0, tolerance: 1` allows +1), so a target that + already budgets the extracted helper must not warn.""" + for operator, expected, tolerance in ( + ("less_than_or_equal", "0", "1"), + ("within_range", "[-5, 0]", "1"), + ("less_than", "1", "0.5"), + ): + yaml_text = ( + _cyclomatic_refactor_target(operator, expected) + f" tolerance: {tolerance}\n" + ) + target = _compile_target(yaml_text) + advisories = detect_advisories(target, visible_def_growth=(_visible_growth(),)) + assert "ADVISORY-D7" not in [advisory.code for advisory in advisories], ( + f"{operator} {expected} tolerance={tolerance} should stay silent" + ) + + +def test_detect_d7_fires_when_tolerance_does_not_reach_plus_one(): + yaml_text = _cyclomatic_refactor_target() + " tolerance: 0.5\n" + target = _compile_target(yaml_text) + + advisories = detect_advisories(target, visible_def_growth=(_visible_growth(),)) + + assert "ADVISORY-D7" in [advisory.code for advisory in advisories] + + +def test_detect_d7_equals_ignores_tolerance(): + # The evaluator's EQUALS dispatch never applies tolerance, so + # `equals 0, tolerance: 1` still rejects every increase. + yaml_text = _cyclomatic_refactor_target("equals", "0") + " tolerance: 1\n" + target = _compile_target(yaml_text) + + advisories = detect_advisories(target, visible_def_growth=(_visible_growth(),)) + + assert "ADVISORY-D7" in [advisory.code for advisory in advisories] From 85183e27aa8e10e8d6722282bc171661386312a0 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 12 Jun 2026 01:32:35 +0000 Subject: [PATCH 3/4] fix(cli): bump advisory envelope to advisory-2 for D6/D7 enum extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review P2 対応: compatibility policy (docs/json_schema.md) は使用中 envelope の enum 値追加に schema version bump を要求する。advisory-1 の advisories[].code enum へ in-place 追加していた ADVISORY-D6 (PR #152、 未リリース main 上) / ADVISORY-D7 を advisory-2 として発行し直す。 producer 定数 / schema const / cli help / docs (json_schema 本文 + version 履歴表に v1→v2 diff 行) / test assert を同時更新 (tests/discipline/test_json_schema_version_sync.py が producer↔docs 同期を enforce)。 https://claude.ai/code/session_01BtUNrsnRygJY52xAy5bFRw --- docs/cli_usage.md | 2 +- docs/json_schema.md | 5 +++-- docs/target_authoring_surface.md | 2 +- src/semantic_ci_code/cli/main.py | 2 +- src/semantic_ci_code/cli/output/doctor_json.py | 2 +- src/semantic_ci_code/schemas/doctor_advisory.schema.json | 2 +- tests/cli/test_target_doctor.py | 2 +- 7 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/cli_usage.md b/docs/cli_usage.md index 4381da0..a313f7c 100644 --- a/docs/cli_usage.md +++ b/docs/cli_usage.md @@ -754,7 +754,7 @@ verdict and does not change the exit code (`docs/exit_codes.md`). | `ADVISORY-P2` | `primary_kind: bugfix` has no `test_surface_delta.new_cases` expectation. | | `ADVISORY-S1` | A user constraint has `severity: info` paired with `unknown_policy in {fail, repair}`. After Brief D1-4 the warning scope narrows to extraction-cause / open_runtime UNKNOWN. | -`--format json` emits the `advisory-1` envelope +`--format json` emits the `advisory-2` envelope ([`docs/json_schema.md`](./json_schema.md)). There is no `--strict-advice` flag — CI that wants to gate on advisory presence should consume the JSON output and apply a workflow-level policy. diff --git a/docs/json_schema.md b/docs/json_schema.md index 2a82855..7fb4955 100644 --- a/docs/json_schema.md +++ b/docs/json_schema.md @@ -301,7 +301,7 @@ compile-repair, or validate-plan envelopes. The shape is pinned by ```jsonc { - "schema_version": "advisory-1", + "schema_version": "advisory-2", "subcommand": "target-doctor", "advisories": [ { @@ -316,7 +316,7 @@ compile-repair, or validate-plan envelopes. The shape is pinned by | Field | Meaning | |---|---| -| `schema_version` | Always `"advisory-1"`. | +| `schema_version` | Always `"advisory-2"`. | | `subcommand` | Always `"target-doctor"`. | | `advisories[].code` | One of `ADVISORY-D1`, `ADVISORY-D3`, `ADVISORY-D4`, `ADVISORY-D6`, `ADVISORY-D7`, `ADVISORY-I1`, `ADVISORY-P1`, `ADVISORY-P2`, `ADVISORY-S1`. | | `advisories[].severity` | Always `"info"` — the Advisor surface never participates in the verdict (`docs/code_semantic_ci_design.md §23.3.1`). | @@ -399,6 +399,7 @@ bump the envelope version. | `1` | validate-plan | Initial Brief 5 pre-generation validation envelope with `risk_summary`. | | `2` | validate-plan | Brief D3: added `risk_summary.authoring_errors` as a sibling list (positioned first). Adapter rendering surfaces a two-step "fix authoring first, then implement" instruction. | | `advisory-1` | target-doctor | Brief 8 / CSCI-43: initial advisory envelope. Independent schema; not tied to verdict / compile / compile-repair / validate-plan versions. | +| `advisory-2` | target-doctor | D-class closure: added `ADVISORY-D6` (nested-function complexity displacement) and `ADVISORY-D7` (extract-method cyclomatic false-FAIL) to the `advisories[].code` enum. Enum extension on an in-use envelope requires a bump per the compatibility policy (D6 briefly shipped inside `advisory-1` on unreleased main; folded into this bump). | | `catalog-1` | target-catalog | Brief 8 / CSCI-44: initial catalog envelope. Independent schema; mirrors runtime registries via INV-5 parity. | ## v2 to v3 Diff diff --git a/docs/target_authoring_surface.md b/docs/target_authoring_surface.md index 4aad5f7..dd5d7d6 100644 --- a/docs/target_authoring_surface.md +++ b/docs/target_authoring_surface.md @@ -161,4 +161,4 @@ sentinel, not a switch. - `docs/exit_codes.md` — `target-doctor` exit code policy (advisory presence does not change the verdict; usage / engine errors still use the global 2 / 3 / 4 policy) -- `docs/json_schema.md` — `advisory-1` and `catalog-1` envelopes +- `docs/json_schema.md` — `advisory-2` and `catalog-1` envelopes diff --git a/src/semantic_ci_code/cli/main.py b/src/semantic_ci_code/cli/main.py index 9cadb35..6adce71 100644 --- a/src/semantic_ci_code/cli/main.py +++ b/src/semantic_ci_code/cli/main.py @@ -349,7 +349,7 @@ def build_parser() -> argparse.ArgumentParser: "--format", choices=("human", "json"), default="human", - help="output format; advisor envelope schema_version='advisory-1' (json)", + help="output format; advisor envelope schema_version='advisory-2' (json)", ) target_doctor.add_argument( "--output", diff --git a/src/semantic_ci_code/cli/output/doctor_json.py b/src/semantic_ci_code/cli/output/doctor_json.py index 3337fa4..12d70d5 100644 --- a/src/semantic_ci_code/cli/output/doctor_json.py +++ b/src/semantic_ci_code/cli/output/doctor_json.py @@ -4,7 +4,7 @@ from semantic_ci_code.authoring.advisory import Advisory -DOCTOR_ADVISORY_SCHEMA_VERSION = "advisory-1" +DOCTOR_ADVISORY_SCHEMA_VERSION = "advisory-2" def build_doctor_payload(advisories: tuple[Advisory, ...]) -> dict[str, Any]: diff --git a/src/semantic_ci_code/schemas/doctor_advisory.schema.json b/src/semantic_ci_code/schemas/doctor_advisory.schema.json index 1d3f7f1..88a80cc 100644 --- a/src/semantic_ci_code/schemas/doctor_advisory.schema.json +++ b/src/semantic_ci_code/schemas/doctor_advisory.schema.json @@ -6,7 +6,7 @@ "additionalProperties": false, "required": ["schema_version", "subcommand", "advisories"], "properties": { - "schema_version": {"const": "advisory-1"}, + "schema_version": {"const": "advisory-2"}, "subcommand": {"const": "target-doctor"}, "advisories": { "type": "array", diff --git a/tests/cli/test_target_doctor.py b/tests/cli/test_target_doctor.py index 5599be3..37f5dda 100644 --- a/tests/cli/test_target_doctor.py +++ b/tests/cli/test_target_doctor.py @@ -965,7 +965,7 @@ def test_json_format_returns_advisory1_envelope(tmp_path: Path): ) assert result.returncode == 0 payload = parse_json(result.stdout) - assert payload["schema_version"] == "advisory-1" + assert payload["schema_version"] == "advisory-2" assert payload["subcommand"] == "target-doctor" assert any(a["code"] == "ADVISORY-P1" for a in payload["advisories"]) jsonschema.validate(payload, DOCTOR_SCHEMA) From 3341d62c7c45f0298c37c2a1cadc16fdeac4294a Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 12 Jun 2026 01:39:20 +0000 Subject: [PATCH 4/4] fix(authoring): D7 compares the allowance against actual net helper growth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review P2 対応: '+1 が violate するか' の判定では '<= 0, tolerance: 1' (1 helper 分の予算) が 2 helper 抽出 (+2) を suppress して しまうが、評価は observed=+2 で依然 structural false-FAIL する。 _forbids_cyclomatic_increase を _rejects_cyclomatic_delta(constraint, net) に置き換え、net_added を先に計算してから「実際の +net を reject する lock」だけを flag する。equals は net と一致すれば予算化扱いで沈黙。 budget < helper count で発火 / budget が helper count を被覆で沈黙 / equals=net で沈黙 をテスト encode、message と docs も +N 比較の表現に同期。 https://claude.ai/code/session_01BtUNrsnRygJY52xAy5bFRw --- docs/cli_usage.md | 2 +- docs/target_yaml_guide.md | 11 +-- src/semantic_ci_code/authoring/hazards.py | 96 +++++++++++------------ tests/authoring/test_hazards.py | 40 +++++++++- 4 files changed, 91 insertions(+), 58 deletions(-) diff --git a/docs/cli_usage.md b/docs/cli_usage.md index a313f7c..2efba4e 100644 --- a/docs/cli_usage.md +++ b/docs/cli_usage.md @@ -748,7 +748,7 @@ verdict and does not change the exit code (`docs/exit_codes.md`). | `ADVISORY-D3` | A user constraint duplicates a template-expanded constraint (same kind/target/operator/expected). | | `ADVISORY-D4` | The target is lock-only and the candidate diff (`--baseline-rev` ↔ `--candidate-rev`) touches no Python files; the verdict would be a vacuous PASS. Skipped silently when neither rev is given and git is unavailable. | | `ADVISORY-D6` | The target declares a `complexity_delta` constraint and the candidate diff (`--baseline-rev` ↔ `--candidate-rev`) grows the nested-function count in an in-scope Python file. The complexity extractor does not descend into nested defs, so a reported cyclomatic/cognitive decrease may be displacement, not simplification. Skipped silently when neither rev is given and git is unavailable. | -| `ADVISORY-D7` | A `primary_kind: refactor` target locks `complexity_delta.cyclomatic` against any increase (declared `tolerance` honoured), and the candidate diff adds extractor-visible function definitions net across the in-scope diff (the extract-method shape). The summed cyclomatic is mathematically guaranteed to micro-increase (+1 base per extracted helper), so the lock can FAIL on a faithful refactor — prefer `complexity_delta.cognitive`. Skipped silently when neither rev is given and git is unavailable. | +| `ADVISORY-D7` | The candidate diff adds N extractor-visible function definitions net across the in-scope diff (the extract-method shape), and a `primary_kind: refactor` target declares a `complexity_delta.cyclomatic` constraint that rejects a `+N` delta (declared `tolerance` honoured — a budget covering the helper count stays silent). The summed cyclomatic is mathematically guaranteed to micro-increase (+1 base per extracted helper), so the lock can FAIL on a faithful refactor — prefer `complexity_delta.cognitive`. Skipped silently when neither rev is given and git is unavailable. | | `ADVISORY-I1` | `intent` is the empty string. Repair adapters and `validate-plan` produce better guidance when intent describes the change purpose; use `init --intent` or edit `target.yaml`. | | `ADVISORY-P1` | `primary_kind: feature` has no positive addition constraint. | | `ADVISORY-P2` | `primary_kind: bugfix` has no `test_surface_delta.new_cases` expectation. | diff --git a/docs/target_yaml_guide.md b/docs/target_yaml_guide.md index 7fea23c..fd0278f 100644 --- a/docs/target_yaml_guide.md +++ b/docs/target_yaml_guide.md @@ -344,12 +344,13 @@ the nesting penalty disappears and `BoolOp` / branch costs redistribute. | large decomposition (N extracted helpers) | `cyclomatic less_than_or_equal N` if you must use cyclomatic | `semantic-ci target-doctor` detects this hazard as `ADVISORY-D7` when -`--baseline-rev` / `--candidate-rev` are given: it fires when a refactor -target locks `complexity_delta.cyclomatic` against any increase (a -declared `tolerance` that already budgets the helpers is honoured) and -the candidate diff adds extractor-visible function definitions **net** +`--baseline-rev` / `--candidate-rev` are given: it fires when the +candidate diff adds N extractor-visible function definitions **net** across the in-scope diff (pure relocation between files cancels out and -does not warn). Hazards 4 +does not warn) and a refactor target declares a +`complexity_delta.cyclomatic` constraint that rejects that `+N` delta — +a declared `tolerance` or allowance that covers the helper count is +honoured and stays silent. Hazards 4 and 5 are complementary: D6 warns that a green cyclomatic verdict may be hollow, D7 warns that a red one may be noise — both resolve by pairing the right metric with the refactor shape. diff --git a/src/semantic_ci_code/authoring/hazards.py b/src/semantic_ci_code/authoring/hazards.py index f4c6ca5..bb0245d 100644 --- a/src/semantic_ci_code/authoring/hazards.py +++ b/src/semantic_ci_code/authoring/hazards.py @@ -314,34 +314,37 @@ def detect_d7( endorse (D7, `docs/dogfooding_findings_tracker.md`). Cognitive is the metric that drops under extraction. - The detector fires when the target is a refactor, declares a - verdict-participating `complexity_delta.cyclomatic` constraint whose - shape rejects every positive observed delta (tolerance included — - see `_forbids_cyclomatic_increase`), AND the candidate diff grows - the extractor-visible def count **net across the in-scope diff** - (the extract-method shape). The net comparison matters because the - cyclomatic delta is summed over the whole extracted state: a - refactor that merely relocates a function between files (+1 in one - file, -1 in another) cancels out and cannot trip the lock, so - per-file growth alone must not warn (Codex review P2). Growth is a - heuristic shape signal, not proof — the advisory recommends a - metric, it never seats the verdict. `None` (inapplicable, git - unavailable) is filtered out by the caller, mirroring D4 / D6. + The detector fires when the target is a refactor, the candidate diff + grows the extractor-visible def count **net across the in-scope + diff** (the extract-method shape), AND a verdict-participating + `complexity_delta.cyclomatic` constraint rejects that guaranteed + `+net` observed delta (tolerance included — see + `_rejects_cyclomatic_delta`). Both refinements come from Codex + review P2s: the net comparison matters because the cyclomatic delta + is summed over the whole extracted state — a refactor that merely + relocates a function between files (+1 in one file, -1 in another) + cancels out and cannot trip the lock — and the allowance must be + compared against the actual `+net` (a `<= 0, tolerance: 1` lock + budgets one helper but still structurally FAILs a two-helper + extraction). Growth is a heuristic shape signal, not proof — the + advisory recommends a metric, it never seats the verdict. `None` + (inapplicable, git unavailable) is filtered out by the caller, + mirroring D4 / D6. """ if target.primary_kind is not ChangeKind.REFACTOR: return () + net_added = sum(g.candidate_count - g.baseline_count for g in visible_def_growth) + if net_added <= 0: + return () cyclomatic_locks = tuple( c for c in target.constraints if _is_cyclomatic_leaf_target(c.target) and _participates_in_verdict(c) - and _forbids_cyclomatic_increase(c) + and _rejects_cyclomatic_delta(c, net_added) ) if not cyclomatic_locks: return () - net_added = sum(g.candidate_count - g.baseline_count for g in visible_def_growth) - if net_added <= 0: - return () grown = tuple(g for g in visible_def_growth if g.candidate_count > g.baseline_count) if not grown: return () @@ -350,17 +353,19 @@ def detect_d7( Advisory( code="ADVISORY-D7", message=( - f"primary_kind=refactor locks complexity_delta.cyclomatic against " - f"any increase ({constraint_ids}), and the candidate diff adds " - f"{net_added} extractor-visible function definition(s) net across " - f"the in-scope diff — the extract-method shape. The cyclomatic " - f"delta is summed over functions and each function starts at base " - f"1, so a faithful extract-method refactor is mathematically " - f"guaranteed to micro-increase it; this lock can FAIL on exactly " - f"the refactor it means to endorse. If the intent is 'no " - f"complexity growth', constrain complexity_delta.cognitive " - f"instead (it drops under extraction), or widen the cyclomatic " - f"allowance by the number of extracted helpers." + f"primary_kind=refactor declares cyclomatic constraint(s) " + f"({constraint_ids}) that reject a +{net_added} delta, and the " + f"candidate diff adds {net_added} extractor-visible function " + f"definition(s) net across the in-scope diff — the " + f"extract-method shape. The cyclomatic delta is summed over " + f"functions and each function starts at base 1, so a faithful " + f"extract-method refactor is mathematically guaranteed to " + f"micro-increase it by +1 per extracted helper; this lock can " + f"FAIL on exactly the refactor it means to endorse. If the " + f"intent is 'no complexity growth', constrain " + f"complexity_delta.cognitive instead (it drops under " + f"extraction), or widen the cyclomatic allowance to cover the " + f"number of extracted helpers." ), evidence={ "constraint_ids": [c.id for c in cyclomatic_locks], @@ -861,43 +866,38 @@ def _is_cyclomatic_leaf_target(path: str) -> bool: return path == "complexity_delta.cyclomatic" -def _forbids_cyclomatic_increase(constraint: CompiledConstraint) -> bool: - """True if the constraint shape rejects every positive observed delta. +def _rejects_cyclomatic_delta(constraint: CompiledConstraint, delta: int) -> bool: + """True if the constraint rejects an observed cyclomatic delta of + `+delta` (the micro-increase a faithful extraction of `delta` + helpers mathematically guarantees). Mirrors the evaluator's tolerance semantics (`evaluator.operators._numeric_compare` / `_within_range`): `lt` / `le` satisfy when observed `<` / `<=` `expected + tolerance`, - `within_range` widens to `high + tolerance`, and `equals` matches - exactly with tolerance NOT applied. The observed - `complexity_delta.cyclomatic` is an integer, so "rejects every - increase" reduces to "+1 violates" (Codex review P2 — a declared - `tolerance` that already budgets the extracted helper must not - warn): - - - `less_than_or_equal N`: N + tolerance < 1 - - `less_than N`: N + tolerance <= 1 - - `equals N`: N <= 0 (tolerance unused by the evaluator) - - `within_range [low, high]`: high + tolerance < 1 - - Shapes that tolerate at least one extracted helper (`<= 3`, - `<= 0, tolerance: 1`, ...) are not the D7 false-FAIL trap. + `within_range` widens both bounds by `tolerance`, and `equals` + matches exactly with tolerance NOT applied. Comparing against the + actual `+delta` (not just `+1`) covers both Codex review P2s: a + `tolerance` that budgets the extracted helpers must not warn, and a + budget smaller than the helper count (`<= 0, tolerance: 1` vs a + two-helper extraction) must still warn. """ operator = constraint.operator expected = constraint.expected tolerance = constraint.tolerance or 0.0 if operator is Operator.LESS_THAN_OR_EQUAL: - return _is_scalar_number(expected) and expected + tolerance < 1 + return _is_scalar_number(expected) and not delta <= expected + tolerance if operator is Operator.LESS_THAN: - return _is_scalar_number(expected) and expected + tolerance <= 1 + return _is_scalar_number(expected) and not delta < expected + tolerance if operator is Operator.EQUALS: - return _is_scalar_number(expected) and expected <= 0 + return _is_scalar_number(expected) and expected != delta if ( operator is Operator.WITHIN_RANGE and isinstance(expected, list | tuple) and len(expected) == 2 and all(_is_scalar_number(item) for item in expected) ): - return expected[1] + tolerance < 1 + low, high = expected + return not (low - tolerance <= delta <= high + tolerance) return False diff --git a/tests/authoring/test_hazards.py b/tests/authoring/test_hazards.py index 101596e..ae6a280 100644 --- a/tests/authoring/test_hazards.py +++ b/tests/authoring/test_hazards.py @@ -386,8 +386,9 @@ def test_detect_d7_reports_net_growth_with_partial_shrinkage(): def test_detect_d7_silent_when_tolerance_budgets_the_increase(): """Codex review P2: the evaluator applies `tolerance` to numeric - comparisons (`<= 0, tolerance: 1` allows +1), so a target that - already budgets the extracted helper must not warn.""" + comparisons (`<= 0, tolerance: 1` allows +1), so a target whose + budget covers the actual net helper count must not warn.""" + one_helper = (_visible_growth(baseline=1, candidate=2),) for operator, expected, tolerance in ( ("less_than_or_equal", "0", "1"), ("within_range", "[-5, 0]", "1"), @@ -397,12 +398,43 @@ def test_detect_d7_silent_when_tolerance_budgets_the_increase(): _cyclomatic_refactor_target(operator, expected) + f" tolerance: {tolerance}\n" ) target = _compile_target(yaml_text) - advisories = detect_advisories(target, visible_def_growth=(_visible_growth(),)) + advisories = detect_advisories(target, visible_def_growth=one_helper) assert "ADVISORY-D7" not in [advisory.code for advisory in advisories], ( - f"{operator} {expected} tolerance={tolerance} should stay silent" + f"{operator} {expected} tolerance={tolerance} should stay silent for +1" ) +def test_detect_d7_fires_when_budget_is_smaller_than_helper_count(): + """Codex review P2: `<= 0, tolerance: 1` budgets one helper, but a + faithful two-helper extraction adds +2 — the lock still structurally + FAILs, so the advisory must compare the allowance against the actual + net growth, not just +1.""" + yaml_text = _cyclomatic_refactor_target() + " tolerance: 1\n" + target = _compile_target(yaml_text) + + advisories = detect_advisories( + target, + visible_def_growth=(_visible_growth(baseline=1, candidate=3),), + ) + d7 = [advisory for advisory in advisories if advisory.code == "ADVISORY-D7"] + + assert len(d7) == 1 + assert d7[0].evidence["visible_defs_added"] == 2 + + +def test_detect_d7_silent_when_equals_matches_net_growth(): + # `equals 2` demands exactly the +2 a two-helper extraction + # produces — the lock budgets the extraction, no false-FAIL trap. + target = _compile_target(_cyclomatic_refactor_target("equals", "2")) + + advisories = detect_advisories( + target, + visible_def_growth=(_visible_growth(baseline=1, candidate=3),), + ) + + assert "ADVISORY-D7" not in [advisory.code for advisory in advisories] + + def test_detect_d7_fires_when_tolerance_does_not_reach_plus_one(): yaml_text = _cyclomatic_refactor_target() + " tolerance: 0.5\n" target = _compile_target(yaml_text)