diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..c49a425 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,20 @@ +.git +.cache +.venv +.pytest_cache +.mypy_cache +.ruff_cache +.idea +__pycache__/ +*.pyc +*.pyo +*.pyd +.coverage +build/ +dist/ +*.egg-info/ +.uv-cache +docs +codeclone.egg-info +.pre-commit-config.yaml +uv.lock diff --git a/.github/actions/codeclone/action.yml b/.github/actions/codeclone/action.yml index 2016ea6..efb63f2 100644 --- a/.github/actions/codeclone/action.yml +++ b/.github/actions/codeclone/action.yml @@ -1,7 +1,7 @@ name: CodeClone description: > - AST-based Python code clone detector focused on architectural duplication - and CI-friendly baseline enforcement. + Structural code quality analysis for Python with + CI-friendly baseline enforcement. author: OrenLab diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..52610e5 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,227 @@ +name: benchmark +run-name: benchmark • ${{ github.event_name }} • ${{ github.ref_name }} + +on: + push: + branches: [ "feat/2.0.0" ] + pull_request: + branches: [ "feat/2.0.0" ] + workflow_dispatch: + inputs: + profile: + description: Benchmark profile + required: true + default: smoke + type: choice + options: + - smoke + - extended + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + benchmark: + name: >- + bench • ${{ matrix.label }} + runs-on: ${{ matrix.os }} + timeout-minutes: ${{ matrix.timeout_minutes }} + + strategy: + fail-fast: false + matrix: + include: + # default profile for push / PR + - profile: smoke + label: linux-smoke + os: ubuntu-latest + runs: 12 + warmups: 3 + cpus: "1.0" + memory: "2g" + timeout_minutes: 45 + + # extended profile for manual runs + - profile: extended + label: linux-extended + os: ubuntu-latest + runs: 16 + warmups: 4 + cpus: "1.0" + memory: "2g" + timeout_minutes: 50 + + - profile: extended + label: macos-extended + os: macos-latest + runs: 12 + warmups: 3 + cpus: "" + memory: "" + timeout_minutes: 60 + + steps: + - name: Resolve run profile gate + shell: bash + run: | + enabled=0 + if [ "${{ github.event_name }}" != "workflow_dispatch" ]; then + if [ "${{ matrix.profile }}" = "smoke" ]; then + enabled=1 + fi + else + if [ "${{ matrix.profile }}" = "${{ inputs.profile }}" ]; then + enabled=1 + fi + fi + echo "BENCH_ENABLED=$enabled" >> "$GITHUB_ENV" + + - name: Checkout + if: env.BENCH_ENABLED == '1' + uses: actions/checkout@v6.0.2 + + - name: Set up Python (macOS local benchmark) + if: env.BENCH_ENABLED == '1' && runner.os == 'macOS' + uses: actions/setup-python@v6.2.0 + with: + python-version: "3.13" + allow-prereleases: true + + - name: Set up uv (macOS local benchmark) + if: env.BENCH_ENABLED == '1' && runner.os == 'macOS' + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Install dependencies (macOS local benchmark) + if: env.BENCH_ENABLED == '1' && runner.os == 'macOS' + run: uv sync --all-extras --dev + + - name: Set benchmark output path + if: env.BENCH_ENABLED == '1' + shell: bash + run: | + mkdir -p .cache/benchmarks + echo "BENCH_JSON=.cache/benchmarks/codeclone-benchmark-${{ matrix.label }}.json" >> "$GITHUB_ENV" + + - name: Build and run Docker benchmark (Linux) + if: env.BENCH_ENABLED == '1' && runner.os == 'Linux' + env: + RUNS: ${{ matrix.runs }} + WARMUPS: ${{ matrix.warmups }} + CPUS: ${{ matrix.cpus }} + MEMORY: ${{ matrix.memory }} + run: | + ./benchmarks/run_docker_benchmark.sh + cp .cache/benchmarks/codeclone-benchmark.json "$BENCH_JSON" + + - name: Run local benchmark (macOS) + if: env.BENCH_ENABLED == '1' && runner.os == 'macOS' + run: | + uv run python benchmarks/run_benchmark.py \ + --target . \ + --runs "${{ matrix.runs }}" \ + --warmups "${{ matrix.warmups }}" \ + --tmp-dir "/tmp/codeclone-bench-${{ matrix.label }}" \ + --output "$BENCH_JSON" + + - name: Print benchmark summary + if: env.BENCH_ENABLED == '1' + shell: bash + run: | + python - <<'PY' + import json + import os + from pathlib import Path + + report_path = Path(os.environ["BENCH_JSON"]) + if not report_path.exists(): + print(f"benchmark report not found: {report_path}") + raise SystemExit(1) + + payload = json.loads(report_path.read_text(encoding="utf-8")) + scenarios = payload.get("scenarios", []) + comparisons = payload.get("comparisons", {}) + + print("CodeClone benchmark summary") + print(f"label={os.environ.get('RUNNER_OS','unknown').lower()} / {os.environ.get('GITHUB_JOB','benchmark')}") + for scenario in scenarios: + name = str(scenario.get("name", "unknown")) + stats = scenario.get("stats_seconds", {}) + median = float(stats.get("median", 0.0)) + p95 = float(stats.get("p95", 0.0)) + stdev = float(stats.get("stdev", 0.0)) + digest = str(scenario.get("digest", "")) + print( + f"- {name:16s} median={median:.4f}s " + f"p95={p95:.4f}s stdev={stdev:.4f}s digest={digest}" + ) + + if comparisons: + print("ratios:") + for key, value in sorted(comparisons.items()): + print(f"- {key}={float(value):.3f}x") + + summary_file = os.environ.get("GITHUB_STEP_SUMMARY") + if not summary_file: + raise SystemExit(0) + + lines = [ + f"## CodeClone benchmark — {os.environ.get('RUNNER_OS', 'unknown')} / ${{ matrix.label }}", + "", + f"- Tool: `{payload['tool']['name']} {payload['tool']['version']}`", + f"- Target: `{payload['config']['target']}`", + f"- Runs: `{payload['config']['runs']}`", + f"- Warmups: `{payload['config']['warmups']}`", + f"- Generated: `{payload['generated_at_utc']}`", + "", + "### Scenarios", + "", + "| Scenario | Median (s) | p95 (s) | Stdev (s) | Deterministic | Digest |", + "|---|---:|---:|---:|:---:|---|", + ] + + for scenario in scenarios: + stats = scenario.get("stats_seconds", {}) + lines.append( + "| " + f"{scenario.get('name', '')} | " + f"{float(stats.get('median', 0.0)):.4f} | " + f"{float(stats.get('p95', 0.0)):.4f} | " + f"{float(stats.get('stdev', 0.0)):.4f} | " + f"{'yes' if bool(scenario.get('deterministic')) else 'no'} | " + f"{scenario.get('digest', '')} |" + ) + + if comparisons: + lines.extend( + [ + "", + "### Ratios", + "", + "| Metric | Value |", + "|---|---:|", + ] + ) + for key, value in sorted(comparisons.items()): + lines.append(f"| {key} | {float(value):.3f}x |") + + with Path(summary_file).open("a", encoding="utf-8") as fh: + fh.write("\n".join(lines) + "\n") + PY + + - name: Skip non-selected profile + if: env.BENCH_ENABLED != '1' + run: echo "Skipping matrix profile '${{ matrix.profile }}' for event '${{ github.event_name }}'" + + - name: Upload benchmark artifact + if: env.BENCH_ENABLED == '1' + uses: actions/upload-artifact@v4 + with: + name: codeclone-benchmark-${{ matrix.label }} + path: ${{ env.BENCH_JSON }} + if-no-files-found: error diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..bcec725 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,75 @@ +name: docs +run-name: docs • ${{ github.event_name }} • ${{ github.ref_name }} + +on: + push: + branches: [ "main" ] + pull_request: + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: docs-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6.0.2 + + - name: Set up Python + uses: actions/setup-python@v6.2.0 + with: + python-version: "3.13" + allow-prereleases: true + + - name: Set up uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Install project dependencies + run: uv sync --dev + + - name: Configure GitHub Pages + uses: actions/configure-pages@v5 + + - name: Build docs site + run: uv run --with mkdocs --with mkdocs-material mkdocs build --strict + + - name: Generate sample report artifacts + run: uv run python scripts/build_docs_example_report.py --output-dir site/examples/report/live + + - name: Upload docs artifact + if: ${{ github.event_name != 'push' || github.ref != 'refs/heads/main' }} + uses: actions/upload-artifact@v7 + with: + name: codeclone-docs-site + path: site + if-no-files-found: error + + - name: Upload GitHub Pages artifact + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + uses: actions/upload-pages-artifact@v4 + with: + path: site + + deploy: + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + runs-on: ubuntu-latest + needs: build + permissions: + contents: read + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index 3e551ca..24a517f 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ __pycache__/ .coverage .coverage.* htmlcov/ +site/ # Tool caches .cache/ @@ -33,5 +34,7 @@ htmlcov/ # Logs *.log - -.claude +/.claude/ +/docs/SPEC-2.0.0.md +/.uv-cache/ +/package-lock.json diff --git a/AGENTS.md b/AGENTS.md index 614346f..91c606c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,42 +1,66 @@ # AGENTS.md — CodeClone (AI Agent Playbook) -This document is the **source of truth** for how AI agents should work in this repository. +This document is the **source of truth** for agent operating rules in this repository. It is optimized for **determinism**, **CI stability**, and **reproducible changes**. -> Repository goal: maximize **honesty**, **reproducibility**, **determinism**, and **precision** for real‑world CI usage. +For architecture, module ownership, and runtime behavior, the **current repository code is the source of truth**. +If AGENTS.md and code diverge, follow code and update AGENTS.md accordingly. + +> Repository goal: maximize **honesty**, **reproducibility**, **determinism**, and **precision** for real‑world CI +> usage. --- ## 1) Operating principles (non‑negotiable) 1. **Do not break CI contracts.** - - Treat baseline, cache, and report formats as **public APIs**. - - Any contract change must be **versioned**, documented, and accompanied by tests. + - Treat baseline, cache, and report formats as **public APIs**. + - Any contract change must be **versioned**, documented, and accompanied by tests. 2. **Determinism > cleverness.** - - Outputs must be stable across runs given identical inputs (same repo, tool version, python tag). + - Outputs must be stable across runs given identical inputs (same repo, tool version, python tag). 3. **Evidence-based explainability.** - - The core engine produces **facts/metrics**. - - HTML/UI **renders facts**, it must not invent interpretations. + - The core engine produces **facts/metrics**. + - HTML/UI **renders facts**, it must not invent interpretations. 4. **Safety first.** - - Never delete or overwrite user files outside repo. - - Any write must be atomic where relevant (e.g., baseline `.tmp` + `os.replace`). + - Never delete or overwrite user files outside repo. + - Any write must be atomic where relevant (e.g., baseline `.tmp` + `os.replace`). + +5. **Golden tests are contract sentinels.** + - Do not update golden snapshots to “fix” failing tests unless the contract change is intentional, versioned where + required, documented, and explicitly approved. +6. **Fingerprint-adjacent optimization policy** + + - Performance work must not change AST normalization, fingerprint inputs, or clone identity semantics while + `FINGERPRINT_VERSION` remains unchanged. + + - If a change in AST/core analysis can affect fingerprint bytes, clone identity, NEW vs KNOWN classification, or + baseline compatibility semantics, it is not a routine optimization. It must be treated as an explicit fingerprint + contract change and requires: + - `FINGERPRINT_VERSION` review or bump + - documentation updates + - migration/release notes + - explicit maintainer approval + - Performance alone is never a sufficient reason to change fingerprint semantics. --- ## 2) Quick orientation -CodeClone is an AST/CFG-informed clone detector for Python. It supports: +CodeClone provides structural code quality analysis for Python. It supports: + - **function clones** (strongest signal) - **block clones** (sliding window of statements, may be noisy on boilerplate) - **segment clones** (report-only unless explicitly gated) Key artifacts: + - `codeclone.baseline.json` — trusted baseline snapshot (for CI comparisons) - `.cache/codeclone/cache.json` — analysis cache (integrity-checked) -- `.cache/codeclone/report.html|report.json|report.txt` — reports +- `.cache/codeclone/report.html|report.json|report.md|report.sarif|report.txt` — reports +- `docs/`, `mkdocs.yml`, `.github/workflows/docs.yml` — published documentation site and docs build pipeline --- @@ -45,24 +69,30 @@ Key artifacts: Run these locally before proposing changes: ```bash -uv run ruff check . -uv run mypy . -uv run pytest -q +uv run pre-commit run --all-files ``` If you touched baseline/cache/report contracts, also run the repo’s audit runner (or the scenario script if present). +If you touched `docs/`, `mkdocs.yml`, docs publishing workflow, or sample-report generation, also run: + +```bash +uv run --with mkdocs --with mkdocs-material mkdocs build --strict +``` --- -## 4) Baseline contract (v1, stable) +## 4) Baseline contract (v2, stable) ### Baseline file structure (canonical) ```json { "meta": { - "generator": { "name": "codeclone", "version": "X.Y.Z" }, - "schema_version": "1.0", + "generator": { + "name": "codeclone", + "version": "X.Y.Z" + }, + "schema_version": "2.0", "fingerprint_version": "1", "python_tag": "cp313", "created_at": "2026-02-08T14:20:15Z", @@ -71,6 +101,9 @@ If you touched baseline/cache/report contracts, also run the repo’s audit runn "clones": { "functions": [], "blocks": [] + }, + "metrics": { + "...": "optional embedded snapshot" } } ``` @@ -78,25 +111,27 @@ If you touched baseline/cache/report contracts, also run the repo’s audit runn ### Rules - `schema_version` is **baseline schema**, not package version. +- Runtime writes baseline schema `2.0`. +- Runtime accepts baseline schema `1.x` and `2.x` for compatibility checks. - Compatibility is tied to: - - `fingerprint_version` - - `python_tag` - - `generator.name == "codeclone"` + - `fingerprint_version` + - `python_tag` + - `generator.name == "codeclone"` - `payload_sha256` is computed from a **canonical payload**: - - stable key order - - clone id lists are **sorted and unique** - - integrity check uses constant‑time compare (e.g., `hmac.compare_digest`) + - stable key order + - clone id lists are **sorted and unique** + - integrity check uses constant‑time compare (e.g., `hmac.compare_digest`) ### Trust model - A baseline is either **trusted** (`baseline_status = ok`) or **untrusted**. - **Normal mode**: - - warn - - ignore untrusted baseline - - compare vs empty baseline + - warn + - ignore untrusted baseline + - compare vs empty baseline - **CI gating mode** (`--ci` / `--fail-on-new`): - - fail‑fast if baseline untrusted - - exit code **2** for untrusted baseline + - fail‑fast if baseline untrusted + - exit code **2** for untrusted baseline ### Legacy behavior @@ -108,9 +143,9 @@ If you touched baseline/cache/report contracts, also run the repo’s audit runn - Cache is an **optimization**, never a source of truth. - If cache is invalid or too large: - - warn - - proceed without cache - - ensure report meta reflects `cache_used=false` + - warn + - proceed without cache + - ensure report meta reflects `cache_used=false` Never “fix” cache by silently mutating it; prefer regenerate. @@ -119,18 +154,21 @@ Never “fix” cache by silently mutating it; prefer regenerate. ## 6) Reports and explainability Reports come in: + - HTML (`--html`) - JSON (`--json`) +- Markdown (`--md`) +- SARIF (`--sarif`) - Text (`--text`) ### Report invariants - Ordering must be deterministic (stable sort keys). - All provenance fields must be consistent across formats: - - baseline loaded / status - - baseline fingerprint + schema versions - - baseline generator version - - cache path / cache used + - baseline loaded / status + - baseline fingerprint + schema versions + - baseline generator version + - cache path / cache used ### Explainability contract (core owns facts) @@ -147,6 +185,7 @@ For each clone group (especially block clones), the **core** should be able to p - `max_consecutive_` (e.g., consecutive asserts) UI can show **hints** only when the predicate is **formal & exact** (100% confidence), e.g.: + - `assert_only_block` (assert_ratio == 1.0 and consecutive_asserts == block_len) - `repeated_stmt_hash` (single stmt hash repeated across window) @@ -157,19 +196,22 @@ No UI-only heuristics that affect gating. ## 7) Noise policy (what is and isn’t a “fix”) ### Acceptable fixes + - Merge/report-layer improvements (e.g., merge sliding windows into maximal regions) **without changing gating**. - Better evidence surfaced in HTML to explain matches. ### Not acceptable as a “quick fix” + - Weakening detection rules to hide noisy test patterns, unless: - - it is configurable - - default remains honest - - the change is justified by real-world repos - - it includes tests for false-negative risk + - it is configurable + - default remains honest + - the change is justified by real-world repos + - it includes tests for false-negative risk ### Preferred remediation for test-only FPs + - Refactor tests to avoid long repetitive statement sequences: - - replace chains of `assert "... in html"` with loops or aggregated checks. + - replace chains of `assert "... in html"` with loops or aggregated checks. --- @@ -180,15 +222,15 @@ When you implement something: 1. **State the intent** (what user-visible issue does it solve?) 2. **List files touched** and why. 3. **Call out contracts affected**: - - baseline / cache / report schema - - CLI exit codes / messages + - baseline / cache / report schema + - CLI exit codes / messages 4. **Add/adjust tests** for: - - normal-mode behavior - - CI gating behavior - - determinism (identical output on rerun) - - legacy/untrusted scenarios where applicable + - normal-mode behavior + - CI gating behavior + - determinism (identical output on rerun) + - legacy/untrusted scenarios where applicable 5. Run: - - `ruff`, `mypy`, `pytest` + - `ruff`, `mypy`, `pytest` Avoid changing unrelated files (locks, roadmap) unless required. @@ -199,8 +241,10 @@ Avoid changing unrelated files (locks, roadmap) unless required. Agents must preserve these semantics: - **0** — success (including “new clones detected” in non-gating mode) -- **2** — baseline gating failure (untrusted/missing baseline when CI requires trusted baseline; invalid output extension, etc.) +- **2** — baseline gating failure (untrusted/missing baseline when CI requires trusted baseline; invalid output + extension, etc.) - **3** — analysis gating failure (e.g., `--fail-threshold` exceeded or new clones in `--ci` as designed) +- **5** — internal error (unexpected exception escaped top-level CLI handling) If you introduce a new exit reason, document it and add tests. @@ -212,13 +256,13 @@ Before cutting a release: - Confirm baseline schema compatibility is unchanged, or properly versioned. - Ensure changelog has: - - user-facing changes - - migration notes if any + - user-facing changes + - migration notes if any - Validate `twine check dist/*` for built artifacts. - Smoke test install in a clean venv: - - `pip install dist/*.whl` - - `codeclone --version` - - `codeclone . --ci` in a sample repo with baseline. + - `pip install dist/*.whl` + - `codeclone --version` + - `codeclone . --ci` in a sample repo with baseline. --- @@ -232,19 +276,187 @@ Before cutting a release: --- -## 12) Where to put new code +## 12) Repository architecture + +Architecture is layered, but grounded in current code (not aspirational diagrams): + +- **CLI / orchestration surface** (`codeclone/cli.py`, `codeclone/_cli_*.py`) parses args, resolves runtime mode, + coordinates pipeline calls, and prints UX. +- **Pipeline orchestrator** (`codeclone/pipeline.py`) owns end-to-end flow: bootstrap → discovery → processing → + analysis → report artifacts → gating. +- **Core analysis** (`codeclone/extractor.py`, `codeclone/cfg.py`, `codeclone/normalize.py`, `codeclone/blocks.py`, + `codeclone/grouping.py`, `codeclone/scanner.py`) produces normalized structural facts and clone candidates. +- **Domain/contracts layer** (`codeclone/models.py`, `codeclone/contracts.py`, `codeclone/errors.py`, + `codeclone/domain/*.py`) defines typed entities and stable enums/constants used across layers. +- **Persistence contracts** (`codeclone/baseline.py`, `codeclone/cache.py`, `codeclone/metrics_baseline.py`) store + trusted comparison state and optimization state. +- **Canonical report + projections** (`codeclone/report/json_contract.py`, `codeclone/report/*.py`) converts analysis + facts to deterministic, contract-shaped outputs. +- **HTML/UI rendering** (`codeclone/html_report.py`, `codeclone/_html_report/*`, `codeclone/_html_*.py`, + `codeclone/templates.py`) renders views from report/meta facts. +- **Documentation/publishing surface** (`docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, + `scripts/build_docs_example_report.py`) publishes contract docs and the live sample report. +- **Tests-as-spec** (`tests/`) lock behavior, contracts, determinism, and architecture boundaries. + +Non-negotiable interpretation: + +- Core produces facts; renderers present facts. +- Baseline/cache are persistence contracts, not analysis truth. +- UI/report must not invent gating semantics. + +## 13) Module map + +Use this map to route changes to the right owner module. + +- `codeclone/cli.py` — public CLI entry and control-flow coordinator; add orchestration and top-level UX here; do not + move core analysis logic here. +- `codeclone/_cli_*.py` — CLI support slices (args, config, runtime, summary, reports, baselines, gating); keep them + thin and reusable; do not encode domain semantics that belong to pipeline/core/contracts. +- `codeclone/pipeline.py` — canonical orchestration and data plumbing between scanner/extractor/metrics/report/gating; + change integration flow here; do not move HTML-only presentation logic here. +- `codeclone/extractor.py` — AST extraction, CFG fingerprint input preparation, symbol/declaration collection, and + per-file metrics inputs; change parsing/extraction semantics here; do not couple this module to CLI/report + rendering/baseline logic. +- `codeclone/grouping.py` / `codeclone/blocks.py` / `codeclone/blockhash.py` — clone grouping and block/segment + mechanics; change grouping behavior here; do not mix in CLI/report UX concerns. +- `codeclone/metrics/` — metric computations and dead-code/dependency/health logic; change metric math and thresholds + here; do not make metrics depend on renderer/UI concerns. +- `codeclone/structural_findings.py` — structural finding extraction/normalization policy; keep it report-layer factual + and deterministic. +- `codeclone/suppressions.py` — inline `# codeclone: ignore[...]` parse/bind/index logic; keep it declaration-scoped and + deterministic. +- `codeclone/baseline.py` — baseline schema/trust/integrity/compatibility contract; all baseline format changes go here + with explicit contract process. +- `codeclone/cache.py` — cache schema/integrity/profile compatibility and serialization; cache remains + optimization-only. +- `codeclone/report/json_contract.py` — canonical report schema builder/integrity payload; any JSON contract shape + change belongs here. +- `codeclone/report/*.py` (other modules) — deterministic projections/format transforms ( + text/markdown/sarif/derived/findings/suggestions); avoid injecting new analysis heuristics here. +- `codeclone/html_report.py` — public HTML facade/re-export surface; preserve backward-compatible imports here; do not + grow section/layout logic in this module. +- `codeclone/_html_report/*` — actual HTML assembly, context shaping, tabs, sections, and overview/navigation behavior; + change report layout and interactive HTML UX here, not in the facade. +- `codeclone/_html_*.py` — shared HTML badges, CSS, JS, escaping, snippets, and data-attrs; keep these as render-only + helpers. +- `codeclone/models.py` — shared typed models crossing modules; keep model changes contract-aware. +- `codeclone/domain/*.py` — centralized domain taxonomies/IDs (families, categories, source scopes, risk/severity + levels); use these constants in pipeline/report/UI instead of scattering raw literals. +- `docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py` — docs-site source, + publication workflow, and live sample-report generation; keep published docs aligned with code contracts. +- `tests/` — executable specification: architecture rules, contracts, goldens, invariants, regressions. + +## 14) Dependency direction + +Dependency direction is enforceable and partially test-guarded (`tests/test_architecture.py`): + +- `codeclone.report.*` must not import `codeclone.cli`, `codeclone.html_report`, or `codeclone.ui_messages`. +- `codeclone.extractor` must not import `codeclone.report`, `codeclone.cli`, or `codeclone.baseline`. +- `codeclone.grouping` must not import `codeclone.cli`, `codeclone.baseline`, or `codeclone.html_report`. +- `codeclone.baseline` and `codeclone.cache` must not import `codeclone.cli`, `codeclone.ui_messages`, or + `codeclone.html_report`. +- `codeclone.models` may import only `codeclone.contracts` and `codeclone.errors` from local modules. + +Operational rules: + +- Core/domain code must not depend on HTML/UI. +- Renderers depend on canonical report payload/model; canonical report code must not depend on renderer/UI. +- Metrics/report layers must not recompute or invent core facts in UI. +- CLI helper modules (`_cli_*`) must orchestrate/format, not own domain semantics. +- Persistence semantics (baseline/cache trust/integrity) must stay in persistence/domain modules, not in render/UI + layers. + +## 15) Suppression policy + +Inline suppressions are explicit local policy, not analysis truth. + +- Supported syntax is `# codeclone: ignore[rule-id,...]` via `codeclone/suppressions.py`. +- Binding scope is declaration-only (`def`, `async def`, `class`) using: + - leading comment on the line immediately before declaration + - inline comment on the declaration header start line + - inline comment on the declaration header closing line for multiline signatures +- Binding is target-specific (`filepath`, `qualname`, declaration span, kind). No file-wide/global implicit scope. +- Unknown/malformed directives are ignored safely; analysis must not fail because of suppression syntax issues. +- Current active semantic effect is dead-code suppression (`dead-code`) through `extractor.py` → + `DeadCandidate.suppressed_rules` → `metrics/dead_code.py`. +- Suppressed dead-code findings are excluded from active dead-code findings and health impact, but remain observable in + report surfaces where implemented (JSON summary/details, text/markdown/html, CLI counters). +- Suppressions must not silently alter unrelated finding families. + +Prefer explicit inline suppressions for runtime/dynamic false positives instead of broad framework heuristics. + +## 16) Change routing + +If you change a contract-sensitive zone, route docs/tests/approval deliberately. + +| Change zone | Must update docs | Must update tests | Explicit approval required when | Contract-change trigger | +|-------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------|------------------------------------------------------------------------------------| +| Baseline schema/trust/integrity (`codeclone/baseline.py`) | `docs/book/06-baseline.md`, `docs/book/14-compatibility-and-versioning.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_baseline.py`, CI/CLI behavior tests (`tests/test_cli_inprocess.py`, `tests/test_cli_unit.py`) | schema/trust semantics, compatibility windows, payload integrity logic change | baseline key layout/status semantics/compat rules change | +| Cache schema/profile/integrity (`codeclone/cache.py`) | `docs/book/07-cache.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_cache.py`, pipeline/CLI cache integration tests | cache schema/status/profile compatibility semantics change | cache payload/version/status semantics change | +| Canonical report JSON shape (`codeclone/report/json_contract.py`, report projections) | `docs/book/08-report.md` (+ `docs/book/10-html-render.md` if rendering contract impacted), `docs/sarif.md` when SARIF changes, `CHANGELOG.md` | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py`, relevant report-format tests | finding/meta/summary schema changes | stable JSON fields/meaning/order guarantees change | +| CLI flags/help/exit behavior (`codeclone/cli.py`, `_cli_*`, `contracts.py`) | `docs/book/09-cli.md`, `docs/book/03-contracts-exit-codes.md`, `README.md`, `CHANGELOG.md` | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py`, `tests/test_cli_smoke.py` | exit-code semantics, script-facing behavior, flag contracts change | user-visible CLI contract changes | +| Fingerprint-adjacent analysis (`extractor/cfg/normalize/grouping`) | `docs/book/05-core-pipeline.md`, `docs/cfg.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_fingerprint.py`, `tests/test_extractor.py`, `tests/test_cfg.py`, golden tests (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`) | always (see Section 1.6) | clone identity / NEW-vs-KNOWN / fingerprint inputs change | +| Suppression semantics/reporting (`suppressions`, extractor dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/16-dead-code-contract.md`, `docs/book/08-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | +| Docs site / sample report publication (`docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) | `docs/README.md`, `docs/publishing.md`, `docs/examples/report.md`, and any contract pages surfaced by the change, `CHANGELOG.md` when user-visible behavior changes | `mkdocs build --strict`, sample-report generation smoke path, and relevant report/html tests if generated examples or embeds change | published docs navigation, sample-report generation, or Pages workflow semantics change | published documentation behavior or sample-report generation contract changes | + +Golden rule: do not “fix” failures by snapshot refresh unless the underlying contract change is intentional, documented, +and approved. + +## 17) Testing taxonomy + +Treat tests as specification with explicit intent: + +- **Unit tests** — module-level behavior and edge conditions (e.g., `tests/test_cfg.py`, `tests/test_normalize.py`, + `tests/test_metrics_modules.py`, `tests/test_suppressions.py`). +- **Contract tests** — baseline/cache/report/CLI public semantics (e.g., `tests/test_baseline.py`, + `tests/test_cache.py`, `tests/test_report_contract_coverage.py`, `tests/test_cli_unit.py`). +- **Golden tests** — snapshot sentinels for stable outputs (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`). +- **Determinism/invariant tests** — ordering, branch-path invariants, and canonical stability (e.g., + `tests/test_report_branch_invariants.py`, `tests/test_core_branch_coverage.py`). +- **Scenario/regression tests** — multi-step integration and process-level behavior (e.g., + `tests/test_cli_inprocess.py`, `tests/test_pipeline_process.py`, `tests/test_cli_smoke.py`). + +Policy: + +- Expand the closest taxonomy bucket when changing behavior. +- If a change touches a public surface, include/adjust contract tests, not only unit tests. +- Goldens validate intended contract shifts; they are not a substitute for reasoning or routing. + +## 18) Public vs internal surfaces + +### Public / contract-sensitive surfaces + +- CLI flags, defaults, exit codes, and stable script-facing messages. +- Baseline schema/trust semantics/integrity compatibility (`2.0` baseline contract family). +- Cache schema/status/profile compatibility/integrity (`CACHE_VERSION` contract family). +- Canonical report JSON schema/payload semantics (`REPORT_SCHEMA_VERSION` contract family). +- Documented report projections and their machine/user-facing semantics (HTML/Markdown/SARIF/Text). +- Documented finding families/kinds/ids and suppression-facing report fields. +- Metrics baseline schema/compatibility where used by CI/gating. +- Benchmark schema/outputs if consumed as a reproducible contract surface. + +### Internal implementation surfaces + +- Local helpers and formatting utilities (`_html_*`, many private `_as_*` normalizers, local transformers). +- Internal orchestration decomposition inside `_cli_*` modules. +- Private utility refactors that do not change public payloads, exit semantics, ordering, or trust rules. -## 13) Python language + typing rules (3.10 → 3.14) +If classification is ambiguous, treat it as contract-sensitive and add tests/docs before merging. + +## 19) Python language + typing rules (3.10 → 3.14) These rules are **repo policy**. If you need to violate one, you must explain why in the PR. ### Supported Python versions + - **Must run on Python 3.10, 3.11, 3.12, 3.13, 3.14**. - Do not rely on behavior that is new to only the latest version unless you provide a fallback. - Prefer **standard library** features that exist in 3.10+. ### Modern syntax (allowed / preferred) + Use modern syntax when it stays compatible with 3.10+: + - `X | Y` unions, `list[str]` / `dict[str, int]` generics (PEP 604 / PEP 585) - `from __future__ import annotations` is allowed, but keep behavior consistent across 3.10–3.14. - `match/case` (PEP 634) is allowed, but only if it keeps determinism/readability. @@ -252,57 +464,58 @@ Use modern syntax when it stays compatible with 3.10+: - Prefer `pathlib.Path` over `os.path` for new code (but keep hot paths pragmatic). ### Typing standards + - **Type hints are required** for all public functions, core pipeline surfaces, and any code that touches: baseline, cache, fingerprints, report models, serialization, CLI exit behavior. - Keep **`Any` to an absolute minimum**: - - `Any` is allowed only at IO boundaries (JSON parsing, `argparse`, `subprocess`) and must be - *narrowed immediately* into typed structures (dataclasses / TypedDict / Protocol / enums). - - If `Any` appears in “core/domain” code, add a comment: `# Any: ` and a TODO to remove. + - `Any` is allowed only at IO boundaries (JSON parsing, `argparse`, `subprocess`) and must be + *narrowed immediately* into typed structures (dataclasses / TypedDict / Protocol / enums). + - If `Any` appears in “core/domain” code, add a comment: `# Any: ` and a TODO to remove. - Prefer **`Literal` / enums** for finite sets (e.g., status codes, kinds). - Prefer **`dataclasses`** (frozen where reasonable) for data models; keep models JSON‑serializable. - Use `collections.abc` types (`Iterable`, `Sequence`, `Mapping`) for inputs where appropriate. - Avoid `cast()` unless you also add an invariant check nearby. ### Dataclasses / models + - Models that cross module boundaries should be: - - explicitly typed - - immutable when possible (`frozen=True`) - - validated at construction (or via a dedicated `validate_*` function) if they are user‑provided. + - explicitly typed + - immutable when possible (`frozen=True`) + - validated at construction (or via a dedicated `validate_*` function) if they are user‑provided. ### Error handling + - Prefer explicit, typed error types over stringly‑typed errors. - Exit codes are part of the public contract; do not change them without updating tests + docs. ### Determinism requirements (language-level) + - Never iterate over unordered containers (`set`, `dict`) without sorting first when it affects: hashes, IDs, report ordering, baseline payloads, or UI output. - Use stable formatting (sorted keys, stable ordering) in JSON output. ### Key PEPs to keep in mind + - PEP 8, PEP 484 (typing), PEP 526 (variable annotations) - PEP 563 / PEP 649 (annotation evaluation changes across versions) — avoid relying on evaluation timing - PEP 585 (built-in generics), PEP 604 (X | Y unions) - PEP 634 (structural pattern matching) - PEP 612 (ParamSpec) / PEP 646 (TypeVarTuple) — only if it clearly helps, don’t overcomplicate - - Prefer these rules: - **Domain / contracts / enums** live near the domain owner (baseline statuses in baseline domain). -- **Core logic** should not depend on HTML. -- **Render** depends on report model, never the other way around. - If a module becomes a “god module”, split by: - - model (types) - - io/serialization - - rules/validation - - ui rendering + - model (types) + - io/serialization + - rules/validation + - ui rendering Avoid deep package hierarchies unless they clearly reduce coupling. --- -## 14) Minimal checklist for PRs (agents) +## 20) Minimal checklist for PRs (agents) - [ ] Change is deterministic. - [ ] Contracts preserved or versioned. @@ -310,7 +523,10 @@ Avoid deep package hierarchies unless they clearly reduce coupling. - [ ] `ruff`, `mypy`, `pytest` green. - [ ] CLI messages remain helpful and stable (don’t break scripts). - [ ] Reports contain provenance fields and reflect trust model correctly. +- [ ] Golden snapshots were **not** updated just to satisfy failing tests. +- [ ] If any golden snapshot changed, the corresponding contract change is intentional, documented, and approved. --- -If you are an AI agent and something here conflicts with an instruction from a maintainer in the PR/issue thread, **ask for clarification in the thread** and default to this document until resolved. +If you are an AI agent and something here conflicts with an instruction from a maintainer in the PR/issue thread, **ask +for clarification in the thread** and default to this document until resolved. diff --git a/CHANGELOG.md b/CHANGELOG.md index 3cbf54d..c6b924e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,95 @@ # Changelog +## [2.0.0b1] + +Major upgrade: CodeClone evolves from a structural clone detector into a +**baseline-aware code-health and CI governance tool** for Python. + +### Architecture + +- Stage-based pipeline (`pipeline.py`): discovery → processing → analysis → reporting → gating. +- Domain layers: `models.py`, `metrics/`, `report/`, `grouping.py`. +- Baseline schema `2.0`, report schema `2.1`, cache schema `2.2`; `fingerprint_version` remains `1`. + +### Code-Health Analysis + +- Seven health dimensions: clones, complexity, coupling, cohesion, dead code, dependencies, coverage. +- Piecewise clone scoring curve: mild penalty below 5% density, steep 5–20%, aggressive above 20%. +- Dimension weights: clones 25%, complexity 20%, cohesion 15%, coupling 10%, dead code 10%, dependencies 10%, coverage + 10%. +- Grade bands: A ≥90, B ≥75, C ≥60, D ≥40, F <40. + +### Detection Thresholds + +- Lowered function-level `--min-loc` from 15 to 10 (configurable via CLI/pyproject.toml). +- Lowered block fragment gate from loc≥40/stmt≥10 to loc≥20/stmt≥8. +- Lowered segment fragment gate from loc≥30/stmt≥12 to loc≥20/stmt≥10. +- All six thresholds configurable via `[tool.codeclone]` in `pyproject.toml`. + +### Detection Quality + +- Conservative dead-code detector: skips tests, dunders, visitors, protocol stubs. +- Module-level PEP 562 hooks (`__getattr__`, `__dir__`) are treated as non-actionable dead-code candidates. +- Exact qualname-based liveness with import-alias resolution. +- Canonical inline suppression syntax: `# codeclone: ignore[dead-code]` on declarations. +- Structural finding families: `duplicated_branches`, `clone_guard_exit_divergence`, `clone_cohort_drift`. + +### Configuration and CLI + +- Config from `pyproject.toml` under `[tool.codeclone]`; precedence: CLI > pyproject.toml > defaults. +- Optional-value report flags: `--html`, `--json`, `--md`, `--sarif`, `--text` with deterministic default paths. +- `--open-html-report`, `--timestamped-report-paths`, `--ci` preset. +- Explicit `--no-progress`/`--progress`, `--no-color`/`--color` flag pairs. + +### HTML Report + +- Overview: KPI grid with health gauge (baseline delta arc), Executive Summary (issue breakdown + source breakdown), + Health Profile radar chart. +- KPI cards show baseline-aware tone: `✓ baselined` pill when all items are accepted debt, `+N` red badge for + regressions. +- Get Badge modal: grade-only and score+grade variants, shields.io preview, Markdown/HTML embeds, copy feedback. +- Report Provenance modal with section cards, SVG icons, boolean badges. +- Responsive layout with dark/light theme toggle and system theme detection. + +### Baseline and Contracts + +- Unified baseline flow: clone keys + optional metrics in one file. +- Metrics snapshot integrity via `meta.metrics_payload_sha256`. +- Report contract: canonical `meta`/`inventory`/`findings`/`metrics` + derived `suggestions`/`overview` + `integrity`. +- SARIF: `%SRCROOT%` anchoring, `baselineState`, rich rule metadata. +- Cache compatibility now keys off the full six-threshold analysis profile + (function + block + segment thresholds), not only the top-level function gate. + +### Performance + +- Unified AST collection pass (merged 3 separate walks). +- Suppression fast-path: skip tokenization when `codeclone:` absent. +- Cache dirty flag: skip `save()` on warm path when nothing changed. +- Adaptive multiprocessing, batch statement hashing, deferred HTML import. + +### Docs and Publishing + +- MkDocs site with Material theme and GitHub Pages workflow. +- Live sample reports (HTML, JSON, SARIF). +- PyPI-facing README now uses published docs URLs instead of repo-relative doc links. + +### Packaging + +- Package metadata stays explicitly beta (`2.0.0b1`, `Development Status :: 4 - Beta`). +- `pyproject.toml` moved to SPDX-style `license = "MIT"` and `project.license-files` + for modern setuptools builds without release-time deprecation warnings. + +### Stability + +- Exit codes unchanged: `0`/`2`/`3`/`5`. +- Fingerprint contract unchanged: `BASELINE_FINGERPRINT_VERSION = "1"`. +- Coverage gate: `>=99%`. + ## [1.4.4] - 2026-03-14 ### Performance -- Optimized HTML snippet rendering hot path: +- Backported report hot-path optimizations from `2.0.0b1` to the `1.4.x` line: - file snippets now reuse cached full-file lines and slice ranges without repeated full-file scans - Pygments modules are loaded once per importer identity instead of diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 54f7748..ff63f06 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,8 +2,8 @@ Thank you for your interest in contributing to **CodeClone**. -CodeClone is an **AST + CFG-based code clone detector** focused on architectural duplication, -not textual similarity. +CodeClone provides **structural code quality analysis** for Python, including clone detection, +quality metrics, and baseline-aware CI governance. Contributions are welcome — especially those that improve **signal quality**, **CFG semantics**, and **real-world CI usability**. diff --git a/LICENSE b/LICENSE index 994c5ef..fdcac7c 100644 --- a/LICENSE +++ b/LICENSE @@ -2,6 +2,8 @@ MIT License Copyright (c) 2024 Denis Rozhnovskiy +The name “CodeClone” refers to the official project distribution. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights diff --git a/README.md b/README.md index 9a0ff5d..536cda5 100644 --- a/README.md +++ b/README.md @@ -1,348 +1,344 @@ -# CodeClone - -[![PyPI](https://img.shields.io/pypi/v/codeclone.svg?style=flat-square)](https://pypi.org/project/codeclone/) -[![Downloads](https://img.shields.io/pypi/dm/codeclone.svg?style=flat-square)](https://pypi.org/project/codeclone/) -[![tests](https://github.com/orenlab/codeclone/actions/workflows/tests.yml/badge.svg?branch=main&style=flat-square)](https://github.com/orenlab/codeclone/actions/workflows/tests.yml) -[![Python](https://img.shields.io/pypi/pyversions/codeclone.svg?style=flat-square)](https://pypi.org/project/codeclone/) -![CI First](https://img.shields.io/badge/CI-first-green?style=flat-square) -![Baseline](https://img.shields.io/badge/baseline-versioned-green?style=flat-square) -[![License](https://img.shields.io/pypi/l/codeclone.svg?style=flat-square)](LICENSE) - -**CodeClone** is a Python code clone detector based on **normalized AST and Control Flow Graphs (CFG)**. -It discovers architectural duplication and prevents new copy-paste from entering your codebase via CI. +

+ CodeClone +

+ +

+ Structural code quality analysis for Python +

+ +

+ PyPI + Downloads + Tests + Benchmark + Python + codeclone 81 (B) + License +

--- -## Why CodeClone - -CodeClone focuses on **architectural duplication**, not text similarity. It detects structural patterns through: - -- **Normalized AST analysis** — robust to renaming, formatting, and minor refactors -- **Control Flow Graphs** — captures execution logic, not just syntax -- **Strict, explainable matching** — clear signals, not fuzzy heuristics +CodeClone provides comprehensive structural code quality analysis for Python. It detects architectural +duplication via normalized AST and Control Flow Graphs, computes quality metrics, and enforces CI gates — +all with baseline-aware governance that separates **known** technical debt from **new** regressions. -Unlike token-based tools, CodeClone compares **structure and control flow**, making it ideal for finding: - -- Repeated service/orchestration patterns -- Duplicated guard/validation blocks -- Copy-pasted handler logic across modules -- Recurring internal segments in large functions - ---- +Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) · +Live sample report: +[orenlab.github.io/codeclone/examples/report/](https://orenlab.github.io/codeclone/examples/report/) -## Core Capabilities +## Features -**Three Detection Levels:** +- **Clone detection** — function (CFG fingerprint), block (statement windows), and segment (report-only) clones +- **Structural findings** — duplicated branch families, clone guard/exit divergence and clone-cohort drift (report-only) +- **Quality metrics** — cyclomatic complexity, coupling (CBO), cohesion (LCOM4), dependency cycles, dead code, health + score +- **Baseline governance** — known debt stays accepted; CI blocks only new clones and metric regressions +- **Reports** — interactive HTML, deterministic JSON/TXT plus Markdown and SARIF projections from one canonical report +- **CI-first** — deterministic output, stable ordering, exit code contract, pre-commit support +- **Fast*** — incremental caching, parallel processing, warm-run optimization, and reproducible benchmark coverage -1. **Function clones (CFG fingerprint)** - Strong structural signal for cross-layer duplication - -2. **Block clones (statement windows)** - Detects repeated local logic patterns - -3. **Segment clones (report-only)** - Internal function repetition for explainability; not used for baseline gating - -**CI-Ready Features:** - -- Deterministic output with stable ordering -- Reproducible artifacts for audit trails -- Baseline-driven gating to prevent new duplication -- Fast incremental analysis with intelligent caching - ---- - -## Installation +## Quick Start ```bash -pip install codeclone +pip install codeclone # or: uv tool install codeclone + +codeclone . # analyze current directory +codeclone . --html # generate HTML report +codeclone . --html --open-html-report # generate and open HTML report +codeclone . --json --md --sarif --text # generate machine-readable reports +codeclone . --html --json --timestamped-report-paths # keep timestamped report snapshots +codeclone . --ci # CI mode (--fail-on-new --no-color --quiet) ``` -**Requirements:** Python 3.10+ +
+Run without install ---- +```bash +uvx codeclone@latest . +``` -## Quick Start +
-### Basic Analysis +## CI Integration ```bash -# Analyze current directory -codeclone . +# 1. Generate baseline (commit to repo) +codeclone . --update-baseline -# Check version -codeclone --version +# 2. Add to CI pipeline +codeclone . --ci ``` -### Generate Reports - -```bash -codeclone . \ - --html .cache/codeclone/report.html \ - --json .cache/codeclone/report.json \ - --text .cache/codeclone/report.txt -``` +The `--ci` preset equals `--fail-on-new --no-color --quiet`. +When a trusted metrics baseline is loaded, CI mode also enables +`--fail-on-new-metrics`. -### CI Integration +### Quality Gates ```bash -# 1. Generate baseline once (commit to repo) -codeclone . --update-baseline +# Metrics thresholds +codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 --fail-health 60 -# 2. Add to CI pipeline -codeclone . --ci +# Structural policies +codeclone . --fail-cycles --fail-dead-code + +# Regression detection vs baseline +codeclone . --fail-on-new-metrics ``` -The `--ci` preset is equivalent to `--fail-on-new --no-color --quiet`. +### Pre-commit ---- +```yaml +repos: + - repo: local + hooks: + - id: codeclone + name: CodeClone + entry: codeclone + language: system + pass_filenames: false + args: [ ".", "--ci" ] + types: [ python ] +``` -## Baseline Workflow +## Configuration + +CodeClone can load project-level configuration from `pyproject.toml`: + +```toml +[tool.codeclone] +min_loc = 10 +min_stmt = 6 +baseline = "codeclone.baseline.json" +skip_metrics = false +quiet = false +html_out = ".cache/codeclone/report.html" +json_out = ".cache/codeclone/report.json" +md_out = ".cache/codeclone/report.md" +sarif_out = ".cache/codeclone/report.sarif" +text_out = ".cache/codeclone/report.txt" +block_min_loc = 20 +block_min_stmt = 8 +segment_min_loc = 20 +segment_min_stmt = 10 +``` -Baselines capture the **current state of duplication** in your codebase. Once committed, they serve as the reference -point for CI checks. +Precedence: CLI flags > `pyproject.toml` > built-in defaults. -**Key points (contract-level):** +## Baseline Workflow -- Baseline file is versioned (`codeclone.baseline.json`) and used to classify clones as **NEW** vs **KNOWN**. -- Compatibility is gated by `schema_version`, `fingerprint_version`, and `python_tag`. -- Baseline trust is gated by `meta.generator.name` (`codeclone`) and integrity (`payload_sha256`). -- In CI preset (`--ci`), an untrusted baseline is a contract error (exit `2`). +Baselines capture the current duplication state. Once committed, they become the CI reference point. -Full contract details: [`docs/book/06-baseline.md`](docs/book/06-baseline.md) +- Clones are classified as **NEW** (not in baseline) or **KNOWN** (accepted debt) +- `--update-baseline` writes both clone and metrics snapshots +- Trust is verified via `generator`, `fingerprint_version`, and `payload_sha256` +- In `--ci` mode, an untrusted baseline is a contract error (exit 2) ---- +Full contract: [Baseline contract](https://orenlab.github.io/codeclone/book/06-baseline/) ## Exit Codes -CodeClone uses a deterministic exit code contract: +| Code | Meaning | +|------|-------------------------------------------------------------------------------| +| `0` | Success | +| `2` | Contract error — untrusted baseline, invalid config, unreadable sources in CI | +| `3` | Gating failure — new clones or metric threshold exceeded | +| `5` | Internal error | -| Code | Meaning | -|------|-------------------------------------------------------------------------------------------------------------------------------------| -| `0` | Success — run completed without gating failures | -| `2` | Contract error — baseline missing/untrusted, invalid output extensions, incompatible versions, unreadable source files in CI/gating | -| `3` | Gating failure — new clones detected or threshold exceeded | -| `5` | Internal error — unexpected exception | +Contract errors (`2`) take precedence over gating failures (`3`). -**Priority:** Contract errors (`2`) override gating failures (`3`) when both occur. +## Reports -Full contract details: [`docs/book/03-contracts-exit-codes.md`](docs/book/03-contracts-exit-codes.md) +| Format | Flag | Default path | +|----------|-----------|---------------------------------| +| HTML | `--html` | `.cache/codeclone/report.html` | +| JSON | `--json` | `.cache/codeclone/report.json` | +| Markdown | `--md` | `.cache/codeclone/report.md` | +| SARIF | `--sarif` | `.cache/codeclone/report.sarif` | +| Text | `--text` | `.cache/codeclone/report.txt` | -**Debug Support:** +All report formats are rendered from one canonical JSON report document. -```bash -# Show detailed error information -codeclone . --debug +- `--open-html-report` opens the generated HTML report in the default browser and requires `--html`. +- `--timestamped-report-paths` appends a UTC timestamp to default report filenames for bare report flags such as + `--html` or `--json`. Explicit report paths are not rewritten. -# Or via environment variable -CODECLONE_DEBUG=1 codeclone . -``` +The published docs site also includes a live example HTML/JSON/SARIF report +generated from the current `codeclone` repository during the docs build. ---- +Structural findings include: -## Reports +- `duplicated_branches` +- `clone_guard_exit_divergence` +- `clone_cohort_drift` -### Supported Formats +### Inline Suppressions -- **HTML** (`--html`) — Interactive web report with filtering -- **JSON** (`--json`) — Machine-readable structured data -- **Text** (`--text`) — Plain text summary +CodeClone keeps dead-code detection deterministic and static by default. When a symbol is intentionally +invoked through runtime dynamics (for example framework callbacks, plugin loading, or reflection), suppress +the known false positive explicitly at the declaration site: -### Report Schema (JSON v1.1) +```python +# codeclone: ignore[dead-code] +def handle_exception(exc: Exception) -> None: + ... -The JSON report uses a compact deterministic layout: -- Top-level: `meta`, `files`, `groups`, `groups_split`, `group_item_layout` -- Optional top-level: `facts` -- `groups_split` provides explicit **NEW / KNOWN** separation per section -- `meta.groups_counts` provides deterministic per-section aggregates -- `meta` follows a shared canonical contract across HTML/JSON/TXT +class Middleware: # codeclone: ignore[dead-code] + ... +``` -Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md) +Dynamic/runtime false positives are resolved via explicit inline suppressions, not via broad heuristics. -**Minimal shape (v1.1):** +
+JSON report shape (v2.1) ```json { + "report_schema_version": "2.1", "meta": { - "report_schema_version": "1.1", - "codeclone_version": "1.4.0", - "python_version": "3.13", - "python_tag": "cp313", - "baseline_path": "/path/to/codeclone.baseline.json", - "baseline_fingerprint_version": "1", - "baseline_schema_version": "1.0", - "baseline_python_tag": "cp313", - "baseline_generator_name": "codeclone", - "baseline_generator_version": "1.4.0", - "baseline_payload_sha256": "", - "baseline_payload_sha256_verified": true, - "baseline_loaded": true, - "baseline_status": "ok", - "cache_path": "/path/to/.cache/codeclone/cache.json", - "cache_used": true, - "cache_status": "ok", - "cache_schema_version": "1.3", - "files_skipped_source_io": 0, - "groups_counts": { - "functions": { - "total": 0, - "new": 0, - "known": 0 + "codeclone_version": "2.0.0b1", + "project_name": "...", + "scan_root": ".", + "report_mode": "full", + "baseline": { + "...": "..." + }, + "cache": { + "...": "..." + }, + "metrics_baseline": { + "...": "..." + }, + "runtime": { + "report_generated_at_utc": "..." + } + }, + "inventory": { + "files": { + "...": "..." + }, + "code": { + "...": "..." + }, + "file_registry": { + "encoding": "relative_path", + "items": [] + } + }, + "findings": { + "summary": { + "...": "..." + }, + "groups": { + "clones": { + "functions": [], + "blocks": [], + "segments": [] + }, + "structural": { + "groups": [] }, - "blocks": { - "total": 0, - "new": 0, - "known": 0 + "dead_code": { + "groups": [] }, - "segments": { - "total": 0, - "new": 0, - "known": 0 + "design": { + "groups": [] } } }, - "files": [], - "groups": { - "functions": {}, - "blocks": {}, - "segments": {} + "metrics": { + "summary": {}, + "families": {} }, - "groups_split": { - "functions": { - "new": [], - "known": [] + "derived": { + "suggestions": [], + "overview": { + "families": {}, + "top_risks": [], + "source_scope_breakdown": {}, + "health_snapshot": {} }, - "blocks": { - "new": [], - "known": [] - }, - "segments": { - "new": [], - "known": [] + "hotlists": { + "most_actionable_ids": [], + "highest_spread_ids": [], + "production_hotspot_ids": [], + "test_fixture_hotspot_ids": [] } }, - "group_item_layout": { - "functions": [ - "file_i", - "qualname", - "start", - "end", - "loc", - "stmt_count", - "fingerprint", - "loc_bucket" - ], - "blocks": [ - "file_i", - "qualname", - "start", - "end", - "size" - ], - "segments": [ - "file_i", - "qualname", - "start", - "end", - "size", - "segment_hash", - "segment_sig" - ] - }, - "facts": { - "blocks": {} + "integrity": { + "canonicalization": { + "version": "1", + "scope": "canonical_only" + }, + "digest": { + "algorithm": "sha256", + "verified": true, + "value": "..." + } } } ``` ---- - -## Cache +Canonical contract: [Report contract](https://orenlab.github.io/codeclone/book/08-report/) and +[Dead-code contract](https://orenlab.github.io/codeclone/book/16-dead-code-contract/) -Cache is an optimization layer only and is never a source of truth. +
-- Default path: `/.cache/codeclone/cache.json` -- Schema version: **v1.3** -- Compatibility includes analysis profile (`min_loc`, `min_stmt`) -- Invalid or oversized cache is ignored with warning and rebuilt (fail-open) - -Full contract details: [`docs/book/07-cache.md`](docs/book/07-cache.md) - ---- +## How It Works -## Pre-commit Integration +1. **Parse** — Python source to AST +2. **Normalize** — canonical structure (robust to renaming, formatting) +3. **CFG** — per-function control flow graph +4. **Fingerprint** — stable hash computation +5. **Group** — function, block, and segment clone groups +6. **Metrics** — complexity, coupling, cohesion, dependencies, dead code, health +7. **Gate** — baseline comparison, threshold checks + +Architecture: [Architecture narrative](https://orenlab.github.io/codeclone/architecture/) · +CFG semantics: [CFG semantics](https://orenlab.github.io/codeclone/cfg/) + +## Documentation + +| Topic | Link | +|----------------------------|----------------------------------------------------------------------------------------------------| +| Contract book (start here) | [Contracts and guarantees](https://orenlab.github.io/codeclone/book/00-intro/) | +| Exit codes | [Exit codes and failure policy](https://orenlab.github.io/codeclone/book/03-contracts-exit-codes/) | +| Configuration | [Config and defaults](https://orenlab.github.io/codeclone/book/04-config-and-defaults/) | +| Baseline contract | [Baseline contract](https://orenlab.github.io/codeclone/book/06-baseline/) | +| Cache contract | [Cache contract](https://orenlab.github.io/codeclone/book/07-cache/) | +| Report contract | [Report contract](https://orenlab.github.io/codeclone/book/08-report/) | +| Metrics & quality gates | [Metrics and quality gates](https://orenlab.github.io/codeclone/book/15-metrics-and-quality-gates/) | +| Dead code | [Dead-code contract](https://orenlab.github.io/codeclone/book/16-dead-code-contract/) | +| Docker benchmark contract | [Benchmarking contract](https://orenlab.github.io/codeclone/book/18-benchmarking/) | +| Determinism | [Determinism policy](https://orenlab.github.io/codeclone/book/12-determinism/) | + +## * Benchmarking + +
+Reproducible Docker Benchmark -```yaml -repos: - - repo: local - hooks: - - id: codeclone - name: CodeClone - entry: codeclone - language: system - pass_filenames: false - args: [ ".", "--ci" ] - types: [ python ] +```bash +./benchmarks/run_docker_benchmark.sh ``` ---- - -## What CodeClone Is (and Is Not) - -### CodeClone Is +The wrapper builds `benchmarks/Dockerfile`, runs isolated container benchmarks, and writes results to +`.cache/benchmarks/codeclone-benchmark.json`. -- A structural clone detector for Python -- A CI guard against new duplication -- A deterministic analysis tool with auditable outputs +Use environment overrides to pin the benchmark envelope: -### CodeClone Is Not - -- A linter or code formatter -- A semantic equivalence prover -- A runtime execution analyzer - ---- - -## How It Works - -**High-level Pipeline:** - -1. **Parse** — Python source → AST -2. **Normalize** — AST → canonical structure -3. **CFG Construction** — per-function control flow graph -4. **Fingerprinting** — stable hash computation -5. **Grouping** — function/block/segment clone groups -6. **Determinism** — stable ordering for reproducibility -7. **Baseline Comparison** — new vs known clones (when requested) - -Learn more: - -- Architecture: [`docs/architecture.md`](docs/architecture.md) -- CFG semantics: [`docs/cfg.md`](docs/cfg.md) +```bash +CPUSET=0 CPUS=1.0 MEMORY=2g RUNS=16 WARMUPS=4 \ + ./benchmarks/run_docker_benchmark.sh +``` ---- +Performance claims are backed by the reproducible benchmark workflow documented +in [Benchmarking contract](https://orenlab.github.io/codeclone/book/18-benchmarking/) -## Documentation Map - -Use this map to pick the right level of detail: - -- **Contract book (canonical contracts/specs):** [`docs/book/`](docs/book/) - - Start here: [`docs/book/00-intro.md`](docs/book/00-intro.md) - - Exit codes and precedence: [`docs/book/03-contracts-exit-codes.md`](docs/book/03-contracts-exit-codes.md) - - Baseline contract (schema/trust/integrity): [`docs/book/06-baseline.md`](docs/book/06-baseline.md) - - Cache contract (schema/integrity/fail-open): [`docs/book/07-cache.md`](docs/book/07-cache.md) - - Report contract (schema v1.1 + NEW/KNOWN split): [`docs/book/08-report.md`](docs/book/08-report.md) - - CLI behavior: [`docs/book/09-cli.md`](docs/book/09-cli.md) - - HTML rendering: [`docs/book/10-html-render.md`](docs/book/10-html-render.md) - - Determinism policy: [`docs/book/12-determinism.md`](docs/book/12-determinism.md) - - Compatibility/versioning rules: [ - `docs/book/14-compatibility-and-versioning.md`](docs/book/14-compatibility-and-versioning.md) -- **Deep dives:** - - Architecture narrative: [`docs/architecture.md`](docs/architecture.md) - - CFG semantics: [`docs/cfg.md`](docs/cfg.md) +
## Links - **Issues:** - **PyPI:** +- **License:** MIT diff --git a/SECURITY.md b/SECURITY.md index 0c52920..aca157b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -9,6 +9,7 @@ The following versions currently receive security updates: | Version | Supported | |---------|-----------| +| 2.0.x | Yes | | 1.4.x | Yes | | 1.3.x | No | | 1.2.x | No | @@ -42,7 +43,7 @@ Additional safeguards: - Report explainability fields are generated in Python core; UI is rendering-only and does not infer semantics. - Scanner traversal is root-confined and prevents symlink-based path escape. - Baseline files are schema/type validated with size limits and tamper-evident integrity fields - (`meta.generator` as trust gate, `meta.payload_sha256` as integrity hash in baseline v1). + (`meta.generator` as trust gate, `meta.payload_sha256` as integrity hash in baseline schema `2.0`). - Baseline integrity is tamper-evident (audit signal), not tamper-proof cryptographic signing. An actor who can rewrite baseline content and recompute `payload_sha256` can still alter it. - Baseline hash covers canonical payload only (`clones.functions`, `clones.blocks`, diff --git a/benchmarks/Dockerfile b/benchmarks/Dockerfile new file mode 100644 index 0000000..8768aad --- /dev/null +++ b/benchmarks/Dockerfile @@ -0,0 +1,31 @@ +# syntax=docker/dockerfile:1.7 + +FROM python:3.13.2-slim-bookworm + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=0 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_NO_CACHE_DIR=1 \ + LC_ALL=C.UTF-8 \ + LANG=C.UTF-8 \ + TZ=UTC \ + CODECLONE_BENCH_ROOT=/opt/codeclone \ + CODECLONE_BENCH_OUTPUT=/bench-out/codeclone-benchmark.json \ + CODECLONE_BENCH_RUNS=12 \ + CODECLONE_BENCH_WARMUPS=3 + +WORKDIR /opt/codeclone + +COPY . /opt/codeclone + +RUN python -m pip install --upgrade pip \ + && python -m pip install . + +RUN useradd --create-home --uid 10001 bench \ + && mkdir -p /bench-out \ + && chown -R bench:bench /bench-out /opt/codeclone + +USER bench + +ENTRYPOINT ["python", "/opt/codeclone/benchmarks/run_benchmark.py"] diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py new file mode 100755 index 0000000..c9b7135 --- /dev/null +++ b/benchmarks/run_benchmark.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import argparse +import json +import os +import platform +import shutil +import subprocess +import sys +import time +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from statistics import fmean, median, pstdev +from typing import Literal + +from codeclone import __version__ as codeclone_version +from codeclone.baseline import current_python_tag + +BENCHMARK_SCHEMA_VERSION = "1.0" + + +@dataclass(frozen=True) +class Scenario: + name: str + mode: Literal["cold", "warm"] + extra_args: tuple[str, ...] + + +@dataclass(frozen=True) +class RunMeasurement: + elapsed_seconds: float + digest: str + files_found: int + files_analyzed: int + files_cached: int + files_skipped: int + + +def _percentile(sorted_values: list[float], q: float) -> float: + if not sorted_values: + return 0.0 + if len(sorted_values) == 1: + return sorted_values[0] + rank = (len(sorted_values) - 1) * q + lower = int(rank) + upper = min(lower + 1, len(sorted_values) - 1) + weight = rank - lower + return sorted_values[lower] * (1.0 - weight) + sorted_values[upper] * weight + + +def _stats(values: list[float]) -> dict[str, float]: + if not values: + return { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "median": 0.0, + "p95": 0.0, + "stdev": 0.0, + } + ordered = sorted(values) + return { + "min": ordered[0], + "max": ordered[-1], + "mean": fmean(ordered), + "median": median(ordered), + "p95": _percentile(ordered, 0.95), + "stdev": pstdev(ordered) if len(ordered) > 1 else 0.0, + } + + +def _read_report(report_path: Path) -> tuple[str, dict[str, int]]: + payload_obj: object = json.loads(report_path.read_text(encoding="utf-8")) + if not isinstance(payload_obj, dict): + raise RuntimeError(f"report payload is not an object: {report_path}") + payload = payload_obj + + integrity_obj = payload.get("integrity") + if not isinstance(integrity_obj, dict): + raise RuntimeError(f"integrity block missing in {report_path}") + digest_obj = integrity_obj.get("digest") + if not isinstance(digest_obj, dict): + raise RuntimeError(f"digest block missing in {report_path}") + digest_value = str(digest_obj.get("value", "")).strip() + if not digest_value: + raise RuntimeError(f"digest value missing in {report_path}") + + inventory_obj = payload.get("inventory") + if not isinstance(inventory_obj, dict): + raise RuntimeError(f"inventory block missing in {report_path}") + files_obj = inventory_obj.get("files") + if not isinstance(files_obj, dict): + raise RuntimeError(f"inventory.files block missing in {report_path}") + + def _as_int(value: object) -> int: + if isinstance(value, bool): + return int(value) + if isinstance(value, int): + return value + if isinstance(value, str): + try: + return int(value) + except ValueError: + return 0 + return 0 + + return digest_value, { + "found": _as_int(files_obj.get("total_found")), + "analyzed": _as_int(files_obj.get("analyzed")), + "cached": _as_int(files_obj.get("cached")), + "skipped": _as_int(files_obj.get("skipped")), + } + + +def _run_cli_once( + *, + target: Path, + python_executable: str, + cache_path: Path, + report_path: Path, + extra_args: tuple[str, ...], +) -> RunMeasurement: + env = dict(os.environ) + env["PYTHONHASHSEED"] = "0" + env["LC_ALL"] = "C.UTF-8" + env["LANG"] = "C.UTF-8" + env["TZ"] = "UTC" + + cmd = [ + python_executable, + "-m", + "codeclone.cli", + str(target), + "--json", + str(report_path), + "--cache-path", + str(cache_path), + "--no-progress", + "--quiet", + *extra_args, + ] + + start = time.perf_counter() + completed = subprocess.run( + cmd, + check=False, + capture_output=True, + text=True, + env=env, + ) + elapsed_seconds = time.perf_counter() - start + if completed.returncode != 0: + stderr_tail = "\n".join(completed.stderr.splitlines()[-20:]) + stdout_tail = "\n".join(completed.stdout.splitlines()[-20:]) + raise RuntimeError( + "benchmark command failed with exit " + f"{completed.returncode}\nSTDOUT:\n{stdout_tail}\nSTDERR:\n{stderr_tail}" + ) + + digest, files = _read_report(report_path) + return RunMeasurement( + elapsed_seconds=elapsed_seconds, + digest=digest, + files_found=files["found"], + files_analyzed=files["analyzed"], + files_cached=files["cached"], + files_skipped=files["skipped"], + ) + + +def _scenario_result( + *, + scenario: Scenario, + target: Path, + python_executable: str, + workspace: Path, + warmups: int, + runs: int, +) -> dict[str, object]: + scenario_dir = workspace / scenario.name + if scenario_dir.exists(): + shutil.rmtree(scenario_dir) + scenario_dir.mkdir(parents=True, exist_ok=True) + + warm_cache_path = scenario_dir / "shared-cache.json" + cold_cache_path = scenario_dir / "cold-cache.json" + + if scenario.mode == "warm": + _run_cli_once( + target=target, + python_executable=python_executable, + cache_path=warm_cache_path, + report_path=scenario_dir / "seed-report.json", + extra_args=scenario.extra_args, + ) + + for idx in range(warmups): + if scenario.mode == "warm": + cache_path = warm_cache_path + else: + cache_path = cold_cache_path + cache_path.unlink(missing_ok=True) + _run_cli_once( + target=target, + python_executable=python_executable, + cache_path=cache_path, + report_path=scenario_dir / f"warmup-report-{idx}.json", + extra_args=scenario.extra_args, + ) + + measurements: list[RunMeasurement] = [] + for idx in range(runs): + if scenario.mode == "warm": + cache_path = warm_cache_path + else: + cache_path = cold_cache_path + cache_path.unlink(missing_ok=True) + measurement = _run_cli_once( + target=target, + python_executable=python_executable, + cache_path=cache_path, + report_path=scenario_dir / f"run-report-{idx}.json", + extra_args=scenario.extra_args, + ) + measurements.append(measurement) + + digests = sorted({m.digest for m in measurements}) + deterministic = len(digests) == 1 + if not deterministic: + raise RuntimeError( + "non-deterministic report digest detected " + f"in scenario {scenario.name}: {digests}" + ) + + timings = [m.elapsed_seconds for m in measurements] + sample = measurements[0] + return { + "name": scenario.name, + "mode": scenario.mode, + "extra_args": list(scenario.extra_args), + "warmups": warmups, + "runs": runs, + "deterministic": deterministic, + "digest": digests[0], + "timings_seconds": timings, + "stats_seconds": _stats(timings), + "inventory_sample": { + "found": sample.files_found, + "analyzed": sample.files_analyzed, + "cached": sample.files_cached, + "skipped": sample.files_skipped, + }, + } + + +def _cgroup_value(path: Path) -> str | None: + try: + content = path.read_text(encoding="utf-8").strip() + except OSError: + return None + return content or None + + +def _environment() -> dict[str, object]: + affinity_count: int | None = None + if hasattr(os, "sched_getaffinity"): + try: + affinity_count = len(os.sched_getaffinity(0)) + except OSError: + affinity_count = None + + cgroup_cpu_max = _cgroup_value(Path("/sys/fs/cgroup/cpu.max")) + cgroup_memory_max = _cgroup_value(Path("/sys/fs/cgroup/memory.max")) + return { + "platform": platform.platform(), + "machine": platform.machine(), + "python_version": platform.python_version(), + "python_implementation": platform.python_implementation(), + "python_tag": current_python_tag(), + "cpu_count": os.cpu_count(), + "cpu_affinity_count": affinity_count, + "container_detected": Path("/.dockerenv").exists(), + "cgroup_cpu_max": cgroup_cpu_max, + "cgroup_memory_max": cgroup_memory_max, + "timestamp_utc": datetime.now(timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z"), + } + + +def _comparison_metrics(scenarios: list[dict[str, object]]) -> dict[str, float]: + by_name = { + str(item["name"]): item + for item in scenarios + if isinstance(item, dict) and "name" in item + } + + def _median_for(name: str) -> float | None: + scenario = by_name.get(name) + if not isinstance(scenario, dict): + return None + stats = scenario.get("stats_seconds") + if not isinstance(stats, dict): + return None + value = stats.get("median") + if isinstance(value, (int, float)): + return float(value) + return None + + cold_full = _median_for("cold_full") + warm_full = _median_for("warm_full") + warm_clones = _median_for("warm_clones_only") + + comparisons: dict[str, float] = {} + if cold_full and warm_full: + comparisons["warm_full_speedup_vs_cold_full"] = cold_full / warm_full + if warm_full and warm_clones: + comparisons["warm_clones_only_speedup_vs_warm_full"] = warm_full / warm_clones + return comparisons + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "Deterministic Docker-oriented benchmark for CodeClone CLI " + "(cold/warm cache scenarios)." + ) + ) + parser.add_argument( + "--target", + type=Path, + default=Path(os.environ.get("CODECLONE_BENCH_ROOT", "/opt/codeclone")), + help="Analysis target directory inside container", + ) + parser.add_argument( + "--output", + type=Path, + default=Path( + os.environ.get( + "CODECLONE_BENCH_OUTPUT", + "/bench-out/codeclone-benchmark.json", + ) + ), + help="Output JSON path", + ) + parser.add_argument( + "--runs", + type=int, + default=int(os.environ.get("CODECLONE_BENCH_RUNS", "12")), + help="Measured runs per scenario", + ) + parser.add_argument( + "--warmups", + type=int, + default=int(os.environ.get("CODECLONE_BENCH_WARMUPS", "3")), + help="Warmup runs per scenario", + ) + parser.add_argument( + "--tmp-dir", + type=Path, + default=Path("/tmp/codeclone-benchmark"), + help="Temporary working directory", + ) + parser.add_argument( + "--python-executable", + default=sys.executable, + help="Python executable used to invoke codeclone CLI", + ) + return parser.parse_args() + + +def main() -> int: + args = _parse_args() + if args.runs <= 0: + raise SystemExit("--runs must be > 0") + if args.warmups < 0: + raise SystemExit("--warmups must be >= 0") + target = args.target.resolve() + if not target.exists(): + raise SystemExit(f"target does not exist: {target}") + if not target.is_dir(): + raise SystemExit(f"target is not a directory: {target}") + + workspace = args.tmp_dir.resolve() + if workspace.exists(): + shutil.rmtree(workspace) + workspace.mkdir(parents=True, exist_ok=True) + + scenarios = [ + Scenario(name="cold_full", mode="cold", extra_args=()), + Scenario(name="warm_full", mode="warm", extra_args=()), + Scenario(name="warm_clones_only", mode="warm", extra_args=("--skip-metrics",)), + ] + scenario_results = [ + _scenario_result( + scenario=scenario, + target=target, + python_executable=args.python_executable, + workspace=workspace, + warmups=args.warmups, + runs=args.runs, + ) + for scenario in scenarios + ] + + comparisons = _comparison_metrics(scenario_results) + + payload = { + "benchmark_schema_version": BENCHMARK_SCHEMA_VERSION, + "tool": { + "name": "codeclone", + "version": codeclone_version, + "python_tag": current_python_tag(), + }, + "config": { + "target": str(target), + "runs": args.runs, + "warmups": args.warmups, + "python_executable": args.python_executable, + }, + "environment": _environment(), + "scenarios": scenario_results, + "comparisons": comparisons, + "generated_at_utc": datetime.now(timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z"), + } + + args.output.parent.mkdir(parents=True, exist_ok=True) + tmp_output = args.output.with_suffix(args.output.suffix + ".tmp") + rendered = json.dumps(payload, ensure_ascii=False, indent=2) + tmp_output.write_text(rendered, encoding="utf-8") + tmp_output.replace(args.output) + + print("CodeClone Docker benchmark") + print(f"target={target}") + print(f"runs={args.runs} warmups={args.warmups}") + for scenario in scenario_results: + name = str(scenario["name"]) + stats = scenario["stats_seconds"] + assert isinstance(stats, dict) + median_s = float(stats["median"]) + p95_s = float(stats["p95"]) + stdev_s = float(stats["stdev"]) + print( + f"- {name:16s} median={median_s:.4f}s " + f"p95={p95_s:.4f}s stdev={stdev_s:.4f}s " + f"digest={scenario['digest']}" + ) + if comparisons: + print("ratios:") + for name, value in sorted(comparisons.items()): + print(f"- {name}={value:.3f}x") + print(f"output={args.output}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/benchmarks/run_docker_benchmark.sh b/benchmarks/run_docker_benchmark.sh new file mode 100755 index 0000000..2ff42e5 --- /dev/null +++ b/benchmarks/run_docker_benchmark.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +IMAGE_TAG="${IMAGE_TAG:-codeclone-benchmark:2.0.0b1}" +OUT_DIR="${OUT_DIR:-$ROOT_DIR/.cache/benchmarks}" +OUTPUT_BASENAME="${OUTPUT_BASENAME:-codeclone-benchmark.json}" +CPUSET="${CPUSET:-0}" +CPUS="${CPUS:-1.0}" +MEMORY="${MEMORY:-2g}" +RUNS="${RUNS:-12}" +WARMUPS="${WARMUPS:-3}" +HOST_UID="$(id -u)" +HOST_GID="$(id -g)" +CONTAINER_USER="${CONTAINER_USER:-${HOST_UID}:${HOST_GID}}" + +mkdir -p "$OUT_DIR" + +echo "[bench] building image: $IMAGE_TAG" +docker build \ + --pull \ + --file "$ROOT_DIR/benchmarks/Dockerfile" \ + --tag "$IMAGE_TAG" \ + "$ROOT_DIR" + +echo "[bench] running benchmark container" +docker run \ + --rm \ + --user "$CONTAINER_USER" \ + --cpuset-cpus="$CPUSET" \ + --cpus="$CPUS" \ + --memory="$MEMORY" \ + --pids-limit=256 \ + --network=none \ + --security-opt=no-new-privileges \ + --read-only \ + --tmpfs /tmp:rw,noexec,nosuid,size=2g \ + --tmpfs /home/bench:rw,noexec,nosuid,size=128m \ + --mount "type=bind,src=$OUT_DIR,dst=/bench-out" \ + "$IMAGE_TAG" \ + --output "/bench-out/$OUTPUT_BASENAME" \ + --runs "$RUNS" \ + --warmups "$WARMUPS" \ + "$@" + +echo "[bench] results: $OUT_DIR/$OUTPUT_BASENAME" diff --git a/codeclone.baseline.json b/codeclone.baseline.json index a50c904..931f31e 100644 --- a/codeclone.baseline.json +++ b/codeclone.baseline.json @@ -2,13 +2,14 @@ "meta": { "generator": { "name": "codeclone", - "version": "1.4.0" + "version": "2.0.0b1" }, - "schema_version": "1.0", + "schema_version": "2.0", "fingerprint_version": "1", "python_tag": "cp313", - "created_at": "2026-02-12T15:31:42Z", - "payload_sha256": "691c6cedd10e2a51d6038780f3ae9dffe763356dd2aba742b3980f131b79f217" + "created_at": "2026-03-24T15:14:34Z", + "payload_sha256": "691c6cedd10e2a51d6038780f3ae9dffe763356dd2aba742b3980f131b79f217", + "metrics_payload_sha256": "3310d3a0f64d5fa0373546c5c4c82675dc5a441344f876092005665e81234e94" }, "clones": { "functions": [ @@ -23,5 +24,24 @@ "cacc33d58f323481f65fed57873d1c840531859e|d60c0005a4c850c140378d1c82b81dde93a7ccab|d60c0005a4c850c140378d1c82b81dde93a7ccab|b4b5893be87edf98955f047cbf25ca755dc753b4", "ee69aff0b7ea38927e5082ceef14115c805f6734|fcd36b4275c94f1955fb55e1c1ca3c04c7c0bb26|3c1b5cf24b4dfcd8e5736b735bfd3850940100d5|3c1b5cf24b4dfcd8e5736b735bfd3850940100d5" ] + }, + "metrics": { + "max_complexity": 20, + "high_risk_functions": [], + "max_coupling": 10, + "high_coupling_classes": [], + "max_cohesion": 5, + "low_cohesion_classes": [ + "codeclone._cli_reports:_OutputPaths", + "codeclone._cli_reports:_ReportArtifacts", + "codeclone.baseline:Baseline", + "codeclone.metrics_baseline:MetricsBaseline", + "tests.test_golden_v2:_DummyExecutor" + ], + "dependency_cycles": [], + "dependency_max_depth": 9, + "dead_code_items": [], + "health_score": 78, + "health_grade": "B" } } diff --git a/codeclone/__init__.py b/codeclone/__init__.py index 4cd6ae8..b52ea47 100644 --- a/codeclone/__init__.py +++ b/codeclone/__init__.py @@ -1,10 +1,5 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from importlib.metadata import PackageNotFoundError, version diff --git a/codeclone/_cli_args.py b/codeclone/_cli_args.py index 15cbdc5..d2796b9 100644 --- a/codeclone/_cli_args.py +++ b/codeclone/_cli_args.py @@ -1,19 +1,39 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import argparse import sys -from typing import NoReturn, cast +from typing import NoReturn from . import ui_messages as ui -from .contracts import ExitCode, cli_help_epilog +from .contracts import ( + DEFAULT_COHESION_THRESHOLD, + DEFAULT_COMPLEXITY_THRESHOLD, + DEFAULT_COUPLING_THRESHOLD, + DEFAULT_HEALTH_THRESHOLD, + ExitCode, + cli_help_epilog, +) + +DEFAULT_ROOT = "." +DEFAULT_MIN_LOC = 10 +DEFAULT_MIN_STMT = 6 +DEFAULT_BLOCK_MIN_LOC = 20 +DEFAULT_BLOCK_MIN_STMT = 8 +DEFAULT_SEGMENT_MIN_LOC = 20 +DEFAULT_SEGMENT_MIN_STMT = 10 +DEFAULT_PROCESSES = 4 +DEFAULT_MAX_CACHE_SIZE_MB = 50 +DEFAULT_MAX_BASELINE_SIZE_MB = 5 + +DEFAULT_BASELINE_PATH = "codeclone.baseline.json" +DEFAULT_HTML_REPORT_PATH = ".cache/codeclone/report.html" +DEFAULT_JSON_REPORT_PATH = ".cache/codeclone/report.json" +DEFAULT_MARKDOWN_REPORT_PATH = ".cache/codeclone/report.md" +DEFAULT_SARIF_REPORT_PATH = ".cache/codeclone/report.sarif" +DEFAULT_TEXT_REPORT_PATH = ".cache/codeclone/report.txt" class _ArgumentParser(argparse.ArgumentParser): @@ -25,156 +45,338 @@ def error(self, message: str) -> NoReturn: ) -class _HelpFormatter( - argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter -): - def _get_help_string(self, action: argparse.Action) -> str: - if action.dest == "cache_path": - return action.help or "" - return cast(str, super()._get_help_string(action)) +class _HelpFormatter(argparse.RawTextHelpFormatter): + """Product-oriented help formatter extension point.""" + + +def _add_optional_path_argument( + group: argparse._ArgumentGroup, + *, + flag: str, + dest: str, + help_text: str, + default: str | None = None, + const: str | None = None, + metavar: str = "FILE", +) -> None: + group.add_argument( + flag, + dest=dest, + nargs="?", + metavar=metavar, + default=default, + const=const, + help=help_text, + ) + + +def _add_bool_optional_argument( + group: argparse._ArgumentGroup, + *, + flag: str, + help_text: str, + default: bool = False, +) -> None: + group.add_argument( + flag, + action=argparse.BooleanOptionalAction, + default=default, + help=help_text, + ) -def build_parser(version: str) -> argparse.ArgumentParser: +def build_parser(version: str) -> _ArgumentParser: ap = _ArgumentParser( prog="codeclone", - description="AST and CFG-based code clone detector for Python.", + description="Structural code quality analysis for Python.", + add_help=False, formatter_class=_HelpFormatter, epilog=cli_help_epilog(), ) - ap.add_argument( - "--version", - action="version", - version=ui.version_output(version), - help=ui.HELP_VERSION, - ) - core_group = ap.add_argument_group("Target") - core_group.add_argument( + target_group = ap.add_argument_group("Target") + target_group.add_argument( "root", nargs="?", - default=".", + default=DEFAULT_ROOT, help=ui.HELP_ROOT, ) - tune_group = ap.add_argument_group("Analysis Tuning") - tune_group.add_argument( + analysis_group = ap.add_argument_group("Analysis") + analysis_group.add_argument( "--min-loc", type=int, - default=15, + default=DEFAULT_MIN_LOC, help=ui.HELP_MIN_LOC, ) - tune_group.add_argument( + analysis_group.add_argument( "--min-stmt", type=int, - default=6, + default=DEFAULT_MIN_STMT, help=ui.HELP_MIN_STMT, ) - tune_group.add_argument( + # Block/segment thresholds are advanced tuning: configurable via + # pyproject.toml only (no CLI flags). Defaults live on the namespace + # so apply_pyproject_config_overrides can override them. + ap.set_defaults( + block_min_loc=DEFAULT_BLOCK_MIN_LOC, + block_min_stmt=DEFAULT_BLOCK_MIN_STMT, + segment_min_loc=DEFAULT_SEGMENT_MIN_LOC, + segment_min_stmt=DEFAULT_SEGMENT_MIN_STMT, + ) + analysis_group.add_argument( "--processes", type=int, - default=4, + default=DEFAULT_PROCESSES, help=ui.HELP_PROCESSES, ) - tune_group.add_argument( - "--cache-path", + _add_optional_path_argument( + analysis_group, + flag="--cache-path", dest="cache_path", - metavar="FILE", default=None, - help=ui.HELP_CACHE_PATH, + const=None, + help_text=ui.HELP_CACHE_PATH, ) - tune_group.add_argument( - "--cache-dir", + _add_optional_path_argument( + analysis_group, + flag="--cache-dir", dest="cache_path", - metavar="FILE", default=None, - help=ui.HELP_CACHE_DIR_LEGACY, + const=None, + help_text=ui.HELP_CACHE_DIR_LEGACY, ) - tune_group.add_argument( + analysis_group.add_argument( "--max-cache-size-mb", type=int, - default=50, + default=DEFAULT_MAX_CACHE_SIZE_MB, metavar="MB", help=ui.HELP_MAX_CACHE_SIZE_MB, ) - ci_group = ap.add_argument_group("Baseline & CI/CD") - ci_group.add_argument( - "--baseline", - default="codeclone.baseline.json", - help=ui.HELP_BASELINE, + baselines_ci_group = ap.add_argument_group("Baselines and CI") + _add_optional_path_argument( + baselines_ci_group, + flag="--baseline", + dest="baseline", + default=DEFAULT_BASELINE_PATH, + const=DEFAULT_BASELINE_PATH, + help_text=ui.HELP_BASELINE, ) - ci_group.add_argument( + baselines_ci_group.add_argument( "--max-baseline-size-mb", type=int, - default=5, + default=DEFAULT_MAX_BASELINE_SIZE_MB, metavar="MB", help=ui.HELP_MAX_BASELINE_SIZE_MB, ) - ci_group.add_argument( - "--update-baseline", - action="store_true", - help=ui.HELP_UPDATE_BASELINE, + _add_bool_optional_argument( + baselines_ci_group, + flag="--update-baseline", + help_text=ui.HELP_UPDATE_BASELINE, ) - ci_group.add_argument( - "--fail-on-new", - action="store_true", - help=ui.HELP_FAIL_ON_NEW, + _add_optional_path_argument( + baselines_ci_group, + flag="--metrics-baseline", + dest="metrics_baseline", + default=DEFAULT_BASELINE_PATH, + const=DEFAULT_BASELINE_PATH, + help_text=ui.HELP_METRICS_BASELINE, + ) + _add_bool_optional_argument( + baselines_ci_group, + flag="--update-metrics-baseline", + help_text=ui.HELP_UPDATE_METRICS_BASELINE, + ) + _add_bool_optional_argument( + baselines_ci_group, + flag="--ci", + help_text=ui.HELP_CI, ) - ci_group.add_argument( + + quality_group = ap.add_argument_group("Quality gates") + _add_bool_optional_argument( + quality_group, + flag="--fail-on-new", + help_text=ui.HELP_FAIL_ON_NEW, + ) + _add_bool_optional_argument( + quality_group, + flag="--fail-on-new-metrics", + help_text=ui.HELP_FAIL_ON_NEW_METRICS, + ) + quality_group.add_argument( "--fail-threshold", type=int, default=-1, metavar="MAX_CLONES", help=ui.HELP_FAIL_THRESHOLD, ) - ci_group.add_argument( - "--ci", - action="store_true", - help=ui.HELP_CI, + quality_group.add_argument( + "--fail-complexity", + type=int, + nargs="?", + const=DEFAULT_COMPLEXITY_THRESHOLD, + default=-1, + metavar="CC_MAX", + help=ui.HELP_FAIL_COMPLEXITY, + ) + quality_group.add_argument( + "--fail-coupling", + type=int, + nargs="?", + const=DEFAULT_COUPLING_THRESHOLD, + default=-1, + metavar="CBO_MAX", + help=ui.HELP_FAIL_COUPLING, + ) + quality_group.add_argument( + "--fail-cohesion", + type=int, + nargs="?", + const=DEFAULT_COHESION_THRESHOLD, + default=-1, + metavar="LCOM4_MAX", + help=ui.HELP_FAIL_COHESION, + ) + _add_bool_optional_argument( + quality_group, + flag="--fail-cycles", + help_text=ui.HELP_FAIL_CYCLES, + ) + _add_bool_optional_argument( + quality_group, + flag="--fail-dead-code", + help_text=ui.HELP_FAIL_DEAD_CODE, + ) + quality_group.add_argument( + "--fail-health", + type=int, + nargs="?", + const=DEFAULT_HEALTH_THRESHOLD, + default=-1, + metavar="SCORE_MIN", + help=ui.HELP_FAIL_HEALTH, + ) + + stages_group = ap.add_argument_group("Analysis stages") + _add_bool_optional_argument( + stages_group, + flag="--skip-metrics", + help_text=ui.HELP_SKIP_METRICS, + ) + _add_bool_optional_argument( + stages_group, + flag="--skip-dead-code", + help_text=ui.HELP_SKIP_DEAD_CODE, + ) + _add_bool_optional_argument( + stages_group, + flag="--skip-dependencies", + help_text=ui.HELP_SKIP_DEPENDENCIES, ) - out_group = ap.add_argument_group("Reporting") - out_group.add_argument( - "--html", + reporting_group = ap.add_argument_group("Reporting") + _add_optional_path_argument( + reporting_group, + flag="--html", dest="html_out", - metavar="FILE", - help=ui.HELP_HTML, + const=DEFAULT_HTML_REPORT_PATH, + help_text=ui.HELP_HTML, ) - out_group.add_argument( - "--json", + _add_optional_path_argument( + reporting_group, + flag="--json", dest="json_out", - metavar="FILE", - help=ui.HELP_JSON, + const=DEFAULT_JSON_REPORT_PATH, + help_text=ui.HELP_JSON, + ) + _add_optional_path_argument( + reporting_group, + flag="--md", + dest="md_out", + const=DEFAULT_MARKDOWN_REPORT_PATH, + help_text=ui.HELP_MD, ) - out_group.add_argument( - "--text", + _add_optional_path_argument( + reporting_group, + flag="--sarif", + dest="sarif_out", + const=DEFAULT_SARIF_REPORT_PATH, + help_text=ui.HELP_SARIF, + ) + _add_optional_path_argument( + reporting_group, + flag="--text", dest="text_out", - metavar="FILE", - help=ui.HELP_TEXT, + const=DEFAULT_TEXT_REPORT_PATH, + help_text=ui.HELP_TEXT, + ) + _add_bool_optional_argument( + reporting_group, + flag="--timestamped-report-paths", + help_text=ui.HELP_TIMESTAMPED_REPORT_PATHS, ) - out_group.add_argument( + + ui_group = ap.add_argument_group("Output and UI") + _add_bool_optional_argument( + ui_group, + flag="--open-html-report", + help_text=ui.HELP_OPEN_HTML_REPORT, + ) + ui_group.add_argument( "--no-progress", + dest="no_progress", action="store_true", help=ui.HELP_NO_PROGRESS, ) - out_group.add_argument( + ui_group.add_argument( + "--progress", + dest="no_progress", + action="store_false", + help=ui.HELP_PROGRESS, + ) + ui_group.add_argument( "--no-color", + dest="no_color", action="store_true", help=ui.HELP_NO_COLOR, ) - out_group.add_argument( - "--quiet", - action="store_true", - help=ui.HELP_QUIET, + ui_group.add_argument( + "--color", + dest="no_color", + action="store_false", + help=ui.HELP_COLOR, ) - out_group.add_argument( - "--verbose", - action="store_true", - help=ui.HELP_VERBOSE, + ui_group.set_defaults(no_progress=False, no_color=False) + _add_bool_optional_argument( + ui_group, + flag="--quiet", + help_text=ui.HELP_QUIET, ) - out_group.add_argument( - "--debug", - action="store_true", - help=ui.HELP_DEBUG, + _add_bool_optional_argument( + ui_group, + flag="--verbose", + help_text=ui.HELP_VERBOSE, ) + _add_bool_optional_argument( + ui_group, + flag="--debug", + help_text=ui.HELP_DEBUG, + ) + + general_group = ap.add_argument_group("General") + general_group.add_argument( + "-h", + "--help", + action="help", + help="Show this help message and exit.", + ) + general_group.add_argument( + "--version", + action="version", + version=ui.version_output(version), + help=ui.HELP_VERSION, + ) + return ap diff --git a/codeclone/_cli_baselines.py b/codeclone/_cli_baselines.py new file mode 100644 index 0000000..64a187c --- /dev/null +++ b/codeclone/_cli_baselines.py @@ -0,0 +1,389 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, Protocol + +from . import ui_messages as ui +from .baseline import ( + BASELINE_UNTRUSTED_STATUSES, + Baseline, + BaselineStatus, + coerce_baseline_status, + current_python_tag, +) +from .contracts import ( + BASELINE_FINGERPRINT_VERSION, + BASELINE_SCHEMA_VERSION, + ExitCode, +) +from .errors import BaselineValidationError +from .metrics_baseline import ( + METRICS_BASELINE_UNTRUSTED_STATUSES, + MetricsBaseline, + MetricsBaselineStatus, + coerce_metrics_baseline_status, +) + +if TYPE_CHECKING: + from .models import GroupMapLike, ProjectMetrics + +__all__ = [ + "CloneBaselineState", + "MetricsBaselineSectionProbe", + "MetricsBaselineState", + "probe_metrics_baseline_section", + "resolve_clone_baseline_state", + "resolve_metrics_baseline_state", +] + + +class _PrinterLike(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... + + +class _BaselineArgs(Protocol): + max_baseline_size_mb: int + update_baseline: bool + fail_on_new: bool + skip_metrics: bool + update_metrics_baseline: bool + fail_on_new_metrics: bool + ci: bool + + +@dataclass(frozen=True, slots=True) +class CloneBaselineState: + baseline: Baseline + loaded: bool + status: BaselineStatus + failure_code: ExitCode | None + trusted_for_diff: bool + updated_path: Path | None + + +@dataclass(frozen=True, slots=True) +class MetricsBaselineState: + baseline: MetricsBaseline + loaded: bool + status: MetricsBaselineStatus + failure_code: ExitCode | None + trusted_for_diff: bool + + +@dataclass(slots=True) +class _MetricsBaselineRuntime: + baseline: MetricsBaseline + loaded: bool = False + status: MetricsBaselineStatus = MetricsBaselineStatus.MISSING + failure_code: ExitCode | None = None + trusted_for_diff: bool = False + + +@dataclass(frozen=True, slots=True) +class MetricsBaselineSectionProbe: + has_metrics_section: bool + payload: dict[str, object] | None + + +def probe_metrics_baseline_section(path: Path) -> MetricsBaselineSectionProbe: + if not path.exists(): + return MetricsBaselineSectionProbe( + has_metrics_section=False, + payload=None, + ) + try: + raw_payload = json.loads(path.read_text("utf-8")) + except (OSError, json.JSONDecodeError): + return MetricsBaselineSectionProbe( + has_metrics_section=True, + payload=None, + ) + if not isinstance(raw_payload, dict): + return MetricsBaselineSectionProbe( + has_metrics_section=True, + payload=None, + ) + payload = dict(raw_payload) + return MetricsBaselineSectionProbe( + has_metrics_section=("metrics" in payload), + payload=payload, + ) + + +def resolve_clone_baseline_state( + *, + args: _BaselineArgs, + baseline_path: Path, + baseline_exists: bool, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + codeclone_version: str, + console: _PrinterLike, + shared_baseline_payload: dict[str, object] | None = None, +) -> CloneBaselineState: + baseline = Baseline(baseline_path) + baseline_loaded = False + baseline_status = BaselineStatus.MISSING + baseline_failure_code: ExitCode | None = None + baseline_trusted_for_diff = False + baseline_updated_path: Path | None = None + + if baseline_exists: + try: + if shared_baseline_payload is None: + baseline.load(max_size_bytes=args.max_baseline_size_mb * 1024 * 1024) + else: + baseline.load( + max_size_bytes=args.max_baseline_size_mb * 1024 * 1024, + preloaded_payload=shared_baseline_payload, + ) + except BaselineValidationError as exc: + baseline_status = coerce_baseline_status(exc.status) + if not args.update_baseline: + console.print(ui.fmt_invalid_baseline(exc)) + if args.fail_on_new: + baseline_failure_code = ExitCode.CONTRACT_ERROR + else: + console.print(ui.WARN_BASELINE_IGNORED) + else: + if not args.update_baseline: + try: + baseline.verify_compatibility( + current_python_tag=current_python_tag() + ) + except BaselineValidationError as exc: + baseline_status = coerce_baseline_status(exc.status) + console.print(ui.fmt_invalid_baseline(exc)) + if args.fail_on_new: + baseline_failure_code = ExitCode.CONTRACT_ERROR + else: + console.print(ui.WARN_BASELINE_IGNORED) + else: + baseline_loaded = True + baseline_status = BaselineStatus.OK + baseline_trusted_for_diff = True + elif not args.update_baseline: + console.print(ui.fmt_path(ui.WARN_BASELINE_MISSING, baseline_path)) + + if baseline_status in BASELINE_UNTRUSTED_STATUSES: + baseline_loaded = False + baseline_trusted_for_diff = False + if args.fail_on_new and not args.update_baseline: + baseline_failure_code = ExitCode.CONTRACT_ERROR + + if args.update_baseline: + new_baseline = Baseline.from_groups( + func_groups, + block_groups, + path=baseline_path, + python_tag=current_python_tag(), + fingerprint_version=BASELINE_FINGERPRINT_VERSION, + schema_version=BASELINE_SCHEMA_VERSION, + generator_version=codeclone_version, + ) + try: + new_baseline.save() + except OSError as exc: + console.print( + ui.fmt_contract_error( + ui.fmt_baseline_write_failed(path=baseline_path, error=exc) + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + console.print(ui.fmt_path(ui.SUCCESS_BASELINE_UPDATED, baseline_path)) + baseline = new_baseline + baseline_loaded = True + baseline_status = BaselineStatus.OK + baseline_trusted_for_diff = True + baseline_updated_path = baseline_path + + return CloneBaselineState( + baseline=baseline, + loaded=baseline_loaded, + status=baseline_status, + failure_code=baseline_failure_code, + trusted_for_diff=baseline_trusted_for_diff, + updated_path=baseline_updated_path, + ) + + +def resolve_metrics_baseline_state( + *, + args: _BaselineArgs, + metrics_baseline_path: Path, + metrics_baseline_exists: bool, + baseline_updated_path: Path | None, + project_metrics: ProjectMetrics | None, + console: _PrinterLike, + shared_baseline_payload: dict[str, object] | None = None, +) -> MetricsBaselineState: + state = _MetricsBaselineRuntime(baseline=MetricsBaseline(metrics_baseline_path)) + + if _metrics_mode_short_circuit(args=args, console=console): + return MetricsBaselineState( + baseline=state.baseline, + loaded=state.loaded, + status=state.status, + failure_code=state.failure_code, + trusted_for_diff=state.trusted_for_diff, + ) + + _load_metrics_baseline_for_diff( + args=args, + metrics_baseline_exists=metrics_baseline_exists, + state=state, + console=console, + shared_baseline_payload=shared_baseline_payload, + ) + _apply_metrics_baseline_untrusted_policy(args=args, state=state) + _update_metrics_baseline_if_requested( + args=args, + metrics_baseline_path=metrics_baseline_path, + baseline_updated_path=baseline_updated_path, + project_metrics=project_metrics, + state=state, + console=console, + ) + if args.ci and state.loaded: + args.fail_on_new_metrics = True + + return MetricsBaselineState( + baseline=state.baseline, + loaded=state.loaded, + status=state.status, + failure_code=state.failure_code, + trusted_for_diff=state.trusted_for_diff, + ) + + +def _metrics_mode_short_circuit( + *, + args: _BaselineArgs, + console: _PrinterLike, +) -> bool: + if not args.skip_metrics: + return False + if args.update_metrics_baseline or args.fail_on_new_metrics: + console.print( + ui.fmt_contract_error( + "Metrics baseline operations require metrics analysis. " + "Remove --skip-metrics." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + return True + + +def _load_metrics_baseline_for_diff( + *, + args: _BaselineArgs, + metrics_baseline_exists: bool, + state: _MetricsBaselineRuntime, + console: _PrinterLike, + shared_baseline_payload: dict[str, object] | None = None, +) -> None: + if not metrics_baseline_exists: + if args.fail_on_new_metrics and not args.update_metrics_baseline: + state.failure_code = ExitCode.CONTRACT_ERROR + console.print( + ui.fmt_contract_error( + "Metrics baseline file is required for --fail-on-new-metrics. " + "Run codeclone . --update-metrics-baseline first." + ) + ) + return + + try: + if shared_baseline_payload is None: + state.baseline.load(max_size_bytes=args.max_baseline_size_mb * 1024 * 1024) + else: + state.baseline.load( + max_size_bytes=args.max_baseline_size_mb * 1024 * 1024, + preloaded_payload=shared_baseline_payload, + ) + except BaselineValidationError as exc: + state.status = coerce_metrics_baseline_status(exc.status) + if not args.update_metrics_baseline: + console.print(ui.fmt_invalid_baseline(exc)) + if args.fail_on_new_metrics: + state.failure_code = ExitCode.CONTRACT_ERROR + return + + if args.update_metrics_baseline: + return + + try: + state.baseline.verify_compatibility(runtime_python_tag=current_python_tag()) + except BaselineValidationError as exc: + state.status = coerce_metrics_baseline_status(exc.status) + console.print(ui.fmt_invalid_baseline(exc)) + if args.fail_on_new_metrics: + state.failure_code = ExitCode.CONTRACT_ERROR + else: + state.loaded = True + state.status = MetricsBaselineStatus.OK + state.trusted_for_diff = True + + +def _apply_metrics_baseline_untrusted_policy( + *, + args: _BaselineArgs, + state: _MetricsBaselineRuntime, +) -> None: + if state.status not in METRICS_BASELINE_UNTRUSTED_STATUSES: + return + state.loaded = False + state.trusted_for_diff = False + if args.fail_on_new_metrics and not args.update_metrics_baseline: + state.failure_code = ExitCode.CONTRACT_ERROR + + +def _update_metrics_baseline_if_requested( + *, + args: _BaselineArgs, + metrics_baseline_path: Path, + baseline_updated_path: Path | None, + project_metrics: ProjectMetrics | None, + state: _MetricsBaselineRuntime, + console: _PrinterLike, +) -> None: + if not args.update_metrics_baseline: + return + if project_metrics is None: + console.print( + ui.fmt_contract_error( + "Cannot update metrics baseline: metrics were not computed." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + new_metrics_baseline = MetricsBaseline.from_project_metrics( + project_metrics=project_metrics, + path=metrics_baseline_path, + ) + try: + new_metrics_baseline.save() + except OSError as exc: + console.print( + ui.fmt_contract_error( + ui.fmt_baseline_write_failed( + path=metrics_baseline_path, + error=exc, + ) + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + if baseline_updated_path != metrics_baseline_path: + console.print(ui.fmt_path(ui.SUCCESS_BASELINE_UPDATED, metrics_baseline_path)) + + state.baseline = new_metrics_baseline + state.loaded = True + state.status = MetricsBaselineStatus.OK + state.trusted_for_diff = True diff --git a/codeclone/_cli_config.py b/codeclone/_cli_config.py new file mode 100644 index 0000000..b31d9b1 --- /dev/null +++ b/codeclone/_cli_config.py @@ -0,0 +1,266 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import importlib +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, Final + +if TYPE_CHECKING: + import argparse + from collections.abc import Mapping, Sequence + + +class ConfigValidationError(ValueError): + """Raised when pyproject.toml contains invalid CodeClone configuration.""" + + +@dataclass(frozen=True, slots=True) +class _ConfigKeySpec: + expected_type: type[object] + allow_none: bool = False + + +_CONFIG_KEY_SPECS: Final[dict[str, _ConfigKeySpec]] = { + "min_loc": _ConfigKeySpec(int), + "min_stmt": _ConfigKeySpec(int), + "block_min_loc": _ConfigKeySpec(int), + "block_min_stmt": _ConfigKeySpec(int), + "segment_min_loc": _ConfigKeySpec(int), + "segment_min_stmt": _ConfigKeySpec(int), + "processes": _ConfigKeySpec(int), + "cache_path": _ConfigKeySpec(str, allow_none=True), + "max_cache_size_mb": _ConfigKeySpec(int), + "baseline": _ConfigKeySpec(str), + "max_baseline_size_mb": _ConfigKeySpec(int), + "update_baseline": _ConfigKeySpec(bool), + "fail_on_new": _ConfigKeySpec(bool), + "fail_threshold": _ConfigKeySpec(int), + "ci": _ConfigKeySpec(bool), + "fail_complexity": _ConfigKeySpec(int), + "fail_coupling": _ConfigKeySpec(int), + "fail_cohesion": _ConfigKeySpec(int), + "fail_cycles": _ConfigKeySpec(bool), + "fail_dead_code": _ConfigKeySpec(bool), + "fail_health": _ConfigKeySpec(int), + "fail_on_new_metrics": _ConfigKeySpec(bool), + "update_metrics_baseline": _ConfigKeySpec(bool), + "metrics_baseline": _ConfigKeySpec(str), + "skip_metrics": _ConfigKeySpec(bool), + "skip_dead_code": _ConfigKeySpec(bool), + "skip_dependencies": _ConfigKeySpec(bool), + "html_out": _ConfigKeySpec(str, allow_none=True), + "json_out": _ConfigKeySpec(str, allow_none=True), + "md_out": _ConfigKeySpec(str, allow_none=True), + "sarif_out": _ConfigKeySpec(str, allow_none=True), + "text_out": _ConfigKeySpec(str, allow_none=True), + "no_progress": _ConfigKeySpec(bool), + "no_color": _ConfigKeySpec(bool), + "quiet": _ConfigKeySpec(bool), + "verbose": _ConfigKeySpec(bool), + "debug": _ConfigKeySpec(bool), +} +_PATH_CONFIG_KEYS: Final[frozenset[str]] = frozenset( + { + "cache_path", + "baseline", + "metrics_baseline", + "html_out", + "json_out", + "md_out", + "sarif_out", + "text_out", + } +) + + +def collect_explicit_cli_dests( + parser: argparse.ArgumentParser, + *, + argv: Sequence[str], +) -> set[str]: + option_to_dest: dict[str, str] = {} + for action in parser._actions: + for option in action.option_strings: + option_to_dest[option] = action.dest + + explicit: set[str] = set() + for token in argv: + if token == "--": + break + if not token.startswith("-"): + continue + option = token.split("=", maxsplit=1)[0] + dest = option_to_dest.get(option) + if dest is not None: + explicit.add(dest) + return explicit + + +def load_pyproject_config(root_path: Path) -> dict[str, object]: + config_path = root_path / "pyproject.toml" + if not config_path.exists(): + return {} + + payload: object + try: + payload = _load_toml(config_path) + except OSError as exc: + raise ConfigValidationError( + f"Cannot read pyproject.toml at {config_path}: {exc}" + ) from exc + except ValueError as exc: + raise ConfigValidationError(f"Invalid TOML in {config_path}: {exc}") from exc + + if not isinstance(payload, dict): + raise ConfigValidationError( + f"Invalid pyproject payload at {config_path}: root must be object" + ) + + tool_obj = payload.get("tool") + if tool_obj is None: + return {} + if not isinstance(tool_obj, dict): + raise ConfigValidationError( + f"Invalid pyproject payload at {config_path}: 'tool' must be object" + ) + + codeclone_obj = tool_obj.get("codeclone") + if codeclone_obj is None: + return {} + if not isinstance(codeclone_obj, dict): + raise ConfigValidationError( + "Invalid pyproject payload at " + f"{config_path}: 'tool.codeclone' must be object" + ) + + unknown = sorted(set(codeclone_obj.keys()) - set(_CONFIG_KEY_SPECS)) + if unknown: + raise ConfigValidationError( + "Unknown key(s) in tool.codeclone: " + ", ".join(unknown) + ) + + validated: dict[str, object] = {} + for key in sorted(codeclone_obj.keys()): + value = _validate_config_value( + key=key, + value=codeclone_obj[key], + ) + validated[key] = _normalize_path_config_value( + key=key, + value=value, + root_path=root_path, + ) + return validated + + +def apply_pyproject_config_overrides( + *, + args: argparse.Namespace, + config_values: Mapping[str, object], + explicit_cli_dests: set[str], +) -> None: + for key, value in config_values.items(): + if key in explicit_cli_dests: + continue + setattr(args, key, value) + + +def _validate_config_value(*, key: str, value: object) -> object: + spec = _CONFIG_KEY_SPECS[key] + if value is None: + if spec.allow_none: + return None + raise ConfigValidationError( + "Invalid value type for tool.codeclone." + f"{key}: expected {spec.expected_type.__name__}" + ) + + expected_type = spec.expected_type + if expected_type is bool: + return _validated_config_instance( + key=key, + value=value, + expected_type=bool, + expected_name="bool", + ) + + if expected_type is int: + return _validated_config_instance( + key=key, + value=value, + expected_type=int, + expected_name="int", + reject_bool=True, + ) + + if expected_type is str: + return _validated_config_instance( + key=key, + value=value, + expected_type=str, + expected_name="str", + ) + + raise ConfigValidationError(f"Unsupported config key spec for tool.codeclone.{key}") + + +def _validated_config_instance( + *, + key: str, + value: object, + expected_type: type[object], + expected_name: str, + reject_bool: bool = False, +) -> object: + if isinstance(value, expected_type) and ( + not reject_bool or not isinstance(value, bool) + ): + return value + raise ConfigValidationError( + f"Invalid value type for tool.codeclone.{key}: expected {expected_name}" + ) + + +def _load_toml(path: Path) -> object: + if sys.version_info >= (3, 11): + import tomllib + + with path.open("rb") as config_file: + return tomllib.load(config_file) + else: + try: + tomli_module = importlib.import_module("tomli") + except ModuleNotFoundError as exc: + raise ConfigValidationError( + "Python 3.10 requires dependency 'tomli' to read pyproject.toml." + ) from exc + + load_fn = getattr(tomli_module, "load", None) + if not callable(load_fn): + raise ConfigValidationError( + "Invalid 'tomli' module: missing callable 'load'." + ) + + with path.open("rb") as config_file: + return load_fn(config_file) + + +def _normalize_path_config_value( + *, + key: str, + value: object, + root_path: Path, +) -> object: + if key not in _PATH_CONFIG_KEYS: + return value + if not isinstance(value, str): + return value + + path = Path(value).expanduser() + if path.is_absolute(): + return str(path) + return str(root_path / path) diff --git a/codeclone/_cli_gating.py b/codeclone/_cli_gating.py new file mode 100644 index 0000000..d6d100f --- /dev/null +++ b/codeclone/_cli_gating.py @@ -0,0 +1,136 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Protocol + +__all__ = [ + "parse_metric_reason_entry", + "policy_context", + "print_gating_failure_block", +] + + +class _GatingArgs(Protocol): + ci: bool + fail_on_new_metrics: bool + fail_complexity: int + fail_coupling: int + fail_cohesion: int + fail_cycles: bool + fail_dead_code: bool + fail_health: int + fail_on_new: bool + fail_threshold: int + + +class _PrinterLike(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... + + +def _strip_terminal_period(text: str) -> str: + return text[:-1] if text.endswith(".") else text + + +def parse_metric_reason_entry(reason: str) -> tuple[str, str]: + trimmed = _strip_terminal_period(reason) + + def tail(prefix: str) -> str: + return trimmed[len(prefix) :] + + simple_prefixes: tuple[tuple[str, str], ...] = ( + ("New high-risk functions vs metrics baseline: ", "new_high_risk_functions"), + ( + "New high-coupling classes vs metrics baseline: ", + "new_high_coupling_classes", + ), + ("New dependency cycles vs metrics baseline: ", "new_dependency_cycles"), + ("New dead code items vs metrics baseline: ", "new_dead_code_items"), + ) + for prefix, kind in simple_prefixes: + if trimmed.startswith(prefix): + return kind, tail(prefix) + + if trimmed.startswith("Health score regressed vs metrics baseline: delta="): + return "health_delta", trimmed.rsplit("=", maxsplit=1)[1] + + if trimmed.startswith("Dependency cycles detected: "): + return "dependency_cycles", tail("Dependency cycles detected: ").replace( + " cycle(s)", "" + ) + + if trimmed.startswith("Dead code detected (high confidence): "): + return "dead_code_items", tail( + "Dead code detected (high confidence): " + ).replace(" item(s)", "") + + threshold_prefixes: tuple[tuple[str, str], ...] = ( + ("Complexity threshold exceeded: ", "complexity_max"), + ("Coupling threshold exceeded: ", "coupling_max"), + ("Cohesion threshold exceeded: ", "cohesion_max"), + ("Health score below threshold: ", "health_score"), + ) + for prefix, kind in threshold_prefixes: + if trimmed.startswith(prefix): + left_part, threshold_part = tail(prefix).split(", ") + return ( + kind, + f"{left_part.rsplit('=', maxsplit=1)[1]} " + f"(threshold={threshold_part.rsplit('=', maxsplit=1)[1]})", + ) + + return "detail", trimmed + + +def policy_context(*, args: _GatingArgs, gate_kind: str) -> str: + if args.ci: + return "ci" + + parts: list[str] = [] + + match gate_kind: + case "metrics": + if args.fail_on_new_metrics: + parts.append("fail-on-new-metrics") + if args.fail_complexity >= 0: + parts.append(f"fail-complexity={args.fail_complexity}") + if args.fail_coupling >= 0: + parts.append(f"fail-coupling={args.fail_coupling}") + if args.fail_cohesion >= 0: + parts.append(f"fail-cohesion={args.fail_cohesion}") + if args.fail_cycles: + parts.append("fail-cycles") + if args.fail_dead_code: + parts.append("fail-dead-code") + if args.fail_health >= 0: + parts.append(f"fail-health={args.fail_health}") + + case "new-clones": + if args.fail_on_new: + parts.append("fail-on-new") + + case "threshold": + if args.fail_threshold >= 0: + parts.append(f"fail-threshold={args.fail_threshold}") + + case _: + pass + + return ", ".join(parts) if parts else "custom" + + +def print_gating_failure_block( + *, + console: _PrinterLike, + code: str, + entries: tuple[tuple[str, object], ...] | list[tuple[str, object]], + args: _GatingArgs, +) -> None: + console.print(f"\n\u2717 GATING FAILURE [{code}]", style="bold red", markup=False) + normalized_entries = [("policy", policy_context(args=args, gate_kind=code))] + normalized_entries.extend((key, str(value)) for key, value in entries) + width = max(len(key) for key, _ in normalized_entries) + console.print() + for key, value in normalized_entries: + console.print(f" {key:<{width}}: {value}") diff --git a/codeclone/_cli_meta.py b/codeclone/_cli_meta.py index 11fcca7..6d893ec 100644 --- a/codeclone/_cli_meta.py +++ b/codeclone/_cli_meta.py @@ -1,25 +1,30 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import sys -from pathlib import Path -from typing import TypedDict +from datetime import datetime, timezone +from typing import TYPE_CHECKING, TypedDict from .baseline import Baseline, current_python_tag -from .contracts import REPORT_SCHEMA_VERSION + +if TYPE_CHECKING: + from pathlib import Path + + from .metrics_baseline import MetricsBaseline def _current_python_version() -> str: return f"{sys.version_info.major}.{sys.version_info.minor}" +def _current_report_timestamp_utc() -> str: + return ( + datetime.now(timezone.utc).replace(microsecond=0).strftime("%Y-%m-%dT%H:%M:%SZ") + ) + + class ReportMeta(TypedDict): """ Canonical report metadata contract shared by HTML, JSON, and TXT reports. @@ -32,8 +37,9 @@ class ReportMeta(TypedDict): - cache_*: cache status/provenance for run transparency """ - report_schema_version: str codeclone_version: str + project_name: str + scan_root: str python_version: str python_tag: str baseline_path: str @@ -51,11 +57,23 @@ class ReportMeta(TypedDict): cache_status: str cache_schema_version: str | None files_skipped_source_io: int + metrics_baseline_path: str + metrics_baseline_loaded: bool + metrics_baseline_status: str + metrics_baseline_schema_version: str | None + metrics_baseline_payload_sha256: str | None + metrics_baseline_payload_sha256_verified: bool + health_score: int | None + health_grade: str | None + analysis_mode: str + metrics_computed: list[str] + report_generated_at_utc: str def _build_report_meta( *, codeclone_version: str, + scan_root: Path, baseline_path: Path, baseline: Baseline, baseline_loaded: bool, @@ -65,10 +83,21 @@ def _build_report_meta( cache_status: str, cache_schema_version: str | None, files_skipped_source_io: int, + metrics_baseline_path: Path, + metrics_baseline: MetricsBaseline, + metrics_baseline_loaded: bool, + metrics_baseline_status: str, + health_score: int | None, + health_grade: str | None, + analysis_mode: str, + metrics_computed: tuple[str, ...], + report_generated_at_utc: str, ) -> ReportMeta: + project_name = scan_root.name or str(scan_root) return { - "report_schema_version": REPORT_SCHEMA_VERSION, "codeclone_version": codeclone_version, + "project_name": project_name, + "scan_root": str(scan_root), "python_version": _current_python_version(), "python_tag": current_python_tag(), "baseline_path": str(baseline_path), @@ -90,4 +119,19 @@ def _build_report_meta( "cache_status": cache_status, "cache_schema_version": cache_schema_version, "files_skipped_source_io": files_skipped_source_io, + "metrics_baseline_path": str(metrics_baseline_path), + "metrics_baseline_loaded": metrics_baseline_loaded, + "metrics_baseline_status": metrics_baseline_status, + "metrics_baseline_schema_version": metrics_baseline.schema_version, + "metrics_baseline_payload_sha256": metrics_baseline.payload_sha256, + "metrics_baseline_payload_sha256_verified": ( + metrics_baseline_loaded + and metrics_baseline_status == "ok" + and isinstance(metrics_baseline.payload_sha256, str) + ), + "health_score": health_score, + "health_grade": health_grade, + "analysis_mode": analysis_mode, + "metrics_computed": list(metrics_computed), + "report_generated_at_utc": report_generated_at_utc, } diff --git a/codeclone/_cli_paths.py b/codeclone/_cli_paths.py index 3f76906..2fb6d11 100644 --- a/codeclone/_cli_paths.py +++ b/codeclone/_cli_paths.py @@ -1,25 +1,21 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import sys -from collections.abc import Callable from pathlib import Path - -from rich.console import Console +from typing import TYPE_CHECKING, Protocol from .contracts import ExitCode from .ui_messages import fmt_contract_error +if TYPE_CHECKING: + from collections.abc import Callable + -def expand_path(p: str) -> Path: - return Path(p).expanduser().resolve() +class _Printer(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... def _validate_output_path( @@ -27,7 +23,7 @@ def _validate_output_path( *, expected_suffix: str, label: str, - console: Console, + console: _Printer, invalid_message: Callable[..., str], invalid_path_message: Callable[..., str], ) -> Path: diff --git a/codeclone/_cli_reports.py b/codeclone/_cli_reports.py new file mode 100644 index 0000000..f1ffea6 --- /dev/null +++ b/codeclone/_cli_reports.py @@ -0,0 +1,147 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sys +import webbrowser +from pathlib import Path +from typing import Protocol + +from . import ui_messages as ui +from .contracts import ExitCode + +__all__ = ["write_report_outputs"] + + +class _PrinterLike(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... + + +class _QuietArgs(Protocol): + quiet: bool + + +def _path_attr(obj: object, name: str) -> Path | None: + value = getattr(obj, name, None) + return value if isinstance(value, Path) else None + + +def _text_attr(obj: object, name: str) -> str | None: + value = getattr(obj, name, None) + return value if isinstance(value, str) else None + + +def _write_report_output( + *, + out: Path, + content: str, + label: str, + console: _PrinterLike, +) -> None: + try: + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(content, "utf-8") + except OSError as exc: + console.print( + ui.fmt_contract_error( + ui.fmt_report_write_failed(label=label, path=out, error=exc) + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + +def _open_html_report_in_browser(*, path: Path) -> None: + if not webbrowser.open_new_tab(path.as_uri()): + raise OSError("no browser handler available") + + +def write_report_outputs( + *, + args: _QuietArgs, + output_paths: object, + report_artifacts: object, + console: _PrinterLike, + open_html_report: bool = False, +) -> str | None: + html_report_path: str | None = None + saved_reports: list[tuple[str, Path]] = [] + html_path = _path_attr(output_paths, "html") + json_path = _path_attr(output_paths, "json") + md_path = _path_attr(output_paths, "md") + sarif_path = _path_attr(output_paths, "sarif") + text_path = _path_attr(output_paths, "text") + html_report = _text_attr(report_artifacts, "html") + json_report = _text_attr(report_artifacts, "json") + md_report = _text_attr(report_artifacts, "md") + sarif_report = _text_attr(report_artifacts, "sarif") + text_report = _text_attr(report_artifacts, "text") + + if html_path and html_report is not None: + out = html_path + _write_report_output( + out=out, + content=html_report, + label="HTML", + console=console, + ) + html_report_path = str(out) + saved_reports.append(("HTML", out)) + + if json_path and json_report is not None: + out = json_path + _write_report_output( + out=out, + content=json_report, + label="JSON", + console=console, + ) + saved_reports.append(("JSON", out)) + + if md_path and md_report is not None: + out = md_path + _write_report_output( + out=out, + content=md_report, + label="Markdown", + console=console, + ) + saved_reports.append(("Markdown", out)) + + if sarif_path and sarif_report is not None: + out = sarif_path + _write_report_output( + out=out, + content=sarif_report, + label="SARIF", + console=console, + ) + saved_reports.append(("SARIF", out)) + + if text_path and text_report is not None: + out = text_path + _write_report_output( + out=out, + content=text_report, + label="text", + console=console, + ) + saved_reports.append(("Text", out)) + + if saved_reports and not args.quiet: + cwd = Path.cwd() + console.print() + for label, path in saved_reports: + try: + display = path.relative_to(cwd) + except ValueError: + display = path + console.print(f" [bold]{label} report saved:[/bold] [dim]{display}[/dim]") + + if open_html_report and html_path is not None: + try: + _open_html_report_in_browser(path=html_path) + except Exception as exc: + console.print(ui.fmt_html_report_open_failed(path=html_path, error=exc)) + + return html_report_path diff --git a/codeclone/_cli_rich.py b/codeclone/_cli_rich.py new file mode 100644 index 0000000..506a6ce --- /dev/null +++ b/codeclone/_cli_rich.py @@ -0,0 +1,128 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import re +from contextlib import AbstractContextManager, nullcontext +from functools import lru_cache +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from rich.console import Console as RichConsole + from rich.progress import BarColumn as RichBarColumn + from rich.progress import Progress as RichProgress + from rich.progress import SpinnerColumn as RichSpinnerColumn + from rich.progress import TextColumn as RichTextColumn + from rich.progress import TimeElapsedColumn as RichTimeElapsedColumn + from rich.rule import Rule as RichRule + from rich.theme import Theme as RichTheme + +_RICH_THEME_STYLES: dict[str, str] = { + "info": "cyan", + "warning": "yellow", + "error": "bold red", + "success": "bold green", + "dim": "dim", +} +_RICH_MARKUP_TAG_RE = re.compile(r"\[/?[a-zA-Z][a-zA-Z0-9_ .#:-]*]") + +__all__ = [ + "PlainConsole", + "make_console", + "make_plain_console", + "print_banner", + "rich_console_symbols", + "rich_progress_symbols", +] + + +class _PrinterLike(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... + + +class PlainConsole: + """Lightweight console for quiet/no-progress mode.""" + + @staticmethod + def print( + *objects: object, + sep: str = " ", + end: str = "\n", + markup: bool = True, + **_: object, + ) -> None: + text = sep.join(str(obj) for obj in objects) + if markup: + text = _RICH_MARKUP_TAG_RE.sub("", text) + print(text, end=end) + + @staticmethod + def status(*_: object, **__: object) -> AbstractContextManager[None]: + return nullcontext() + + +@lru_cache(maxsize=1) +def rich_console_symbols() -> tuple[ + type[RichConsole], + type[RichTheme], + type[RichRule], +]: + from rich.console import Console as _RichConsole + from rich.rule import Rule as _RichRule + from rich.theme import Theme as _RichTheme + + return _RichConsole, _RichTheme, _RichRule + + +@lru_cache(maxsize=1) +def rich_progress_symbols() -> tuple[ + type[RichProgress], + type[RichSpinnerColumn], + type[RichTextColumn], + type[RichBarColumn], + type[RichTimeElapsedColumn], +]: + import rich.progress as _rich_progress + + return ( + _rich_progress.Progress, + _rich_progress.SpinnerColumn, + _rich_progress.TextColumn, + _rich_progress.BarColumn, + _rich_progress.TimeElapsedColumn, + ) + + +def make_console(*, no_color: bool, width: int) -> RichConsole: + console_cls, theme_cls, _ = rich_console_symbols() + return console_cls( + theme=theme_cls(_RICH_THEME_STYLES), + no_color=no_color, + width=width, + ) + + +def make_plain_console() -> PlainConsole: + return PlainConsole() + + +def print_banner( + *, + console: _PrinterLike, + banner_title: str, + project_name: str | None = None, + root_display: str | None = None, +) -> None: + _, _, rule_cls = rich_console_symbols() + console.print(banner_title) + console.print() + console.print( + rule_cls( + title=f"Analyze: {project_name}" if project_name else "Analyze", + style="dim", + characters="\u2500", + ) + ) + if root_display is not None: + console.print(f" [dim]Root:[/dim] [dim]{root_display}[/dim]") diff --git a/codeclone/_cli_runtime.py b/codeclone/_cli_runtime.py new file mode 100644 index 0000000..b7e315e --- /dev/null +++ b/codeclone/_cli_runtime.py @@ -0,0 +1,189 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sys +from pathlib import Path +from typing import Protocol + +from . import ui_messages as ui +from .cache import CacheStatus +from .contracts import ExitCode + +__all__ = [ + "configure_metrics_mode", + "metrics_computed", + "print_failed_files", + "resolve_cache_path", + "resolve_cache_status", + "validate_numeric_args", +] + + +class _RuntimeArgs(Protocol): + cache_path: str | None + max_baseline_size_mb: int + max_cache_size_mb: int + fail_threshold: int + fail_complexity: int + fail_coupling: int + fail_cohesion: int + fail_health: int + fail_on_new_metrics: bool + update_metrics_baseline: bool + skip_metrics: bool + fail_cycles: bool + fail_dead_code: bool + skip_dead_code: bool + skip_dependencies: bool + + +class _PrinterLike(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... + + +class _CacheLike(Protocol): + @property + def load_status(self) -> CacheStatus | str | None: ... + + @property + def load_warning(self) -> str | None: ... + + @property + def cache_schema_version(self) -> str | None: ... + + +def validate_numeric_args(args: _RuntimeArgs) -> bool: + return bool( + not ( + args.max_baseline_size_mb < 0 + or args.max_cache_size_mb < 0 + or args.fail_threshold < -1 + or args.fail_complexity < -1 + or args.fail_coupling < -1 + or args.fail_cohesion < -1 + or args.fail_health < -1 + ) + ) + + +def _metrics_flags_requested(args: _RuntimeArgs) -> bool: + return bool( + args.fail_complexity >= 0 + or args.fail_coupling >= 0 + or args.fail_cohesion >= 0 + or args.fail_cycles + or args.fail_dead_code + or args.fail_health >= 0 + or args.fail_on_new_metrics + or args.update_metrics_baseline + ) + + +def configure_metrics_mode( + *, + args: _RuntimeArgs, + metrics_baseline_exists: bool, + console: _PrinterLike, +) -> None: + metrics_flags_requested = _metrics_flags_requested(args) + + if args.skip_metrics and metrics_flags_requested: + console.print( + ui.fmt_contract_error( + "--skip-metrics cannot be used together with metrics gating/update " + "flags." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + if ( + not args.skip_metrics + and not metrics_flags_requested + and not metrics_baseline_exists + ): + args.skip_metrics = True + + if args.skip_metrics: + args.skip_dead_code = True + args.skip_dependencies = True + return + + if args.fail_dead_code: + args.skip_dead_code = False + if args.fail_cycles: + args.skip_dependencies = False + + +def resolve_cache_path( + *, + root_path: Path, + args: _RuntimeArgs, + from_args: bool, + legacy_cache_path: Path, + console: _PrinterLike, +) -> Path: + if from_args and args.cache_path: + return Path(args.cache_path).expanduser() + + cache_path = root_path / ".cache" / "codeclone" / "cache.json" + if legacy_cache_path.exists(): + try: + legacy_resolved = legacy_cache_path.resolve() + except OSError: + legacy_resolved = legacy_cache_path + if legacy_resolved != cache_path: + console.print( + ui.fmt_legacy_cache_warning( + legacy_path=legacy_resolved, + new_path=cache_path, + ) + ) + return cache_path + + +def metrics_computed(args: _RuntimeArgs) -> tuple[str, ...]: + if args.skip_metrics: + return () + + computed = ["complexity", "coupling", "cohesion", "health"] + if not args.skip_dependencies: + computed.append("dependencies") + if not args.skip_dead_code: + computed.append("dead_code") + return tuple(computed) + + +def resolve_cache_status(cache: _CacheLike) -> tuple[CacheStatus, str | None]: + raw_cache_status = getattr(cache, "load_status", None) + load_warning = getattr(cache, "load_warning", None) + if isinstance(raw_cache_status, CacheStatus): + cache_status = raw_cache_status + elif isinstance(raw_cache_status, str): + try: + cache_status = CacheStatus(raw_cache_status) + except ValueError: + cache_status = ( + CacheStatus.OK if load_warning is None else CacheStatus.INVALID_TYPE + ) + else: + cache_status = ( + CacheStatus.OK if load_warning is None else CacheStatus.INVALID_TYPE + ) + + raw_cache_schema_version = getattr(cache, "cache_schema_version", None) + cache_schema_version = ( + raw_cache_schema_version if isinstance(raw_cache_schema_version, str) else None + ) + return cache_status, cache_schema_version + + +def print_failed_files(*, failed_files: tuple[str, ...], console: _PrinterLike) -> None: + if not failed_files: + return + console.print(ui.fmt_failed_files_header(len(failed_files))) + for failure in failed_files[:10]: + console.print(f" • {failure}") + if len(failed_files) > 10: + console.print(f" ... and {len(failed_files) - 10} more") diff --git a/codeclone/_cli_summary.py b/codeclone/_cli_summary.py index a43bd35..d1d2369 100644 --- a/codeclone/_cli_summary.py +++ b/codeclone/_cli_summary.py @@ -1,89 +1,46 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations -from rich.console import Console -from rich.table import Table -from rich.text import Text +from dataclasses import dataclass +from typing import Protocol from . import ui_messages as ui -_CLONE_LABELS = frozenset( - { - ui.SUMMARY_LABEL_FUNCTION, - ui.SUMMARY_LABEL_BLOCK, - ui.SUMMARY_LABEL_SEGMENT, - } -) - - -def _summary_value_style(*, label: str, value: int) -> str: - if value == 0: - return "dim" - if label == ui.SUMMARY_LABEL_NEW_BASELINE: - return "bold red" - if label == ui.SUMMARY_LABEL_SUPPRESSED: - return "yellow" - if label in _CLONE_LABELS: - return "bold yellow" - return "bold" - -def _build_summary_rows( - *, - files_found: int, - files_analyzed: int, - cache_hits: int, - files_skipped: int, - func_clones_count: int, - block_clones_count: int, - segment_clones_count: int, - suppressed_segment_groups: int, - new_clones_count: int, -) -> list[tuple[str, int]]: - return [ - (ui.SUMMARY_LABEL_FILES_FOUND, files_found), - (ui.SUMMARY_LABEL_FILES_ANALYZED, files_analyzed), - (ui.SUMMARY_LABEL_CACHE_HITS, cache_hits), - (ui.SUMMARY_LABEL_FILES_SKIPPED, files_skipped), - (ui.SUMMARY_LABEL_FUNCTION, func_clones_count), - (ui.SUMMARY_LABEL_BLOCK, block_clones_count), - (ui.SUMMARY_LABEL_SEGMENT, segment_clones_count), - (ui.SUMMARY_LABEL_SUPPRESSED, suppressed_segment_groups), - (ui.SUMMARY_LABEL_NEW_BASELINE, new_clones_count), - ] +@dataclass(frozen=True, slots=True) +class MetricsSnapshot: + complexity_avg: float + complexity_max: int + high_risk_count: int + coupling_avg: float + coupling_max: int + cohesion_avg: float + cohesion_max: int + cycles_count: int + dead_code_count: int + health_total: int + health_grade: str + suppressed_dead_code_count: int = 0 -def _build_summary_table(rows: list[tuple[str, int]]) -> Table: - summary_table = Table( - title=ui.SUMMARY_TITLE, - show_header=True, - width=ui.CLI_LAYOUT_WIDTH, - ) - summary_table.add_column("Metric") - summary_table.add_column("Value", justify="right") - for label, value in rows: - summary_table.add_row( - label, - Text(str(value), style=_summary_value_style(label=label, value=value)), - ) - return summary_table +class _Printer(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... def _print_summary( *, - console: Console, + console: _Printer, quiet: bool, files_found: int, files_analyzed: int, cache_hits: int, files_skipped: int, + analyzed_lines: int = 0, + analyzed_functions: int = 0, + analyzed_methods: int = 0, + analyzed_classes: int = 0, func_clones_count: int, block_clones_count: int, segment_clones_count: int, @@ -91,22 +48,10 @@ def _print_summary( new_clones_count: int, ) -> None: invariant_ok = files_found == (files_analyzed + cache_hits + files_skipped) - rows = _build_summary_rows( - files_found=files_found, - files_analyzed=files_analyzed, - cache_hits=cache_hits, - files_skipped=files_skipped, - func_clones_count=func_clones_count, - block_clones_count=block_clones_count, - segment_clones_count=segment_clones_count, - suppressed_segment_groups=suppressed_segment_groups, - new_clones_count=new_clones_count, - ) if quiet: - console.print(ui.SUMMARY_TITLE) console.print( - ui.fmt_summary_compact_input( + ui.fmt_summary_compact( found=files_found, analyzed=files_analyzed, cache_hits=cache_hits, @@ -123,7 +68,84 @@ def _print_summary( ) ) else: - console.print(_build_summary_table(rows)) + from rich.rule import Rule + + console.print() + console.print(Rule(title=ui.SUMMARY_TITLE, style="dim", characters="\u2500")) + console.print( + ui.fmt_summary_files( + found=files_found, + analyzed=files_analyzed, + cached=cache_hits, + skipped=files_skipped, + ) + ) + parsed_line = ui.fmt_summary_parsed( + lines=analyzed_lines, + functions=analyzed_functions, + methods=analyzed_methods, + classes=analyzed_classes, + ) + if parsed_line is not None: + console.print(parsed_line) + console.print( + ui.fmt_summary_clones( + func=func_clones_count, + block=block_clones_count, + segment=segment_clones_count, + suppressed=suppressed_segment_groups, + new=new_clones_count, + ) + ) if not invariant_ok: console.print(f"[warning]{ui.WARN_SUMMARY_ACCOUNTING_MISMATCH}[/warning]") + + +def _print_metrics( + *, + console: _Printer, + quiet: bool, + metrics: MetricsSnapshot, +) -> None: + if quiet: + console.print( + ui.fmt_summary_compact_metrics( + cc_avg=metrics.complexity_avg, + cc_max=metrics.complexity_max, + cbo_avg=metrics.coupling_avg, + cbo_max=metrics.coupling_max, + lcom_avg=metrics.cohesion_avg, + lcom_max=metrics.cohesion_max, + cycles=metrics.cycles_count, + dead=metrics.dead_code_count, + health=metrics.health_total, + grade=metrics.health_grade, + ) + ) + else: + from rich.rule import Rule + + console.print() + console.print(Rule(title=ui.METRICS_TITLE, style="dim", characters="\u2500")) + console.print(ui.fmt_metrics_health(metrics.health_total, metrics.health_grade)) + console.print( + ui.fmt_metrics_cc( + metrics.complexity_avg, + metrics.complexity_max, + metrics.high_risk_count, + ) + ) + console.print( + ui.fmt_metrics_coupling(metrics.coupling_avg, metrics.coupling_max) + ) + console.print( + ui.fmt_metrics_cohesion(metrics.cohesion_avg, metrics.cohesion_max) + ) + console.print(ui.fmt_metrics_cycles(metrics.cycles_count)) + console.print( + ui.fmt_metrics_dead_code( + metrics.dead_code_count, + suppressed=metrics.suppressed_dead_code_count, + ) + ) diff --git a/codeclone/_coerce.py b/codeclone/_coerce.py new file mode 100644 index 0000000..e4c07bd --- /dev/null +++ b/codeclone/_coerce.py @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence + +__all__ = ["as_float", "as_int", "as_mapping", "as_sequence", "as_str"] + + +def as_int(value: object, default: int = 0) -> int: + if isinstance(value, bool): + return int(value) + if isinstance(value, int): + return value + if isinstance(value, str): + try: + return int(value) + except ValueError: + return default + return default + + +def as_float(value: object, default: float = 0.0) -> float: + if isinstance(value, bool): + return float(int(value)) + if isinstance(value, (int, float)): + return float(value) + if isinstance(value, str): + try: + return float(value) + except ValueError: + return default + return default + + +def as_str(value: object, default: str = "") -> str: + return value if isinstance(value, str) else default + + +def as_mapping(value: object) -> Mapping[str, object]: + if isinstance(value, Mapping): + return value + return {} + + +def as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return value + return () diff --git a/codeclone/_html_badges.py b/codeclone/_html_badges.py new file mode 100644 index 0000000..f35dc17 --- /dev/null +++ b/codeclone/_html_badges.py @@ -0,0 +1,172 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Shared HTML badge, label, and visual helpers for the report UI layer. + +Naming conventions: + - ``{domain}-badge`` for inline taxonomy labels (risk-badge, severity-badge, + source-kind-badge, clone-type-badge) + - ``meta-item`` is the **single** card pattern for all stat/KPI/meta cards + - ``meta-label`` + ``meta-value`` are the **single** label+value pair + - ``suggestion-card`` for suggestion grid items +""" + +from __future__ import annotations + +from collections.abc import Callable, Sequence + +from ._html_escape import _escape_attr, _escape_html +from .domain.quality import ( + EFFORT_EASY, + EFFORT_HARD, + EFFORT_MODERATE, + RISK_HIGH, + RISK_LOW, + RISK_MEDIUM, + SEVERITY_CRITICAL, + SEVERITY_INFO, + SEVERITY_WARNING, +) +from .report._source_kinds import normalize_source_kind, source_kind_label + +__all__ = [ + "CHECK_CIRCLE_SVG", + "_quality_badge_html", + "_render_chain_flow", + "_short_label", + "_source_kind_badge_html", + "_stat_card", + "_tab_empty", +] + +_EFFORT_CSS: dict[str, str] = { + EFFORT_EASY: "success", + EFFORT_MODERATE: "warning", + EFFORT_HARD: "error", +} + +CHECK_CIRCLE_SVG = ( + '' + '' + '' + "" +) + + +def _quality_badge_html(text: str) -> str: + """Render a risk / severity / effort value as a styled badge.""" + r = text.strip().lower() + if r in (RISK_LOW, RISK_HIGH, RISK_MEDIUM): + return ( + f'{_escape_html(r)}' + ) + if r in (SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO): + return ( + f'' + f"{_escape_html(r)}" + ) + if r in _EFFORT_CSS: + return ( + f'{_escape_html(r)}' + ) + return _escape_html(text) + + +def _source_kind_badge_html(source_kind: str) -> str: + normalized = normalize_source_kind(source_kind) + return ( + f'' + f"{_escape_html(source_kind_label(normalized))}" + ) + + +def _tab_empty(message: str) -> str: + return ( + '
' + f"{CHECK_CIRCLE_SVG}" + f'
{_escape_html(message)}
' + '
' + "Nothing to report - keep up the good work." + "
" + "
" + ) + + +def _short_label(name: str, max_len: int = 18) -> str: + """Shorten a dotted name keeping the last segment, truncated if needed.""" + parts = name.rsplit(".", maxsplit=1) + label = parts[-1] if len(parts) > 1 else name + if len(label) > max_len: + half = max_len // 2 - 1 + return f"{label[:half]}..{label[-half:]}" + return label + + +def _render_chain_flow( + parts: Sequence[str], + *, + arrows: bool = False, +) -> str: + """Render a sequence of names as chain-node spans, optionally with arrows.""" + nodes: list[str] = [] + for i, mod in enumerate(parts): + short = _short_label(str(mod)) + nodes.append( + f'' + f"{_escape_html(short)}" + ) + if arrows and i < len(parts) - 1: + nodes.append('\u2192') + return f'{"".join(nodes)}' + + +def _stat_card( + label: str, + value: object, + *, + detail: str = "", + tip: str = "", + value_tone: str = "", + css_class: str = "meta-item", + glossary_tip_fn: Callable[[str], str] | None = None, + delta_new: int | None = None, +) -> str: + """Unified stat-card renderer. + + Always emits the same HTML structure using ``.meta-item`` / + ``.meta-label`` / ``.meta-value`` so every stat card shares the + exact same design code. + + *value_tone* — semantic color for the main value: + ``"good"`` → green (metric is clean), ``"bad"`` → red (metric has issues), + ``"warn"`` → yellow, ``"muted"`` → dimmed, ``""`` → default text-primary. + + *delta_new* — if provided and > 0, renders a ``+N new`` badge + inline with the label (top-right). For "bad" metrics (complexity, + coupling, etc.) positive delta means regression → red. + """ + tip_html = "" + if glossary_tip_fn is not None: + tip_html = glossary_tip_fn(label) + elif tip: + tip_html = f'?' + + detail_html = "" + if detail: + detail_html = f'
{detail}
' + + delta_html = "" + if delta_new is not None and delta_new > 0: + delta_html = f'+{delta_new}' + + value_cls = f" meta-value--{value_tone}" if value_tone else "" + + return ( + f'
' + f'
{_escape_html(label)}{tip_html}{delta_html}
' + f'
{_escape_html(str(value))}
' + f"{detail_html}" + "
" + ) diff --git a/codeclone/_html_css.py b/codeclone/_html_css.py new file mode 100644 index 0000000..99fcf00 --- /dev/null +++ b/codeclone/_html_css.py @@ -0,0 +1,1103 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""CSS design system for the HTML report — tokens, components, layout.""" + +from __future__ import annotations + +# --------------------------------------------------------------------------- +# Design tokens +# --------------------------------------------------------------------------- + +_TOKENS_DARK = """\ +:root{ + --font-sans:Inter,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,Oxygen,Ubuntu,sans-serif; + --font-mono:"JetBrains Mono",ui-monospace,SFMono-Regular,"SF Mono",Menlo,Consolas,monospace; + + /* surface — slate scale */ + --bg-body:#0f1117; + --bg-surface:#161822; + --bg-raised:#1c1f2e; + --bg-overlay:#232639; + --bg-subtle:#2a2d42; + + /* border */ + --border:#2e3248; + --border-strong:#3d4160; + + /* text */ + --text-primary:#e2e4ed; + --text-secondary:#a0a3b8; + --text-muted:#6b6f88; + + /* accent — indigo */ + --accent-primary:#6366f1; + --accent-hover:#818cf8; + --accent-muted:color-mix(in oklch,#6366f1 25%,transparent); + + /* semantic */ + --success:#34d399; + --success-muted:color-mix(in oklch,#34d399 15%,transparent); + --warning:#fbbf24; + --warning-muted:color-mix(in oklch,#fbbf24 15%,transparent); + --error:#f87171; + --error-muted:color-mix(in oklch,#f87171 15%,transparent); + --danger:#f87171; + --info:#60a5fa; + --info-muted:color-mix(in oklch,#60a5fa 15%,transparent); + + /* elevation */ + --shadow-sm:0 1px 2px rgba(0,0,0,.25); + --shadow-md:0 2px 8px rgba(0,0,0,.3); + --shadow-lg:0 4px 16px rgba(0,0,0,.35); + --shadow-xl:0 8px 32px rgba(0,0,0,.4); + + /* radii */ + --radius-sm:4px; + --radius-md:6px; + --radius-lg:8px; + --radius-xl:12px; + + /* spacing */ + --sp-1:4px;--sp-2:8px;--sp-3:12px;--sp-4:16px;--sp-5:20px;--sp-6:24px;--sp-8:32px;--sp-10:40px; + + /* transitions */ + --ease:cubic-bezier(.4,0,.2,1); + --dur-fast:120ms; + --dur-normal:200ms; + --dur-slow:300ms; + + /* sizes */ + --topbar-h:72px; + --container-max:1360px; + + color-scheme:dark; +} +""" + +_TOKENS_LIGHT = """\ +@media(prefers-color-scheme:light){ + :root:not([data-theme]){ + --bg-body:#f8f9fc;--bg-surface:#ffffff;--bg-raised:#f1f3f8;--bg-overlay:#e8eaf2;--bg-subtle:#dde0eb; + --border:#d4d7e3;--border-strong:#b8bdd0; + --text-primary:#1a1d2e;--text-secondary:#4b5068;--text-muted:#8589a0; + --accent-primary:#4f46e5;--accent-hover:#6366f1;--accent-muted:color-mix(in oklch,#4f46e5 12%,transparent); + --success:#059669;--success-muted:color-mix(in oklch,#059669 10%,transparent); + --warning:#d97706;--warning-muted:color-mix(in oklch,#d97706 10%,transparent); + --error:#dc2626;--error-muted:color-mix(in oklch,#dc2626 10%,transparent); + --danger:#dc2626;--info:#2563eb;--info-muted:color-mix(in oklch,#2563eb 10%,transparent); + --shadow-sm:0 1px 2px rgba(0,0,0,.06);--shadow-md:0 2px 8px rgba(0,0,0,.08); + --shadow-lg:0 4px 16px rgba(0,0,0,.1);--shadow-xl:0 8px 32px rgba(0,0,0,.12); + color-scheme:light; + } +} +[data-theme="light"]{ + --bg-body:#f8f9fc;--bg-surface:#ffffff;--bg-raised:#f1f3f8;--bg-overlay:#e8eaf2;--bg-subtle:#dde0eb; + --border:#d4d7e3;--border-strong:#b8bdd0; + --text-primary:#1a1d2e;--text-secondary:#4b5068;--text-muted:#8589a0; + --accent-primary:#4f46e5;--accent-hover:#6366f1;--accent-muted:color-mix(in oklch,#4f46e5 12%,transparent); + --success:#059669;--success-muted:color-mix(in oklch,#059669 10%,transparent); + --warning:#d97706;--warning-muted:color-mix(in oklch,#d97706 10%,transparent); + --error:#dc2626;--error-muted:color-mix(in oklch,#dc2626 10%,transparent); + --danger:#dc2626;--info:#2563eb;--info-muted:color-mix(in oklch,#2563eb 10%,transparent); + --shadow-sm:0 1px 2px rgba(0,0,0,.06);--shadow-md:0 2px 8px rgba(0,0,0,.08); + --shadow-lg:0 4px 16px rgba(0,0,0,.1);--shadow-xl:0 8px 32px rgba(0,0,0,.12); + color-scheme:light; +} +""" + +# --------------------------------------------------------------------------- +# Reset + base +# --------------------------------------------------------------------------- + +_RESET = """\ +*,*::before,*::after{box-sizing:border-box;margin:0;padding:0} +html{-webkit-text-size-adjust:100%;text-size-adjust:100%;-webkit-font-smoothing:antialiased; + -moz-osx-font-smoothing:grayscale;scroll-behavior:smooth;scrollbar-gutter:stable} +body{font-family:var(--font-sans);font-size:14px;line-height:1.6;color:var(--text-primary); + background:var(--bg-body);overflow-x:hidden} +code,pre,kbd{font-family:var(--font-mono);font-size:13px} +a{color:var(--accent-primary);text-decoration:none} +a:hover{color:var(--accent-hover);text-decoration:underline} +h1,h2,h3,h4{font-weight:600;line-height:1.3;color:var(--text-primary)} +h1{font-size:1.5rem}h2{font-size:1.25rem}h3{font-size:1.1rem} +ul,ol{list-style:none} +button,input,select{font:inherit;color:inherit} +summary{cursor:pointer} +.muted{color:var(--text-muted);font-size:.85em} +""" + +# --------------------------------------------------------------------------- +# Layout +# --------------------------------------------------------------------------- + +_LAYOUT = """\ +.container{max-width:var(--container-max);margin:0 auto;padding:0 var(--sp-6)} + +/* Topbar */ +.topbar{position:sticky;top:0;z-index:100;background:var(--bg-surface);border-bottom:1px solid var(--border); + box-shadow:var(--shadow-sm)} +.topbar-inner{display:flex;align-items:center;justify-content:space-between; + height:72px;padding:0 var(--sp-6);max-width:var(--container-max);margin:0 auto} +.brand{display:flex;align-items:center;gap:var(--sp-3)} +.brand-logo{flex-shrink:0} +.brand-text{display:flex;flex-direction:column;gap:2px} +.brand h1{font-size:1.15rem;font-weight:700;color:var(--text-primary);line-height:1.3} +.brand-meta{font-size:.78rem;color:var(--text-muted)} +.brand-project{font-weight:500;color:var(--text-secondary)} +.brand-project-name{font-family:var(--font-mono);font-size:.85em;font-weight:500;padding:1px 5px; + border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--accent-primary)} +.topbar-actions{display:flex;align-items:center;gap:var(--sp-2)} + +/* Theme toggle */ +.theme-toggle{display:inline-flex;align-items:center;gap:var(--sp-1); + padding:var(--sp-1) var(--sp-3);background:none;border:1px solid var(--border); + border-radius:var(--radius-md);cursor:pointer;color:var(--text-muted);font-size:.85rem; + font-weight:500;font-family:inherit;transition:all var(--dur-fast) var(--ease)} +.theme-toggle:hover{color:var(--text-primary);background:var(--bg-raised);border-color:var(--border-strong)} +.theme-toggle svg{width:16px;height:16px} + +/* Main tabs — full-width pill bar */ +.main-tabs-wrap{position:sticky;top:var(--topbar-h);z-index:90;padding:var(--sp-3) 0 0; + background:var(--bg-body)} +.main-tabs{display:flex;gap:var(--sp-1);padding:var(--sp-1); + background:var(--bg-surface);border:1px solid var(--border);border-radius:var(--radius-lg); + overflow-x:auto;scrollbar-width:none;-webkit-overflow-scrolling:touch} +.main-tabs::-webkit-scrollbar{display:none} +.main-tab{position:relative;flex:1;text-align:center;padding:var(--sp-2) var(--sp-3); + background:none;border:none;cursor:pointer;font-size:.85rem;font-weight:500; + color:var(--text-muted);white-space:nowrap;border-radius:var(--radius-md); + transition:all var(--dur-fast) var(--ease)} +.main-tab:hover{color:var(--text-primary);background:var(--bg-raised)} +.main-tab[aria-selected="true"]{color:var(--accent-primary);background:var(--accent-muted)} +.tab-count{display:inline-flex;align-items:center;justify-content:center;min-width:18px; + height:18px;padding:0 5px;font-size:.7rem;font-weight:700;border-radius:9px; + background:var(--bg-overlay);color:var(--text-muted);margin-left:var(--sp-1)} +.main-tab[aria-selected="true"] .tab-count{background:var(--accent-primary); + color:#fff} + +/* Tab panels */ +.tab-panel{display:none;padding:var(--sp-6) 0;contain:layout style} +.tab-panel.active{display:block} +""" + +# --------------------------------------------------------------------------- +# Components: buttons, inputs, selects +# --------------------------------------------------------------------------- + +_CONTROLS = """\ +/* Buttons */ +.btn{display:inline-flex;align-items:center;gap:var(--sp-1);padding:var(--sp-1) var(--sp-3); + font-size:.8rem;font-weight:500;border:1px solid var(--border);border-radius:var(--radius-md); + background:var(--bg-raised);color:var(--text-secondary);cursor:pointer;white-space:nowrap; + transition:all var(--dur-fast) var(--ease)} +.btn:hover{border-color:var(--border-strong);color:var(--text-primary);background:var(--bg-overlay)} +.btn-prov{position:relative} +.btn-prov .prov-dot{width:8px;height:8px;border-radius:50%;flex-shrink:0} +.btn-prov .prov-dot.dot-green{background:var(--success)} +.btn-prov .prov-dot.dot-amber{background:var(--warning)} +.btn-prov .prov-dot.dot-red{background:var(--error)} +.btn-prov .prov-dot.dot-neutral{background:var(--text-muted)} +.btn.ghost{background:none;border-color:transparent} +.btn.ghost:hover{background:var(--bg-raised);border-color:var(--border)} +.btn svg{width:14px;height:14px} + +/* Inputs */ +input[type="text"]{padding:var(--sp-1) var(--sp-3);font-size:.85rem;border:1px solid var(--border); + border-radius:var(--radius-md);background:var(--bg-body);color:var(--text-primary);outline:none; + transition:border-color var(--dur-fast) var(--ease)} +input[type="text"]:focus{border-color:var(--accent-primary);box-shadow:0 0 0 2px var(--accent-muted)} +input[type="text"]::placeholder{color:var(--text-muted)} + +/* Selects */ +.select{padding:var(--sp-1) var(--sp-3);padding-right:var(--sp-6);font-size:.8rem; + border:1px solid var(--border);border-radius:var(--radius-md);background:var(--bg-raised); + color:var(--text-secondary);cursor:pointer;appearance:none; + background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' fill='none' stroke='%236b6f88' stroke-width='2'%3E%3Cpath d='M3 4.5l3 3 3-3'/%3E%3C/svg%3E"); + background-repeat:no-repeat;background-position:right 8px center} +.select:focus{border-color:var(--accent-primary);outline:none} + +/* Checkbox labels */ +.inline-check{display:inline-flex;align-items:center;gap:var(--sp-1);font-size:.8rem; + color:var(--text-muted);cursor:pointer;white-space:nowrap} +.inline-check input[type="checkbox"]{accent-color:var(--accent-primary);width:14px;height:14px} +""" + +# --------------------------------------------------------------------------- +# Search box +# --------------------------------------------------------------------------- + +_SEARCH = """\ +.search-box{position:relative;display:flex;align-items:center} +.search-ico{position:absolute;left:var(--sp-2);color:var(--text-muted);pointer-events:none; + display:flex;align-items:center} +.search-ico svg{width:14px;height:14px} +.search-box input[type="text"]{padding-left:28px;width:200px} +.clear-btn{position:absolute;right:var(--sp-1);background:none;border:none;cursor:pointer; + color:var(--text-muted);padding:2px;display:flex;align-items:center;opacity:0; + transition:opacity var(--dur-fast) var(--ease)} +.search-box input:not(:placeholder-shown)~.clear-btn{opacity:1} +.clear-btn:hover{color:var(--text-primary)} +.clear-btn svg{width:14px;height:14px} +""" + +# --------------------------------------------------------------------------- +# Toolbar + pagination +# --------------------------------------------------------------------------- + +_TOOLBAR = """\ +.toolbar{display:flex;flex-wrap:wrap;align-items:center;gap:var(--sp-2); + padding:var(--sp-3) var(--sp-4);background:var(--bg-raised);border:1px solid var(--border); + border-radius:var(--radius-lg);margin-bottom:var(--sp-4)} +.toolbar-left{display:flex;flex-wrap:wrap;align-items:center;gap:var(--sp-2);flex:1} +.toolbar-right{display:flex;align-items:center;gap:var(--sp-2)} + +.pagination{display:flex;align-items:center;gap:var(--sp-1)} +.page-meta{font-size:.8rem;color:var(--text-muted);white-space:nowrap;min-width:100px;text-align:center} + +/* Suggestions toolbar */ +.suggestions-toolbar{flex-direction:column;align-items:stretch} +.suggestions-toolbar-row{display:flex;flex-wrap:wrap;align-items:center;gap:var(--sp-2)} +.suggestions-toolbar-row--secondary{padding-top:var(--sp-2);border-top:1px solid var(--border)} +.suggestions-count-label{margin-left:auto;font-size:.8rem;color:var(--text-muted);font-weight:500} +""" + +# --------------------------------------------------------------------------- +# Insight banners +# --------------------------------------------------------------------------- + +_INSIGHT = """\ +.insight-banner{padding:var(--sp-3) var(--sp-4);border-radius:var(--radius-md); + margin-bottom:var(--sp-4);border-left:3px solid var(--border);background:none} +.insight-question{font-size:.72rem;font-weight:500;color:var(--text-muted); + text-transform:uppercase;letter-spacing:.04em;margin-bottom:2px} +.insight-answer{font-size:.82rem;color:var(--text-muted);line-height:1.5} + +.insight-ok{border-left-color:var(--success);background:var(--success-muted)} +.insight-warn{border-left-color:var(--warning);background:var(--warning-muted)} +.insight-risk{border-left-color:var(--error);background:var(--error-muted)} +.insight-info{border-left-color:var(--info);background:var(--info-muted)} +""" + +# --------------------------------------------------------------------------- +# Tables +# --------------------------------------------------------------------------- + +_TABLES = """\ +.table-wrap{overflow-x:auto;border:1px solid var(--border);border-radius:var(--radius-lg); + margin-bottom:var(--sp-4)} +.table{width:100%;border-collapse:collapse;font-size:.82rem;font-family:var(--font-mono)} +.table th{position:sticky;top:0;z-index:2;padding:var(--sp-2) var(--sp-3);text-align:left;font-family:var(--font-sans); + font-weight:600;font-size:.75rem;text-transform:uppercase;letter-spacing:.05em; + color:var(--text-muted);background:var(--bg-overlay);border-bottom:1px solid var(--border); + white-space:nowrap;cursor:default;user-select:none} +.table th[data-sortable]{cursor:pointer} +.table th[data-sortable]:hover{color:var(--text-primary)} +.table th .sort-icon{display:inline-flex;margin-left:var(--sp-1);opacity:.4} +.table th[aria-sort] .sort-icon{opacity:1;color:var(--accent-primary)} +.table td{padding:var(--sp-2) var(--sp-3);border-bottom:1px solid var(--border);color:var(--text-secondary); + vertical-align:top} +.table tr:last-child td{border-bottom:none} +.table tr:hover td{background:var(--bg-raised)} +.table .col-name{font-weight:500;color:var(--text-primary)} +.table .col-file{color:var(--text-muted);max-width:240px;overflow:hidden; + text-overflow:ellipsis;white-space:nowrap} +.table .col-number{font-variant-numeric:tabular-nums;text-align:right;white-space:nowrap} +.table .col-risk{white-space:nowrap} +.table .col-wide{max-width:320px;word-break:break-all} +.table-empty{padding:var(--sp-8);text-align:center;color:var(--text-muted);font-size:.9rem} +""" + +# --------------------------------------------------------------------------- +# Sub-tabs (clone-nav / split-tabs) +# --------------------------------------------------------------------------- + +_SUB_TABS = """\ +.clone-nav{display:flex;gap:2px;border-bottom:1px solid var(--border);margin-bottom:var(--sp-4); + overflow-x:auto;scrollbar-width:none} +.clone-nav::-webkit-scrollbar{display:none} +.clone-nav-btn{position:relative;padding:var(--sp-2) var(--sp-4);background:none;border:none; + cursor:pointer;font-size:.85rem;font-weight:500;color:var(--text-muted);white-space:nowrap; + transition:color var(--dur-fast) var(--ease)} +.clone-nav-btn:hover{color:var(--text-primary)} +.clone-nav-btn.active{color:var(--accent-primary)} +.clone-nav-btn.active::after{content:"";position:absolute;bottom:-1px;left:0;right:0; + height:2px;background:var(--accent-primary);border-radius:1px 1px 0 0} +.clone-panel{display:none} +.clone-panel.active{display:block} +""" + +# --------------------------------------------------------------------------- +# Sections + groups +# --------------------------------------------------------------------------- + +_SECTIONS = """\ +.section{margin-bottom:var(--sp-6)} +.subsection-title{font-size:1rem;font-weight:600;color:var(--text-primary); + margin-bottom:var(--sp-3);padding-bottom:var(--sp-2);border-bottom:1px solid var(--border)} +.section-body{display:flex;flex-direction:column;gap:var(--sp-3)} + +/* Clone groups */ +.group{border:1px solid var(--border);border-radius:var(--radius-lg);background:var(--bg-surface); + overflow:hidden;transition:box-shadow var(--dur-fast) var(--ease)} +.group:hover{box-shadow:var(--shadow-sm)} +.group-head{display:flex;align-items:center;justify-content:space-between;padding:var(--sp-3) var(--sp-4); + gap:var(--sp-3);cursor:pointer} +.group-head-left{display:flex;align-items:center;gap:var(--sp-3);min-width:0;flex:1} +.group-head-right{display:flex;align-items:center;gap:var(--sp-2);flex-shrink:0} +.group-toggle{background:none;border:none;cursor:pointer;color:var(--text-muted);padding:var(--sp-1); + display:flex;align-items:center;transition:transform var(--dur-normal) var(--ease);flex-shrink:0} +.group-toggle svg{width:16px;height:16px} +.group-toggle.expanded{transform:rotate(180deg)} +.group-info{min-width:0;flex:1} +.group-name{font-weight:600;font-size:.9rem;color:var(--text-primary);white-space:nowrap; + overflow:hidden;text-overflow:ellipsis;font-family:var(--font-mono)} +.group-summary{font-size:.8rem;color:var(--text-muted)} + +/* Badges */ +.clone-type-badge{font-size:.75rem;font-weight:500;padding:1px var(--sp-2); + border-radius:var(--radius-sm);background:var(--accent-muted);color:var(--accent-primary)} +.clone-count-badge{font-size:.75rem;font-weight:600;padding:1px var(--sp-2); + border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--text-secondary)} + +/* Group body */ +.group-body{border-top:1px solid var(--border);display:none} +.group-body.expanded{display:block} +.group-body.items.expanded{display:grid} +.group-compare-note{padding:var(--sp-2) var(--sp-4);font-size:.8rem;color:var(--text-muted); + background:var(--bg-raised);border-bottom:1px solid var(--border);font-style:italic} + +/* Group explain */ +.group-explain{padding:var(--sp-2) var(--sp-4);display:flex;flex-wrap:wrap;gap:var(--sp-1); + background:var(--bg-raised);border-bottom:1px solid var(--border)} +.group-explain-item{font-size:.75rem;padding:1px var(--sp-2);border-radius:var(--radius-sm); + background:var(--bg-overlay);color:var(--text-muted);font-family:var(--font-mono);white-space:nowrap} +.group-explain-warn{color:var(--warning);background:var(--warning-muted)} +.group-explain-muted{opacity:.7} +.group-explain-note{font-size:.75rem;color:var(--text-muted);font-style:italic;width:100%; + padding-top:var(--sp-1)} +""" + +# --------------------------------------------------------------------------- +# Items (clone instances) +# --------------------------------------------------------------------------- + +_ITEMS = """\ +.items{grid-template-columns:repeat(2,1fr);gap:0} +.items .item{border-right:1px solid var(--border);border-bottom:1px solid var(--border)} +.items .item:nth-child(2n){border-right:none} +.items .item:nth-last-child(-n+2){border-bottom:none} +.items .item:last-child{border-bottom:none} +.item{padding:0;min-width:0;overflow:hidden} +.item-header{display:flex;align-items:center;justify-content:space-between; + padding:var(--sp-2) var(--sp-3);background:var(--bg-raised);gap:var(--sp-2)} +.item-title{font-weight:500;font-size:.8rem;color:var(--text-primary);font-family:var(--font-mono); + white-space:nowrap;overflow:hidden;text-overflow:ellipsis;min-width:0;flex:1} +.item-loc{font-size:.72rem;color:var(--text-muted);font-family:var(--font-mono);white-space:nowrap;flex-shrink:0} +.item-compare-meta{padding:var(--sp-1) var(--sp-3);font-size:.72rem;color:var(--text-muted); + background:var(--bg-body);border-bottom:1px solid var(--border)} +""" + +# --------------------------------------------------------------------------- +# Code blocks +# --------------------------------------------------------------------------- + +_CODE = """\ +/* _html_snippets renders .codebox>.hitline/.line */ +.codebox{overflow-x:auto;font-size:12px;line-height:1.7;background:var(--bg-body);padding:var(--sp-2) 0;margin:0} +.codebox pre{margin:0;padding:0} +.codebox .line,.codebox .hitline{padding:0 var(--sp-4) 0 var(--sp-2);white-space:pre;display:block} +.codebox .line:hover{background:var(--bg-raised)} +.codebox .hitline{background:color-mix(in oklch,var(--accent-primary) 12%,transparent); + border-left:3px solid var(--accent-primary);padding-left:calc(var(--sp-2) - 3px)} +.codebox .hitline:hover{background:color-mix(in oklch,var(--accent-primary) 20%,transparent)} +""" + +# --------------------------------------------------------------------------- +# Risk / severity / source-kind badges +# --------------------------------------------------------------------------- + +_BADGES = """\ +.risk-badge,.severity-badge{display:inline-flex;align-items:center;font-size:.72rem;font-weight:600; + padding:1px var(--sp-2);border-radius:var(--radius-sm);text-transform:uppercase;letter-spacing:.02em} +.risk-critical,.severity-critical{background:var(--error-muted);color:var(--error)} +.risk-high,.severity-high{background:var(--error-muted);color:var(--error)} +.risk-warning,.severity-warning{background:var(--warning-muted);color:var(--warning)} +.risk-medium,.severity-medium{background:var(--warning-muted);color:var(--warning)} +.risk-low,.severity-low{background:var(--success-muted);color:var(--success)} +.risk-info,.severity-info{background:var(--info-muted);color:var(--info)} + +.source-kind-badge{display:inline-flex;align-items:center;font-size:.72rem;font-weight:500; + padding:1px var(--sp-2);border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--text-muted)} +.source-kind-production{background:var(--error-muted);color:var(--error)} +.source-kind-test,.source-kind-test_util{background:var(--info-muted);color:var(--info)} +.source-kind-fixture,.source-kind-conftest{background:var(--warning-muted);color:var(--warning)} +.source-kind-import,.source-kind-cross_kind{background:var(--accent-muted);color:var(--accent-primary)} +.category-badge{display:inline-flex;align-items:center;gap:3px;font-size:.7rem; + font-family:var(--font-mono);padding:1px var(--sp-2);border-radius:var(--radius-sm); + background:var(--bg-overlay);color:var(--text-muted);white-space:nowrap} +.category-badge-key{font-weight:400;color:var(--text-muted)} +.category-badge-val{font-weight:600;color:var(--text-secondary)} +.finding-why-chips{display:flex;flex-wrap:wrap;gap:var(--sp-1);margin:var(--sp-1) 0} +.finding-why-chips .category-badge{font-size:.72rem} +""" + +# --------------------------------------------------------------------------- +# Overview +# --------------------------------------------------------------------------- + +_OVERVIEW = """\ +/* Dashboard */ +/* KPI grid: health card on the left, KPI cards in two rows on the right */ +.overview-kpi-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr)); + gap:var(--sp-3);margin-bottom:var(--sp-6)} +.overview-kpi-grid--with-health{grid-template-columns:minmax(190px,210px) minmax(0,1fr); + gap:var(--sp-3);align-items:stretch} +.overview-kpi-cards{display:grid;grid-template-columns:repeat(4,minmax(0,1fr)); + gap:var(--sp-3);min-width:0} +.overview-kpi-grid--with-health .meta-item{min-width:0} +.overview-kpi-grid--with-health .meta-item{min-height:108px} +.overview-kpi-cards .meta-item{display:grid;grid-template-rows:auto 1fr auto; + align-items:start;padding:var(--sp-3) var(--sp-4);gap:var(--sp-2);min-height:122px} +.overview-kpi-cards .meta-item .meta-label{font-size:.72rem;min-height:18px} +.overview-kpi-cards .meta-item .meta-value{display:flex;align-items:center; + font-size:1.55rem;line-height:1;padding:var(--sp-1) 0} +.overview-kpi-cards .kpi-detail{margin-top:0;gap:4px;align-self:end} +.overview-kpi-cards .kpi-micro{padding:2px 6px;font-size:.65rem} +.overview-kpi-grid--with-health .overview-health-card{padding:var(--sp-2)} +.overview-kpi-grid--with-health .overview-health-inner{width:100%;height:100%} +.overview-kpi-grid--with-health .health-ring{width:140px;height:140px;margin:auto} +.overview-kpi-grid--with-health .overview-health-card .meta-value{font-size:1.2rem} +.overview-kpi-grid--with-health .overview-health-card .meta-label{font-size:.66rem} +@media(max-width:1380px){ + .overview-kpi-cards{grid-template-columns:repeat(3,minmax(0,1fr))} +} +@media(max-width:980px){ + .overview-kpi-grid--with-health{grid-template-columns:1fr} + .overview-kpi-cards{grid-template-columns:repeat(2,minmax(0,1fr))} +} +@media(max-width:520px){ + .overview-kpi-cards{grid-template-columns:1fr} +} + +/* Health gauge */ +.overview-health-card{display:flex;align-items:center;justify-content:center; + padding:var(--sp-3);background:var(--bg-surface);border:1px solid var(--border); + border-radius:var(--radius-lg)} +.overview-health-inner{display:flex;flex-direction:column;align-items:center;justify-content:center; + gap:var(--sp-1)} +.health-ring{position:relative;width:140px;height:140px} +.health-ring svg{width:100%;height:100%;transform:rotate(-90deg)} +.health-ring-bg{fill:none;stroke:var(--border);stroke-width:6} +.health-ring-baseline{fill:none;stroke-width:6;stroke-linecap:round} +.health-ring-fg{fill:none;stroke-width:6;stroke-linecap:round; + transition:stroke-dashoffset 1s var(--ease)} +.health-ring-label{position:absolute;inset:0;display:flex;flex-direction:column; + align-items:center;justify-content:center} +.health-ring-score{font-size:1.75rem;font-weight:700;color:var(--text-primary); + font-variant-numeric:tabular-nums;line-height:1} +.health-ring-grade{font-size:.72rem;font-weight:500;color:var(--text-muted);margin-top:3px} +.health-ring-delta{font-size:.65rem;font-weight:600;margin-top:3px} +.health-ring-delta--up{color:var(--success)} +.health-ring-delta--down{color:var(--error)} + +/* Get Badge button (under health ring) */ +.badge-btn{display:inline-flex;align-items:center;gap:4px;margin-top:var(--sp-2); + padding:4px 10px;font-size:.65rem;font-weight:500;color:var(--text-muted); + background:var(--bg-surface);border:1px solid var(--border);border-radius:var(--radius-sm); + cursor:pointer;transition:all var(--dur-fast) var(--ease);white-space:nowrap} +.badge-btn:hover{color:var(--text-primary);border-color:var(--border-strong); + background:var(--bg-alt)} + +/* Badge modal */ +.badge-modal{max-width:540px;width:90vw;max-height:85vh} +.badge-modal .modal-head{display:flex;align-items:center;justify-content:space-between; + padding:var(--sp-3) var(--sp-4);border-bottom:1px solid var(--border)} +.badge-modal .modal-head h2{font-size:1rem;font-weight:700;margin:0} +.badge-modal .modal-body{padding:var(--sp-3) var(--sp-4) var(--sp-4);overflow-y:auto;flex:1 1 auto} + +/* Badge tabs */ +.badge-tabs{display:flex;gap:var(--sp-1);margin-bottom:var(--sp-3)} +.badge-tab{padding:5px 12px;font-size:.72rem;font-weight:500;color:var(--text-muted); + background:transparent;border:1px solid var(--border);border-radius:var(--radius-sm); + cursor:pointer;transition:all var(--dur-fast) var(--ease)} +.badge-tab:hover{color:var(--text-primary);border-color:var(--border-strong)} +.badge-tab--active{color:var(--text-primary);background:var(--bg-alt); + border-color:var(--border-strong);font-weight:600} + +/* Badge preview & disclaimer */ +.badge-preview{text-align:center;padding:var(--sp-3) 0;margin-bottom:var(--sp-1); + border-bottom:1px solid var(--border)} +.badge-preview img{height:24px} +.badge-disclaimer{font-size:.65rem;color:var(--text-muted);text-align:center; + margin:var(--sp-1) 0 var(--sp-2);line-height:1.4} + +/* Badge embed fields */ +.badge-field-label{display:block;font-size:.68rem;font-weight:600;color:var(--text-muted); + margin-bottom:var(--sp-1);margin-top:var(--sp-3);text-transform:uppercase;letter-spacing:.04em} +.badge-code-wrap{display:flex;align-items:stretch;border:1px solid var(--border); + border-radius:var(--radius-sm);overflow:hidden;background:var(--bg-alt)} +.badge-code{flex:1;padding:var(--sp-2) var(--sp-3);font-size:.72rem;font-family:var(--font-mono); + color:var(--text-primary);word-break:break-all;white-space:pre-wrap;line-height:1.5; + user-select:all;-webkit-user-select:all} +.badge-copy-btn{min-width:64px;padding:var(--sp-2) var(--sp-3);font-size:.68rem;font-weight:500; + color:var(--text-muted);background:transparent;border:none;border-left:1px solid var(--border); + cursor:pointer;transition:all var(--dur-fast) var(--ease);white-space:nowrap} +.badge-copy-btn:hover{color:var(--text-primary)} +.badge-copy-btn--ok{color:var(--success)} + +/* KPI stat card */ +.meta-item{padding:var(--sp-2) var(--sp-3);background:var(--bg-surface);border:1px solid var(--border); + border-radius:var(--radius-md);display:flex;flex-direction:column;gap:2px; + transition:border-color var(--dur-fast) var(--ease);min-width:0} +.meta-item:hover{border-color:var(--border-strong)} +.meta-item .meta-label{font-size:.68rem;font-weight:500;color:var(--text-muted); + display:flex;align-items:center;gap:var(--sp-1)} +.meta-item .meta-value{font-size:1.35rem;font-weight:700;color:var(--text-primary); + font-variant-numeric:tabular-nums;line-height:1.2} +.meta-item .meta-value--good{color:var(--success)} +.meta-item .meta-value--bad{color:var(--error)} +.meta-item .meta-value--warn{color:var(--warning)} +.meta-item .meta-value--muted{color:var(--text-muted)} +.kpi-detail{display:flex;flex-wrap:wrap;gap:3px;margin-top:2px} +.kpi-micro{display:inline-flex;align-items:center;gap:2px;font-size:.62rem; + padding:1px 5px;border-radius:var(--radius-sm);background:var(--bg-raised); + white-space:nowrap;line-height:1.3} +.kpi-micro-val{font-weight:500;font-variant-numeric:tabular-nums;color:var(--text-muted)} +.kpi-micro-lbl{font-weight:400;color:var(--text-muted);text-transform:lowercase} +.kpi-micro--baselined{color:var(--success);font-weight:500;font-size:.6rem} +.kpi-delta{font-size:.58rem;font-weight:700;margin-left:auto; + padding:1px 5px;border-radius:8px;white-space:nowrap} +.kpi-delta--good{color:var(--success);background:var(--success-muted)} +.kpi-delta--bad{color:var(--error);background:var(--error-muted)} +.kpi-delta--neutral{color:var(--text-muted);background:var(--bg-raised)} +.kpi-help{display:inline-flex;align-items:center;justify-content:center;width:15px;height:15px; + font-size:.6rem;font-weight:600;border-radius:50%;background:none; + color:var(--text-muted);cursor:help;position:relative;border:1.5px solid var(--border); + opacity:.5;transition:opacity var(--dur-fast) var(--ease)} +.kpi-help:hover{opacity:1} +.kpi-help:hover::after{content:attr(data-tip);position:absolute;top:calc(100% + 6px);left:50%; + transform:translateX(-50%);background:var(--bg-overlay);color:var(--text-primary); + padding:var(--sp-2) var(--sp-3);border-radius:var(--radius-md);font-size:.75rem;font-weight:400; + white-space:normal;width:max-content;max-width:240px;line-height:1.4; + box-shadow:var(--shadow-md);z-index:100;pointer-events:none; + border:1px solid var(--border)} + +/* Tone variants */ +.meta-item.tone-ok{border-left:3px solid var(--success)} +.meta-item.tone-warn{border-left:3px solid var(--warning)} +.meta-item.tone-risk{border-left:3px solid var(--error)} + +/* Clusters */ +.overview-cluster{margin-bottom:var(--sp-4)} +.overview-cluster-header{margin-bottom:var(--sp-2)} +.overview-cluster-copy{font-size:.82rem;color:var(--text-muted);margin-top:2px} +.overview-cluster-empty{display:flex;flex-direction:column;align-items:center;gap:var(--sp-2); + padding:var(--sp-5);text-align:center;color:var(--text-muted);font-size:.85rem} +.empty-icon{color:var(--success);opacity:.35;width:32px;height:32px;flex-shrink:0} +.overview-list{display:grid;grid-template-columns:repeat(2,1fr);gap:var(--sp-2)} + +/* Overview rows */ +.overview-row{display:flex;flex-direction:column;gap:var(--sp-1); + padding:var(--sp-3) var(--sp-4);background:var(--bg-surface);border:1px solid var(--border); + border-radius:var(--radius-lg);transition:border-color var(--dur-fast) var(--ease)} +.overview-row:hover{border-color:var(--border-strong)} +.overview-row[data-severity="critical"]{border-left:3px solid var(--error)} +.overview-row[data-severity="warning"]{border-left:3px solid var(--warning)} +.overview-row[data-severity="info"]{border-left:3px solid var(--info)} +.overview-row-head{display:flex;align-items:center;gap:var(--sp-2);flex-wrap:wrap} +.overview-row-spread{font-size:.72rem;font-family:var(--font-mono);color:var(--text-muted); + margin-left:auto;white-space:nowrap} +.overview-row-title{font-weight:600;font-size:.85rem;color:var(--text-primary)} +.overview-row-summary{font-size:.8rem;color:var(--text-secondary);line-height:1.5} + +/* Summary grid */ +.overview-summary-grid{display:grid;gap:var(--sp-3);margin-bottom:var(--sp-3)} +.overview-summary-grid--2col{grid-template-columns:repeat(auto-fit,minmax(280px,1fr))} +.overview-summary-item{background:var(--bg-surface);border:1px solid var(--border); + border-radius:var(--radius-lg);padding:var(--sp-4)} +.overview-summary-label{display:flex;align-items:center;gap:var(--sp-2); + font-size:.72rem;font-weight:700;text-transform:uppercase; + letter-spacing:.06em;color:var(--text-muted);margin-bottom:var(--sp-3); + padding-bottom:var(--sp-2);border-bottom:1px solid var(--border)} +.summary-icon{flex-shrink:0;opacity:.6} +.summary-icon--risk{color:var(--warning)} +.summary-icon--info{color:var(--accent-primary)} +.overview-summary-list{display:flex;flex-direction:column;gap:var(--sp-2)} +.overview-summary-list li{font-size:.82rem;color:var(--text-secondary); + padding-left:var(--sp-3);position:relative;line-height:1.5} +.overview-summary-list li::before{content:"\\2022";position:absolute;left:0;color:var(--text-muted)} +.overview-summary-value{font-size:.85rem;color:var(--text-muted)} +/* Source breakdown bars */ +.breakdown-list{display:flex;flex-direction:column;gap:var(--sp-2)} +.breakdown-row{display:grid;grid-template-columns:6.5rem 2rem 1fr;align-items:center;gap:var(--sp-2)} +.breakdown-row .source-kind-badge{justify-content:center;min-width:0;width:100%;text-align:center} +.breakdown-count{font-size:.8rem;font-weight:600;font-variant-numeric:tabular-nums; + color:var(--text-primary);text-align:right} +.breakdown-bar-track{height:6px;border-radius:3px;background:var(--bg-raised);overflow:hidden} +.breakdown-bar-fill{display:block;height:100%;border-radius:3px; + background:var(--accent-primary);transition:width .6s var(--ease)} +/* Health radar chart */ +.health-radar{display:flex;justify-content:center;padding:var(--sp-3) 0} +.health-radar svg{width:100%;max-width:520px;height:auto;overflow:visible} +.health-radar text{font-size:9px;font-family:var(--font-sans);fill:var(--text-muted)} +.health-radar .radar-score{font-weight:600;font-variant-numeric:tabular-nums;fill:var(--text-secondary)} +.health-radar .radar-label--weak{fill:var(--error)} +.health-radar .radar-label--weak .radar-score{fill:var(--error)} +/* Findings by family bars */ +.families-list{display:flex;flex-direction:column;gap:var(--sp-2)} +.families-row{display:grid;grid-template-columns:5.5rem 2rem 1fr auto;align-items:center;gap:var(--sp-2)} +.families-row--muted{opacity:.55} +.families-label{font-size:.75rem;font-weight:500;color:var(--text-secondary);text-align:right} +.families-count{font-size:.8rem;font-weight:600;font-variant-numeric:tabular-nums; + color:var(--text-primary);text-align:right} +.breakdown-bar-track{display:flex} +.breakdown-bar-fill--baselined{opacity:.35} +.breakdown-bar-fill--new{border-radius:0 3px 3px 0} +.families-delta{font-size:.65rem;font-weight:600;font-variant-numeric:tabular-nums;white-space:nowrap} +.families-delta--ok{color:var(--success)} +.families-delta--new{color:var(--error)} +""" + +# --------------------------------------------------------------------------- +# Dependencies (SVG graph) +# --------------------------------------------------------------------------- + +_DEPENDENCIES = """\ +.dep-stats{display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr)); + gap:var(--sp-2);margin-bottom:var(--sp-4)} +.dep-stats .meta-item{display:grid;grid-template-rows:auto 1fr auto;min-height:100px} +.dep-stats .meta-item .meta-label{font-size:.72rem;min-height:18px} +.dep-stats .meta-item .meta-value{display:flex;align-items:center} +.dep-stats .kpi-detail{margin-top:0;align-self:end} +.dep-graph-wrap{overflow:hidden;margin-bottom:var(--sp-4);border:1px solid var(--border); + border-radius:var(--radius-lg);background:var(--bg-surface);padding:var(--sp-4)} +.dep-graph-svg{width:100%;height:auto;max-height:520px} +.dep-graph-svg text{fill:var(--text-secondary);font-family:var(--font-mono)} +.dep-node{transition:fill-opacity var(--dur-fast) var(--ease)} +.dep-edge{transition:stroke-opacity var(--dur-fast) var(--ease)} +.dep-label{transition:fill var(--dur-fast) var(--ease)} + +/* Hub bar */ +.dep-hub-bar{display:flex;align-items:center;gap:var(--sp-2);flex-wrap:wrap; + margin-bottom:var(--sp-4);padding:var(--sp-2) var(--sp-4);background:var(--bg-raised); + border-radius:var(--radius-lg);border:1px solid var(--border)} +.dep-hub-label{font-size:.75rem;font-weight:600;text-transform:uppercase;letter-spacing:.05em; + color:var(--text-muted)} +.dep-hub-pill{display:inline-flex;align-items:center;gap:var(--sp-1);padding:var(--sp-1) var(--sp-2); + border-radius:var(--radius-sm);background:var(--bg-overlay);font-size:.8rem} +.dep-hub-name{color:var(--text-primary);font-family:var(--font-mono);font-size:.8rem} +.dep-hub-deg{font-size:.72rem;font-weight:600;color:var(--accent-primary); + background:var(--accent-muted);padding:0 var(--sp-1);border-radius:var(--radius-sm)} + +/* Legend */ +.dep-legend{display:flex;gap:var(--sp-4);align-items:center;margin-bottom:var(--sp-4); + padding:var(--sp-2) var(--sp-4);font-size:.8rem;color:var(--text-muted)} +.dep-legend-item{display:inline-flex;align-items:center;gap:var(--sp-1)} +.dep-legend-item svg{flex-shrink:0} + +/* Chain flow */ +.chain-flow{display:inline-flex;align-items:center;gap:var(--sp-1);flex-wrap:wrap} +.chain-node{font-family:var(--font-mono);font-size:.8rem;color:var(--text-primary); + padding:0 var(--sp-1);background:var(--bg-overlay);border-radius:var(--radius-sm)} +.chain-arrow{color:var(--text-muted);font-size:.75rem} +""" + +# --------------------------------------------------------------------------- +# Novelty controls +# --------------------------------------------------------------------------- + +_NOVELTY = """\ +.global-novelty{margin-bottom:var(--sp-4);padding:var(--sp-4) var(--sp-5); + background:var(--bg-raised);border:1px solid var(--border);border-radius:var(--radius-lg)} +.global-novelty-head{display:flex;align-items:center;gap:var(--sp-4);flex-wrap:wrap} +.global-novelty-head h2{font-size:1rem;white-space:nowrap} +.novelty-tabs{display:flex;gap:var(--sp-2)} +.novelty-tab{transition:all var(--dur-fast) var(--ease)} +.novelty-tab.active{background:var(--accent-primary);color:white;border-color:var(--accent-primary)} +.novelty-tab[data-novelty-state="good"]{color:var(--success);border-color:var(--success);background:var(--success-muted)} +.novelty-tab[data-novelty-state="good"].active{background:var(--success);color:white;border-color:var(--success)} +.novelty-tab[data-novelty-state="bad"]{color:var(--error);border-color:var(--error);background:var(--error-muted)} +.novelty-tab[data-novelty-state="bad"].active{background:var(--error);color:white;border-color:var(--error)} +.novelty-count{font-size:.72rem;font-weight:600;background:rgba(255,255,255,.15);padding:0 var(--sp-1); + border-radius:var(--radius-sm);margin-left:var(--sp-1)} +.novelty-note{font-size:.8rem;color:var(--text-muted);margin-top:var(--sp-2)} + +/* Hidden by novelty filter */ +.group[data-novelty-hidden="true"]{display:none} +""" + +# --------------------------------------------------------------------------- +# Dead-code +# --------------------------------------------------------------------------- + +_DEAD_CODE = """\ +/* No custom overrides — uses shared table + tabs */ +""" + +# --------------------------------------------------------------------------- +# Suggestions +# --------------------------------------------------------------------------- + +_SUGGESTIONS = """\ +/* List layout */ +.suggestions-list{display:flex;flex-direction:column;gap:var(--sp-2)} + +/* Card — full-width row */ +.suggestion-card{background:var(--bg-surface);border:1px solid var(--border);border-radius:var(--radius-lg); + overflow:hidden;transition:border-color var(--dur-fast) var(--ease),box-shadow var(--dur-fast) var(--ease)} +.suggestion-card:hover{border-color:var(--border-strong);box-shadow:var(--shadow-sm)} +.suggestion-card[data-severity="critical"]{border-left:3px solid var(--error)} +.suggestion-card[data-severity="warning"]{border-left:3px solid var(--warning)} +.suggestion-card[data-severity="info"]{border-left:3px solid var(--info)} + +/* Header row: severity pill · title · meta badges */ +.suggestion-head{padding:var(--sp-3) var(--sp-4);display:flex;align-items:center; + gap:var(--sp-2);flex-wrap:wrap} +.suggestion-sev{font-size:.68rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em; + padding:2px var(--sp-2);border-radius:var(--radius-sm);white-space:nowrap} +.suggestion-sev--critical{background:var(--error-muted);color:var(--error)} +.suggestion-sev--warning{background:var(--warning-muted);color:var(--warning)} +.suggestion-sev--info{background:var(--info-muted);color:var(--info)} +.suggestion-sev-inline{font-size:.72rem;font-weight:600;padding:1px var(--sp-1); + border-radius:var(--radius-sm)} +.suggestion-title{font-weight:600;font-size:.85rem;color:var(--text-primary);flex:1;min-width:0} +.suggestion-meta{display:flex;align-items:center;gap:var(--sp-1);flex-shrink:0;flex-wrap:wrap} +.suggestion-meta-badge{font-size:.68rem;font-family:var(--font-mono);font-weight:500; + padding:1px var(--sp-2);border-radius:var(--radius-sm);background:var(--bg-overlay); + color:var(--text-muted);white-space:nowrap} +.suggestion-effort--easy{color:var(--success);background:var(--success-muted, rgba(34,197,94,.1))} +.suggestion-effort--moderate{color:var(--warning);background:var(--warning-muted)} +.suggestion-effort--hard{color:var(--error);background:var(--error-muted)} + +/* Body — context + summary */ +.suggestion-body{padding:0 var(--sp-4) var(--sp-3);display:flex;flex-direction:column;gap:var(--sp-1)} +.suggestion-context{display:flex;gap:var(--sp-1);flex-wrap:wrap} +.suggestion-chip{font-size:.65rem;font-weight:500;padding:1px 6px;border-radius:var(--radius-sm); + background:var(--bg-overlay);color:var(--text-muted);white-space:nowrap} +.suggestion-summary{font-size:.8rem;font-family:var(--font-mono);color:var(--text-secondary);line-height:1.5} +.suggestion-action{display:flex;align-items:center;gap:var(--sp-1); + font-size:.8rem;font-weight:500;color:var(--accent-primary);margin-top:var(--sp-1)} +.suggestion-action-icon{flex-shrink:0;color:var(--accent-primary)} + +/* Expandable details */ +.suggestion-details{border-top:1px solid var(--border)} +.suggestion-details summary{padding:var(--sp-2) var(--sp-4);font-size:.75rem;font-weight:500; + color:var(--text-muted);cursor:pointer;display:flex;align-items:center;gap:var(--sp-2); + background:none;user-select:none} +.suggestion-details summary:hover{color:var(--text-primary);background:var(--bg-raised)} +.suggestion-details[open] summary{border-bottom:1px solid var(--border)} +.suggestion-details-body{padding:var(--sp-3) var(--sp-4);display:flex;flex-direction:column;gap:var(--sp-3)} + +/* Facts grid inside details */ +.suggestion-facts{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:var(--sp-3)} +.suggestion-fact-group{display:flex;flex-direction:column;gap:var(--sp-1)} +.suggestion-fact-group-title{font-size:.68rem;font-weight:600;text-transform:uppercase; + letter-spacing:.05em;color:var(--text-muted);padding-bottom:var(--sp-1);border-bottom:1px solid var(--border)} +.suggestion-dl{display:flex;flex-direction:column;gap:2px} +.suggestion-dl div{display:flex;gap:var(--sp-2);align-items:baseline} +.suggestion-dl dt{font-size:.72rem;color:var(--text-muted);white-space:nowrap;min-width:60px} +.suggestion-dl dd{font-size:.78rem;font-family:var(--font-mono);color:var(--text-primary);word-break:break-word} + +/* Locations & steps inside details */ +.suggestion-locations{display:flex;flex-direction:column;gap:var(--sp-1)} +.suggestion-locations li{display:flex;gap:var(--sp-2);align-items:baseline; + padding:2px 0;border-bottom:1px solid var(--border);line-height:1.4} +.suggestion-locations li:last-child{border-bottom:none} +.suggestion-loc-path{font-family:var(--font-mono);font-size:.75rem;color:var(--text-secondary)} +.suggestion-loc-lines{color:var(--text-muted)} +.suggestion-loc-name{font-family:var(--font-mono);font-size:.72rem;color:var(--text-muted); + margin-left:auto} +.suggestion-steps{padding-left:var(--sp-4);display:flex;flex-direction:column;gap:var(--sp-1);list-style:decimal} +.suggestion-steps li{font-size:.78rem;color:var(--text-secondary)} +.suggestion-sub-title{font-size:.72rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em; + color:var(--text-muted);margin-bottom:var(--sp-1)} + +.suggestion-empty{padding:var(--sp-4);text-align:center;color:var(--text-muted);font-size:.85rem} + +/* Hidden cards */ +.suggestion-card[data-filter-hidden="true"]{display:none} +""" + +# --------------------------------------------------------------------------- +# Structural findings +# --------------------------------------------------------------------------- + +_STRUCTURAL = """\ +/* Structural findings — list layout */ +.sf-list{display:flex;flex-direction:column;gap:var(--sp-2)} +.sf-card{background:var(--bg-surface);border:1px solid var(--border);border-left:3px solid var(--info); + border-radius:var(--radius-lg); + overflow:hidden;transition:border-color var(--dur-fast) var(--ease),box-shadow var(--dur-fast) var(--ease)} +.sf-card:hover{border-color:var(--border-strong);box-shadow:var(--shadow-sm)} + +/* Header row */ +.sf-head{padding:var(--sp-3) var(--sp-4);display:flex;align-items:center;gap:var(--sp-2);flex-wrap:wrap} +.sf-kind-badge{font-size:.68rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em; + padding:2px var(--sp-2);border-radius:var(--radius-sm);white-space:nowrap; + background:var(--info-muted);color:var(--info)} +.sf-title{font-weight:600;font-size:.85rem;color:var(--text-primary);flex:1;min-width:0} +.sf-meta{display:flex;align-items:center;gap:var(--sp-1);flex-shrink:0;flex-wrap:wrap} +.sf-why-btn{font-size:.72rem;color:var(--accent-primary);font-weight:500} + +/* Body */ +.sf-body{padding:0 var(--sp-4) var(--sp-3);display:flex;flex-direction:column;gap:var(--sp-2)} +.sf-chips{display:flex;flex-wrap:wrap;gap:var(--sp-1)} +.sf-scope-text{font-size:.8rem;font-family:var(--font-mono);color:var(--text-secondary)} + +/* Expandable occurrences */ +.sf-details{border-top:1px solid var(--border)} +.sf-details summary{padding:var(--sp-2) var(--sp-4);font-size:.75rem;font-weight:500; + color:var(--text-muted);cursor:pointer;display:flex;align-items:center;gap:var(--sp-2); + background:none;user-select:none} +.sf-details summary:hover{color:var(--text-primary);background:var(--bg-raised)} +.sf-details[open] summary{border-bottom:1px solid var(--border)} +.sf-details-body{padding:0} +.sf-details-body .table-wrap{border:none;border-radius:0} +.sf-table .col-num{white-space:nowrap} +.sf-table{table-layout:fixed} + +.sf-kind-meta{font-weight:normal;font-size:.8rem;color:var(--text-muted)} +.subsection-title{font-size:.95rem;margin:var(--sp-4) 0 var(--sp-2)} +.finding-occurrences-more summary{font-size:.8rem;color:var(--accent-primary);cursor:pointer; + padding:var(--sp-1) var(--sp-3)} +.sf-card[data-filter-hidden="true"]{display:none} +/* Finding Why modal */ +.finding-why-modal{max-width:720px;width:92vw;max-height:85vh} +.finding-why-modal .modal-head{display:flex;align-items:center;justify-content:space-between; + padding:var(--sp-3) var(--sp-4);border-bottom:1px solid var(--border);flex-shrink:0} +.finding-why-modal .modal-head h2{font-size:1rem;font-weight:600} +.finding-why-modal .modal-body{padding:var(--sp-3) var(--sp-4);overflow-y:auto;flex:1 1 auto;min-height:0} +.metrics-section{margin-bottom:var(--sp-3)} +.metrics-section-title{font-size:.75rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em; + color:var(--text-muted);margin-bottom:var(--sp-1);padding-bottom:3px;border-bottom:1px solid var(--border)} +.finding-why-text{font-size:.85rem;color:var(--text-secondary);line-height:1.5;margin:var(--sp-1) 0} +.finding-why-list{font-size:.82rem;color:var(--text-secondary);line-height:1.5; + list-style:disc;padding-left:var(--sp-5);margin:var(--sp-1) 0} +.finding-why-list li{margin-bottom:2px} +.finding-why-note{font-size:.78rem;color:var(--text-muted);margin-bottom:var(--sp-2)} +.finding-why-examples{display:flex;flex-direction:column;gap:var(--sp-2)} +.finding-why-example{border:1px solid var(--border);border-radius:var(--radius-md);overflow:hidden} +.finding-why-example-head{display:flex;align-items:center;gap:var(--sp-2);padding:var(--sp-1) var(--sp-3); + background:var(--bg-raised);font-size:.78rem;border-bottom:1px solid var(--border)} +.finding-why-example-label{font-weight:600;color:var(--text-primary)} +.finding-why-example-meta{color:var(--text-muted);font-family:var(--font-mono);font-size:.72rem} +.finding-why-example-loc{margin-left:auto;color:var(--text-muted);font-family:var(--font-mono);font-size:.72rem} +""" + +# --------------------------------------------------------------------------- +# Report provenance / meta panel +# --------------------------------------------------------------------------- + +_META_PANEL = """\ +/* Provenance section cards */ +.prov-section{margin-bottom:var(--sp-3);background:var(--bg-raised); + border-radius:var(--radius-md);padding:var(--sp-3) var(--sp-3) var(--sp-2); + border:1px solid color-mix(in srgb,var(--border) 50%,transparent)} +.prov-section:last-child{margin-bottom:0} +.prov-section-title{font-size:.65rem;font-weight:700;text-transform:uppercase;letter-spacing:.08em; + color:var(--text-muted);margin:0 0 var(--sp-2);padding:0;border:none; + display:flex;align-items:center;gap:var(--sp-1)} +.prov-section-title svg{width:12px;height:12px;opacity:.5;flex-shrink:0} +.prov-table{width:100%;border-collapse:collapse;font-size:.8rem} +.prov-table tr:not(:last-child){border-bottom:1px solid color-mix(in srgb,var(--border) 30%,transparent)} +.prov-table tr:hover{background:color-mix(in srgb,var(--accent-primary) 4%,transparent)} +.prov-td-label{padding:5px 0;color:var(--text-muted);white-space:nowrap;width:40%; + vertical-align:top;font-weight:500;font-size:.78rem} +.prov-td-value{padding:5px 0 5px var(--sp-2);color:var(--text-primary);word-break:break-all; + font-family:var(--font-mono);font-size:.72rem} + +/* Boolean check/cross badges */ +.meta-bool{font-size:.7rem;font-weight:600;padding:1px 8px;border-radius:10px; + display:inline-flex;align-items:center;gap:3px} +.meta-bool-true{background:var(--success-muted);color:var(--success)} +.meta-bool-false{background:var(--error-muted);color:var(--error)} + +/* Provenance summary badges */ +.prov-summary{display:flex;flex-wrap:wrap;align-items:center;gap:6px; + padding:var(--sp-2) var(--sp-4);border-top:1px solid var(--border)} +.prov-badge{font-size:.65rem;font-weight:600;padding:2px 8px; + border-radius:10px;white-space:nowrap;display:inline-flex;align-items:center;gap:3px} +.prov-badge.green{background:var(--success-muted);color:var(--success)} +.prov-badge.red{background:var(--error-muted);color:var(--error)} +.prov-badge.amber{background:var(--warning-muted);color:var(--warning)} +.prov-badge.neutral{background:var(--bg-overlay);color:var(--text-muted)} +.prov-explain{font-size:.62rem;color:var(--text-muted);margin-left:auto;font-style:italic} +""" + +# --------------------------------------------------------------------------- +# Empty states +# --------------------------------------------------------------------------- + +_EMPTY = """\ +.empty{display:flex;align-items:center;justify-content:center;padding:var(--sp-10)} +.empty-card{text-align:center;max-width:400px} +.empty-icon{margin-bottom:var(--sp-3);color:var(--success)} +.empty-icon svg{width:40px;height:40px} +.empty-card h2{margin-bottom:var(--sp-2)} +.empty-card p{color:var(--text-secondary);font-size:.9rem} +.tab-empty{display:flex;flex-direction:column;align-items:center;justify-content:center; + padding:var(--sp-10);text-align:center} +.tab-empty-icon{color:var(--text-muted);opacity:.4;margin-bottom:var(--sp-3);width:48px;height:48px} +.tab-empty-title{font-size:1rem;font-weight:600;color:var(--text-primary);margin-bottom:var(--sp-1)} +.tab-empty-desc{font-size:.85rem;color:var(--text-muted);max-width:320px} +""" + +# --------------------------------------------------------------------------- +# Coupled details +# --------------------------------------------------------------------------- + +_COUPLED = """\ +.coupled-details{display:inline} +.coupled-summary{display:inline;cursor:pointer} +.coupled-summary:hover{color:var(--text-primary)} +.coupled-more{font-size:.75rem;color:var(--text-muted);margin-left:var(--sp-1)} +.coupled-expanded{margin-top:var(--sp-1)} +""" + +# --------------------------------------------------------------------------- +# Modal (dialog) +# --------------------------------------------------------------------------- + +_MODAL = """\ +/* Generic dialog modal — Safari-compatible centering */ +dialog{background:var(--bg-surface);color:var(--text-primary);border:1px solid var(--border); + border-radius:var(--radius-xl);box-shadow:var(--shadow-xl);padding:0;max-width:600px;width:90vw; + max-height:80vh;overflow:hidden} +dialog:not([open]){display:none} +dialog[open]{display:flex;flex-direction:column; + position:fixed;inset:0;margin:auto;z-index:9999} +dialog::backdrop{background:rgba(0,0,0,.5);backdrop-filter:blur(4px);-webkit-backdrop-filter:blur(4px)} +.modal-close{background:none;border:none;cursor:pointer;color:var(--text-muted);padding:var(--sp-1); + font-size:1.25rem;line-height:1} +.modal-close:hover{color:var(--text-primary)} + +/* Info modal (block metrics) */ +#clone-info-modal{max-width:640px;width:92vw;max-height:85vh} +#clone-info-modal .modal-head{display:flex;align-items:center;justify-content:space-between; + padding:var(--sp-3) var(--sp-4);border-bottom:1px solid var(--border)} +#clone-info-modal .modal-head h2{font-size:1rem} +#clone-info-modal .modal-body{padding:var(--sp-3) var(--sp-4);overflow-y:auto;flex:1 1 auto;min-height:0} +.info-dl{display:grid;grid-template-columns:1fr 1fr;gap:0;margin:0} +.info-dl>div{display:flex;justify-content:space-between;gap:var(--sp-2); + padding:var(--sp-2) var(--sp-3);border-bottom:1px solid var(--border)} +.info-dl>div:nth-last-child(-n+2){border-bottom:none} +.info-dl dt{font-size:.8rem;color:var(--text-muted);white-space:nowrap} +.info-dl dd{font-size:.8rem;font-weight:500;color:var(--text-primary);margin:0;text-align:right; + font-family:var(--font-mono)} + +/* Provenance modal */ +dialog.prov-modal{max-width:660px;width:92vw;max-height:85vh} +.prov-modal-head{display:flex;align-items:center;justify-content:space-between; + padding:var(--sp-3) var(--sp-5);border-bottom:none;flex-shrink:0} +.prov-modal-head h2{font-size:1rem;font-weight:700;letter-spacing:-.01em} +.prov-modal-body{padding:0 var(--sp-4) var(--sp-4);overflow-y:auto;flex:1 1 auto} +.prov-modal .prov-summary{border-top:none;padding:0 var(--sp-5) var(--sp-3); + border-bottom:1px solid var(--border);flex-shrink:0} + +""" + +# --------------------------------------------------------------------------- +# Command palette +# --------------------------------------------------------------------------- + +_CMD_PALETTE = "" # removed: command palette eliminated + +# --------------------------------------------------------------------------- +# Toast notifications +# --------------------------------------------------------------------------- + +_TOAST = """\ +.toast-container{position:fixed;bottom:var(--sp-6);right:var(--sp-6);z-index:2000; + display:flex;flex-direction:column;gap:var(--sp-2)} +.toast{padding:var(--sp-3) var(--sp-5);background:var(--bg-overlay);border:1px solid var(--border); + border-radius:var(--radius-lg);box-shadow:var(--shadow-lg);font-size:.85rem;color:var(--text-primary); + animation:toast-in var(--dur-slow) var(--ease)} +@keyframes toast-in{from{opacity:0;transform:translateY(8px)}to{opacity:1;transform:none}} +""" + +# --------------------------------------------------------------------------- +# Utility +# --------------------------------------------------------------------------- + +_UTILITY = """\ +/* Responsive */ +@media(max-width:768px){ + .overview-kpi-grid{grid-template-columns:repeat(2,1fr)} + .toolbar{flex-direction:column;align-items:stretch} + .toolbar-left,.toolbar-right{justify-content:flex-start} + .overview-list{grid-template-columns:1fr} + .items{grid-template-columns:1fr} + .items .item{border-right:none} + .overview-row-head{flex-wrap:wrap} + .overview-row-spread{margin-left:0;width:100%} + .suggestion-head{flex-direction:column;align-items:flex-start} + .suggestion-facts{grid-template-columns:1fr} + .container{padding:0 var(--sp-3)} + .main-tabs{padding:0 var(--sp-3)} +} +@media(max-width:480px){ + .overview-kpi-grid{grid-template-columns:1fr} + .search-box input[type="text"]{width:140px} +} + +/* Print */ +@media print{ + .topbar,.toolbar,.pagination,.theme-toggle,.toast-container, + .novelty-tabs,.clear-btn,.btn{display:none!important} + .tab-panel{display:block!important;break-inside:avoid} + .group-body{display:block!important} + body{background:#fff;color:#000} +} +""" + +# --------------------------------------------------------------------------- +# Footer +# --------------------------------------------------------------------------- + +_FOOTER = """\ +.report-footer{margin-top:var(--sp-8);padding:var(--sp-4) 0;border-top:1px solid var(--border); + text-align:center;font-size:.78rem;color:var(--text-muted)} +.report-footer a{color:var(--accent-primary)} +""" + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +_ALL_SECTIONS = ( + _TOKENS_DARK, + _TOKENS_LIGHT, + _RESET, + _LAYOUT, + _CONTROLS, + _SEARCH, + _TOOLBAR, + _INSIGHT, + _TABLES, + _SUB_TABS, + _SECTIONS, + _ITEMS, + _CODE, + _BADGES, + _OVERVIEW, + _DEPENDENCIES, + _NOVELTY, + _DEAD_CODE, + _SUGGESTIONS, + _STRUCTURAL, + _META_PANEL, + _EMPTY, + _COUPLED, + _MODAL, + _CMD_PALETTE, + _TOAST, + _UTILITY, + _FOOTER, +) + + +def build_css() -> str: + """Return the complete CSS string for the HTML report.""" + return "\n".join(_ALL_SECTIONS) diff --git a/codeclone/_html_data_attrs.py b/codeclone/_html_data_attrs.py new file mode 100644 index 0000000..cf10e4b --- /dev/null +++ b/codeclone/_html_data_attrs.py @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Unified data-attribute builder for HTML elements.""" + +from __future__ import annotations + +from ._html_escape import _escape_attr + +__all__ = ["_build_data_attrs"] + + +def _build_data_attrs(attrs: dict[str, object | None]) -> str: + """Build a space-prefixed HTML data-attribute string from a dict. + + None values are omitted; empty strings are preserved as ``attr=""``. + All values are escaped. + Returns ``''`` when no attrs survive, or ``' data-foo="bar" ...'`` + (leading space) otherwise. + """ + parts: list[str] = [] + for key, val in attrs.items(): + if val is None: + continue + s = str(val) + parts.append(f'{key}="{_escape_attr(s)}"') + return f" {' '.join(parts)}" if parts else "" diff --git a/codeclone/_html_escape.py b/codeclone/_html_escape.py index 025ec44..b12a3b8 100644 --- a/codeclone/_html_escape.py +++ b/codeclone/_html_escape.py @@ -1,10 +1,5 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_html_filters.py b/codeclone/_html_filters.py new file mode 100644 index 0000000..980cf91 --- /dev/null +++ b/codeclone/_html_filters.py @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Data-driven filter dropdown renderer for report toolbars.""" + +from __future__ import annotations + +from collections.abc import Sequence + +from ._html_escape import _escape_attr, _escape_html + +__all__ = [ + "CLONE_TYPE_OPTIONS", + "SPREAD_OPTIONS", + "_render_select", +] + +CLONE_TYPE_OPTIONS: tuple[tuple[str, str], ...] = ( + ("Type-1", "Type-1"), + ("Type-2", "Type-2"), + ("Type-3", "Type-3"), + ("Type-4", "Type-4"), +) + +SPREAD_OPTIONS: tuple[tuple[str, str], ...] = ( + ("high", "high"), + ("low", "low"), +) + + +def _render_select( + *, + element_id: str, + data_attr: str, + options: Sequence[tuple[str, str]], + all_label: str = "all", + selected: str | None = None, +) -> str: + """Render a ``" + f'', + ] + for value, display in options: + sel = " selected" if selected == value else "" + parts.append( + f'" + ) + parts.append("") + return "".join(parts) diff --git a/codeclone/_html_js.py b/codeclone/_html_js.py new file mode 100644 index 0000000..12ad40e --- /dev/null +++ b/codeclone/_html_js.py @@ -0,0 +1,598 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""JavaScript for the HTML report — modular IIFE with feature blocks.""" + +from __future__ import annotations + +# --------------------------------------------------------------------------- +# Core helpers +# --------------------------------------------------------------------------- + +_CORE = """\ +const $=s=>document.querySelector(s); +const $$=s=>[...document.querySelectorAll(s)]; +""" + +# --------------------------------------------------------------------------- +# Theme +# --------------------------------------------------------------------------- + +_THEME = """\ +(function initTheme(){ + const key='codeclone-theme'; + const root=document.documentElement; + const saved=localStorage.getItem(key); + if(saved)root.setAttribute('data-theme',saved); + + const btn=$('.theme-toggle'); + if(!btn)return; + btn.addEventListener('click',()=>{ + const has=root.getAttribute('data-theme'); + const isDark=has?has==='dark' + :matchMedia('(prefers-color-scheme:light)').matches?false:true; + const next=isDark?'light':'dark'; + root.setAttribute('data-theme',next); + localStorage.setItem(key,next); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Main tabs +# --------------------------------------------------------------------------- + +_TABS = """\ +(function initTabs(){ + const tabs=$$('.main-tab'); + const panels=$$('.tab-panel'); + if(!tabs.length)return; + + function activate(id){ + tabs.forEach(t=>{t.setAttribute('aria-selected',t.dataset.tab===id?'true':'false')}); + panels.forEach(p=>{p.classList.toggle('active',p.id==='panel-'+id)}); + history.replaceState(null,'','#'+id); + } + + tabs.forEach(t=>t.addEventListener('click',()=>activate(t.dataset.tab))); + + // Keyboard: arrow left/right + const tabList=$('[role="tablist"].main-tabs'); + if(tabList){ + tabList.addEventListener('keydown',e=>{ + const idx=tabs.indexOf(document.activeElement); + if(idx<0)return; + let next=-1; + if(e.key==='ArrowRight')next=(idx+1)%tabs.length; + else if(e.key==='ArrowLeft')next=(idx-1+tabs.length)%tabs.length; + if(next>=0){e.preventDefault();tabs[next].focus();activate(tabs[next].dataset.tab)} + }); + } + + // Hash deep-link + const hash=location.hash.slice(1); + const valid=tabs.map(t=>t.dataset.tab); + activate(valid.includes(hash)?hash:valid[0]||''); +})(); +""" + +# --------------------------------------------------------------------------- +# Sub-tabs (clone-nav / split-tabs) +# --------------------------------------------------------------------------- + +_SUB_TABS = """\ +(function initSubTabs(){ + $$('.clone-nav-btn').forEach(btn=>{ + btn.addEventListener('click',()=>{ + const group=btn.dataset.subtabGroup; + if(!group)return; + $$('.clone-nav-btn[data-subtab-group="'+group+'"]').forEach(b=>b.classList.remove('active')); + btn.classList.add('active'); + $$('.clone-panel[data-subtab-group="'+group+'"]').forEach(p=>{ + p.classList.toggle('active',p.dataset.clonePanel===btn.dataset.cloneTab); + }); + }); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Sections: search, filter, pagination, collapse/expand +# --------------------------------------------------------------------------- + +_SECTIONS = """\ +(function initSections(){ + // Registry so novelty filter can call applyFilters directly (no debounce) + window.__sectionFilters=window.__sectionFilters||{}; + + $$('[data-section]').forEach(sec=>{ + const id=sec.dataset.section; + const groups=[...sec.querySelectorAll('.group[data-group="'+id+'"]')]; + const searchInput=$('#search-'+id); + const pageMeta=sec.querySelector('[data-page-meta="'+id+'"]'); + const pageSizeSelect=sec.querySelector('[data-pagesize="'+id+'"]'); + const sourceKindFilter=sec.querySelector('[data-source-kind-filter="'+id+'"]'); + const cloneTypeFilter=sec.querySelector('[data-clone-type-filter="'+id+'"]'); + const spreadFilter=sec.querySelector('[data-spread-filter="'+id+'"]'); + const minOccCheck=sec.querySelector('[data-min-occurrences-filter="'+id+'"]'); + + let page=1; + let pageSize=parseInt(pageSizeSelect?.value||'10',10); + + function isAll(v){return !v||v==='all'} + + function applyFilters(){ + const q=(searchInput?.value||'').toLowerCase().trim(); + const sk=sourceKindFilter?.value||''; + const ct=cloneTypeFilter?.value||''; + const sp=spreadFilter?.value||''; + const minOcc=minOccCheck?.checked||false; + + groups.forEach(g=>{ + // Novelty-hidden groups are always hidden + if(g.getAttribute('data-novelty-hidden')==='true'){g.style.display='none';return} + let show=true; + if(q&&!(g.dataset.search||'').includes(q))show=false; + if(!isAll(sk)&&g.dataset.sourceKind!==sk)show=false; + if(!isAll(ct)&&g.dataset.cloneType!==ct)show=false; + if(!isAll(sp)&&g.dataset.spreadBucket!==sp)show=false; + if(minOcc&&parseInt(g.dataset.groupArity||'0',10)<4)show=false; + g.style.display=show?'':'none'; + }); + page=1; + paginate(); + } + + function paginate(){ + // Collect groups that passed both novelty + search/filter + const vis=groups.filter(g=>g.style.display!=='none'); + const totalPages=Math.max(1,Math.ceil(vis.length/pageSize)); + if(page>totalPages)page=totalPages; + const start=(page-1)*pageSize; + const end=start+pageSize; + vis.forEach((g,i)=>{g.style.display=i>=start&&i{clearTimeout(timer);timer=setTimeout(applyFilters,200)}); + } + [sourceKindFilter,cloneTypeFilter,spreadFilter].forEach(el=>{ + if(el)el.addEventListener('change',applyFilters); + }); + if(minOccCheck)minOccCheck.addEventListener('change',applyFilters); + if(pageSizeSelect)pageSizeSelect.addEventListener('change',()=>{ + pageSize=parseInt(pageSizeSelect.value,10);page=1;paginate()}); + + // Clear search + const clearBtn=sec.querySelector('[data-clear="'+id+'"]'); + if(clearBtn&&searchInput)clearBtn.addEventListener('click',()=>{searchInput.value='';applyFilters()}); + + // Prev/Next + const prevBtn=sec.querySelector('[data-prev="'+id+'"]'); + const nextBtn=sec.querySelector('[data-next="'+id+'"]'); + if(prevBtn)prevBtn.addEventListener('click',()=>{if(page>1){page--;paginate()}}); + if(nextBtn)nextBtn.addEventListener('click',()=>{ + const vis=visible();const tp=Math.max(1,Math.ceil(vis.length/pageSize)); + if(page{ + groups.forEach(g=>{ + const body=g.querySelector('.group-body');if(body)body.classList.remove('expanded'); + const toggle=g.querySelector('.group-toggle');if(toggle)toggle.classList.remove('expanded'); + })}); + if(expBtn)expBtn.addEventListener('click',()=>{ + groups.filter(g=>g.style.display!=='none').forEach(g=>{ + const body=g.querySelector('.group-body');if(body)body.classList.add('expanded'); + const toggle=g.querySelector('.group-toggle');if(toggle)toggle.classList.add('expanded'); + })}); + + // Initial + applyFilters(); + }); + + // Toggle individual groups + document.addEventListener('click',e=>{ + const btn=e.target.closest('[data-toggle-group]'); + if(!btn)return; + const groupId=btn.dataset.toggleGroup; + const body=$('#group-body-'+groupId); + if(!body)return; + body.classList.toggle('expanded'); + btn.classList.toggle('expanded'); + }); + + // Also toggle on group-head click (except buttons) + document.addEventListener('click',e=>{ + const head=e.target.closest('.group-head'); + if(!head)return; + if(e.target.closest('button'))return; + const toggle=head.querySelector('.group-toggle'); + if(toggle)toggle.click(); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Novelty filter (global new/known) +# --------------------------------------------------------------------------- + +_NOVELTY = """\ +(function initNovelty(){ + const ctrl=$('#global-novelty-controls'); + if(!ctrl)return; + const defaultNovelty=ctrl.dataset.defaultNovelty||'new'; + const btns=$$('[data-global-novelty]'); + let activeNovelty=''; + + function applyNovelty(val){ + activeNovelty=val; + btns.forEach(b=>b.classList.toggle('active',b.dataset.globalNovelty===val)); + $$('.group[data-novelty]').forEach(g=>{ + const nov=g.dataset.novelty; + if(nov==='all')g.setAttribute('data-novelty-hidden','false'); + else g.setAttribute('data-novelty-hidden',nov!==val?'true':'false'); + }); + // Re-run section filters directly (no debounce) + const reg=window.__sectionFilters||{}; + Object.values(reg).forEach(fn=>fn()); + } + + btns.forEach(b=>b.addEventListener('click',()=>applyNovelty(b.dataset.globalNovelty))); + applyNovelty(defaultNovelty); +})(); +""" + +# --------------------------------------------------------------------------- +# Modals (dialog-based for block metrics info) +# --------------------------------------------------------------------------- + +_MODALS = """\ +(function initModals(){ + let dlg=$('#clone-info-modal'); + if(!dlg){ + dlg=document.createElement('dialog'); + dlg.id='clone-info-modal'; + dlg.innerHTML='' + +''; + document.body.appendChild(dlg); + dlg.querySelector('.modal-close').addEventListener('click',()=>dlg.close()); + dlg.addEventListener('click',e=>{if(e.target===dlg)dlg.close()}); + } + + document.addEventListener('click',e=>{ + const btn=e.target.closest('[data-metrics-btn]'); + if(!btn)return; + const groupId=btn.dataset.metricsBtn; + const group=btn.closest('.group'); + if(!group)return; + const d=group.dataset; + const items=[]; + function add(label,val){if(val)items.push('
'+label+'
'+val+'
')} + add('Match rule',d.matchRule); + add('Block size',d.blockSize); + add('Signature',d.signatureKind); + add('Merged regions',d.mergedRegions); + add('Pattern',d.patternLabel); + add('Hint',d.hintLabel); + add('Hint confidence',d.hintConfidence); + add('Assert ratio',d.assertRatio); + add('Consecutive asserts',d.consecutiveAsserts); + add('Boilerplate asserts',d.boilerplateAsserts); + add('Group arity',d.groupArity); + add('Clone type',d.cloneType); + add('Source kind',d.sourceKind); + if(d.spreadFiles)add('Spread',d.spreadFunctions+' fn / '+d.spreadFiles+' files'); + dlg.querySelector('#modal-title').textContent='Group: '+groupId; + dlg.querySelector('#modal-body').innerHTML=items.length + ?'
'+items.join('')+'
' + :'

No metadata available.

'; + dlg.showModal(); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Suggestions filter +# --------------------------------------------------------------------------- + +_SUGGESTIONS = """\ +(function initSuggestions(){ + const body=$('[data-suggestions-body]'); + if(!body)return; + const cards=[...body.querySelectorAll('[data-suggestion-card]')]; + const sevSel=$('[data-suggestions-severity]'); + const catSel=$('[data-suggestions-category]'); + const famSel=$('[data-suggestions-family]'); + const skSel=$('[data-suggestions-source-kind]'); + const spSel=$('[data-suggestions-spread]'); + const actCheck=$('[data-suggestions-actionable]'); + const countLabel=$('[data-suggestions-count]'); + + function apply(){ + const sev=sevSel?.value||''; + const cat=catSel?.value||''; + const fam=famSel?.value||''; + const sk=skSel?.value||''; + const sp=spSel?.value||''; + const act=actCheck?.checked||false; + let shown=0; + cards.forEach(c=>{ + let hide=false; + if(sev&&c.dataset.severity!==sev)hide=true; + if(cat&&c.dataset.category!==cat)hide=true; + if(fam&&c.dataset.family!==fam)hide=true; + if(sk&&c.dataset.sourceKind!==sk)hide=true; + if(sp&&c.dataset.spreadBucket!==sp)hide=true; + if(act&&c.dataset.actionable!=='true')hide=true; + c.setAttribute('data-filter-hidden',hide?'true':'false'); + if(!hide)shown++; + }); + if(countLabel)countLabel.textContent=shown+' shown'; + } + + [sevSel,catSel,famSel,skSel,spSel].forEach(el=>{if(el)el.addEventListener('change',apply)}); + if(actCheck)actCheck.addEventListener('change',apply); +})(); +""" + +# --------------------------------------------------------------------------- +# Dependency graph hover +# --------------------------------------------------------------------------- + +_DEP_GRAPH = """\ +(function initDepGraph(){ + const svg=$('.dep-graph-svg'); + if(!svg)return; + const nodes=$$('.dep-node'); + const labels=$$('.dep-label'); + const edges=$$('.dep-edge'); + + function highlight(name){ + nodes.forEach(n=>{n.style.fillOpacity=n.dataset.node===name?'1':'0.15'}); + labels.forEach(l=>{l.style.fill=l.dataset.node===name?'var(--text-primary)':'var(--text-muted)'; + l.style.fillOpacity=l.dataset.node===name?'1':'0.3'}); + edges.forEach(e=>{ + const connected=e.dataset.source===name||e.dataset.target===name; + e.style.strokeOpacity=connected?'0.8':'0.05'; + e.style.strokeWidth=connected?'2':'1'; + }); + } + + function reset(){ + nodes.forEach(n=>{n.style.fillOpacity=''}); + labels.forEach(l=>{l.style.fill='';l.style.fillOpacity=''}); + edges.forEach(e=>{e.style.strokeOpacity='';e.style.strokeWidth=''}); + } + + [...nodes,...labels].forEach(el=>{ + el.addEventListener('mouseenter',()=>highlight(el.dataset.node)); + el.addEventListener('mouseleave',reset); + el.style.cursor='pointer'; + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Meta panel toggle +# --------------------------------------------------------------------------- + +_META_PANEL = """\ +(function initBadgeModal(){ + const dlg=$('#badge-modal'); + if(!dlg)return; + + /* --- state --- */ + var _grade='',_score=0,_variant='grade'; + + /* --- grade→shields color (canonical bands) --- */ + function badgeColor(g){ + return g==='A'?'brightgreen':g==='B'?'green':g==='C'?'yellow':g==='D'?'orange':'red'} + + /* --- build shield URLs & embed codes for current variant --- */ + function render(){ + var label,alt,url; + if(_variant==='full'){ + label=_score+' ('+_grade+')';alt='codeclone '+_score+' ('+_grade+')'; + }else{ + label='grade '+_grade;alt='codeclone grade '+_grade;} + url='https://img.shields.io/badge/codeclone-' + +encodeURIComponent(label).replace(/-/g,'--')+'-'+badgeColor(_grade); + var prev=dlg.querySelector('#badge-preview'); + if(prev)prev.innerHTML=''+alt+''; + var md=dlg.querySelector('#badge-code-md'); + if(md)md.textContent='!['+alt+']('+url+')'; + var ht=dlg.querySelector('#badge-code-html'); + if(ht)ht.textContent=''+alt+'';} + + /* --- tabs --- */ + dlg.querySelectorAll('[data-badge-tab]').forEach(function(tab){ + tab.addEventListener('click',function(){ + dlg.querySelectorAll('[data-badge-tab]').forEach(function(t){ + t.classList.remove('badge-tab--active');t.setAttribute('aria-selected','false')}); + tab.classList.add('badge-tab--active');tab.setAttribute('aria-selected','true'); + _variant=tab.dataset.badgeTab;render();});}); + + /* --- open --- */ + document.addEventListener('click',function(e){ + var btn=e.target.closest('[data-badge-open]'); + if(!btn)return; + _grade=btn.dataset.badgeGrade||''; + _score=parseInt(btn.dataset.badgeScore||'0',10); + _variant='grade'; + dlg.querySelectorAll('[data-badge-tab]').forEach(function(t){ + var active=t.dataset.badgeTab==='grade'; + t.classList.toggle('badge-tab--active',active); + t.setAttribute('aria-selected',active?'true':'false');}); + render();dlg.showModal(); + var fc=dlg.querySelector('[data-badge-close]');if(fc)fc.focus();}); + + /* --- close --- */ + var closeBtn=dlg.querySelector('[data-badge-close]'); + if(closeBtn)closeBtn.addEventListener('click',function(){dlg.close()}); + dlg.addEventListener('click',function(e){if(e.target===dlg)dlg.close()}); + + /* --- copy with feedback --- */ + dlg.addEventListener('click',function(e){ + var copyBtn=e.target.closest('[data-badge-copy]'); + if(!copyBtn)return; + var which=copyBtn.dataset.badgeCopy; + var code=dlg.querySelector('#badge-code-'+which); + if(!code)return; + navigator.clipboard.writeText(code.textContent).then(function(){ + copyBtn.textContent='\u2713 Copied';copyBtn.classList.add('badge-copy-btn--ok'); + setTimeout(function(){copyBtn.textContent='Copy'; + copyBtn.classList.remove('badge-copy-btn--ok')},1500);});}); +})(); +(function initProvModal(){ + const dlg=$('#prov-modal'); + if(!dlg)return; + const openBtn=$('[data-prov-open]'); + const closeBtn=dlg.querySelector('[data-prov-close]'); + if(openBtn)openBtn.addEventListener('click',()=>dlg.showModal()); + if(closeBtn)closeBtn.addEventListener('click',()=>dlg.close()); + dlg.addEventListener('click',e=>{if(e.target===dlg)dlg.close()}); +})(); +(function initFindingWhy(){ + var dlg=$('#finding-why-modal'); + if(!dlg)return; + var body=dlg.querySelector('.modal-body'); + var closeBtn=dlg.querySelector('[data-finding-why-close]'); + closeBtn.addEventListener('click',function(){dlg.close()}); + dlg.addEventListener('click',function(e){if(e.target===dlg)dlg.close()}); + document.addEventListener('click',function(e){ + var btn=e.target.closest('[data-finding-why-btn]'); + if(!btn)return; + var tplId=btn.getAttribute('data-finding-why-btn'); + var tpl=document.getElementById(tplId); + if(!tpl)return; + body.innerHTML=tpl.innerHTML; + dlg.showModal(); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# JSON export +# --------------------------------------------------------------------------- + +_EXPORT = "" # removed: Export JSON button eliminated from topbar + +# --------------------------------------------------------------------------- +# Command Palette (Cmd/Ctrl+K) +# --------------------------------------------------------------------------- + +_CMD_PALETTE = "" # removed: command palette eliminated + +# --------------------------------------------------------------------------- +# Table sort +# --------------------------------------------------------------------------- + +_TABLE_SORT = """\ +(function initTableSort(){ + $$('.table th[data-sortable]').forEach(th=>{ + th.addEventListener('click',()=>{ + const table=th.closest('.table'); + if(!table)return; + const idx=[...th.parentElement.children].indexOf(th); + const tbody=table.querySelector('tbody')||table; + const rows=[...tbody.querySelectorAll('tr')].filter(r=>r.querySelector('td')); + const cur=th.getAttribute('aria-sort'); + const dir=cur==='ascending'?'descending':'ascending'; + // Reset siblings + [...th.parentElement.children].forEach(s=>{s.removeAttribute('aria-sort')}); + th.setAttribute('aria-sort',dir); + + rows.sort((a,b)=>{ + const at=(a.children[idx]?.textContent||'').trim(); + const bt=(b.children[idx]?.textContent||'').trim(); + const an=parseFloat(at),bn=parseFloat(bt); + const cmp=(!isNaN(an)&&!isNaN(bn))?an-bn:at.localeCompare(bt); + return dir==='ascending'?cmp:-cmp; + }); + rows.forEach(r=>tbody.appendChild(r)); + }); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Toast +# --------------------------------------------------------------------------- + +_TOAST = """\ +function toast(msg){ + let c=$('.toast-container'); + if(!c){c=document.createElement('div');c.className='toast-container';document.body.appendChild(c)} + const t=document.createElement('div');t.className='toast';t.textContent=msg; + c.appendChild(t); + setTimeout(()=>{t.style.opacity='0';t.style.transform='translateY(8px)'; + setTimeout(()=>t.remove(),300)},3000); +} +""" + +# --------------------------------------------------------------------------- +# Lazy highlight (IntersectionObserver for code snippets) +# --------------------------------------------------------------------------- + +_SCOPE_COUNTERS = """\ +function updateCloneScopeCounters(){ + const sections=['functions','blocks','segments']; + let total=0; + sections.forEach(id=>{ + const sec=document.querySelector('[data-section="'+id+'"]'); + if(!sec)return; + const vis=[...sec.querySelectorAll('.group[data-group="'+id+'"]')] + .filter(g=>g.style.display!=='none'&&g.getAttribute('data-novelty-hidden')!=='true'); + total+=vis.length; + const tabCount=document.querySelector('[data-clone-tab-count="'+id+'"]'); + if(tabCount){tabCount.textContent=vis.length;tabCount.dataset.totalGroups=vis.length} + }); + const mainBtn=document.querySelector('[data-main-clones-count]'); + if(mainBtn)mainBtn.setAttribute('data-main-clones-count',total); +} +""" + +_LAZY_HIGHLIGHT = "" + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +_ALL_MODULES = ( + _CORE, + _TOAST, + _THEME, + _TABS, + _SUB_TABS, + _SECTIONS, + _NOVELTY, + _MODALS, + _SUGGESTIONS, + _DEP_GRAPH, + _META_PANEL, + _EXPORT, + _CMD_PALETTE, + _TABLE_SORT, + _SCOPE_COUNTERS, + _LAZY_HIGHLIGHT, +) + + +def build_js() -> str: + """Return the complete JS string for the HTML report, wrapped in an IIFE.""" + body = "\n".join(_ALL_MODULES) + return f"(function(){{\n'use strict';\n{body}\n}})();\n" diff --git a/codeclone/_html_report/__init__.py b/codeclone/_html_report/__init__.py new file mode 100644 index 0000000..fbbfff7 --- /dev/null +++ b/codeclone/_html_report/__init__.py @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""New HTML report package — component-based architecture.""" + +from __future__ import annotations + +from ._assemble import build_html_report + +__all__ = ["build_html_report"] diff --git a/codeclone/_html_report/_assemble.py b/codeclone/_html_report/_assemble.py new file mode 100644 index 0000000..91172af --- /dev/null +++ b/codeclone/_html_report/_assemble.py @@ -0,0 +1,361 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Orchestrator: build_context → render all sections → template.substitute.""" + +from __future__ import annotations + +from collections.abc import Collection, Mapping, Sequence +from typing import TYPE_CHECKING + +from .. import __version__, _coerce +from .._html_css import build_css +from .._html_escape import _escape_html +from .._html_js import build_js +from .._html_snippets import _FileCache, _pygments_css +from ..contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL +from ..domain.quality import CONFIDENCE_HIGH +from ..structural_findings import normalize_structural_findings +from ..templates import FONT_CSS_URL, REPORT_TEMPLATE +from ._context import _meta_pick, build_context +from ._icons import BRAND_LOGO, ICONS +from ._sections._clones import render_clones_panel +from ._sections._coupling import render_quality_panel +from ._sections._dead_code import render_dead_code_panel +from ._sections._dependencies import render_dependencies_panel +from ._sections._meta import render_meta_panel +from ._sections._overview import render_overview_panel +from ._sections._structural import render_structural_panel +from ._sections._suggestions import render_suggestions_panel + +if TYPE_CHECKING: + from ..models import GroupMapLike, MetricsDiff, StructuralFindingGroup, Suggestion + + +def build_html_report( + *, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + block_group_facts: dict[str, dict[str, str]], + new_function_group_keys: Collection[str] | None = None, + new_block_group_keys: Collection[str] | None = None, + report_meta: Mapping[str, object] | None = None, + metrics: Mapping[str, object] | None = None, + suggestions: Sequence[Suggestion] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, + report_document: Mapping[str, object] | None = None, + metrics_diff: MetricsDiff | None = None, + title: str = "CodeClone Report", + context_lines: int = 3, + max_snippet_lines: int = 220, +) -> str: + """Build a self-contained HTML report string. + + This is the sole public entry point. The signature is frozen. + """ + file_cache = _FileCache() + + ctx = build_context( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + block_group_facts=block_group_facts, + new_function_group_keys=new_function_group_keys, + new_block_group_keys=new_block_group_keys, + report_meta=report_meta, + metrics=metrics, + suggestions=suggestions, + structural_findings=structural_findings, + report_document=report_document, + metrics_diff=metrics_diff, + file_cache=file_cache, + context_lines=context_lines, + max_snippet_lines=max_snippet_lines, + ) + + # -- Render sections -- + overview_html = render_overview_panel(ctx) + clones_html, _novelty_enabled, _total_new, _total_known = render_clones_panel(ctx) + quality_html = render_quality_panel(ctx) + dependencies_html = render_dependencies_panel(ctx) + dead_code_html = render_dead_code_panel(ctx) + suggestions_html = render_suggestions_panel(ctx) + structural_html = render_structural_panel(ctx) + meta_html = render_meta_panel(ctx) + + # -- Tab counters -- + _as_mapping = _coerce.as_mapping + _as_sequence = _coerce.as_sequence + _as_int = _coerce.as_int + dead_summary = _as_mapping(ctx.dead_code_map.get("summary")) + dead_total = _as_int(dead_summary.get("total")) + dead_high_conf = _as_int( + dead_summary.get("high_confidence", dead_summary.get("critical")) + ) + if dead_total > 0 and dead_high_conf == 0: + dead_high_conf = sum( + 1 + for item in _as_sequence(ctx.dead_code_map.get("items")) + if str(_as_mapping(item).get("confidence", "")).strip().lower() + == CONFIDENCE_HIGH + ) + dep_cycles = len(_as_sequence(ctx.dependencies_map.get("cycles"))) + structural_count = len( + tuple(normalize_structural_findings(ctx.structural_findings)) + ) + quality_issues = ( + _as_int(_as_mapping(ctx.complexity_map.get("summary")).get("high_risk")) + + _as_int(_as_mapping(ctx.coupling_map.get("summary")).get("high_risk")) + + _as_int(_as_mapping(ctx.cohesion_map.get("summary")).get("low_cohesion")) + ) + + def _tab_badge(count: int) -> str: + if count == 0: + return "" + return f'{count}' + + # -- Main tab navigation -- + tab_defs = [ + ("overview", "Overview", overview_html, ""), + ("clones", "Clones", clones_html, _tab_badge(ctx.clone_groups_total)), + ("quality", "Quality", quality_html, _tab_badge(quality_issues)), + ("dependencies", "Dependencies", dependencies_html, _tab_badge(dep_cycles)), + ("dead-code", "Dead Code", dead_code_html, _tab_badge(dead_high_conf)), + ( + "suggestions", + "Suggestions", + suggestions_html, + _tab_badge(len(ctx.suggestions)), + ), + ( + "structural-findings", + "Findings", + structural_html, + _tab_badge(structural_count), + ), + ] + + # Extra data attrs for specific tabs (contract hooks) + tab_extra_attrs: dict[str, str] = { + "clones": f'data-main-clones-count="{ctx.clone_groups_total}"', + } + + tab_buttons: list[str] = [] + tab_panels: list[str] = [] + for idx, (tab_id, tab_label, panel_html, badge) in enumerate(tab_defs): + selected = "true" if idx == 0 else "false" + extra = tab_extra_attrs.get(tab_id, "") + if extra: + extra = " " + extra + tab_buttons.append( + f'" + ) + active = " active" if idx == 0 else "" + tab_panels.append( + f'
' + f"{panel_html}
" + ) + + tabs_html = ( + '
' + '
" + ) + panels_html = "".join(tab_panels) + + # -- Provenance dot color -- + _bl_verified = _meta_pick( + ctx.meta.get("baseline_payload_sha256_verified"), + ctx.baseline_meta.get("payload_sha256_verified"), + ) + _bl_loaded = _meta_pick( + ctx.meta.get("baseline_loaded"), + ctx.baseline_meta.get("loaded"), + ) + if _bl_verified: + prov_dot_cls = "dot-green" + elif _bl_loaded is True and _bl_verified is not True: + prov_dot_cls = "dot-red" + elif _bl_loaded is False or _bl_loaded is None: + prov_dot_cls = "dot-amber" + else: + prov_dot_cls = "dot-neutral" + + # -- Topbar -- + topbar_html = ( + '
' + '
' + f"{BRAND_LOGO}" + '
' + f"

CodeClone Report{ctx.brand_project_html}

" + f'
{ctx.brand_meta}
' + "
" + '
' + f'' + f'" + "
" + ) + + # -- Footer -- + version = str(ctx.meta.get("codeclone_version", __version__)) + footer_html = ( + '" + ) + + cmd_palette_html = "" # removed + finding_why_modal_html = ( + '' + '" + '' + "" + ) + help_modal_html = "" # removed + + badge_modal_html = ( + '' + '" + '" + ) + + # -- Body assembly -- + body_html = ( + topbar_html + + '
' + + tabs_html + + panels_html + + footer_html + + "
" + + meta_html # , positioned by browser + + finding_why_modal_html + + help_modal_html + + cmd_palette_html + + badge_modal_html + ) + + # -- CSS assembly -- + pygments_dark = _pygments_css("monokai") + pygments_light = _pygments_css("default") + + def _codebox_rules(css: str) -> str: + """Extract only .codebox-scoped rules (drop bare pre/td/span rules).""" + out: list[str] = [] + for line in css.splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("/*"): + continue + if not stripped.startswith(".codebox"): + continue + out.append(stripped) + return "\n".join(out) + + def _scope(rules: str, prefix: str) -> str: + """Prepend *prefix* before every `.codebox` selector.""" + return rules.replace(".codebox", f"{prefix} .codebox") + + css_parts = [build_css()] + + # Dark Pygments (monokai) — unscoped base, dark-first design + if pygments_dark: + css_parts.append(pygments_dark) + + # Light Pygments — comprehensive theme override + # + # Problem: Pygments "default" style doesn't define rules for every + # token class that "monokai" does (.n, .p, .esc, .g, .l, .x, …). + # Those monokai rules set color:#F8F8F2 (white) which becomes + # invisible on a light background. + # + # Solution: a CSS reset that clears ALL span styling inside .codebox + # back to inherit, then the light Pygments rules re-apply colors + # only for tokens the light theme cares about. + if pygments_light: + light_rules = _codebox_rules(pygments_light) + if light_rules: + # Reset: clear monokai colors for tokens light theme omits. + # NB: color must be var(--text-primary), NOT inherit — because + # the parent .codebox still carries monokai's color:#F8F8F2 + # (white) and inherit would propagate that invisible color. + _reset = ( + "color:var(--text-primary);font-style:inherit;" + "font-weight:inherit;" + "background-color:transparent;border:none" + ) + + # Override .codebox itself: monokai sets color:#F8F8F2 on + # .codebox — light theme needs dark text for non-span content + _cb_override = "color:var(--text-primary);background:var(--bg-body)" + + # 1) Explicit [data-theme="light"] + explicit_reset = ( + f'[data-theme="light"] .codebox{{{_cb_override}}}\n' + f'[data-theme="light"] .codebox span{{{_reset}}}' + ) + explicit_rules = _scope(light_rules, '[data-theme="light"]') + css_parts.append(explicit_reset) + css_parts.append(explicit_rules) + + # 2) Auto-detect: OS prefers light + no explicit dark + _auto_pfx = ":root:not([data-theme])" + auto_reset = ( + f"{_auto_pfx} .codebox{{{_cb_override}}}\n" + f"{_auto_pfx} .codebox span{{{_reset}}}" + ) + auto_rules = _scope(light_rules, _auto_pfx) + css_parts.append( + f"@media (prefers-color-scheme:light){{{auto_reset}\n{auto_rules}}}" + ) + css_html = "\n".join(css_parts) + + # -- JS -- + js_html = build_js() + + return REPORT_TEMPLATE.safe_substitute( + title=_escape_html(title), + font_css_url=FONT_CSS_URL, + css=css_html, + js=js_html, + body=body_html, + ) diff --git a/codeclone/_html_report/_components.py b/codeclone/_html_report/_components.py new file mode 100644 index 0000000..76193e6 --- /dev/null +++ b/codeclone/_html_report/_components.py @@ -0,0 +1,128 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Shared UI components: insight banners, summary helpers, chip rows.""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Literal + +from .. import _coerce +from .._html_badges import _source_kind_badge_html +from .._html_escape import _escape_attr, _escape_html + +_as_int = _coerce.as_int +_as_mapping = _coerce.as_mapping + +Tone = Literal["ok", "warn", "risk", "info"] + +_EMPTY_ICON = ( + '' + '' + '' +) + + +def insight_block(*, question: str, answer: str, tone: Tone = "info") -> str: + return ( + f'
' + f'
{_escape_html(question)}
' + f'
{_escape_html(answer)}
' + "
" + ) + + +def overview_cluster_header(title: str, subtitle: str | None = None) -> str: + sub = ( + f'

{_escape_html(subtitle)}

' + if subtitle + else "" + ) + return ( + '
' + f'

{_escape_html(title)}

' + f"{sub}" + "
" + ) + + +_ICON_ALERT = ( + '' + '' + '' +) + +_ICON_PIE = ( + '' + '' + '' +) + +_ICON_RADAR = ( + '' + '' + '' + '' + '' +) + +_ICON_BAR = ( + '' + '' + '' + '' +) + +_SUMMARY_ICONS: dict[str, str] = { + "top risks": _ICON_ALERT, + "source breakdown": _ICON_PIE, + "health profile": _ICON_RADAR, + "issue breakdown": _ICON_BAR, +} + + +def overview_summary_item_html(*, label: str, body_html: str) -> str: + icon = _SUMMARY_ICONS.get(label.lower(), "") + return ( + '
' + '
' + f"{icon}{_escape_html(label)}
" + f"{body_html}" + "
" + ) + + +def overview_source_breakdown_html(breakdown: Mapping[str, object]) -> str: + sorted_items = sorted( + ((str(k), _as_int(v)) for k, v in breakdown.items()), + key=lambda item: -item[1], + ) + rows = [(kind, count) for kind, count in sorted_items if count > 0] + if not rows: + return '
n/a
' + + total = sum(c for _, c in rows) + parts: list[str] = [] + for kind, count in rows: + pct = round(count / total * 100) if total else 0 + parts.append( + '
' + f"{_source_kind_badge_html(kind)}" + f'{count}' + f'' + f'' + "
" + ) + return '
' + "".join(parts) + "
" diff --git a/codeclone/_html_report/_context.py b/codeclone/_html_report/_context.py new file mode 100644 index 0000000..8d05650 --- /dev/null +++ b/codeclone/_html_report/_context.py @@ -0,0 +1,292 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""ReportContext — immutable shared state for all section renderers.""" + +from __future__ import annotations + +from collections.abc import Collection, Mapping, Sequence +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from .. import _coerce +from ..contracts import REPORT_SCHEMA_VERSION +from ..report.overview import build_report_overview, materialize_report_overview + +if TYPE_CHECKING: + from .._html_snippets import _FileCache + from ..models import ( + GroupItemLike, + GroupMapLike, + MetricsDiff, + StructuralFindingGroup, + Suggestion, + ) + +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + + +@dataclass(frozen=True, slots=True) +class ReportContext: + """Immutable bag of pre-extracted data passed to every section renderer.""" + + # -- metadata -- + meta: Mapping[str, object] + baseline_meta: Mapping[str, object] + cache_meta: Mapping[str, object] + metrics_baseline_meta: Mapping[str, object] + runtime_meta: Mapping[str, object] + scan_root: str + project_name: str + report_schema_version: str + report_generated_at: str + brand_meta: str + brand_project_html: str + baseline_loaded: bool + baseline_status: str + baseline_split_note: str + + # -- clone groups (pre-sorted) -- + func_sorted: tuple[tuple[str, Sequence[GroupItemLike]], ...] + block_sorted: tuple[tuple[str, Sequence[GroupItemLike]], ...] + segment_sorted: tuple[tuple[str, Sequence[GroupItemLike]], ...] + block_group_facts: dict[str, dict[str, str]] + new_func_keys: frozenset[str] + new_block_keys: frozenset[str] + + # -- metrics sub-maps -- + metrics_map: Mapping[str, object] + complexity_map: Mapping[str, object] + coupling_map: Mapping[str, object] + cohesion_map: Mapping[str, object] + dependencies_map: Mapping[str, object] + dead_code_map: Mapping[str, object] + health_map: Mapping[str, object] + + # -- suggestions + structural -- + suggestions: tuple[Suggestion, ...] + structural_findings: tuple[StructuralFindingGroup, ...] + + # -- derived -- + overview_data: Mapping[str, object] + report_document: Mapping[str, object] + derived_map: Mapping[str, object] + integrity_map: Mapping[str, object] + + # -- baseline diff -- + metrics_diff: MetricsDiff | None + + # -- rendering config -- + file_cache: _FileCache + context_lines: int + max_snippet_lines: int + + # -- convenience -- + @property + def has_any_clones(self) -> bool: + return bool(self.func_sorted or self.block_sorted or self.segment_sorted) + + @property + def metrics_available(self) -> bool: + return bool(self.metrics_map) + + @property + def clone_groups_total(self) -> int: + return len(self.func_sorted) + len(self.block_sorted) + len(self.segment_sorted) + + @property + def clone_instances_total(self) -> int: + return ( + sum(len(items) for _, items in self.func_sorted) + + sum(len(items) for _, items in self.block_sorted) + + sum(len(items) for _, items in self.segment_sorted) + ) + + def relative_path(self, abspath: str) -> str: + """Strip scan_root prefix to get a concise project-relative path.""" + if not self.scan_root or not abspath: + return abspath + text = abspath.replace("\\", "/") + root = self.scan_root.replace("\\", "/").rstrip("/") + "/" + if text.startswith(root): + return text[len(root) :] + return abspath + + def bare_qualname(self, qualname: str, filepath: str) -> str: + """Strip file-derived module prefix from qualname.""" + if not qualname: + return qualname + if ":" in qualname: + return qualname.rsplit(":", maxsplit=1)[-1] + if "." not in qualname: + return qualname + rel = self.relative_path(filepath) + for suffix in ("/__init__.py", ".py"): + if rel.endswith(suffix): + rel = rel[: -len(suffix)] + break + prefix = rel.replace("/", ".") + "." + if qualname.startswith(prefix): + bare = qualname[len(prefix) :] + if bare: + return bare + return qualname + + +def _group_sort_key(items: Collection[object]) -> tuple[int]: + return (-len(items),) + + +def _meta_pick(*values: object) -> object | None: + for value in values: + if value is None: + continue + if isinstance(value, str) and not value.strip(): + continue + return value + return None + + +def build_context( + *, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + block_group_facts: dict[str, dict[str, str]], + new_function_group_keys: Collection[str] | None = None, + new_block_group_keys: Collection[str] | None = None, + report_meta: Mapping[str, object] | None = None, + metrics: Mapping[str, object] | None = None, + suggestions: Sequence[Suggestion] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, + report_document: Mapping[str, object] | None = None, + metrics_diff: MetricsDiff | None = None, + file_cache: _FileCache, + context_lines: int = 3, + max_snippet_lines: int = 220, +) -> ReportContext: + """Build a ReportContext from raw build_html_report parameters.""" + from .._html_escape import _escape_html + + meta = dict(report_meta or {}) + baseline_meta = _as_mapping(meta.get("baseline")) + cache_meta = _as_mapping(meta.get("cache")) + metrics_baseline_meta = _as_mapping(meta.get("metrics_baseline")) + runtime_meta = _as_mapping(meta.get("runtime")) + report_document_map = _as_mapping(report_document) + derived_map = _as_mapping(report_document_map.get("derived")) + integrity_map = _as_mapping(report_document_map.get("integrity")) + + report_schema_version = str( + meta.get("report_schema_version") or REPORT_SCHEMA_VERSION + ) + report_generated_at = str( + _meta_pick( + meta.get("report_generated_at_utc"), + runtime_meta.get("report_generated_at_utc"), + ) + or "" + ).strip() + brand_meta = ( + f"Generated at {report_generated_at}" + if report_generated_at + else f"Report schema {report_schema_version}" + ) + scan_root_raw = str( + _meta_pick(meta.get("scan_root"), runtime_meta.get("scan_root_absolute")) or "" + ).strip() + project_name_raw = str(meta.get("project_name", "")).strip() + brand_project_html = ( + f' for ' + f'{_escape_html(project_name_raw)}' + f"" + if project_name_raw + else "" + ) + + baseline_loaded = bool(meta.get("baseline_loaded")) + baseline_status = str(meta.get("baseline_status", "")).strip().lower() + if baseline_loaded and baseline_status == "ok": + baseline_split_note = ( + "Split is based on baseline: known duplicates are already " + "recorded in baseline, new duplicates are absent from baseline." + ) + else: + baseline_split_note = ( + "Baseline is not loaded or not trusted: " + "all duplicates are treated as new versus an empty baseline." + ) + + func_sorted = tuple( + sorted(func_groups.items(), key=lambda kv: (*_group_sort_key(kv[1]), kv[0])) + ) + block_sorted = tuple( + sorted(block_groups.items(), key=lambda kv: (*_group_sort_key(kv[1]), kv[0])) + ) + segment_sorted = tuple( + sorted(segment_groups.items(), key=lambda kv: (*_group_sort_key(kv[1]), kv[0])) + ) + + metrics_map = _as_mapping(metrics) + complexity_map = _as_mapping(metrics_map.get("complexity")) + coupling_map = _as_mapping(metrics_map.get("coupling")) + cohesion_map = _as_mapping(metrics_map.get("cohesion")) + dependencies_map = _as_mapping(metrics_map.get("dependencies")) + dead_code_map = _as_mapping(metrics_map.get("dead_code")) + health_map = _as_mapping(metrics_map.get("health")) + + suggestions_tuple = tuple(suggestions or ()) + + overview_data = _as_mapping(derived_map.get("overview")) + if not overview_data: + overview_data = build_report_overview( + suggestions=list(suggestions_tuple), + metrics=metrics_map, + ) + else: + overview_data = materialize_report_overview( + overview=overview_data, + hotlists=_as_mapping(derived_map.get("hotlists")), + findings=_as_mapping(report_document_map.get("findings")), + ) + + return ReportContext( + meta=meta, + baseline_meta=baseline_meta, + cache_meta=cache_meta, + metrics_baseline_meta=metrics_baseline_meta, + runtime_meta=runtime_meta, + scan_root=scan_root_raw, + project_name=project_name_raw, + report_schema_version=report_schema_version, + report_generated_at=report_generated_at, + brand_meta=brand_meta, + brand_project_html=brand_project_html, + baseline_loaded=baseline_loaded, + baseline_status=baseline_status, + baseline_split_note=baseline_split_note, + func_sorted=func_sorted, + block_sorted=block_sorted, + segment_sorted=segment_sorted, + block_group_facts=block_group_facts, + new_func_keys=frozenset(new_function_group_keys or ()), + new_block_keys=frozenset(new_block_group_keys or ()), + metrics_map=metrics_map, + complexity_map=complexity_map, + coupling_map=coupling_map, + cohesion_map=cohesion_map, + dependencies_map=dependencies_map, + dead_code_map=dead_code_map, + health_map=health_map, + suggestions=suggestions_tuple, + structural_findings=tuple(structural_findings or ()), + overview_data=overview_data, + report_document=report_document_map, + derived_map=derived_map, + integrity_map=integrity_map, + metrics_diff=metrics_diff, + file_cache=file_cache, + context_lines=context_lines, + max_snippet_lines=max_snippet_lines, + ) diff --git a/codeclone/_html_report/_glossary.py b/codeclone/_html_report/_glossary.py new file mode 100644 index 0000000..7712253 --- /dev/null +++ b/codeclone/_html_report/_glossary.py @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Tooltip glossary for report table headers and stat cards.""" + +from __future__ import annotations + +from .._html_escape import _escape_attr + +GLOSSARY: dict[str, str] = { + # Complexity + "function": "Fully-qualified function or method name", + "class": "Fully-qualified class name", + "name": "Symbol name (function, class, or variable)", + "file": "Source file path relative to scan root", + "location": "File and line range where the symbol is defined", + "cc": "Cyclomatic complexity — number of independent execution paths", + "nesting": "Maximum nesting depth of control-flow statements", + "risk": "Risk level based on metric thresholds (low / medium / high)", + # Coupling / cohesion + "cbo": "Coupling Between Objects — number of classes this class depends on", + "coupled classes": "Resolved class dependencies used to compute CBO for this class", + "lcom4": "Lack of Cohesion of Methods — connected components in method/field graph", + "methods": "Number of methods defined in the class", + "fields": "Number of instance variables (attributes) in the class", + # Dead code + "line": "Source line number where the symbol starts", + "kind": "Symbol type: function, class, import, or variable", + "confidence": "Detection confidence (low / medium / high / critical)", + # Dependencies + "longest chain": "Longest transitive import chain between modules", + "length": "Number of modules in the dependency chain", + "cycle": "Circular import dependency between modules", + # Suggestions + "priority": "Computed priority score (higher = more urgent)", + "severity": "Issue severity: critical, warning, or info", + "category": ( + "Metric category: clone, complexity, coupling, cohesion, dead_code, dependency" + ), + "title": "Brief description of the suggested improvement", + "effort": "Estimated effort to fix: easy, moderate, or hard", + "steps": "Actionable steps to resolve the issue", + # Dependency stat cards + "modules": "Total number of Python modules analyzed", + "edges": "Total number of import relationships between modules", + "max depth": "Longest chain of transitive imports", + "cycles": "Number of circular import dependencies detected", +} + + +def glossary_tip(label: str) -> str: + """Return a tooltip ```` for *label*, or ``''`` if unknown.""" + tip = GLOSSARY.get(label.lower(), "") + if not tip: + return "" + return f' ?' diff --git a/codeclone/_html_report/_icons.py b/codeclone/_html_report/_icons.py new file mode 100644 index 0000000..a7304e5 --- /dev/null +++ b/codeclone/_html_report/_icons.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""SVG icon constants for the HTML report (Lucide-style).""" + +from __future__ import annotations + + +def _svg(size: int, sw: str, body: str) -> str: + return ( + f'{body}' + ) + + +BRAND_LOGO = ( + '" +) + +ICONS: dict[str, str] = { + "search": _svg( + 16, + "2.5", + '', + ), + "clear": _svg( + 16, + "2.5", + '', + ), + "chev_down": _svg( + 16, + "2.5", + '', + ), + "theme": _svg( + 16, + "2", + '', + ), + "check": _svg( + 48, + "2", + '', + ), + "prev": _svg( + 16, + "2", + '', + ), + "next": _svg( + 16, + "2", + '', + ), + "sort_asc": _svg( + 12, + "2", + '', + ), + "sort_desc": _svg( + 12, + "2", + '', + ), +} diff --git a/codeclone/_html_report/_sections/__init__.py b/codeclone/_html_report/_sections/__init__.py new file mode 100644 index 0000000..a8917fd --- /dev/null +++ b/codeclone/_html_report/_sections/__init__.py @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy diff --git a/codeclone/_html_report/_sections/_clones.py b/codeclone/_html_report/_sections/_clones.py new file mode 100644 index 0000000..230e716 --- /dev/null +++ b/codeclone/_html_report/_sections/_clones.py @@ -0,0 +1,645 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Clones panel renderer — function/block/segment sections.""" + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING, Literal + +from ... import _coerce +from ..._html_badges import _source_kind_badge_html +from ..._html_data_attrs import _build_data_attrs +from ..._html_escape import _escape_attr, _escape_html +from ..._html_filters import CLONE_TYPE_OPTIONS, SPREAD_OPTIONS, _render_select +from ..._html_snippets import _render_code_block +from ...report._source_kinds import SOURCE_KIND_FILTER_VALUES +from ...report.derived import ( + combine_source_kinds, + group_spread, + report_location_from_group_item, +) +from ...report.explain_contract import format_group_instance_compare_meta +from ...report.suggestions import classify_clone_type +from .._components import Tone, insight_block +from .._icons import ICONS +from .._tabs import render_split_tabs + +if TYPE_CHECKING: + from ...models import GroupItemLike + from .._context import ReportContext + +_as_int = _coerce.as_int + +_HEX_SET = frozenset("0123456789abcdefABCDEF") + + +def _looks_like_hash(text: str) -> bool: + """Return True if text starts with a long hex string (likely a hash key).""" + bare = text.split("|")[0].strip() + return len(bare) >= 16 and all(c in _HEX_SET for c in bare) + + +def _derive_group_display_name( + gkey: str, + items: Sequence[Mapping[str, object]], + section_id: str, + block_meta: Mapping[str, str], + ctx: ReportContext, +) -> str: + """Build a human-friendly group display name from items, never raw hashes.""" + # Explicit overrides from block_group_facts + if section_id == "blocks": + if block_meta.get("group_display_name"): + return str(block_meta["group_display_name"]) + if block_meta.get("pattern_display"): + return str(block_meta["pattern_display"]) + + # For function clones — gkey is already qualname, use it directly + if section_id == "functions" and not _looks_like_hash(gkey): + return gkey + + # For any section with hash-like keys — derive from items + if items: + # Collect short paths from items + short_names: list[str] = [] + for it in items[:3]: + qn = str(it.get("qualname", "")) + fp = str(it.get("filepath", "")) + name = ctx.bare_qualname(qn, fp) + if name: + short_names.append(name) + else: + rel = ctx.relative_path(fp) + if rel: + short_names.append(rel) + if short_names: + label = " \u2022 ".join(dict.fromkeys(short_names)) + if len(label) > 72: + label = label[:68] + "\u2026" + return label + + # Fallback: truncate key + if len(gkey) > 56: + return f"{gkey[:24]}\u2026{gkey[-16:]}" + return gkey + + +def _render_group_explanation(meta: Mapping[str, object]) -> str: + if not meta: + return "" + items: list[tuple[str, str]] = [] + if meta.get("match_rule"): + items.append((f"match_rule: {meta['match_rule']}", "group-explain-item")) + if meta.get("block_size"): + items.append((f"block_size: {meta['block_size']}", "group-explain-item")) + if meta.get("signature_kind"): + items.append( + (f"signature_kind: {meta['signature_kind']}", "group-explain-item") + ) + if meta.get("merged_regions"): + items.append( + (f"merged_regions: {meta['merged_regions']}", "group-explain-item") + ) + pattern_value = str(meta.get("pattern", "")).strip() + if pattern_value: + pattern_label = str(meta.get("pattern_label", pattern_value)).strip() + pattern_display = str(meta.get("pattern_display", "")).strip() + if pattern_display: + items.append( + (f"pattern: {pattern_label} ({pattern_display})", "group-explain-item") + ) + else: + items.append((f"pattern: {pattern_label}", "group-explain-item")) + hint_id = str(meta.get("hint", "")).strip() + if hint_id: + hint_label = str(meta.get("hint_label", hint_id)).strip() + items.append((f"hint: {hint_label}", "group-explain-item group-explain-warn")) + if meta.get("hint_confidence"): + items.append( + ( + f"hint_confidence: {meta['hint_confidence']}", + "group-explain-item group-explain-muted", + ) + ) + if meta.get("assert_ratio"): + items.append( + ( + f"assert_ratio: {meta['assert_ratio']}", + "group-explain-item group-explain-muted", + ) + ) + if meta.get("consecutive_asserts"): + items.append( + ( + f"consecutive_asserts: {meta['consecutive_asserts']}", + "group-explain-item group-explain-muted", + ) + ) + hint_context = str(meta.get("hint_context_label", "")).strip() + if hint_context: + items.append((hint_context, "group-explain-item group-explain-muted")) + + attrs = { + "data-match-rule": str(meta.get("match_rule", "")), + "data-block-size": str(meta.get("block_size", "")), + "data-signature-kind": str(meta.get("signature_kind", "")), + "data-merged-regions": str(meta.get("merged_regions", "")), + "data-pattern": str(meta.get("pattern", "")), + "data-pattern-label": str(meta.get("pattern_label", "")), + "data-hint": str(meta.get("hint", "")), + "data-hint-label": str(meta.get("hint_label", "")), + "data-hint-context-label": str(meta.get("hint_context_label", "")), + "data-hint-confidence": str(meta.get("hint_confidence", "")), + "data-assert-ratio": str(meta.get("assert_ratio", "")), + "data-consecutive-asserts": str(meta.get("consecutive_asserts", "")), + } + attr_html = " ".join(f'{k}="{_escape_attr(v)}"' for k, v in attrs.items() if v) + parts = [f'{_escape_html(text)}' for text, css in items] + note = "" + if isinstance(meta.get("hint_note"), str): + note = ( + f'

{_escape_html(str(meta["hint_note"]))}

' + ) + return f'
{"".join(parts)}{note}
' + + +def _render_section_toolbar( + section_id: str, + section_title: str, + group_count: int, +) -> str: + return ( + f'" + ) + + +def _group_block_meta( + section_id: str, + group_key: str, + block_group_facts: Mapping[str, Mapping[str, object]], +) -> dict[str, str]: + if section_id != "blocks": + return {} + return { + str(k): str(v) + for k, v in block_group_facts.get(group_key, {}).items() + if v is not None + } + + +def _group_item_span(item: Mapping[str, object]) -> int: + return max( + 0, + _as_int(item.get("end_line", 0)) - _as_int(item.get("start_line", 0)) + 1, + ) + + +def _resolve_group_span_and_arity( + section_id: str, + items: Sequence[Mapping[str, object]], + block_meta: Mapping[str, str], +) -> tuple[int, int]: + group_span = max((_group_item_span(item) for item in items), default=0) + group_arity = len(items) + if section_id != "blocks": + return group_span, group_arity + + block_size_raw = block_meta.get("block_size", "").strip() + if block_size_raw.isdigit(): + group_span = int(block_size_raw) + + group_arity_raw = block_meta.get("group_arity", "").strip() + if group_arity_raw.isdigit() and int(group_arity_raw) > 0: + group_arity = int(group_arity_raw) + return group_span, group_arity + + +def _clone_kind_for_section( + section_id: str, +) -> Literal["function", "block", "segment"]: + if section_id == "functions": + return "function" + if section_id == "blocks": + return "block" + return "segment" + + +def _build_group_data_attrs( + *, + group_id: str, + group_span: int, + group_arity: int, + clone_type: str, + group_source_kind: str, + spread_bucket: str, + spread_files: int, + spread_functions: int, + block_meta: Mapping[str, str], +) -> dict[str, object | None]: + attrs: dict[str, object | None] = { + "data-group-id": group_id, + "data-clone-size": str(group_span), + "data-items-count": str(group_arity), + "data-group-arity": str(group_arity), + "data-clone-type": clone_type, + "data-source-kind": group_source_kind, + "data-spread-bucket": spread_bucket, + "data-spread-files": str(spread_files), + "data-spread-functions": str(spread_functions), + } + if not block_meta: + return attrs + attrs.update( + { + "data-match-rule": block_meta.get("match_rule"), + "data-block-size": block_meta.get("block_size"), + "data-signature-kind": block_meta.get("signature_kind"), + "data-merged-regions": block_meta.get("merged_regions"), + "data-pattern": block_meta.get("pattern"), + "data-pattern-label": block_meta.get("pattern_label"), + "data-hint": block_meta.get("hint"), + "data-hint-label": block_meta.get("hint_label"), + "data-hint-context-label": block_meta.get("hint_context_label"), + "data-hint-confidence": block_meta.get("hint_confidence"), + "data-assert-ratio": block_meta.get("assert_ratio"), + "data-consecutive-asserts": block_meta.get("consecutive_asserts"), + "data-boilerplate-asserts": block_meta.get("boilerplate_asserts"), + } + ) + return attrs + + +def _metrics_button_html(section_id: str, group_id: str) -> str: + if section_id != "blocks": + return "" + return ( + f'' + ) + + +def _compare_note_html( + section_id: str, + group_arity: int, + block_meta: Mapping[str, str], +) -> str: + if section_id != "blocks" or group_arity <= 2: + return "" + compare_note = block_meta.get("group_compare_note", "").strip() + if not compare_note: + return "" + return f'
{_escape_html(compare_note)}
' + + +def _resolve_peer_count(section_id: str, block_meta: Mapping[str, str]) -> int: + if section_id != "blocks": + return 0 + peer_count_raw = block_meta.get("instance_peer_count", "").strip() + if peer_count_raw.isdigit() and int(peer_count_raw) >= 0: + return int(peer_count_raw) + return 0 + + +def _render_group_items_html( + *, + ctx: ReportContext, + section_id: str, + items: Sequence[GroupItemLike], + group_id: str, + group_arity: int, + peer_count: int, + block_meta: Mapping[str, str], +) -> str: + rendered: list[str] = [f'
'] + include_compare_meta = section_id == "blocks" and "group_arity" in block_meta + + for item_index, item in enumerate(items, start=1): + filepath = str(item.get("filepath", "")) + qualname = str(item.get("qualname", "")) + start_line = _as_int(item.get("start_line", 0)) + end_line = _as_int(item.get("end_line", 0)) + snippet = _render_code_block( + filepath=filepath, + start_line=start_line, + end_line=end_line, + file_cache=ctx.file_cache, + context=ctx.context_lines, + max_lines=ctx.max_snippet_lines, + ) + display_qualname = ctx.bare_qualname(qualname, filepath) + display_filepath = ctx.relative_path(filepath) + compare_html = "" + if include_compare_meta: + compare_text = format_group_instance_compare_meta( + instance_index=item_index, + group_arity=group_arity, + peer_count=peer_count, + ) + compare_html = f'
{compare_text}
' + rendered.append( + f'
' + '
' + f'
' + f"{_escape_html(display_qualname)}
" + f'
' + f"{_escape_html(display_filepath)}:{start_line}-{end_line}
" + f"{compare_html}" + f"{snippet.code_html}" + "
" + ) + rendered.append("
") + return "".join(rendered) + + +def _render_group_html( + *, + ctx: ReportContext, + section_id: str, + group_index: int, + group_key: str, + items: Sequence[GroupItemLike], + block_group_facts: Mapping[str, Mapping[str, object]], + section_novelty: Mapping[str, str], +) -> str: + group_id = f"{section_id}-{group_index}" + search_parts: list[str] = [str(group_key)] + for item in items: + search_parts.append(str(item.get("qualname", ""))) + search_parts.append(str(item.get("filepath", ""))) + search_blob = _escape_attr(" ".join(search_parts).lower()) + + block_meta = _group_block_meta(section_id, group_key, block_group_facts) + group_name = _derive_group_display_name( + group_key, + items, + section_id, + block_meta, + ctx, + ) + group_span, group_arity = _resolve_group_span_and_arity( + section_id, + items, + block_meta, + ) + group_summary = ( + f"{group_arity} instances \u2022 block size {group_span}" + if group_span > 0 + else f"{group_arity} instances" + ) + clone_type = classify_clone_type( + items=items, + kind=_clone_kind_for_section(section_id), + ) + group_locations = tuple( + report_location_from_group_item(item, scan_root=ctx.scan_root) for item in items + ) + group_source_kind = combine_source_kinds( + location.source_kind for location in group_locations + ) + spread_files, spread_functions = group_spread(group_locations) + spread_bucket = "high" if spread_files > 1 or spread_functions > 1 else "low" + group_summary += f" \u2022 spread {spread_functions} fn / {spread_files} files" + group_attrs = _build_group_data_attrs( + group_id=group_id, + group_span=group_span, + group_arity=group_arity, + clone_type=clone_type, + group_source_kind=group_source_kind, + spread_bucket=spread_bucket, + spread_files=spread_files, + spread_functions=spread_functions, + block_meta=block_meta, + ) + peer_count = _resolve_peer_count(section_id, block_meta) + explanation_html = _render_group_explanation(block_meta) if block_meta else "" + + return ( + f'
' + '
' + '
' + f'' + '
' + f'
{_escape_html(group_name)}
' + f'
{_escape_html(group_summary)}
' + "
" + '
' + f"{_source_kind_badge_html(group_source_kind)}" + f'{_escape_html(clone_type)}' + f'{group_arity}' + f"{_metrics_button_html(section_id, group_id)}" + "
" + f"{_compare_note_html(section_id, group_arity, block_meta)}" + f"{explanation_html}" + + _render_group_items_html( + ctx=ctx, + section_id=section_id, + items=items, + group_id=group_id, + group_arity=group_arity, + peer_count=peer_count, + block_meta=block_meta, + ) + + "
" + ) + + +def _render_section( + ctx: ReportContext, + section_id: str, + section_title: str, + groups: Sequence[tuple[str, Sequence[GroupItemLike]]], + *, + novelty_by_group: Mapping[str, str] | None = None, +) -> str: + if not groups: + return "" + + block_group_facts = ctx.block_group_facts + section_novelty = novelty_by_group or {} + has_novelty_filter = bool(section_novelty) + + out: list[str] = [ + f'
', + _render_section_toolbar(section_id, section_title, len(groups)), + '
', + ] + + for idx, (gkey, items) in enumerate(groups, start=1): + out.append( + _render_group_html( + ctx=ctx, + section_id=section_id, + group_index=idx, + group_key=gkey, + items=items, + block_group_facts=block_group_facts, + section_novelty=section_novelty, + ) + ) + + out.append("
") # section-body + out.append("
") + return "\n".join(out) + + +def render_clones_panel(ctx: ReportContext) -> tuple[str, bool, int, int]: + """Build the Clones tab panel HTML. + + Returns ``(panel_html, novelty_enabled, total_new, total_known)``. + """ + # Empty state + if not ctx.has_any_clones: + empty = ( + '
' + f'
{ICONS["check"]}
' + "

No code clones detected

" + "

No structural, block-level, or segment-level duplication was found " + "above configured thresholds.

" + '

This usually indicates healthy abstraction boundaries.

' + "
" + ) + return empty, False, 0, 0 + + # Novelty maps + func_novelty = { + gk: ("new" if gk in ctx.new_func_keys else "known") for gk, _ in ctx.func_sorted + } + block_novelty = { + gk: ("new" if gk in ctx.new_block_keys else "known") + for gk, _ in ctx.block_sorted + } + novelty_enabled = bool(func_novelty) or bool(block_novelty) + total_new = sum(1 for v in func_novelty.values() if v == "new") + total_new += sum(1 for v in block_novelty.values() if v == "new") + total_known = sum(1 for v in func_novelty.values() if v == "known") + total_known += sum(1 for v in block_novelty.values() if v == "known") + default_novelty = "new" if total_new > 0 else "known" + + global_novelty_html = "" + if novelty_enabled: + global_novelty_html = ( + '
' + '
' + "

Duplicate Scope

" + '
' + '' + '' + "
" + f'

{_escape_html(ctx.baseline_split_note)}

' + "
" + ) + + func_section = _render_section( + ctx, + "functions", + "Function clones", + list(ctx.func_sorted), + novelty_by_group=func_novelty, + ) + block_section = _render_section( + ctx, + "blocks", + "Block clones", + list(ctx.block_sorted), + novelty_by_group=block_novelty, + ) + segment_section = _render_section( + ctx, + "segments", + "Segment clones", + list(ctx.segment_sorted), + ) + + sub_tabs: list[tuple[str, str, int, str]] = [] + if ctx.func_sorted: + sub_tabs.append(("functions", "Functions", len(ctx.func_sorted), func_section)) + if ctx.block_sorted: + sub_tabs.append(("blocks", "Blocks", len(ctx.block_sorted), block_section)) + if ctx.segment_sorted: + sub_tabs.append( + ("segments", "Segments", len(ctx.segment_sorted), segment_section) + ) + + panel = global_novelty_html + render_split_tabs( + group_id="clones", tabs=sub_tabs, emit_clone_counters=True + ) + + # Insight block + if novelty_enabled: + clones_answer = ( + f"{ctx.clone_groups_total} groups total; " + f"{total_new} new vs {total_known} known." + ) + else: + clones_answer = f"{ctx.clone_groups_total} groups and {ctx.clone_instances_total} instances." + clones_tone: Tone = "warn" if ctx.clone_groups_total > 0 else "ok" + panel = ( + insight_block( + question="Where is duplication concentrated right now?", + answer=clones_answer, + tone=clones_tone, + ) + + panel + ) + + return panel, novelty_enabled, total_new, total_known diff --git a/codeclone/_html_report/_sections/_coupling.py b/codeclone/_html_report/_sections/_coupling.py new file mode 100644 index 0000000..224e8cc --- /dev/null +++ b/codeclone/_html_report/_sections/_coupling.py @@ -0,0 +1,159 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Coupling + Cohesion panel renderer (unified Quality tab).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ... import _coerce +from ..._html_badges import _render_chain_flow +from .._components import Tone, insight_block +from .._tables import render_rows_table +from .._tabs import render_split_tabs + +if TYPE_CHECKING: + from collections.abc import Mapping + + from .._context import ReportContext + +_as_int = _coerce.as_int +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + + +def _render_coupled_cell(row_data: Mapping[str, object]) -> str: + raw = _as_sequence(row_data.get("coupled_classes")) + names = sorted( + {str(v).strip() for v in raw if isinstance(v, str) and str(v).strip()} + ) + if not names: + return "-" + if len(names) <= 3: + return _render_chain_flow(names) + preview = _render_chain_flow(names[:3]) + full = _render_chain_flow(names) + rem = len(names) - 3 + return ( + '
' + '' + f'{preview}(+{rem} more)' + "" + f'
{full}
' + "
" + ) + + +def render_quality_panel(ctx: ReportContext) -> str: + """Build the unified Quality tab (Complexity + Coupling + Cohesion sub-tabs).""" + coupling_summary = _as_mapping(ctx.coupling_map.get("summary")) + cohesion_summary = _as_mapping(ctx.cohesion_map.get("summary")) + complexity_summary = _as_mapping(ctx.complexity_map.get("summary")) + + coupling_high_risk = _as_int(coupling_summary.get("high_risk")) + cohesion_low = _as_int(cohesion_summary.get("low_cohesion")) + complexity_high_risk = _as_int(complexity_summary.get("high_risk")) + cc_max = _as_int(complexity_summary.get("max")) + + # Insight + answer: str + tone: Tone + if not ctx.metrics_available: + answer = "Metrics are skipped for this run." + tone = "info" + else: + answer = ( + f"High-complexity: {complexity_high_risk}; " + f"high-coupling: {coupling_high_risk}; " + f"low-cohesion: {cohesion_low}; " + f"max CC {cc_max}; " + f"max CBO {coupling_summary.get('max', 'n/a')}; " + f"max LCOM4 {cohesion_summary.get('max', 'n/a')}." + ) + if coupling_high_risk > 0 and cohesion_low > 0: + tone = "risk" + elif coupling_high_risk > 0 or cohesion_low > 0 or complexity_high_risk > 0: + tone = "warn" + else: + tone = "ok" + + # Complexity sub-tab + cx_rows_data = _as_sequence(ctx.complexity_map.get("functions")) + cx_rows = [ + ( + ctx.bare_qualname( + str(_as_mapping(r).get("qualname", "")), + str(_as_mapping(r).get("filepath", "")), + ), + str(_as_mapping(r).get("filepath", "")), + str(_as_mapping(r).get("cyclomatic_complexity", "")), + str(_as_mapping(r).get("nesting_depth", "")), + str(_as_mapping(r).get("risk", "")), + ) + for r in cx_rows_data[:50] + ] + cx_panel = render_rows_table( + headers=("Function", "File", "CC", "Nesting", "Risk"), + rows=cx_rows, + empty_message="Complexity metrics are not available.", + ctx=ctx, + ) + + # Coupling sub-tab + cp_rows_data = _as_sequence(ctx.coupling_map.get("classes")) + cp_rows = [ + ( + ctx.bare_qualname( + str(_as_mapping(r).get("qualname", "")), + str(_as_mapping(r).get("filepath", "")), + ), + str(_as_mapping(r).get("filepath", "")), + str(_as_mapping(r).get("cbo", "")), + str(_as_mapping(r).get("risk", "")), + _render_coupled_cell(_as_mapping(r)), + ) + for r in cp_rows_data[:50] + ] + cp_panel = render_rows_table( + headers=("Class", "File", "CBO", "Risk", "Coupled classes"), + rows=cp_rows, + empty_message="Coupling metrics are not available.", + raw_html_headers=("Coupled classes",), + ctx=ctx, + ) + + # Cohesion sub-tab + ch_rows_data = _as_sequence(ctx.cohesion_map.get("classes")) + ch_rows = [ + ( + ctx.bare_qualname( + str(_as_mapping(r).get("qualname", "")), + str(_as_mapping(r).get("filepath", "")), + ), + str(_as_mapping(r).get("filepath", "")), + str(_as_mapping(r).get("lcom4", "")), + str(_as_mapping(r).get("risk", "")), + str(_as_mapping(r).get("method_count", "")), + str(_as_mapping(r).get("instance_var_count", "")), + ) + for r in ch_rows_data[:50] + ] + ch_panel = render_rows_table( + headers=("Class", "File", "LCOM4", "Risk", "Methods", "Fields"), + rows=ch_rows, + empty_message="Cohesion metrics are not available.", + ctx=ctx, + ) + + sub_tabs: list[tuple[str, str, int, str]] = [ + ("complexity", "Complexity", complexity_high_risk, cx_panel), + ("coupling", "Coupling (CBO)", coupling_high_risk, cp_panel), + ("cohesion", "Cohesion (LCOM4)", cohesion_low, ch_panel), + ] + + return insight_block( + question="Are there quality hotspots in the codebase?", + answer=answer, + tone=tone, + ) + render_split_tabs(group_id="quality", tabs=sub_tabs) diff --git a/codeclone/_html_report/_sections/_dead_code.py b/codeclone/_html_report/_sections/_dead_code.py new file mode 100644 index 0000000..ca87f42 --- /dev/null +++ b/codeclone/_html_report/_sections/_dead_code.py @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Dead Code panel renderer.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ... import _coerce +from .._components import Tone, insight_block +from .._tables import render_rows_table +from .._tabs import render_split_tabs + +if TYPE_CHECKING: + from collections.abc import Mapping + + from .._context import ReportContext + +_as_int = _coerce.as_int +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + + +def _dead_row( + item: Mapping[str, object], ctx: ReportContext +) -> tuple[str, str, str, str, str]: + return ( + ctx.bare_qualname(str(item.get("qualname", "")), str(item.get("filepath", ""))), + str(item.get("filepath", "")), + str(item.get("start_line", "")), + str(item.get("kind", "")), + str(item.get("confidence", "")), + ) + + +def render_dead_code_panel(ctx: ReportContext) -> str: + summary = _as_mapping(ctx.dead_code_map.get("summary")) + dead_total = _as_int(summary.get("total")) + dead_high_conf = _as_int(summary.get("high_confidence", summary.get("critical"))) + dead_suppressed_total = _as_int(summary.get("suppressed", 0)) + + # Count high confidence from items if summary is 0 but items have them + items_data = _as_sequence(ctx.dead_code_map.get("items")) + suppressed_data = _as_sequence(ctx.dead_code_map.get("suppressed_items")) + hi_conf_items = sum( + 1 + for it in items_data + if str(_as_mapping(it).get("confidence", "")).strip().lower() == "high" + ) + if dead_total > 0 and dead_high_conf == 0 and hi_conf_items > 0: + dead_high_conf = min(dead_total, hi_conf_items) + if dead_suppressed_total == 0: + dead_suppressed_total = len(suppressed_data) + + # Rows + active_rows = [_dead_row(_as_mapping(it), ctx) for it in items_data[:200]] + suppressed_rows: list[tuple[str, str, str, str, str, str, str]] = [] + for it in suppressed_data[:200]: + im = _as_mapping(it) + suppressed_by = _as_sequence(im.get("suppressed_by")) + first = _as_mapping(suppressed_by[0]) if suppressed_by else {} + suppressed_rows.append( + ( + *_dead_row(im, ctx), + str(first.get("rule", "")), + str(first.get("source", "")), + ) + ) + + # Insight + answer: str + tone: Tone + if not ctx.metrics_available: + answer, tone = "Metrics are skipped for this run.", "info" + else: + answer = ( + f"{dead_total} candidates total; " + f"{dead_high_conf} high-confidence items; " + f"{dead_suppressed_total} suppressed." + ) + if dead_high_conf > 0: + tone = "risk" + elif dead_total > 0: + tone = "warn" + else: + tone = "ok" + + active_panel = render_rows_table( + headers=("Name", "File", "Line", "Kind", "Confidence"), + rows=active_rows, + empty_message="No dead code detected.", + ctx=ctx, + ) + suppressed_panel = render_rows_table( + headers=("Name", "File", "Line", "Kind", "Confidence", "Rule", "Source"), + rows=suppressed_rows, + empty_message="No suppressed dead-code candidates.", + ctx=ctx, + ) + + return insight_block( + question="Do we have actionable unused code?", + answer=answer, + tone=tone, + ) + render_split_tabs( + group_id="dead-code", + tabs=( + ("active", "Active", dead_total, active_panel), + ("suppressed", "Suppressed", dead_suppressed_total, suppressed_panel), + ), + ) diff --git a/codeclone/_html_report/_sections/_dependencies.py b/codeclone/_html_report/_sections/_dependencies.py new file mode 100644 index 0000000..67d5917 --- /dev/null +++ b/codeclone/_html_report/_sections/_dependencies.py @@ -0,0 +1,473 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Dependencies panel renderer (SVG graph + tables).""" + +from __future__ import annotations + +import math +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING + +from ... import _coerce +from ..._html_badges import _render_chain_flow, _short_label, _stat_card, _tab_empty +from ..._html_escape import _escape_attr, _escape_html +from .._components import Tone, insight_block +from .._glossary import glossary_tip +from .._tables import render_rows_table + +if TYPE_CHECKING: + from .._context import ReportContext + +_as_int = _coerce.as_int +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + + +def _select_dep_nodes( + edges: Sequence[tuple[str, str]], +) -> tuple[list[str], list[tuple[str, str]]]: + all_nodes = sorted({part for edge in edges for part in edge}) + if len(all_nodes) > 20: + degree_count: dict[str, int] = dict.fromkeys(all_nodes, 0) + for source, target in edges: + degree_count[source] = degree_count.get(source, 0) + 1 + degree_count[target] = degree_count.get(target, 0) + 1 + nodes = sorted(all_nodes, key=lambda node: -degree_count.get(node, 0))[:20] + nodes.sort() + else: + nodes = all_nodes + node_set = set(nodes) + filtered = [ + (source, target) + for source, target in edges + if source in node_set and target in node_set + ][:100] + return nodes, filtered + + +def _build_degree_maps( + nodes: Sequence[str], + edges: Sequence[tuple[str, str]], +) -> tuple[dict[str, int], dict[str, int]]: + in_degree: dict[str, int] = dict.fromkeys(nodes, 0) + out_degree: dict[str, int] = dict.fromkeys(nodes, 0) + for source, target in edges: + in_degree[target] += 1 + out_degree[source] += 1 + return in_degree, out_degree + + +def _build_layer_groups( + nodes: Sequence[str], + edges: Sequence[tuple[str, str]], + in_degree: Mapping[str, int], + out_degree: Mapping[str, int], +) -> dict[int, list[str]]: + children: dict[str, list[str]] = {node: [] for node in nodes} + for source, target in edges: + children[source].append(target) + + layers: dict[str, int] = {} + roots = sorted(node for node in nodes if in_degree[node] == 0) + if not roots: + roots = sorted(nodes, key=lambda node: -out_degree.get(node, 0))[:1] + queue = list(roots) + for node in queue: + layers.setdefault(node, 0) + while queue: + node = queue.pop(0) + for child in children.get(node, []): + if child in layers: + continue + layers[child] = layers[node] + 1 + queue.append(child) + + max_layer = max(layers.values(), default=0) + for node in nodes: + if node not in layers: + layers[node] = max_layer + 1 + + layer_groups: dict[int, list[str]] = {} + for node, layer in layers.items(): + layer_groups.setdefault(layer, []).append(node) + for layer in layer_groups: + layer_groups[layer].sort() + return layer_groups + + +def _layout_dep_graph( + layer_groups: Mapping[int, Sequence[str]], +) -> tuple[int, int, int, dict[str, tuple[float, float]]]: + num_layers = max(layer_groups.keys(), default=0) + 1 + max_per_layer = max((len(members) for members in layer_groups.values()), default=1) + width = max(600, min(1200, max_per_layer * 70 + 140)) + height = max(260, num_layers * 80 + 80) + pad_x, pad_y = 60.0, 40.0 + + positions: dict[str, tuple[float, float]] = {} + for layer_index in range(num_layers): + members = layer_groups.get(layer_index, []) + count = len(members) + y = pad_y + layer_index * ((height - 2 * pad_y) / max(1, num_layers - 1)) + for index, node in enumerate(members): + x = pad_x + (index + 0.5) * ((width - 2 * pad_x) / max(1, count)) + positions[node] = (x, y) + return width, height, max_per_layer, positions + + +def _hub_threshold( + nodes: Sequence[str], in_degree: Mapping[str, int], out_degree: Mapping[str, int] +) -> int: + degrees = [in_degree.get(node, 0) + out_degree.get(node, 0) for node in nodes] + if not degrees: + return 99 + degrees_sorted = sorted(degrees, reverse=True) + return int(degrees_sorted[max(0, len(degrees_sorted) // 5)]) + + +def _build_node_radii( + nodes: Sequence[str], + in_degree: Mapping[str, int], + out_degree: Mapping[str, int], + cycle_node_set: set[str], + hub_threshold: int, +) -> dict[str, float]: + node_radii: dict[str, float] = {} + for node in nodes: + degree = in_degree.get(node, 0) + out_degree.get(node, 0) + if node in cycle_node_set: + node_radii[node] = min(8.0, max(5.0, 3.5 + degree * 0.4)) + elif degree >= hub_threshold and degree > 2: + node_radii[node] = min(10.0, max(6.0, 4.0 + degree * 0.5)) + elif degree <= 1: + node_radii[node] = 3.0 + else: + node_radii[node] = min(6.0, max(3.5, 3.0 + degree * 0.3)) + return node_radii + + +def _build_svg_defs() -> str: + return ( + "" + '' + '' + '' + '' + '' + '' + "" + ) + + +def _build_cycle_edges(dep_cycles: Sequence[object]) -> set[tuple[str, str]]: + cycle_edges: set[tuple[str, str]] = set() + for cycle in dep_cycles: + parts = [str(part) for part in _as_sequence(cycle)] + for index in range(len(parts)): + cycle_edges.add((parts[index], parts[(index + 1) % len(parts)])) + return cycle_edges + + +def _render_dep_edges( + edges: Sequence[tuple[str, str]], + positions: Mapping[str, tuple[float, float]], + node_radii: Mapping[str, float], + cycle_edges: set[tuple[str, str]], +) -> list[str]: + rendered: list[str] = [] + for source, target in edges: + x1, y1 = positions[source] + x2, y2 = positions[target] + source_radius, target_radius = node_radii[source], node_radii[target] + dx, dy = x2 - x1, y2 - y1 + distance = math.sqrt(dx * dx + dy * dy) or 1.0 + ux, uy = dx / distance, dy / distance + x1a, y1a = x1 + ux * (source_radius + 2), y1 + uy * (source_radius + 2) + x2a, y2a = x2 - ux * (target_radius + 4), y2 - uy * (target_radius + 4) + mx = (x1a + x2a) / 2 - (y2a - y1a) * 0.06 + my = (y1a + y2a) / 2 + (x2a - x1a) * 0.06 + is_cycle = (source, target) in cycle_edges + stroke = "var(--danger)" if is_cycle else "var(--border-strong)" + opacity = "0.6" if is_cycle else "0.3" + marker = "dep-arrow-cycle" if is_cycle else "dep-arrow" + rendered.append( + f'' + ) + return rendered + + +def _render_dep_nodes_and_labels( + nodes: Sequence[str], + *, + positions: Mapping[str, tuple[float, float]], + node_radii: Mapping[str, float], + in_degree: Mapping[str, int], + out_degree: Mapping[str, int], + cycle_node_set: set[str], + hub_threshold: int, + max_per_layer: int, +) -> tuple[list[str], list[str]]: + nodes_svg: list[str] = [] + labels_svg: list[str] = [] + rotate_labels = max_per_layer > 6 + + for node in nodes: + x, y = positions[node] + radius = node_radii[node] + degree = in_degree.get(node, 0) + out_degree.get(node, 0) + label = _short_label(node) + is_cycle = node in cycle_node_set + is_hub = degree >= hub_threshold and degree > 2 + + if is_cycle: + fill, fill_opacity, extra = ( + "var(--danger)", + "0.85", + 'stroke="var(--danger)" stroke-width="1.5" stroke-dasharray="3,2"', + ) + elif is_hub: + fill, fill_opacity, extra = ( + "var(--accent-primary)", + "1", + 'filter="url(#glow)"', + ) + elif degree <= 1: + fill, fill_opacity, extra = "var(--text-muted)", "0.4", "" + else: + fill, fill_opacity, extra = "var(--accent-primary)", "0.7", "" + + nodes_svg.append( + f'' + ) + + font_size = "10" if is_hub else "9" + if rotate_labels: + labels_svg.append( + f'' + f"{_escape_html(node)}{_escape_html(label)}" + ) + continue + + labels_svg.append( + f'' + f"{_escape_html(node)}{_escape_html(label)}" + ) + + return nodes_svg, labels_svg + + +def _render_dep_svg( + edges: Sequence[tuple[str, str]], + cycle_node_set: set[str], + dep_cycles: Sequence[object], +) -> str: + if not edges: + return _tab_empty("Dependency graph is not available.") + + nodes, filtered_edges = _select_dep_nodes(edges) + in_degree, out_degree = _build_degree_maps(nodes, filtered_edges) + layer_groups = _build_layer_groups(nodes, filtered_edges, in_degree, out_degree) + width, height, max_per_layer, positions = _layout_dep_graph(layer_groups) + hub_threshold = _hub_threshold(nodes, in_degree, out_degree) + node_radii = _build_node_radii( + nodes, + in_degree, + out_degree, + cycle_node_set, + hub_threshold, + ) + cycle_edges = _build_cycle_edges(dep_cycles) + defs = _build_svg_defs() + edge_svg = _render_dep_edges(filtered_edges, positions, node_radii, cycle_edges) + node_svg, label_svg = _render_dep_nodes_and_labels( + nodes, + positions=positions, + node_radii=node_radii, + in_degree=in_degree, + out_degree=out_degree, + cycle_node_set=cycle_node_set, + hub_threshold=hub_threshold, + max_per_layer=max_per_layer, + ) + + label_pad = 50 if max_per_layer > 6 else 0 + vb_y = -label_pad + vb_h = height + label_pad + + return ( + '
' + f'' + f"{defs}{''.join(edge_svg)}{''.join(node_svg)}{''.join(label_svg)}" + "
" + ) + + +def render_dependencies_panel(ctx: ReportContext) -> str: + dep_cycles = _as_sequence(ctx.dependencies_map.get("cycles")) + dep_edge_data = _as_sequence(ctx.dependencies_map.get("edge_list")) + dep_edges = [ + (str(_as_mapping(r).get("source", "")), str(_as_mapping(r).get("target", ""))) + for r in dep_edge_data + if _as_mapping(r).get("source") and _as_mapping(r).get("target") + ] + + cycle_node_set: set[str] = set() + for cyc in dep_cycles: + for p in _as_sequence(cyc): + cycle_node_set.add(str(p)) + + dep_module_count = _as_int(ctx.dependencies_map.get("modules")) + dep_edge_count = _as_int(ctx.dependencies_map.get("edges")) + dep_max_depth = _as_int(ctx.dependencies_map.get("max_depth")) + cycle_count = len(dep_cycles) + + def _mb(*pairs: tuple[str, object]) -> str: + return "".join( + f'' + f'{_escape_html(str(v))}' + f'{_escape_html(lbl)}' + for lbl, v in pairs + if v is not None and str(v) != "n/a" + ) + + dep_avg = ( + f"{dep_edge_count / dep_module_count:.1f}" if dep_module_count > 0 else "n/a" + ) + + cards = [ + _stat_card( + "Modules", + dep_module_count, + detail=_mb(("imports", dep_edge_count)), + css_class="meta-item", + glossary_tip_fn=glossary_tip, + ), + _stat_card( + "Edges", + dep_edge_count, + detail=_mb(("avg/module", dep_avg)), + css_class="meta-item", + glossary_tip_fn=glossary_tip, + ), + _stat_card( + "Max depth", + dep_max_depth, + detail=_mb(("target", "< 8")), + value_tone="warn" if dep_max_depth > 8 else "good", + css_class="meta-item", + glossary_tip_fn=glossary_tip, + ), + _stat_card( + "Cycles", + cycle_count, + detail=( + _mb(("modules", len(cycle_node_set))) + if cycle_count > 0 + else _mb(("status", "clean")) + ), + value_tone="bad" if cycle_count > 0 else "good", + css_class="meta-item", + glossary_tip_fn=glossary_tip, + ), + ] + + # SVG graph + graph_svg = _render_dep_svg(dep_edges, cycle_node_set, dep_cycles) + + # Hub bar + deg_map = dict.fromkeys(sorted({p for e in dep_edges for p in e}), 0) + for s, t in dep_edges: + deg_map[s] += 1 + deg_map[t] += 1 + top_nodes = sorted(deg_map, key=lambda n: (-deg_map[n], n))[:5] + hub_pills = "".join( + f'' + f'{_escape_html(_short_label(n))}' + f'{deg_map[n]}' + for n in top_nodes + ) + hub_bar = ( + f'
Top connected{hub_pills}
' + if top_nodes + else "" + ) + + # Legend + legend = ( + '
' + '' + ' Hub' + '' + ' Leaf' + '' + ' Cycle
' + ) + + # Tables + dep_cycle_rows = [ + (_render_chain_flow([str(p) for p in _as_sequence(c)], arrows=True),) + for c in dep_cycles + ] + dep_longest = _as_sequence(ctx.dependencies_map.get("longest_chains")) + dep_chain_rows = [ + ( + _render_chain_flow([str(p) for p in _as_sequence(ch)], arrows=True), + str(len(_as_sequence(ch))), + ) + for ch in dep_longest + ] + + # Insight + answer: str + tone: Tone + if not ctx.metrics_available: + answer, tone = "Metrics are skipped for this run.", "info" + else: + answer = f"Cycles: {cycle_count}; max dependency depth: {dep_max_depth}." + if cycle_count > 0: + tone = "risk" + elif dep_max_depth > 8: + tone = "warn" + else: + tone = "ok" + + return ( + insight_block( + question="Do module dependencies form cycles?", answer=answer, tone=tone + ) + + f'
{"".join(cards)}
' + + hub_bar + + graph_svg + + legend + + '

Longest chains

' + + render_rows_table( + headers=("Longest chain", "Length"), + rows=dep_chain_rows, + empty_message="No dependency chains detected.", + raw_html_headers=("Longest chain",), + ctx=ctx, + ) + + '

Detected cycles

' + + render_rows_table( + headers=("Cycle",), + rows=dep_cycle_rows, + empty_message="No dependency cycles detected.", + raw_html_headers=("Cycle",), + ctx=ctx, + ) + ) diff --git a/codeclone/_html_report/_sections/_meta.py b/codeclone/_html_report/_sections/_meta.py new file mode 100644 index 0000000..a843a1d --- /dev/null +++ b/codeclone/_html_report/_sections/_meta.py @@ -0,0 +1,383 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Report Provenance / metadata panel renderer.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ... import __version__, _coerce +from ..._html_data_attrs import _build_data_attrs +from ..._html_escape import _escape_html, _meta_display +from .._context import _meta_pick +from .._glossary import glossary_tip + +if TYPE_CHECKING: + from .._context import ReportContext + +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + + +def _path_basename(value: object) -> str | None: + if not isinstance(value, str): + return None + text = value.strip() + if not text: + return None + normalized = text.replace("\\", "/").rstrip("/") + if not normalized: + return "" + return normalized.rsplit("/", maxsplit=1)[-1] + + +def render_meta_panel(ctx: ReportContext) -> str: + """Build the collapsible Report Provenance panel.""" + meta = ctx.meta + baseline_meta = ctx.baseline_meta + cache_meta, metrics_baseline_meta, runtime_meta, integrity_map = ( + ctx.cache_meta, + ctx.metrics_baseline_meta, + ctx.runtime_meta, + ctx.integrity_map, + ) + + baseline_path_value = _meta_pick( + meta.get("baseline_path"), + baseline_meta.get("path"), + runtime_meta.get("baseline_path_absolute"), + ) + cache_path_value = _meta_pick( + meta.get("cache_path"), + cache_meta.get("path"), + runtime_meta.get("cache_path_absolute"), + ) + mbl_path_value = _meta_pick( + meta.get("metrics_baseline_path"), + metrics_baseline_meta.get("path"), + runtime_meta.get("metrics_baseline_path_absolute"), + ) + scan_root_value = _meta_pick( + meta.get("scan_root"), runtime_meta.get("scan_root_absolute") + ) + python_tag_value = _meta_pick(meta.get("python_tag")) + report_mode_value = _meta_pick(meta.get("report_mode"), "full") + metrics_computed_value = _meta_pick( + meta.get("metrics_computed"), + meta.get("computed_metric_families"), + ) + integrity_canon = _as_mapping(integrity_map.get("canonicalization")) + integrity_digest = _as_mapping(integrity_map.get("digest")) + canonical_sections = ", ".join( + str(i) for i in _as_sequence(integrity_canon.get("sections")) if str(i).strip() + ) + + general_rows: list[tuple[str, object]] = [ + ("CodeClone", _meta_pick(meta.get("codeclone_version"), __version__)), + ("Project", _meta_pick(meta.get("project_name"))), + ("Report schema", ctx.report_schema_version), + ("Scan root", scan_root_value), + ("Python", _meta_pick(meta.get("python_version"))), + ("Python tag", python_tag_value), + ("Analysis mode", _meta_pick(meta.get("analysis_mode"))), + ("Report mode", report_mode_value), + ("Report generated (UTC)", ctx.report_generated_at), + ( + "Metrics computed", + ", ".join(str(i) for i in _as_sequence(metrics_computed_value)), + ), + ("Health score", _meta_pick(meta.get("health_score"))), + ("Health grade", _meta_pick(meta.get("health_grade"))), + ("Source IO skipped", _meta_pick(meta.get("files_skipped_source_io"))), + ] + + _bl_status = _meta_pick(meta.get("baseline_status"), baseline_meta.get("status")) + _bl_loaded = _meta_pick(meta.get("baseline_loaded"), baseline_meta.get("loaded")) + _bl_fp_ver = _meta_pick( + meta.get("baseline_fingerprint_version"), + baseline_meta.get("fingerprint_version"), + ) + _bl_schema_ver = _meta_pick( + meta.get("baseline_schema_version"), baseline_meta.get("schema_version") + ) + _bl_py_tag = _meta_pick( + meta.get("baseline_python_tag"), baseline_meta.get("python_tag") + ) + _bl_gen_name = _meta_pick( + meta.get("baseline_generator_name"), baseline_meta.get("generator_name") + ) + _bl_gen_ver = _meta_pick( + meta.get("baseline_generator_version"), baseline_meta.get("generator_version") + ) + _bl_sha256 = _meta_pick( + meta.get("baseline_payload_sha256"), baseline_meta.get("payload_sha256") + ) + _bl_verified = _meta_pick( + meta.get("baseline_payload_sha256_verified"), + baseline_meta.get("payload_sha256_verified"), + ) + + bl_rows: list[tuple[str, object]] = [ + ("Baseline file", _path_basename(baseline_path_value)), + ("Baseline path", baseline_path_value), + ("Baseline status", _bl_status), + ("Baseline loaded", _bl_loaded), + ("Baseline fingerprint", _bl_fp_ver), + ("Baseline schema", _bl_schema_ver), + ("Baseline Python tag", _bl_py_tag), + ("Baseline generator name", _bl_gen_name), + ("Baseline generator version", _bl_gen_ver), + ("Baseline payload sha256", _bl_sha256), + ("Baseline payload verified", _bl_verified), + ] + + _mbl_loaded = _meta_pick( + meta.get("metrics_baseline_loaded"), metrics_baseline_meta.get("loaded") + ) + _mbl_status = _meta_pick( + meta.get("metrics_baseline_status"), metrics_baseline_meta.get("status") + ) + _mbl_schema_ver = _meta_pick( + meta.get("metrics_baseline_schema_version"), + metrics_baseline_meta.get("schema_version"), + ) + _mbl_sha256 = _meta_pick( + meta.get("metrics_baseline_payload_sha256"), + metrics_baseline_meta.get("payload_sha256"), + ) + _mbl_verified = _meta_pick( + meta.get("metrics_baseline_payload_sha256_verified"), + metrics_baseline_meta.get("payload_sha256_verified"), + ) + + mbl_rows: list[tuple[str, object]] = [ + ("Metrics baseline path", mbl_path_value), + ("Metrics baseline loaded", _mbl_loaded), + ("Metrics baseline status", _mbl_status), + ("Metrics baseline schema", _mbl_schema_ver), + ("Metrics baseline payload sha256", _mbl_sha256), + ("Metrics baseline payload verified", _mbl_verified), + ] + + _cache_schema_ver = _meta_pick( + meta.get("cache_schema_version"), cache_meta.get("schema_version") + ) + _cache_status = _meta_pick(meta.get("cache_status"), cache_meta.get("status")) + _cache_used = _meta_pick(meta.get("cache_used"), cache_meta.get("used")) + + cache_rows: list[tuple[str, object]] = [ + ("Cache path", cache_path_value), + ("Cache schema", _cache_schema_ver), + ("Cache status", _cache_status), + ("Cache used", _cache_used), + ] + + rt_rows = [ + r + for r in ( + ("Scan root absolute", runtime_meta.get("scan_root_absolute")), + ("Baseline path absolute", runtime_meta.get("baseline_path_absolute")), + ("Cache path absolute", runtime_meta.get("cache_path_absolute")), + ( + "Metrics baseline path absolute", + runtime_meta.get("metrics_baseline_path_absolute"), + ), + ) + if _meta_pick(r[1]) is not None + ] + + integ_rows = [ + r + for r in ( + ("Canonicalization version", integrity_canon.get("version")), + ("Canonicalization scope", integrity_canon.get("scope")), + ("Canonical sections", canonical_sections), + ("Digest algorithm", integrity_digest.get("algorithm")), + ("Digest value", integrity_digest.get("value")), + ("Digest verified", integrity_digest.get("verified")), + ) + if _meta_pick(r[1]) is not None + ] + + meta_sections = [ + ("General", general_rows), + ("Clone Baseline", bl_rows), + ("Metrics Baseline", mbl_rows), + ("Cache", cache_rows), + ("Runtime", rt_rows), + ("Integrity", integ_rows), + ] + + # Data attrs + metrics_csv = ",".join(str(i) for i in _as_sequence(metrics_computed_value)) + meta_attrs = _build_data_attrs( + { + "data-report-schema-version": ctx.report_schema_version, + "data-codeclone-version": meta.get("codeclone_version", __version__), + "data-project-name": meta.get("project_name"), + "data-scan-root": scan_root_value, + "data-python-version": meta.get("python_version"), + "data-python-tag": python_tag_value, + "data-analysis-mode": meta.get("analysis_mode"), + "data-report-mode": report_mode_value, + "data-report-generated-at-utc": ctx.report_generated_at, + "data-metrics-computed": metrics_csv, + "data-health-score": meta.get("health_score"), + "data-health-grade": meta.get("health_grade"), + "data-baseline-file": _path_basename(baseline_path_value), + "data-baseline-path": baseline_path_value, + "data-baseline-fingerprint-version": _bl_fp_ver, + "data-baseline-schema-version": _bl_schema_ver, + "data-baseline-python-tag": _bl_py_tag, + "data-baseline-generator-name": _bl_gen_name, + "data-baseline-generator-version": _bl_gen_ver, + "data-baseline-payload-sha256": _bl_sha256, + "data-baseline-payload-verified": _meta_display(_bl_verified), + "data-baseline-loaded": _meta_display(_bl_loaded), + "data-baseline-status": _bl_status, + "data-cache-path": cache_path_value, + "data-cache-schema-version": _cache_schema_ver, + "data-cache-status": _cache_status, + "data-cache-used": _meta_display(_cache_used), + "data-files-skipped-source-io": meta.get("files_skipped_source_io"), + "data-metrics-baseline-path": mbl_path_value, + "data-metrics-baseline-loaded": _meta_display(_mbl_loaded), + "data-metrics-baseline-status": _mbl_status, + "data-metrics-baseline-schema-version": _mbl_schema_ver, + "data-metrics-baseline-payload-sha256": _mbl_sha256, + "data-metrics-baseline-payload-verified": _meta_display(_mbl_verified), + "data-runtime-scan-root-absolute": runtime_meta.get("scan_root_absolute"), + "data-runtime-baseline-path-absolute": runtime_meta.get( + "baseline_path_absolute" + ), + "data-runtime-cache-path-absolute": runtime_meta.get("cache_path_absolute"), + "data-runtime-metrics-baseline-path-absolute": runtime_meta.get( + "metrics_baseline_path_absolute" + ), + "data-canonicalization-version": integrity_canon.get("version"), + "data-canonicalization-scope": integrity_canon.get("scope"), + "data-canonical-sections": canonical_sections, + "data-digest-algorithm": integrity_digest.get("algorithm"), + "data-digest-value": integrity_digest.get("value"), + "data-digest-verified": _meta_display(integrity_digest.get("verified")), + } + ) + + _BOOL = { + "Baseline payload verified", + "Baseline loaded", + "Cache used", + "Metrics baseline loaded", + "Metrics baseline payload verified", + "Digest verified", + } + + def _val_html(label: str, value: object) -> str: + if label in _BOOL and isinstance(value, bool): + icon = "\u2713" if value else "\u2717" + badge_cls = "meta-bool-true" if value else "meta-bool-false" + return f'{icon}' + return _escape_html(_meta_display(value)) + + _SECTION_ICONS: dict[str, str] = { + "General": ( + '' + '' + ), + "Clone Baseline": ( + '' + '' + ), + "Metrics Baseline": ( + '' + '' + ), + "Cache": ( + '' + '' + '' + ), + "Runtime": ( + '' + '' + ), + "Integrity": ( + '' + '' + ), + } + + def _section_html(title: str, rows: list[tuple[str, object]]) -> str: + icon = _SECTION_ICONS.get(title, "") + visible_rows = [ + (label_name, value) + for label_name, value in rows + if _meta_pick(value) is not None + ] + if not visible_rows: + return "" + row_html = "".join( + f'{_escape_html(label)}' + f"{glossary_tip(label)}" + f'{_val_html(label, value)}' + for label, value in visible_rows + ) + return ( + '
' + f'

{icon}{_escape_html(title)}

' + f'{row_html}
' + ) + + meta_rows_html = "".join( + _section_html(st, rows) for st, rows in meta_sections if rows + ) + + def _prov_badge(label: str, color: str) -> str: + return f'{_escape_html(label)}' + + badges: list[str] = [] + if _bl_verified is True: + badges.append(_prov_badge("Baseline verified", "green")) + elif _bl_loaded is True and _bl_verified is not True: + badges.append(_prov_badge("Baseline untrusted", "red")) + elif _bl_loaded is False or _bl_loaded is None: + badges.append(_prov_badge("Baseline missing", "amber")) + if ctx.report_schema_version: + badges.append(_prov_badge(f"Schema {ctx.report_schema_version}", "neutral")) + if _bl_fp_ver is not None: + badges.append(_prov_badge(f"Fingerprint {_bl_fp_ver}", "neutral")) + gen_name = str(_bl_gen_name or "") + if gen_name and gen_name != "codeclone": + badges.append(_prov_badge(f"Generator mismatch: {gen_name}", "red")) + if _cache_used is True: + badges.append(_prov_badge("Cache hit", "green")) + elif _cache_used is False: + badges.append(_prov_badge("Cache miss", "amber")) + else: + badges.append(_prov_badge("Cache N/A", "neutral")) + analysis_mode = str(_meta_pick(meta.get("analysis_mode")) or "") + if analysis_mode: + badges.append(_prov_badge(f"Mode: {analysis_mode}", "neutral")) + if _mbl_verified is True: + badges.append(_prov_badge("Metrics baseline verified", "green")) + elif _mbl_loaded is True and _mbl_verified is not True: + badges.append(_prov_badge("Metrics baseline untrusted", "red")) + + prov_summary = ( + f'
{"".join(badges)}' + 'Baseline-aware \u00b7 contract-verified
' + if badges + else "" + ) + + return ( + f'' + '
' + "

Report Provenance

" + '
' + f"{prov_summary}" + f'
{meta_rows_html}
' + "
" + ) diff --git a/codeclone/_html_report/_sections/_overview.py b/codeclone/_html_report/_sections/_overview.py new file mode 100644 index 0000000..be3b811 --- /dev/null +++ b/codeclone/_html_report/_sections/_overview.py @@ -0,0 +1,644 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Overview panel renderer.""" + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +from ... import _coerce +from ..._html_badges import _stat_card +from ..._html_escape import _escape_html +from .._components import ( + Tone, + insight_block, + overview_cluster_header, + overview_source_breakdown_html, + overview_summary_item_html, +) +from .._glossary import glossary_tip + +if TYPE_CHECKING: + from .._context import ReportContext + +_as_int = _coerce.as_int +_as_float = _coerce.as_float +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + + +def _health_gauge_html( + score: float, grade: str, *, health_delta: int | None = None +) -> str: + """Render an SVG ring gauge for health score with optional baseline arc.""" + if score < 0: + return _stat_card( + "Health", + "n/a", + css_class="meta-item overview-health-card", + glossary_tip_fn=glossary_tip, + ) + _R = 42.0 + circumference = 2.0 * math.pi * _R + offset = circumference * (1.0 - score / 100.0) + if score >= 75: + color = "var(--success)" + elif score >= 60: + color = "var(--warning)" + else: + color = "var(--error)" + + # Baseline comparison arc: show where baseline was relative to current. + # SVG circle with rotate(-90deg) starts at 12 o'clock, goes clockwise. + # Negative stroke-dashoffset shifts the arc forward (clockwise). + # To place an arc at P% from 12 o'clock: offset = -(C * P / 100). + baseline_arc = "" + if health_delta is not None and health_delta != 0: + baseline_score = max(0.0, min(100.0, score - health_delta)) + arc_len = circumference * abs(health_delta) / 100.0 + if health_delta > 0: + # Improvement: ghost arc from baseline to score (gained segment) + arc_offset = -circumference * baseline_score / 100.0 + baseline_arc = ( + f'' + ) + else: + # Degradation: red arc from score to baseline (lost segment) + arc_offset = -circumference * score / 100.0 + baseline_arc = ( + f'' + ) + + delta_html = "" + if health_delta is not None and health_delta != 0: + if health_delta > 0: + cls = "health-ring-delta--up" + sign = "+" + else: + cls = "health-ring-delta--down" + sign = "" + delta_html = f'
{sign}{health_delta}
' + + # "Get Badge" button — shown for grades A, B, C + badge_btn_html = "" + if grade.upper() in ("A", "B", "C"): + badge_btn_html = ( + '" + ) + + return ( + '
' + '
' + '
' + '' + '' + f"{baseline_arc}" + f'' + "" + '
' + f'
{score:.0f}
' + f'
Grade {_escape_html(grade)}
' + f"{delta_html}" + "
" + f"{badge_btn_html}" + "
" + ) + + +# --------------------------------------------------------------------------- +# Analytics: Health Radar (pure SVG) +# --------------------------------------------------------------------------- + +_RADAR_DIMENSIONS = ( + "clones", + "complexity", + "coupling", + "cohesion", + "dead_code", + "dependencies", + "coverage", +) + +_RADAR_LABELS = { + "clones": "Clones", + "complexity": "Complexity", + "coupling": "Coupling", + "cohesion": "Cohesion", + "dead_code": "Dead Code", + "dependencies": "Deps", + "coverage": "Coverage", +} + +_RADAR_CX, _RADAR_CY, _RADAR_R = 200.0, 200.0, 130.0 +_RADAR_LABEL_R = 155.0 + + +def _radar_point(index: int, total: int, radius: float) -> tuple[float, float]: + angle = 2.0 * math.pi * index / total - math.pi / 2.0 + return ( + round(_RADAR_CX + radius * math.cos(angle), 2), + round(_RADAR_CY + radius * math.sin(angle), 2), + ) + + +def _radar_polygon(total: int, radius: float) -> str: + return " ".join( + f"{x},{y}" for x, y in (_radar_point(i, total, radius) for i in range(total)) + ) + + +def _health_radar_svg(dimensions: dict[str, int]) -> str: + n = len(_RADAR_DIMENSIONS) + scores = [max(0, min(100, dimensions.get(d, 0))) for d in _RADAR_DIMENSIONS] + + # Concentric grid rings + rings: list[str] = [] + for pct in (0.33, 0.66, 1.0): + pts = _radar_polygon(n, _RADAR_R * pct) + rings.append( + f'' + ) + + # Axis lines + axes: list[str] = [] + for i in range(n): + x, y = _radar_point(i, n, _RADAR_R) + axes.append( + f'' + ) + + # Score polygon + score_pts = " ".join( + f"{x},{y}" + for x, y in ( + _radar_point(i, n, _RADAR_R * s / 100.0) for i, s in enumerate(scores) + ) + ) + score_poly = ( + f'' + ) + + # Score dots + dots: list[str] = [] + for i, s in enumerate(scores): + x, y = _radar_point(i, n, _RADAR_R * s / 100.0) + color = "var(--error)" if s < 60 else "var(--accent-primary)" + dots.append(f'') + + # Labels — two lines: name + score + labels: list[str] = [] + for i, dim in enumerate(_RADAR_DIMENSIONS): + lx, ly = _radar_point(i, n, _RADAR_LABEL_R) + anchor = "middle" + dx = lx - _RADAR_CX + if dx < -5: + anchor = "end" + elif dx > 5: + anchor = "start" + # Nudge labels outward from center for breathing room + nudge = 18.0 + angle = math.atan2(ly - _RADAR_CY, lx - _RADAR_CX) + lx = round(lx + nudge * math.cos(angle), 2) + ly = round(ly + nudge * math.sin(angle), 2) + s = scores[i] + cls = ' class="radar-label--weak"' if s < 60 else "" + labels.append( + f'' + f"{_RADAR_LABELS.get(dim, dim)}" + f'{s}' + f"" + ) + + return ( + '
' + '' + + "".join(rings) + + "".join(axes) + + score_poly + + "".join(dots) + + "".join(labels) + + "
" + ) + + +# --------------------------------------------------------------------------- +# Analytics: Findings by Family (horizontal bars) +# --------------------------------------------------------------------------- + + +def _issue_breakdown_html( + ctx: ReportContext, + *, + deltas: dict[str, int | None], +) -> str: + """Horizontal bar chart of real issue counts with baseline awareness. + + *deltas* maps row key → new-items count (None = no baseline loaded). + When delta == 0 the row is fully baselined and rendered muted. + When delta > 0 the bar is split: baselined segment (muted) + new segment. + """ + complexity_high = _as_int( + _as_mapping(ctx.complexity_map.get("summary")).get("high_risk") + ) + coupling_high = _as_int( + _as_mapping(ctx.coupling_map.get("summary")).get("high_risk") + ) + cohesion_low = _as_int( + _as_mapping(ctx.cohesion_map.get("summary")).get("low_cohesion") + ) + dead_total = _as_int(_as_mapping(ctx.dead_code_map.get("summary")).get("total")) + dep_cycles = len(_as_sequence(ctx.dependencies_map.get("cycles"))) + structural = len(ctx.structural_findings) + + # (key, label, count, color) + raw_rows: list[tuple[str, str, int, str]] = [ + ("clones", "Clone Groups", ctx.clone_groups_total, "var(--error)"), + ("structural", "Structural", structural, "var(--warning)"), + ("complexity", "Complexity", complexity_high, "var(--warning)"), + ("cohesion", "Cohesion", cohesion_low, "var(--info)"), + ("coupling", "Coupling", coupling_high, "var(--info)"), + ("dead_code", "Dead Code", dead_total, "var(--text-muted)"), + ("dep_cycles", "Dep. Cycles", dep_cycles, "var(--text-muted)"), + ] + # Filter out zeros — show only actual issues + rows = [ + (key, label, count, color) for key, label, count, color in raw_rows if count > 0 + ] + if not rows: + return '
No issues detected.
' + + max_count = max(c for _, _, c, _ in rows) + parts: list[str] = [] + for key, label, count, color in rows: + pct = round(count / max_count * 100) if max_count else 0 + delta = deltas.get(key) + + # Determine row state + is_muted = delta is not None and delta == 0 + has_split = delta is not None and delta > 0 and count > delta + + row_cls = "families-row families-row--muted" if is_muted else "families-row" + + # Build bar: split (baselined + new) or single fill + if has_split: + assert delta is not None # for type checker + baselined_pct = round((count - delta) / max_count * 100) + new_pct = pct - baselined_pct + bar_html = ( + f'' + f'' + f'' + f"" + ) + else: + bar_cls = " breakdown-bar-fill--baselined" if is_muted else "" + bar_html = ( + f'' + f'' + ) + + # Delta indicator + delta_html = "" + if is_muted: + delta_html = '\u2713' + elif delta is not None and delta > 0: + delta_html = ( + f'+{delta}' + ) + + parts.append( + f'
' + f'{_escape_html(label)}' + f'{count}' + f"{bar_html}{delta_html}
" + ) + return '
' + "".join(parts) + "
" + + +def render_overview_panel(ctx: ReportContext) -> str: + """Build the Overview tab panel HTML.""" + complexity_summary = _as_mapping(ctx.complexity_map.get("summary")) + coupling_summary = _as_mapping(ctx.coupling_map.get("summary")) + cohesion_summary = _as_mapping(ctx.cohesion_map.get("summary")) + dead_code_summary = _as_mapping(ctx.dead_code_map.get("summary")) + dep_cycles = _as_sequence(ctx.dependencies_map.get("cycles")) + + complexity_high_risk = _as_int(complexity_summary.get("high_risk")) + coupling_high_risk = _as_int(coupling_summary.get("high_risk")) + cohesion_low = _as_int(cohesion_summary.get("low_cohesion")) + dependency_cycle_count = len(dep_cycles) + dependency_max_depth = _as_int(ctx.dependencies_map.get("max_depth")) + dead_total = _as_int(dead_code_summary.get("total")) + dead_high_conf = _as_int( + dead_code_summary.get("high_confidence", dead_code_summary.get("critical")) + ) + dead_suppressed = _as_int(dead_code_summary.get("suppressed", 0)) + + health_score_raw = ctx.health_map.get("score") + health_score_known = ( + health_score_raw is not None and str(health_score_raw).strip() != "" + ) + health_score = _as_float(health_score_raw) if health_score_known else -1.0 + health_grade = str(ctx.health_map.get("grade", "n/a")) + + # Overview answer + def _answer_and_tone() -> tuple[str, Tone]: + if ctx.metrics_available and health_score_known: + ans = ( + f"Health {health_score:.0f}/100 ({health_grade}); " + f"{ctx.clone_groups_total} clone groups; " + f"{dead_total} dead-code items ({dead_suppressed} suppressed); " + f"{dependency_cycle_count} dependency cycles." + ) + if health_score >= 80.0: + return ans, "ok" + if health_score >= 60.0: + return ans, "warn" + return ans, "risk" + if ctx.metrics_available: + ans = ( + f"{ctx.clone_groups_total} clone groups; " + f"{dead_total} dead-code items ({dead_suppressed} suppressed); " + f"{dependency_cycle_count} dependency cycles." + ) + return ans, "info" + return ( + f"{ctx.clone_groups_total} clone groups; metrics were skipped for this run.", + "info", + ) + + overview_answer, overview_tone = _answer_and_tone() + + # -- MetricsDiff deltas -- + md = ctx.metrics_diff + _new_complexity = len(md.new_high_risk_functions) if md else None + _new_coupling = len(md.new_high_coupling_classes) if md else None + _new_dead = len(md.new_dead_code) if md else None + _new_cycles = len(md.new_cycles) if md else None + _health_delta = md.health_delta if md else None + structural_count = len(ctx.structural_findings) + structural_kind_count = len({g.finding_kind for g in ctx.structural_findings}) + clone_suggestion_count = sum( + 1 for suggestion in ctx.suggestions if suggestion.finding_family == "clones" + ) + structural_suggestion_count = sum( + 1 for suggestion in ctx.suggestions if suggestion.finding_family == "structural" + ) + metrics_suggestion_count = sum( + 1 for suggestion in ctx.suggestions if suggestion.finding_family == "metrics" + ) + + # Clone group novelty — show delta only when baseline comparison is active. + # MetricsDiff presence is the reliable indicator of a loaded baseline. + _new_clones: int | None = None + if md is not None: + _new_clones = sum( + 1 for gk, _ in ctx.func_sorted if gk in ctx.new_func_keys + ) + sum(1 for gk, _ in ctx.block_sorted if gk in ctx.new_block_keys) + + def _mb(*pairs: tuple[str, object]) -> str: + """Render micro-badges: [label value] [label value] ...""" + return "".join( + f'' + f'{_escape_html(str(v))}' + f'{_escape_html(label)}' + for label, v in pairs + if v is not None and str(v) != "n/a" + ) + + _baseline_ok = ( + '\u2713 baselined' + ) + + def _baselined_detail( + total: int, + delta: int | None, + detail: str, + ) -> tuple[str, str]: + """Return (detail_html, value_tone) accounting for baseline state. + + When baseline is loaded and all items are accepted debt, tone + becomes 'muted' and a '✓ baselined' pill is appended. + When baseline is loaded but new regressions exist, the accepted + count is shown alongside the existing detail. + """ + if delta is None or total == 0: + return detail, "good" if total == 0 else "bad" + if delta == 0: + return detail + _baseline_ok, "muted" + baselined = total - delta + extra = "" + if baselined > 0: + extra = _mb(("baselined", baselined)) + return detail + extra, "bad" + + # KPI cards — compute detail + tone with baseline awareness + _clone_detail, _clone_tone = _baselined_detail( + ctx.clone_groups_total, + _new_clones, + _mb( + ("func", len(ctx.func_sorted)), + ("block", len(ctx.block_sorted)), + ("seg", len(ctx.segment_sorted)), + ), + ) + _cx_detail, _cx_tone = _baselined_detail( + complexity_high_risk, + _new_complexity, + _mb( + ("avg", complexity_summary.get("average", "n/a")), + ("max", complexity_summary.get("max", "n/a")), + ), + ) + _cp_detail, _cp_tone = _baselined_detail( + coupling_high_risk, + _new_coupling, + _mb( + ("avg", coupling_summary.get("average", "n/a")), + ("max", coupling_summary.get("max", "n/a")), + ), + ) + _cy_detail, _cy_tone = _baselined_detail( + dependency_cycle_count, + _new_cycles, + _mb(("depth", dependency_max_depth)), + ) + _dc_detail, _dc_tone = _baselined_detail( + dead_total, + _new_dead, + _mb(("high-conf", dead_high_conf)), + ) + + kpis = [ + _stat_card( + "Clone Groups", + ctx.clone_groups_total, + detail=_clone_detail, + tip="Detected code clone groups by detection level", + delta_new=_new_clones, + value_tone=_clone_tone, + ), + _stat_card( + "High Complexity", + complexity_high_risk, + detail=_cx_detail, + tip="Functions with cyclomatic complexity above threshold", + value_tone=_cx_tone, + delta_new=_new_complexity, + ), + _stat_card( + "High Coupling", + coupling_high_risk, + detail=_cp_detail, + tip="Classes with high coupling between objects (CBO)", + value_tone=_cp_tone, + delta_new=_new_coupling, + ), + _stat_card( + "Low Cohesion", + cohesion_low, + detail=_mb( + ("avg", cohesion_summary.get("average", "n/a")), + ("max", cohesion_summary.get("max", "n/a")), + ), + tip="Classes with low internal cohesion (high LCOM4)", + value_tone="good" if cohesion_low == 0 else "warn", + ), + _stat_card( + "Dep. Cycles", + dependency_cycle_count, + detail=_cy_detail, + tip="Circular dependencies between project modules", + value_tone=_cy_tone, + delta_new=_new_cycles, + ), + _stat_card( + "Dead Code", + dead_total, + detail=_dc_detail, + tip="Potentially unused functions, classes, or imports", + value_tone=_dc_tone, + delta_new=_new_dead, + ), + _stat_card( + "Findings", + structural_count, + detail=_mb(("kinds", structural_kind_count)), + tip="Active structural findings reported in production code", + value_tone="good" if structural_count == 0 else "warn", + ), + _stat_card( + "Suggestions", + len(ctx.suggestions), + detail=_mb( + ("clone", clone_suggestion_count), + ("struct", structural_suggestion_count), + ("metric", metrics_suggestion_count), + ), + tip="Actionable recommendations derived from clones, findings, and metrics", + value_tone="good" if not ctx.suggestions else "warn", + ), + ] + + # Build deltas map for issue breakdown baseline awareness + _issue_deltas: dict[str, int | None] = { + "clones": _new_clones, + "complexity": _new_complexity, + "coupling": _new_coupling, + "dead_code": _new_dead, + "dep_cycles": _new_cycles, + # No baseline tracking for these families + "structural": None, + "cohesion": None, + } + + # Executive summary: issue breakdown (sorted) + source breakdown + executive = ( + '
' + + overview_cluster_header( + "Executive Summary", + "Project-wide context derived from the full scanned root.", + ) + + '
' + + overview_summary_item_html( + label="Issue breakdown", + body_html=_issue_breakdown_html(ctx, deltas=_issue_deltas), + ) + + overview_summary_item_html( + label="Source breakdown", + body_html=overview_source_breakdown_html( + _as_mapping(ctx.overview_data.get("source_breakdown")) + ), + ) + + "
" + ) + + health_gauge = _health_gauge_html( + health_score, health_grade, health_delta=_health_delta + ) + + return ( + insight_block( + question="What is the current code-health snapshot?", + answer=overview_answer, + tone=overview_tone, + ) + + '
' + + health_gauge + + '
' + + "".join(kpis) + + "
" + + "
" + + executive + + _analytics_section(ctx) + ) + + +def _analytics_section(ctx: ReportContext) -> str: + """Build the Analytics cluster with full-width radar chart.""" + raw_dims = _as_mapping(ctx.health_map.get("dimensions")) + dimensions = {str(k): _as_int(v) for k, v in raw_dims.items()} if raw_dims else {} + if not dimensions: + return "" + + radar_html = _health_radar_svg(dimensions) + + return ( + '
' + + overview_cluster_header( + "Health Profile", + "Dimension scores across all quality axes.", + ) + + '
' + + overview_summary_item_html(label="Health profile", body_html=radar_html) + + "
" + ) diff --git a/codeclone/_html_report/_sections/_structural.py b/codeclone/_html_report/_sections/_structural.py new file mode 100644 index 0000000..4f09a52 --- /dev/null +++ b/codeclone/_html_report/_sections/_structural.py @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Structural Findings panel — thin wrapper delegating to report/findings.py.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ...report.findings import build_structural_findings_html_panel +from ...structural_findings import normalize_structural_findings + +if TYPE_CHECKING: + from .._context import ReportContext + + +def render_structural_panel(ctx: ReportContext) -> str: + sf_groups = list(normalize_structural_findings(ctx.structural_findings)) + sf_files: list[str] = sorted( + {occ.file_path for group in sf_groups for occ in group.items} + ) + return build_structural_findings_html_panel( + sf_groups, + sf_files, + scan_root=ctx.scan_root, + file_cache=ctx.file_cache, + context_lines=ctx.context_lines, + max_snippet_lines=ctx.max_snippet_lines, + ) diff --git a/codeclone/_html_report/_sections/_suggestions.py b/codeclone/_html_report/_sections/_suggestions.py new file mode 100644 index 0000000..a643229 --- /dev/null +++ b/codeclone/_html_report/_sections/_suggestions.py @@ -0,0 +1,275 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Suggestions panel renderer.""" + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING + +from ... import _coerce +from ..._html_badges import _tab_empty +from ..._html_data_attrs import _build_data_attrs +from ..._html_escape import _escape_html +from ..._html_filters import SPREAD_OPTIONS, _render_select +from ...domain.findings import ( + CATEGORY_CLONE, + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, + CATEGORY_STRUCTURAL, + FAMILY_CLONES, + FAMILY_METRICS, + FAMILY_STRUCTURAL, +) +from ...domain.quality import SEVERITY_CRITICAL, SEVERITY_INFO, SEVERITY_WARNING +from ...report._source_kinds import SOURCE_KIND_FILTER_VALUES, source_kind_label +from .._components import insight_block + +if TYPE_CHECKING: + from ...models import Suggestion + from .._context import ReportContext + +_as_int = _coerce.as_int + + +def _render_fact_summary(raw: str) -> str: + """Render fact_summary as a styled inline chip.""" + if not raw: + return "" + # Humanize key=value pairs: "cyclomatic_complexity=15" → "cyclomatic complexity: 15" + segments = [s.strip() for s in raw.split(",")] + parts: list[str] = [] + for seg in segments: + if "=" in seg: + key, _, val = seg.partition("=") + parts.append(f"{key.strip().replace('_', ' ')}: {val.strip()}") + else: + parts.append(seg) + text = ", ".join(parts) + return f'
{_escape_html(text)}
' + + +def _format_source_breakdown( + source_breakdown: Mapping[str, object] | Sequence[object], +) -> str: + rows: list[tuple[str, int]] = [] + if isinstance(source_breakdown, Mapping): + rows = [ + (str(k), _as_int(v)) for k, v in source_breakdown.items() if _as_int(v) > 0 + ] + else: + rows = [ + (str(pair[0]), _as_int(pair[1])) + for pair in source_breakdown + if isinstance(pair, Sequence) and len(pair) == 2 and _as_int(pair[1]) > 0 + ] + rows.sort(key=lambda item: (item[0], item[1])) + return " \u00b7 ".join(f"{source_kind_label(k)} {c}" for k, c in rows if c > 0) + + +def _render_card(s: Suggestion, ctx: ReportContext) -> str: + actionable = "true" if s.severity != "info" else "false" + spread_bucket = "high" if s.spread_files > 1 or s.spread_functions > 1 else "low" + breakdown_text = _format_source_breakdown(s.source_breakdown) + facts_source = _escape_html(breakdown_text or source_kind_label(s.source_kind)) + facts_location = _escape_html(s.location_label or s.location) + + # Context chips — more visible than a single muted line + ctx_chips: list[str] = [] + sk = source_kind_label(s.source_kind) + if sk: + ctx_chips.append(f'{_escape_html(sk)}') + cat = s.category.replace("_", " ") + if cat: + ctx_chips.append(f'{_escape_html(cat)}') + if s.clone_type: + ctx_chips.append( + f'{_escape_html(s.clone_type)}' + ) + ctx_html = f'
{"".join(ctx_chips)}
' + + # Next step — primary actionable CTA + next_step = _escape_html(s.steps[0]) if s.steps else "" + next_step_html = ( + '
' + '' + '' + f"{next_step}
" + if next_step + else "" + ) + + # Effort badge — color-coded + effort_cls = f" suggestion-effort--{_escape_html(s.effort)}" + + # Priority — clean display (drop trailing zeros) + priority_str = f"{s.priority:g}" + + # Locations inside details + locs_html = "" + if s.representative_locations: + locs_items = "".join( + '
  • ' + f"{_escape_html(loc.relative_path)}" + f':{loc.start_line}\u2013{loc.end_line}' + "" + f'{_escape_html(ctx.bare_qualname(loc.qualname, loc.filepath))}' + "
  • " + for loc in s.representative_locations + ) + locs_html = ( + f'
    Locations ({len(s.representative_locations)})
    ' + f'
      {locs_items}
    ' + ) + + # Steps inside details + steps_html = "" + if s.steps: + steps_items = "".join(f"
  • {_escape_html(step)}
  • " for step in s.steps) + steps_html = ( + '
    Refactoring steps
    ' + f'
      {steps_items}
    ' + ) + + # Severity dd — colored to match header badge + sev_dd = ( + f'' + f"{_escape_html(s.severity)}" + ) + + return ( + f'
    " + # -- header row -- + '
    ' + f'{_escape_html(s.severity)}' + f'{_escape_html(s.title)}' + '' + f'{_escape_html(s.effort)}' + f'P{priority_str}' + f'{s.spread_functions} fn / {s.spread_files} files' + "
    " + # -- body -- + '
    ' + f"{ctx_html}" + f"{_render_fact_summary(s.fact_summary)}" + f"{next_step_html}" + "
    " + # -- expandable details -- + '
    ' + "Details" + '
    ' + '
    ' + '
    ' + '
    Facts
    ' + '
    ' + f"
    Finding
    {_escape_html(s.fact_kind or s.category)}
    " + f"
    Spread
    {s.spread_functions} fn / {s.spread_files} files
    " + f"
    Source
    {facts_source}
    " + f"
    Scope
    {facts_location}
    " + "
    " + '
    ' + '
    Assessment
    ' + '
    ' + f"
    Severity
    {sev_dd}
    " + f"
    Confidence
    {_escape_html(s.confidence)}
    " + f"
    Priority
    {priority_str}
    " + f"
    Family
    {_escape_html(s.finding_family)}
    " + "
    " + "
    " + f"{locs_html}" + f"{steps_html}" + "
    " + "
    " + ) + + +def render_suggestions_panel(ctx: ReportContext) -> str: + rows = list(ctx.suggestions) + if not rows: + return insight_block( + question="What should be prioritized next?", + answer="No suggestions were generated for this run.", + tone="ok", + ) + _tab_empty("No suggestions generated.") + + critical = sum(1 for s in rows if s.severity == "critical") + warning = sum(1 for s in rows if s.severity == "warning") + info = sum(1 for s in rows if s.severity == "info") + intro = insight_block( + question="What should be prioritized next?", + answer=f"{len(rows)} suggestions: {critical} critical, {warning} warning, {info} info.", + tone="risk" if critical > 0 else "warn", + ) + + cards_html = "".join(_render_card(s, ctx) for s in rows) + sev_opts = tuple( + (s, s) for s in (SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO) + ) + cat_opts = tuple( + (c, c) + for c in ( + CATEGORY_CLONE, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_COHESION, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, + CATEGORY_STRUCTURAL, + ) + ) + fam_opts = tuple((f, f) for f in (FAMILY_CLONES, FAMILY_STRUCTURAL, FAMILY_METRICS)) + sk_opts = tuple((k, k) for k in SOURCE_KIND_FILTER_VALUES) + + return ( + intro + + '" + f'
    {cards_html}
    ' + ) diff --git a/codeclone/_html_report/_tables.py b/codeclone/_html_report/_tables.py new file mode 100644 index 0000000..8d8a1fd --- /dev/null +++ b/codeclone/_html_report/_tables.py @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Generic table renderer for metric/finding tables.""" + +from __future__ import annotations + +from collections.abc import Collection, Sequence +from typing import TYPE_CHECKING + +from .._html_badges import _quality_badge_html, _tab_empty +from .._html_escape import _escape_attr, _escape_html +from ._glossary import glossary_tip + +if TYPE_CHECKING: + from ._context import ReportContext + +_RISK_HEADERS = {"risk", "confidence", "severity", "effort"} +_PATH_HEADERS = {"file", "location"} + +_COL_WIDTHS: dict[str, str] = { + "cc": "62px", + "cbo": "62px", + "lcom4": "70px", + "nesting": "76px", + "line": "60px", + "length": "68px", + "methods": "80px", + "fields": "68px", + "priority": "74px", + "risk": "78px", + "confidence": "94px", + "severity": "82px", + "effort": "78px", + "category": "100px", + "kind": "76px", + "steps": "120px", + "coupled classes": "360px", +} + +_COL_CLS: dict[str, str] = {} +for _h in ("function", "class", "name"): + _COL_CLS[_h] = "col-name" +for _h in ("file", "location"): + _COL_CLS[_h] = "col-path" +for _h in ( + "cc", + "cbo", + "lcom4", + "nesting", + "line", + "length", + "methods", + "fields", + "priority", +): + _COL_CLS[_h] = "col-num" +for _h in ("risk", "confidence", "severity", "effort"): + _COL_CLS[_h] = "col-badge" +for _h in ("category", "kind"): + _COL_CLS[_h] = "col-cat" +for _h in ("cycle", "longest chain", "title", "coupled classes"): + _COL_CLS[_h] = "col-wide" +_COL_CLS["steps"] = "col-steps" + + +def render_rows_table( + *, + headers: Sequence[str], + rows: Sequence[Sequence[str]], + empty_message: str, + raw_html_headers: Collection[str] = (), + ctx: ReportContext | None = None, +) -> str: + """Render a data table with badges, tooltips, and col sizing.""" + if not rows: + return _tab_empty(empty_message) + + lower_headers = [h.lower() for h in headers] + raw_html_set = {h.lower() for h in raw_html_headers} + + # colgroup + cg = [""] + for h in lower_headers: + w = _COL_WIDTHS.get(h) + cg.append(f'' if w else "") + cg.append("") + + # thead + th_parts = [ + f"{_escape_html(header)}{glossary_tip(header)}" for header in headers + ] + + # tbody + def _td(col_idx: int, cell: str) -> str: + h = lower_headers[col_idx] if col_idx < len(lower_headers) else "" + cls = _COL_CLS.get(h, "") + cls_attr = f' class="{cls}"' if cls else "" + if h in raw_html_set: + return f"{cell}" + if h in _RISK_HEADERS: + return f"{_quality_badge_html(cell)}" + if h in _PATH_HEADERS and ctx is not None: + short = ctx.relative_path(cell) + return ( + f'{_escape_html(short)}' + ) + return f"{_escape_html(cell)}" + + body_html = "".join( + "" + "".join(_td(i, cell) for i, cell in enumerate(row)) + "" + for row in rows + ) + + return ( + '
    ' + f"{''.join(cg)}" + f"{''.join(th_parts)}" + f"{body_html}" + "
    " + ) diff --git a/codeclone/_html_report/_tabs.py b/codeclone/_html_report/_tabs.py new file mode 100644 index 0000000..54870ca --- /dev/null +++ b/codeclone/_html_report/_tabs.py @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Tab/subtab rendering helpers.""" + +from __future__ import annotations + +from collections.abc import Sequence + +from .._html_escape import _escape_attr, _escape_html + + +def render_split_tabs( + *, + group_id: str, + tabs: Sequence[tuple[str, str, int, str]], + emit_clone_counters: bool = False, +) -> str: + """Render sub-tab navigation + panels. + + Each tab tuple: ``(tab_id, label, count, panel_html)``. + """ + if not tabs: + return "" + + nav: list[str] = [ + '") + + panels: list[str] = [] + for idx, (tab_id, _, _, panel_html) in enumerate(tabs): + active = " active" if idx == 0 else "" + panels.append( + f'
    ' + f"{panel_html}
    " + ) + + return f"{''.join(nav)}{''.join(panels)}" diff --git a/codeclone/_html_snippets.py b/codeclone/_html_snippets.py index 8205b17..9ae7e40 100644 --- a/codeclone/_html_snippets.py +++ b/codeclone/_html_snippets.py @@ -1,29 +1,18 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import html import importlib -import itertools -from collections.abc import Iterable from dataclasses import dataclass from functools import lru_cache -from types import ModuleType -from typing import NamedTuple, cast +from typing import TYPE_CHECKING, NamedTuple, cast from .errors import FileProcessingError - -def pairwise(iterable: Iterable[object]) -> Iterable[tuple[object, object]]: - a, b = itertools.tee(iterable) - next(b, None) - return zip(a, b, strict=False) +if TYPE_CHECKING: + from types import ModuleType @dataclass(slots=True) @@ -77,7 +66,7 @@ class _CacheInfo(NamedTuple): currsize: int def cache_info(self) -> _CacheInfo: - return cast(_FileCache._CacheInfo, self._get_file_lines_impl.cache_info()) + return cast("_FileCache._CacheInfo", self._get_file_lines_impl.cache_info()) _PYGMENTS_IMPORTER_ID: int | None = None @@ -151,32 +140,6 @@ def _pygments_css(style_name: str) -> str: return "" -def _prefix_css(css: str, prefix: str) -> str: - """ - Prefix every selector block with `prefix `. - Safe enough for pygments CSS which is mostly selector blocks and comments. - """ - out_lines: list[str] = [] - for line in css.splitlines(): - stripped = line.strip() - if not stripped: - out_lines.append(line) - continue - if stripped.startswith(("/*", "*", "*/")): - out_lines.append(line) - continue - if "{" in line: - before, after = line.split("{", 1) - sel = before.strip() - if sel: - out_lines.append(f"{prefix} {sel} {{ {after}".rstrip()) - else: - out_lines.append(line) - else: - out_lines.append(line) - return "\n".join(out_lines) - - def _render_code_block( *, filepath: str, diff --git a/codeclone/_report_blocks.py b/codeclone/_report_blocks.py deleted file mode 100644 index a6369d6..0000000 --- a/codeclone/_report_blocks.py +++ /dev/null @@ -1,94 +0,0 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" - -from __future__ import annotations - -from typing import Any - -from ._report_types import GroupItem, GroupMap - - -# Any: values come from report item dictionaries populated from JSON-like data. -def _coerce_positive_int(value: Any) -> int | None: - try: - integer = int(value) - except (TypeError, ValueError): - return None - return integer if integer > 0 else None - - -def _block_item_sort_key(item: GroupItem) -> tuple[str, str, int, int]: - start_line = _coerce_positive_int(item.get("start_line")) or 0 - end_line = _coerce_positive_int(item.get("end_line")) or 0 - return ( - str(item.get("filepath", "")), - str(item.get("qualname", "")), - start_line, - end_line, - ) - - -def _merge_block_items(items: list[GroupItem]) -> list[GroupItem]: - """ - Merge overlapping/adjacent block windows into maximal ranges per function. - """ - if not items: - return [] - - sorted_items = sorted(items, key=_block_item_sort_key) - merged: list[GroupItem] = [] - current: GroupItem | None = None - - for item in sorted_items: - start_line = _coerce_positive_int(item.get("start_line")) - end_line = _coerce_positive_int(item.get("end_line")) - if start_line is None or end_line is None or end_line < start_line: - continue - - if current is None: - current = dict(item) - current["start_line"] = start_line - current["end_line"] = end_line - current["size"] = max(1, end_line - start_line + 1) - continue - - same_owner = str(current.get("filepath", "")) == str( - item.get("filepath", "") - ) and str(current.get("qualname", "")) == str(item.get("qualname", "")) - if same_owner and start_line <= int(current["end_line"]) + 1: - current["end_line"] = max(int(current["end_line"]), end_line) - current["size"] = max( - 1, int(current["end_line"]) - int(current["start_line"]) + 1 - ) - continue - - merged.append(current) - current = dict(item) - current["start_line"] = start_line - current["end_line"] = end_line - current["size"] = max(1, end_line - start_line + 1) - - if current is not None: - merged.append(current) - - return merged - - -def prepare_block_report_groups(block_groups: GroupMap) -> GroupMap: - """ - Convert sliding block windows into maximal merged regions for reporting. - Block hash keys remain unchanged. - """ - prepared: GroupMap = {} - for key, items in block_groups.items(): - merged = _merge_block_items(items) - if merged: - prepared[key] = merged - else: - prepared[key] = sorted(items, key=_block_item_sort_key) - return prepared diff --git a/codeclone/_report_grouping.py b/codeclone/_report_grouping.py deleted file mode 100644 index 3ad44ab..0000000 --- a/codeclone/_report_grouping.py +++ /dev/null @@ -1,64 +0,0 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" - -from __future__ import annotations - -from ._report_types import GroupItem, GroupMap - - -def build_groups(units: list[GroupItem]) -> GroupMap: - groups: GroupMap = {} - for u in units: - key = f"{u['fingerprint']}|{u['loc_bucket']}" - groups.setdefault(key, []).append(u) - return {k: v for k, v in groups.items() if len(v) > 1} - - -def build_block_groups(blocks: list[GroupItem], min_functions: int = 2) -> GroupMap: - groups: GroupMap = {} - for b in blocks: - groups.setdefault(b["block_hash"], []).append(b) - - filtered: GroupMap = {} - for h, items in groups.items(): - functions = {i["qualname"] for i in items} - if len(functions) >= min_functions: - filtered[h] = items - - return filtered - - -def build_segment_groups( - segments: list[GroupItem], min_occurrences: int = 2 -) -> GroupMap: - sig_groups: GroupMap = {} - for s in segments: - sig_groups.setdefault(s["segment_sig"], []).append(s) - - confirmed: GroupMap = {} - for items in sig_groups.values(): - if len(items) < min_occurrences: - continue - - hash_groups: GroupMap = {} - for item in items: - hash_groups.setdefault(item["segment_hash"], []).append(item) - - for segment_hash, hash_items in hash_groups.items(): - if len(hash_items) < min_occurrences: - continue - - by_func: GroupMap = {} - for it in hash_items: - by_func.setdefault(it["qualname"], []).append(it) - - for qualname, q_items in by_func.items(): - if len(q_items) >= min_occurrences: - confirmed[f"{segment_hash}|{qualname}"] = q_items - - return confirmed diff --git a/codeclone/_report_segments.py b/codeclone/_report_segments.py deleted file mode 100644 index bd985cb..0000000 --- a/codeclone/_report_segments.py +++ /dev/null @@ -1,247 +0,0 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" - -from __future__ import annotations - -import ast -from dataclasses import dataclass -from pathlib import Path - -from ._report_types import GroupItem, GroupMap - -SEGMENT_MIN_UNIQUE_STMT_TYPES = 2 - -_CONTROL_FLOW_STMTS = ( - ast.If, - ast.For, - ast.While, - ast.Try, - ast.With, - ast.Match, - ast.AsyncFor, - ast.AsyncWith, -) -_FORBIDDEN_STMTS = (ast.Return, ast.Raise, ast.Assert) - - -@dataclass(frozen=True, slots=True) -class _SegmentAnalysis: - unique_stmt_types: int - has_control_flow: bool - is_boilerplate: bool - - -class _QualnameCollector(ast.NodeVisitor): - __slots__ = ("funcs", "stack") - - def __init__(self) -> None: - self.stack: list[str] = [] - self.funcs: dict[str, ast.FunctionDef | ast.AsyncFunctionDef] = {} - - def visit_ClassDef(self, node: ast.ClassDef) -> None: - self.stack.append(node.name) - self.generic_visit(node) - self.stack.pop() - - def visit_FunctionDef(self, node: ast.FunctionDef) -> None: - name = ".".join([*self.stack, node.name]) if self.stack else node.name - self.funcs[name] = node - - def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: - name = ".".join([*self.stack, node.name]) if self.stack else node.name - self.funcs[name] = node - - -def _merge_segment_items(items: list[GroupItem]) -> list[GroupItem]: - if not items: - return [] - - items_sorted = sorted( - items, - key=lambda i: ( - i.get("filepath", ""), - i.get("qualname", ""), - int(i.get("start_line", 0)), - int(i.get("end_line", 0)), - ), - ) - - merged: list[GroupItem] = [] - current: GroupItem | None = None - - for item in items_sorted: - start = int(item.get("start_line", 0)) - end = int(item.get("end_line", 0)) - if start <= 0 or end <= 0: - continue - - if current is None: - current = dict(item) - current["start_line"] = start - current["end_line"] = end - current["size"] = max(1, end - start + 1) - continue - - same_owner = current.get("filepath") == item.get("filepath") and current.get( - "qualname" - ) == item.get("qualname") - if same_owner and start <= int(current["end_line"]) + 1: - current["end_line"] = max(int(current["end_line"]), end) - current["size"] = max( - 1, int(current["end_line"]) - int(current["start_line"]) + 1 - ) - continue - - merged.append(current) - current = dict(item) - current["start_line"] = start - current["end_line"] = end - current["size"] = max(1, end - start + 1) - - if current is not None: - merged.append(current) - - return merged - - -def _collect_file_functions( - filepath: str, -) -> dict[str, ast.FunctionDef | ast.AsyncFunctionDef] | None: - try: - source = Path(filepath).read_text("utf-8") - except OSError: - return None - try: - tree = ast.parse(source) - except SyntaxError: - return None - - collector = _QualnameCollector() - collector.visit(tree) - return collector.funcs - - -def _segment_statements( - func_node: ast.FunctionDef | ast.AsyncFunctionDef, start_line: int, end_line: int -) -> list[ast.stmt]: - body = getattr(func_node, "body", None) - if not isinstance(body, list): - return [] - stmts: list[ast.stmt] = [] - for stmt in body: - lineno = getattr(stmt, "lineno", None) - end = getattr(stmt, "end_lineno", None) - if lineno is None or end is None: - continue - if lineno >= start_line and end <= end_line: - stmts.append(stmt) - return stmts - - -def _assign_targets_attribute_only(stmt: ast.stmt) -> bool: - if isinstance(stmt, ast.Assign): - return all(isinstance(t, ast.Attribute) for t in stmt.targets) - if isinstance(stmt, ast.AnnAssign): - return isinstance(stmt.target, ast.Attribute) - return False - - -def _analyze_segment_statements(stmts: list[ast.stmt]) -> _SegmentAnalysis | None: - if not stmts: - return None - - unique_types = {type(s) for s in stmts} - has_control_flow = any(isinstance(s, _CONTROL_FLOW_STMTS) for s in stmts) - has_forbidden = any(isinstance(s, _FORBIDDEN_STMTS) for s in stmts) - has_call_stmt = any( - isinstance(s, ast.Expr) and isinstance(s.value, ast.Call) for s in stmts - ) - - assign_stmts = [s for s in stmts if isinstance(s, (ast.Assign, ast.AnnAssign))] - assign_ratio = len(assign_stmts) / len(stmts) - assign_attr_only = all(_assign_targets_attribute_only(s) for s in assign_stmts) - - is_boilerplate = ( - assign_ratio >= 0.8 - and assign_attr_only - and not has_control_flow - and not has_forbidden - and not has_call_stmt - ) - - return _SegmentAnalysis( - unique_stmt_types=len(unique_types), - has_control_flow=has_control_flow, - is_boilerplate=is_boilerplate, - ) - - -def prepare_segment_report_groups( - segment_groups: GroupMap, -) -> tuple[GroupMap, int]: - """ - Merge overlapping segment windows and suppress low-value boilerplate groups - for reporting. Detection hashes remain unchanged. - """ - suppressed = 0 - filtered: GroupMap = {} - file_cache: dict[str, dict[str, ast.FunctionDef | ast.AsyncFunctionDef] | None] = {} - - for key, items in segment_groups.items(): - merged_items = _merge_segment_items(items) - if not merged_items: - continue - - analyses: list[_SegmentAnalysis] = [] - unknown = False - for item in merged_items: - filepath = str(item.get("filepath", "")) - qualname = str(item.get("qualname", "")) - start_line = int(item.get("start_line", 0)) - end_line = int(item.get("end_line", 0)) - if not filepath or not qualname or start_line <= 0 or end_line <= 0: - unknown = True - break - - if filepath not in file_cache: - file_cache[filepath] = _collect_file_functions(filepath) - funcs = file_cache[filepath] - if not funcs: - unknown = True - break - - local_name = qualname.split(":", 1)[1] if ":" in qualname else qualname - func_node = funcs.get(local_name) - if func_node is None: - unknown = True - break - - stmts = _segment_statements(func_node, start_line, end_line) - analysis = _analyze_segment_statements(stmts) - if analysis is None: - unknown = True - break - analyses.append(analysis) - - if unknown: - filtered[key] = merged_items - continue - - all_boilerplate = all(a.is_boilerplate for a in analyses) - all_too_simple = all( - (not a.has_control_flow) - and (a.unique_stmt_types < SEGMENT_MIN_UNIQUE_STMT_TYPES) - for a in analyses - ) - if all_boilerplate or all_too_simple: - suppressed += 1 - continue - - filtered[key] = merged_items - - return filtered, suppressed diff --git a/codeclone/_report_serialize.py b/codeclone/_report_serialize.py deleted file mode 100644 index 9c1a576..0000000 --- a/codeclone/_report_serialize.py +++ /dev/null @@ -1,418 +0,0 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" - -from __future__ import annotations - -import json -from collections.abc import Collection, Mapping - -from ._report_types import GroupItem, GroupMap -from .contracts import REPORT_SCHEMA_VERSION - -FunctionRecord = tuple[int, str, int, int, int, int, str, str] -BlockRecord = tuple[int, str, int, int, int] -SegmentRecord = tuple[int, str, int, int, int, str, str] -SplitLists = dict[str, list[str]] -GroupsSplit = dict[str, SplitLists] - -GROUP_ITEM_LAYOUT: dict[str, list[str]] = { - "functions": [ - "file_i", - "qualname", - "start", - "end", - "loc", - "stmt_count", - "fingerprint", - "loc_bucket", - ], - "blocks": ["file_i", "qualname", "start", "end", "size"], - "segments": [ - "file_i", - "qualname", - "start", - "end", - "size", - "segment_hash", - "segment_sig", - ], -} - - -def _item_sort_key(item: GroupItem) -> tuple[str, int, int, str]: - return ( - str(item.get("filepath", "")), - int(item.get("start_line", 0)), - int(item.get("end_line", 0)), - str(item.get("qualname", "")), - ) - - -def _collect_files( - *, - func_groups: GroupMap, - block_groups: GroupMap, - segment_groups: GroupMap, -) -> list[str]: - files: set[str] = set() - for groups in (func_groups, block_groups, segment_groups): - for items in groups.values(): - for item in items: - files.add(str(item.get("filepath", ""))) - return sorted(files) - - -def _encode_function_item(item: GroupItem, file_id: int) -> FunctionRecord: - return ( - file_id, - str(item.get("qualname", "")), - int(item.get("start_line", 0)), - int(item.get("end_line", 0)), - int(item.get("loc", 0)), - int(item.get("stmt_count", 0)), - str(item.get("fingerprint", "")), - str(item.get("loc_bucket", "")), - ) - - -def _encode_block_item(item: GroupItem, file_id: int) -> BlockRecord: - return ( - file_id, - str(item.get("qualname", "")), - int(item.get("start_line", 0)), - int(item.get("end_line", 0)), - int(item.get("size", 0)), - ) - - -def _encode_segment_item(item: GroupItem, file_id: int) -> SegmentRecord: - return ( - file_id, - str(item.get("qualname", "")), - int(item.get("start_line", 0)), - int(item.get("end_line", 0)), - int(item.get("size", 0)), - str(item.get("segment_hash", "")), - str(item.get("segment_sig", "")), - ) - - -def _function_record_sort_key(record: FunctionRecord) -> tuple[int, str, int, int]: - return record[0], record[1], record[2], record[3] - - -def _block_record_sort_key(record: BlockRecord) -> tuple[int, str, int, int]: - return record[0], record[1], record[2], record[3] - - -def _segment_record_sort_key(record: SegmentRecord) -> tuple[int, str, int, int]: - return record[0], record[1], record[2], record[3] - - -def _resolve_metric_value(item: GroupItem, metric_name: str) -> int: - raw_value = item.get(metric_name) - if raw_value is None: - fallback_metric = "size" if metric_name == "loc" else "loc" - raw_value = item.get(fallback_metric, 0) - return int(raw_value) - - -def _baseline_is_trusted(meta: Mapping[str, object]) -> bool: - return ( - meta.get("baseline_loaded") is True - and str(meta.get("baseline_status", "")).strip().lower() == "ok" - ) - - -def to_json(groups: GroupMap) -> str: - def _sorted_items(items: list[GroupItem]) -> list[GroupItem]: - return sorted(items, key=_item_sort_key) - - return json.dumps( - { - "group_count": len(groups), - "groups": [ - {"key": k, "count": len(v), "items": _sorted_items(v)} - for k, v in sorted( - groups.items(), - key=lambda kv: (-len(kv[1]), kv[0]), - ) - ], - }, - ensure_ascii=False, - indent=2, - ) - - -def to_json_report( - func_groups: GroupMap, - block_groups: GroupMap, - segment_groups: GroupMap, - meta: Mapping[str, object] | None = None, - block_facts: Mapping[str, Mapping[str, str]] | None = None, - new_function_group_keys: Collection[str] | None = None, - new_block_group_keys: Collection[str] | None = None, - new_segment_group_keys: Collection[str] | None = None, -) -> str: - """ - Serialize report JSON schema v1.1. - - NEW/KNOWN split contract: - - if baseline is not trusted, all groups are NEW and KNOWN is empty - - if baseline is trusted, callers must pass `new_*_group_keys` computed by - the core baseline diff pipeline; keys absent from `new_*` are treated as KNOWN - """ - meta_payload = dict(meta or {}) - meta_payload["report_schema_version"] = REPORT_SCHEMA_VERSION - - files = _collect_files( - func_groups=func_groups, - block_groups=block_groups, - segment_groups=segment_groups, - ) - file_ids = {filepath: idx for idx, filepath in enumerate(files)} - - function_groups: dict[str, list[FunctionRecord]] = {} - for group_key in sorted(func_groups): - function_records = [ - _encode_function_item(item, file_ids[str(item.get("filepath", ""))]) - for item in func_groups[group_key] - ] - function_groups[group_key] = sorted( - function_records, key=_function_record_sort_key - ) - - block_groups_out: dict[str, list[BlockRecord]] = {} - for group_key in sorted(block_groups): - block_records = [ - _encode_block_item(item, file_ids[str(item.get("filepath", ""))]) - for item in block_groups[group_key] - ] - block_groups_out[group_key] = sorted(block_records, key=_block_record_sort_key) - - segment_groups_out: dict[str, list[SegmentRecord]] = {} - for group_key in sorted(segment_groups): - segment_records = [ - _encode_segment_item(item, file_ids[str(item.get("filepath", ""))]) - for item in segment_groups[group_key] - ] - segment_groups_out[group_key] = sorted( - segment_records, key=_segment_record_sort_key - ) - - baseline_trusted = _baseline_is_trusted(meta_payload) - - def _split_for( - *, - keys: Collection[str], - new_keys: Collection[str] | None, - ) -> SplitLists: - sorted_keys = sorted(keys) - if not baseline_trusted: - return {"new": sorted_keys, "known": []} - if new_keys is None: - return {"new": sorted_keys, "known": []} - new_key_set = set(new_keys) - new_list = [group_key for group_key in sorted_keys if group_key in new_key_set] - known_list = [ - group_key for group_key in sorted_keys if group_key not in new_key_set - ] - return {"new": new_list, "known": known_list} - - groups_split: GroupsSplit = { - "functions": _split_for( - keys=function_groups.keys(), - new_keys=new_function_group_keys, - ), - "blocks": _split_for( - keys=block_groups_out.keys(), - new_keys=new_block_group_keys, - ), - "segments": _split_for( - keys=segment_groups_out.keys(), - new_keys=new_segment_group_keys, - ), - } - meta_payload["groups_counts"] = { - section_name: { - "total": len(section_split["new"]) + len(section_split["known"]), - "new": len(section_split["new"]), - "known": len(section_split["known"]), - } - for section_name, section_split in groups_split.items() - } - - payload: dict[str, object] = { - "meta": meta_payload, - "files": files, - "groups": { - "functions": function_groups, - "blocks": block_groups_out, - "segments": segment_groups_out, - }, - "groups_split": groups_split, - "group_item_layout": GROUP_ITEM_LAYOUT, - } - - if block_facts: - sorted_block_facts: dict[str, dict[str, str]] = {} - for group_key in sorted(block_facts): - sorted_block_facts[group_key] = { - fact_key: str(block_facts[group_key][fact_key]) - for fact_key in sorted(block_facts[group_key]) - } - payload["facts"] = {"blocks": sorted_block_facts} - - return json.dumps( - payload, - ensure_ascii=False, - indent=2, - ) - - -def to_text(groups: GroupMap, *, metric_name: str = "loc") -> str: - lines: list[str] = [] - for i, (_, v) in enumerate( - sorted(groups.items(), key=lambda kv: (-len(kv[1]), kv[0])) - ): - items = sorted( - v, - key=lambda item: ( - str(item.get("filepath", "")), - int(item.get("start_line", 0)), - int(item.get("end_line", 0)), - str(item.get("qualname", "")), - ), - ) - lines.append(f"\n=== Clone group #{i + 1} (count={len(v)}) ===") - lines.extend( - [ - f"- {item['qualname']} " - f"{item['filepath']}:{item['start_line']}-{item['end_line']} " - f"{metric_name}={_resolve_metric_value(item, metric_name)}" - for item in items - ] - ) - return "\n".join(lines).strip() + "\n" - - -def _format_meta_text_value(value: object) -> str: - if isinstance(value, bool): - return "true" if value else "false" - if value is None: - return "(none)" - text = str(value).strip() - return text if text else "(none)" - - -def to_text_report( - *, - meta: Mapping[str, object], - func_groups: GroupMap, - block_groups: GroupMap, - segment_groups: GroupMap, - new_function_group_keys: Collection[str] | None = None, - new_block_group_keys: Collection[str] | None = None, - new_segment_group_keys: Collection[str] | None = None, -) -> str: - """ - Serialize deterministic TXT report. - - NEW/KNOWN split follows the same contract as JSON v1.1. - """ - - baseline_trusted = _baseline_is_trusted(meta) - - def _split_for( - *, - groups: GroupMap, - new_keys: Collection[str] | None, - ) -> SplitLists: - sorted_keys = sorted(groups.keys()) - if not baseline_trusted: - return {"new": sorted_keys, "known": []} - if new_keys is None: - return {"new": sorted_keys, "known": []} - new_key_set = set(new_keys) - new_list = [group_key for group_key in sorted_keys if group_key in new_key_set] - known_list = [ - group_key for group_key in sorted_keys if group_key not in new_key_set - ] - return {"new": new_list, "known": known_list} - - groups_split: GroupsSplit = { - "functions": _split_for(groups=func_groups, new_keys=new_function_group_keys), - "blocks": _split_for(groups=block_groups, new_keys=new_block_group_keys), - "segments": _split_for(groups=segment_groups, new_keys=new_segment_group_keys), - } - - lines = [ - "REPORT METADATA", - "Report schema version: " - f"{_format_meta_text_value(meta.get('report_schema_version'))}", - f"CodeClone version: {_format_meta_text_value(meta.get('codeclone_version'))}", - f"Python version: {_format_meta_text_value(meta.get('python_version'))}", - f"Python tag: {_format_meta_text_value(meta.get('python_tag'))}", - f"Baseline path: {_format_meta_text_value(meta.get('baseline_path'))}", - "Baseline fingerprint version: " - f"{_format_meta_text_value(meta.get('baseline_fingerprint_version'))}", - "Baseline schema version: " - f"{_format_meta_text_value(meta.get('baseline_schema_version'))}", - "Baseline Python tag: " - f"{_format_meta_text_value(meta.get('baseline_python_tag'))}", - "Baseline generator name: " - f"{_format_meta_text_value(meta.get('baseline_generator_name'))}", - "Baseline generator version: " - f"{_format_meta_text_value(meta.get('baseline_generator_version'))}", - "Baseline payload sha256: " - f"{_format_meta_text_value(meta.get('baseline_payload_sha256'))}", - "Baseline payload verified: " - f"{_format_meta_text_value(meta.get('baseline_payload_sha256_verified'))}", - f"Baseline loaded: {_format_meta_text_value(meta.get('baseline_loaded'))}", - f"Baseline status: {_format_meta_text_value(meta.get('baseline_status'))}", - f"Cache path: {_format_meta_text_value(meta.get('cache_path'))}", - "Cache schema version: " - f"{_format_meta_text_value(meta.get('cache_schema_version'))}", - f"Cache status: {_format_meta_text_value(meta.get('cache_status'))}", - f"Cache used: {_format_meta_text_value(meta.get('cache_used'))}", - "Source IO skipped: " - f"{_format_meta_text_value(meta.get('files_skipped_source_io'))}", - ] - - if not baseline_trusted: - lines.append("Note: baseline is untrusted; all groups are treated as NEW.") - - sections = ( - ("FUNCTION CLONES", "functions", func_groups, "loc"), - ("BLOCK CLONES", "blocks", block_groups, "size"), - ("SEGMENT CLONES", "segments", segment_groups, "size"), - ) - for title, section_key, groups, metric_name in sections: - split = groups_split[section_key] - new_groups: GroupMap = { - group_key: groups[group_key] - for group_key in split["new"] - if group_key in groups - } - known_groups: GroupMap = { - group_key: groups[group_key] - for group_key in split["known"] - if group_key in groups - } - - lines.append("") - lines.append(f"{title} (NEW) (groups={len(split['new'])})") - new_block = to_text(new_groups, metric_name=metric_name).rstrip() - lines.append(new_block if new_block else "(none)") - - lines.append("") - lines.append(f"{title} (KNOWN) (groups={len(split['known'])})") - known_block = to_text(known_groups, metric_name=metric_name).rstrip() - lines.append(known_block if known_block else "(none)") - - return "\n".join(lines).rstrip() + "\n" diff --git a/codeclone/_report_types.py b/codeclone/_report_types.py deleted file mode 100644 index 79a732f..0000000 --- a/codeclone/_report_types.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" - -from __future__ import annotations - -from typing import Any - -# Any: report items aggregate heterogeneous JSON-like payloads from multiple -# pipelines (function/block/segment) and are narrowed at access sites. -GroupItem = dict[str, Any] - - -GroupMap = dict[str, list[GroupItem]] diff --git a/codeclone/_schema_validation.py b/codeclone/_schema_validation.py new file mode 100644 index 0000000..43280c0 --- /dev/null +++ b/codeclone/_schema_validation.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .errors import BaselineValidationError + +if TYPE_CHECKING: + from collections.abc import Mapping, Set + from pathlib import Path + +__all__ = ["validate_top_level_structure"] + + +def validate_top_level_structure( + payload: Mapping[str, object], + *, + path: Path, + required_keys: Set[str], + allowed_keys: Set[str], + schema_label: str, + missing_status: str, + extra_status: str, +) -> None: + keys = set(payload.keys()) + missing = required_keys - keys + if missing: + raise BaselineValidationError( + f"Invalid {schema_label} schema at {path}: missing top-level keys: " + f"{', '.join(sorted(missing))}", + status=missing_status, + ) + extra = keys - allowed_keys + if extra: + raise BaselineValidationError( + f"Invalid {schema_label} schema at {path}: unexpected top-level keys: " + f"{', '.join(sorted(extra))}", + status=extra_status, + ) diff --git a/codeclone/baseline.py b/codeclone/baseline.py index b63f88c..c249539 100644 --- a/codeclone/baseline.py +++ b/codeclone/baseline.py @@ -1,10 +1,5 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations @@ -14,25 +9,27 @@ import os import re import sys -from collections.abc import Mapping from datetime import datetime, timezone from enum import Enum from pathlib import Path -from typing import Any, Final +from typing import TYPE_CHECKING, Any, Final from . import __version__ +from ._schema_validation import validate_top_level_structure from .contracts import ( BASELINE_FINGERPRINT_VERSION, BASELINE_SCHEMA_VERSION, ) from .errors import BaselineValidationError +if TYPE_CHECKING: + from collections.abc import Mapping + # Any: baseline JSON parsing/serialization boundary. Values are validated # and narrowed before entering compatibility/integrity checks. BASELINE_GENERATOR = "codeclone" -BASELINE_SCHEMA_MAJOR = 1 -BASELINE_SCHEMA_MAX_MINOR = 0 +_BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR = {1: 0, 2: 0} MAX_BASELINE_SIZE_BYTES = 5 * 1024 * 1024 @@ -81,7 +78,9 @@ def coerce_baseline_status( return BaselineStatus.INVALID_TYPE -_TOP_LEVEL_KEYS = {"meta", "clones"} +_TOP_LEVEL_REQUIRED_KEYS = {"meta", "clones"} +_TOP_LEVEL_OPTIONAL_KEYS = {"metrics"} +_TOP_LEVEL_ALLOWED_KEYS = _TOP_LEVEL_REQUIRED_KEYS | _TOP_LEVEL_OPTIONAL_KEYS _META_REQUIRED_KEYS = { "generator", "schema_version", @@ -93,6 +92,7 @@ def coerce_baseline_status( _CLONES_REQUIRED_KEYS = {"functions", "blocks"} _FUNCTION_ID_RE = re.compile(r"^[0-9a-f]{40}\|(?:\d+-\d+|\d+\+)$") _BLOCK_ID_RE = re.compile(r"^[0-9a-f]{40}\|[0-9a-f]{40}\|[0-9a-f]{40}\|[0-9a-f]{40}$") +_UTC_ISO8601_Z_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$") class Baseline: @@ -121,7 +121,12 @@ def __init__(self, path: str | Path): self.payload_sha256: str | None = None self.generator_version: str | None = None - def load(self, *, max_size_bytes: int | None = None) -> None: + def load( + self, + *, + max_size_bytes: int | None = None, + preloaded_payload: dict[str, Any] | None = None, + ) -> None: try: exists = self.path.exists() except OSError as e: @@ -144,7 +149,15 @@ def load(self, *, max_size_bytes: int | None = None) -> None: status=BaselineStatus.TOO_LARGE, ) - payload = _load_json_object(self.path) + if preloaded_payload is None: + payload = _load_json_object(self.path) + else: + if not isinstance(preloaded_payload, dict): + raise BaselineValidationError( + f"Baseline payload must be an object at {self.path}", + status=BaselineStatus.INVALID_TYPE, + ) + payload = preloaded_payload if _is_legacy_baseline_payload(payload): raise BaselineValidationError( "Baseline format is legacy (<=1.3.x) and must be regenerated. " @@ -173,6 +186,17 @@ def load(self, *, max_size_bytes: int | None = None) -> None: generator, generator_version = _parse_generator_meta(meta_obj, path=self.path) schema_version = _require_semver_str(meta_obj, "schema_version", path=self.path) + schema_major, _, _ = _parse_semver( + schema_version, + key="schema_version", + path=self.path, + ) + if schema_major < 2 and "metrics" in payload: + raise BaselineValidationError( + f"Invalid baseline schema at {self.path}: " + "top-level 'metrics' requires baseline schema >= 2.0.", + status=BaselineStatus.MISMATCH_SCHEMA_VERSION, + ) fingerprint_version = _require_str( meta_obj, "fingerprint_version", path=self.path ) @@ -215,8 +239,49 @@ def save(self) -> None: generator_version=self.generator_version, created_at=self.created_at, ) + preserved_metrics, preserved_metrics_hash = _preserve_embedded_metrics( + self.path + ) + if preserved_metrics is not None: + payload["metrics"] = preserved_metrics + if preserved_metrics_hash is not None: + meta_obj = payload.get("meta") + if isinstance(meta_obj, dict): + meta_obj["metrics_payload_sha256"] = preserved_metrics_hash _atomic_write_json(self.path, payload) + meta_obj = payload.get("meta") + if not isinstance(meta_obj, dict): + return + + generator_obj = meta_obj.get("generator") + if isinstance(generator_obj, dict): + generator_name = generator_obj.get("name") + generator_version = generator_obj.get("version") + if isinstance(generator_name, str): + self.generator = generator_name + if isinstance(generator_version, str): + self.generator_version = generator_version + elif isinstance(generator_obj, str): + self.generator = generator_obj + + schema_version = meta_obj.get("schema_version") + fingerprint_version = meta_obj.get("fingerprint_version") + python_tag = meta_obj.get("python_tag") + created_at = meta_obj.get("created_at") + payload_sha256 = meta_obj.get("payload_sha256") + + if isinstance(schema_version, str): + self.schema_version = schema_version + if isinstance(fingerprint_version, str): + self.fingerprint_version = fingerprint_version + if isinstance(python_tag, str): + self.python_tag = python_tag + if isinstance(created_at, str): + self.created_at = created_at + if isinstance(payload_sha256, str): + self.payload_sha256 = payload_sha256 + def verify_compatibility(self, *, current_python_tag: str) -> None: if self.generator != BASELINE_GENERATOR: raise BaselineValidationError( @@ -242,18 +307,22 @@ def verify_compatibility(self, *, current_python_tag: str) -> None: schema_major, schema_minor, _ = _parse_semver( self.schema_version, key="schema_version", path=self.path ) - if schema_major != BASELINE_SCHEMA_MAJOR: + max_minor = _BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR.get(schema_major) + if max_minor is None: + supported = ",".join( + str(major) for major in sorted(_BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR) + ) raise BaselineValidationError( "Baseline schema version mismatch: " f"baseline={self.schema_version}, " - f"supported_major={BASELINE_SCHEMA_MAJOR}.", + f"supported_majors={supported}.", status=BaselineStatus.MISMATCH_SCHEMA_VERSION, ) - if schema_minor > BASELINE_SCHEMA_MAX_MINOR: + if schema_minor > max_minor: raise BaselineValidationError( "Baseline schema version is newer than supported: " f"baseline={self.schema_version}, " - f"max=1.{BASELINE_SCHEMA_MAX_MINOR}.", + f"max={schema_major}.{max_minor}.", status=BaselineStatus.MISMATCH_SCHEMA_VERSION, ) if self.fingerprint_version != BASELINE_FINGERPRINT_VERSION: @@ -390,21 +459,15 @@ def _load_json_object(path: Path) -> dict[str, Any]: def _validate_top_level_structure(payload: dict[str, Any], *, path: Path) -> None: - keys = set(payload.keys()) - missing = _TOP_LEVEL_KEYS - keys - extra = keys - _TOP_LEVEL_KEYS - if missing: - raise BaselineValidationError( - f"Invalid baseline schema at {path}: missing top-level keys: " - f"{', '.join(sorted(missing))}", - status=BaselineStatus.MISSING_FIELDS, - ) - if extra: - raise BaselineValidationError( - f"Invalid baseline schema at {path}: unexpected top-level keys: " - f"{', '.join(sorted(extra))}", - status=BaselineStatus.INVALID_TYPE, - ) + validate_top_level_structure( + payload, + path=path, + required_keys=_TOP_LEVEL_REQUIRED_KEYS, + allowed_keys=_TOP_LEVEL_ALLOWED_KEYS, + schema_label="baseline", + missing_status=BaselineStatus.MISSING_FIELDS, + extra_status=BaselineStatus.INVALID_TYPE, + ) def _validate_required_keys( @@ -434,6 +497,23 @@ def _is_legacy_baseline_payload(payload: dict[str, Any]) -> bool: return "functions" in payload and "blocks" in payload +def _preserve_embedded_metrics(path: Path) -> tuple[dict[str, Any] | None, str | None]: + try: + payload = _load_json_object(path) + except BaselineValidationError: + return None, None + metrics_obj = payload.get("metrics") + if not isinstance(metrics_obj, dict): + return None, None + meta_obj = payload.get("meta") + if not isinstance(meta_obj, dict): + return dict(metrics_obj), None + metrics_hash = meta_obj.get("metrics_payload_sha256") + if not isinstance(metrics_hash, str): + return dict(metrics_obj), None + return dict(metrics_obj), metrics_hash + + def _parse_generator_meta( meta_obj: dict[str, Any], *, path: Path ) -> tuple[str, str | None]: @@ -610,8 +690,21 @@ def _require_python_tag(obj: dict[str, Any], key: str, *, path: Path) -> str: def _require_utc_iso8601_z(obj: dict[str, Any], key: str, *, path: Path) -> str: value = _require_str(obj, key, path=path) + if not _UTC_ISO8601_Z_RE.fullmatch(value): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: '{key}' must be UTC ISO-8601 with Z", + status=BaselineStatus.INVALID_TYPE, + ) try: - datetime.strptime(value, "%Y-%m-%dT%H:%M:%SZ") + datetime( + int(value[0:4]), + int(value[5:7]), + int(value[8:10]), + int(value[11:13]), + int(value[14:16]), + int(value[17:19]), + tzinfo=timezone.utc, + ) except ValueError as e: raise BaselineValidationError( f"Invalid baseline schema at {path}: '{key}' must be UTC ISO-8601 with Z", diff --git a/codeclone/blockhash.py b/codeclone/blockhash.py index bd213ee..5eb8bcc 100644 --- a/codeclone/blockhash.py +++ b/codeclone/blockhash.py @@ -1,21 +1,29 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import ast import hashlib +from typing import TYPE_CHECKING from .normalize import AstNormalizer, NormalizationConfig +if TYPE_CHECKING: + from collections.abc import Sequence + + +def _normalized_stmt_dump(stmt: ast.stmt, normalizer: AstNormalizer) -> str: + normalized = normalizer.visit(stmt) + assert isinstance(normalized, ast.AST) + return ast.dump(normalized, annotate_fields=True, include_attributes=False) + -def stmt_hash(stmt: ast.stmt, cfg: NormalizationConfig) -> str: +def stmt_hashes(statements: Sequence[ast.stmt], cfg: NormalizationConfig) -> list[str]: normalizer = AstNormalizer(cfg) - stmt = ast.fix_missing_locations(normalizer.visit(stmt)) - dump = ast.dump(stmt, annotate_fields=True, include_attributes=False) - return hashlib.sha1(dump.encode("utf-8")).hexdigest() + return [ + hashlib.sha1( + _normalized_stmt_dump(stmt, normalizer).encode("utf-8") + ).hexdigest() + for stmt in statements + ] diff --git a/codeclone/blocks.py b/codeclone/blocks.py index 12a5526..2ccad47 100644 --- a/codeclone/blocks.py +++ b/codeclone/blocks.py @@ -1,41 +1,21 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations -import ast -from collections.abc import Sequence -from dataclasses import dataclass +from typing import TYPE_CHECKING -from .blockhash import stmt_hash +from .blockhash import stmt_hashes from .fingerprint import sha1 -from .normalize import NormalizationConfig - +from .models import BlockUnit, SegmentUnit -@dataclass(frozen=True, slots=True) -class BlockUnit: - block_hash: str - filepath: str - qualname: str - start_line: int - end_line: int - size: int +if TYPE_CHECKING: + import ast + from collections.abc import Sequence + from .normalize import NormalizationConfig -@dataclass(frozen=True, slots=True) -class SegmentUnit: - segment_hash: str - segment_sig: str - filepath: str - qualname: str - start_line: int - end_line: int - size: int +__all__ = ["BlockUnit", "SegmentUnit", "extract_blocks", "extract_segments"] def extract_blocks( @@ -57,16 +37,16 @@ def extract_blocks( f"precomputed_hashes length {len(precomputed_hashes)} " f"!= body length {len(body)}" ) - stmt_hashes = precomputed_hashes + stmt_hash_rows = precomputed_hashes else: - stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body] + stmt_hash_rows = stmt_hashes(body, cfg) blocks: list[BlockUnit] = [] last_start: int | None = None # Allow some overlap (50%), but at least 3 lines apart min_line_distance = max(block_size // 2, 3) - for i in range(len(stmt_hashes) - block_size + 1): + for i in range(len(stmt_hash_rows) - block_size + 1): start = getattr(body[i], "lineno", None) end = getattr(body[i + block_size - 1], "end_lineno", None) if not start or not end: @@ -75,7 +55,7 @@ def extract_blocks( if last_start is not None and start - last_start < min_line_distance: continue - bh = "|".join(stmt_hashes[i : i + block_size]) + bh = "|".join(stmt_hash_rows[i : i + block_size]) blocks.append( BlockUnit( @@ -114,19 +94,19 @@ def extract_segments( f"precomputed_hashes length {len(precomputed_hashes)} " f"!= body length {len(body)}" ) - stmt_hashes = precomputed_hashes + stmt_hash_rows = precomputed_hashes else: - stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body] + stmt_hash_rows = stmt_hashes(body, cfg) segments: list[SegmentUnit] = [] - for i in range(len(stmt_hashes) - window_size + 1): + for i in range(len(stmt_hash_rows) - window_size + 1): start = getattr(body[i], "lineno", None) end = getattr(body[i + window_size - 1], "end_lineno", None) if not start or not end: continue - window = stmt_hashes[i : i + window_size] + window = stmt_hash_rows[i : i + window_size] segment_hash = sha1("|".join(window)) segment_sig = sha1("|".join(sorted(window))) diff --git a/codeclone/cache.py b/codeclone/cache.py index 88131e2..18b9b44 100644 --- a/codeclone/cache.py +++ b/codeclone/cache.py @@ -1,10 +1,5 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations @@ -12,21 +7,43 @@ import hmac import json import os -from collections.abc import Mapping, Sequence +from collections.abc import Collection from enum import Enum from pathlib import Path -from typing import TYPE_CHECKING, TypedDict - -if TYPE_CHECKING: - from .blocks import BlockUnit, SegmentUnit - from .extractor import Unit +from typing import TYPE_CHECKING, Literal, TypedDict, TypeGuard, TypeVar, cast from .baseline import current_python_tag from .contracts import BASELINE_FINGERPRINT_VERSION, CACHE_VERSION from .errors import CacheError +from .models import ( + BlockGroupItem, + BlockUnit, + ClassMetrics, + DeadCandidate, + FileMetrics, + FunctionGroupItem, + ModuleDep, + SegmentGroupItem, + SegmentUnit, + StructuralFindingGroup, + StructuralFindingOccurrence, + Unit, +) +from .structural_findings import normalize_structural_finding_group + +if TYPE_CHECKING: + from collections.abc import Callable, Mapping, Sequence MAX_CACHE_SIZE_BYTES = 50 * 1024 * 1024 LEGACY_CACHE_SECRET_FILENAME = ".cache_secret" +_DEFAULT_WIRE_UNIT_FLOW_PROFILES = ( + 0, + "none", + False, + "fallthrough", + "none", + "none", +) class CacheStatus(str, Enum): @@ -48,46 +65,94 @@ class FileStat(TypedDict): size: int -class UnitDict(TypedDict): +class SourceStatsDict(TypedDict): + lines: int + functions: int + methods: int + classes: int + + +UnitDict = FunctionGroupItem +BlockDict = BlockGroupItem +SegmentDict = SegmentGroupItem + + +class ClassMetricsDictBase(TypedDict): qualname: str filepath: str start_line: int end_line: int - loc: int - stmt_count: int - fingerprint: str - loc_bucket: str + cbo: int + lcom4: int + method_count: int + instance_var_count: int + risk_coupling: str + risk_cohesion: str -class BlockDict(TypedDict): - block_hash: str - filepath: str +class ClassMetricsDict(ClassMetricsDictBase, total=False): + coupled_classes: list[str] + + +class ModuleDepDict(TypedDict): + source: str + target: str + import_type: str + line: int + + +class DeadCandidateDictBase(TypedDict): qualname: str + local_name: str + filepath: str start_line: int end_line: int - size: int + kind: str -class SegmentDict(TypedDict): - segment_hash: str - segment_sig: str - filepath: str +class DeadCandidateDict(DeadCandidateDictBase, total=False): + suppressed_rules: list[str] + + +class StructuralFindingOccurrenceDict(TypedDict): qualname: str - start_line: int - end_line: int - size: int + start: int + end: int + +class StructuralFindingGroupDict(TypedDict): + finding_kind: str + finding_key: str + signature: dict[str, str] + items: list[StructuralFindingOccurrenceDict] -class CacheEntry(TypedDict): + +class CacheEntryBase(TypedDict): stat: FileStat units: list[UnitDict] blocks: list[BlockDict] segments: list[SegmentDict] +class CacheEntry(CacheEntryBase, total=False): + source_stats: SourceStatsDict + class_metrics: list[ClassMetricsDict] + module_deps: list[ModuleDepDict] + dead_candidates: list[DeadCandidateDict] + referenced_names: list[str] + referenced_qualnames: list[str] + import_names: list[str] + class_names: list[str] + structural_findings: list[StructuralFindingGroupDict] + + class AnalysisProfile(TypedDict): min_loc: int min_stmt: int + block_min_loc: int + block_min_stmt: int + segment_min_loc: int + segment_min_stmt: int class CacheData(TypedDict): @@ -98,8 +163,136 @@ class CacheData(TypedDict): files: dict[str, CacheEntry] +class SegmentReportProjection(TypedDict): + digest: str + suppressed: int + groups: dict[str, list[SegmentDict]] + + +def build_segment_report_projection( + *, + digest: str, + suppressed: int, + groups: Mapping[str, Sequence[Mapping[str, object]]], +) -> SegmentReportProjection: + normalized_groups: dict[str, list[SegmentDict]] = {} + for group_key in sorted(groups): + normalized_items: list[SegmentDict] = [] + for raw_item in sorted( + groups[group_key], + key=lambda item: ( + str(item.get("filepath", "")), + str(item.get("qualname", "")), + _as_int(item.get("start_line")) or 0, + _as_int(item.get("end_line")) or 0, + ), + ): + segment_hash = _as_str(raw_item.get("segment_hash")) + segment_sig = _as_str(raw_item.get("segment_sig")) + filepath = _as_str(raw_item.get("filepath")) + qualname = _as_str(raw_item.get("qualname")) + start_line = _as_int(raw_item.get("start_line")) + end_line = _as_int(raw_item.get("end_line")) + size = _as_int(raw_item.get("size")) + if ( + segment_hash is None + or segment_sig is None + or filepath is None + or qualname is None + or start_line is None + or end_line is None + or size is None + ): + continue + normalized_items.append( + SegmentGroupItem( + segment_hash=segment_hash, + segment_sig=segment_sig, + filepath=filepath, + qualname=qualname, + start_line=start_line, + end_line=end_line, + size=size, + ) + ) + if normalized_items: + normalized_groups[group_key] = normalized_items + return { + "digest": digest, + "suppressed": max(0, int(suppressed)), + "groups": normalized_groups, + } + + +def _normalize_cached_structural_group( + group: StructuralFindingGroupDict, + *, + filepath: str, +) -> StructuralFindingGroupDict | None: + signature = dict(group["signature"]) + finding_kind = group["finding_kind"] + finding_key = group["finding_key"] + normalized = normalize_structural_finding_group( + StructuralFindingGroup( + finding_kind=finding_kind, + finding_key=finding_key, + signature=signature, + items=tuple( + StructuralFindingOccurrence( + finding_kind=finding_kind, + finding_key=finding_key, + file_path=filepath, + qualname=item["qualname"], + start=item["start"], + end=item["end"], + signature=signature, + ) + for item in group["items"] + ), + ) + ) + if normalized is None: + return None + return StructuralFindingGroupDict( + finding_kind=normalized.finding_kind, + finding_key=normalized.finding_key, + signature=dict(normalized.signature), + items=[ + StructuralFindingOccurrenceDict( + qualname=item.qualname, + start=item.start, + end=item.end, + ) + for item in normalized.items + ], + ) + + +def _normalize_cached_structural_groups( + groups: Sequence[StructuralFindingGroupDict], + *, + filepath: str, +) -> list[StructuralFindingGroupDict]: + normalized = [ + candidate + for candidate in ( + _normalize_cached_structural_group(group, filepath=filepath) + for group in groups + ) + if candidate is not None + ] + normalized.sort(key=lambda group: (-len(group["items"]), group["finding_key"])) + return normalized + + +_DecodedItemT = TypeVar("_DecodedItemT") +_ValidatedItemT = TypeVar("_ValidatedItemT") + + class Cache: __slots__ = ( + "_canonical_runtime_paths", + "_dirty", "analysis_profile", "cache_schema_version", "data", @@ -110,6 +303,7 @@ class Cache: "max_size_bytes", "path", "root", + "segment_report_projection", ) _CACHE_VERSION = CACHE_VERSION @@ -120,8 +314,12 @@ def __init__( *, root: str | Path | None = None, max_size_bytes: int | None = None, - min_loc: int = 15, + min_loc: int = 10, min_stmt: int = 6, + block_min_loc: int = 20, + block_min_stmt: int = 8, + segment_min_loc: int = 20, + segment_min_stmt: int = 10, ): self.path = Path(path) self.root = _resolve_root(root) @@ -129,6 +327,10 @@ def __init__( self.analysis_profile: AnalysisProfile = { "min_loc": min_loc, "min_stmt": min_stmt, + "block_min_loc": block_min_loc, + "block_min_stmt": block_min_stmt, + "segment_min_loc": segment_min_loc, + "segment_min_stmt": segment_min_stmt, } self.data: CacheData = _empty_cache_data( version=self._CACHE_VERSION, @@ -136,6 +338,7 @@ def __init__( fingerprint_version=self.fingerprint_version, analysis_profile=self.analysis_profile, ) + self._canonical_runtime_paths: set[str] = set() self.legacy_secret_warning = self._detect_legacy_secret_warning() self.cache_schema_version: str | None = None self.load_status = CacheStatus.MISSING @@ -143,6 +346,8 @@ def __init__( self.max_size_bytes = ( MAX_CACHE_SIZE_BYTES if max_size_bytes is None else max_size_bytes ) + self.segment_report_projection: SegmentReportProjection | None = None + self._dirty: bool = True # new cache is dirty until loaded from disk def _detect_legacy_secret_warning(self) -> str | None: secret_path = self.path.parent / LEGACY_CACHE_SECRET_FILENAME @@ -181,8 +386,43 @@ def _ignore_cache( fingerprint_version=self.fingerprint_version, analysis_profile=self.analysis_profile, ) + self._canonical_runtime_paths = set() + self.segment_report_projection = None + + def _reject_cache_load( + self, + message: str, + *, + status: CacheStatus, + schema_version: str | None = None, + ) -> CacheData | None: + self._ignore_cache( + message, + status=status, + schema_version=schema_version, + ) + return None - def _sign_data(self, data: Mapping[str, object]) -> str: + def _reject_invalid_cache_format( + self, + *, + schema_version: str | None = None, + ) -> CacheData | None: + return self._reject_cache_load( + "Cache format invalid; ignoring cache.", + status=CacheStatus.INVALID_TYPE, + schema_version=schema_version, + ) + + def _reject_version_mismatch(self, version: str) -> CacheData | None: + return self._reject_cache_load( + f"Cache version mismatch (found {version}); ignoring cache.", + status=CacheStatus.VERSION_MISMATCH, + schema_version=version, + ) + + @staticmethod + def _sign_data(data: Mapping[str, object]) -> str: """Create deterministic SHA-256 signature for canonical payload data.""" canonical = _canonical_json(data) return hashlib.sha256(canonical.encode("utf-8")).hexdigest() @@ -201,6 +441,8 @@ def load(self) -> None: self._set_load_warning(None) self.load_status = CacheStatus.MISSING self.cache_schema_version = None + self._canonical_runtime_paths = set() + self.segment_report_projection = None return try: @@ -214,12 +456,14 @@ def load(self) -> None: return raw_obj: object = json.loads(self.path.read_text("utf-8")) - parsed = self._parse_cache_document(raw_obj) + parsed = self._load_and_validate(raw_obj) if parsed is None: return self.data = parsed + self._canonical_runtime_paths = set(parsed["files"].keys()) self.load_status = CacheStatus.OK self._set_load_warning(None) + self._dirty = False # freshly loaded — nothing to persist except OSError as e: self._ignore_cache( @@ -232,110 +476,69 @@ def load(self) -> None: status=CacheStatus.INVALID_JSON, ) - def _parse_cache_document(self, raw_obj: object) -> CacheData | None: + def _load_and_validate(self, raw_obj: object) -> CacheData | None: raw = _as_str_dict(raw_obj) if raw is None: - self._ignore_cache( - "Cache format invalid; ignoring cache.", - status=CacheStatus.INVALID_TYPE, - ) - return None + return self._reject_invalid_cache_format() # Legacy cache format: top-level {version, files, _signature}. legacy_version = _as_str(raw.get("version")) if legacy_version is not None: - self._ignore_cache( - f"Cache version mismatch (found {legacy_version}); ignoring cache.", - status=CacheStatus.VERSION_MISMATCH, - schema_version=legacy_version, - ) - return None + return self._reject_version_mismatch(legacy_version) version = _as_str(raw.get("v")) if version is None: - self._ignore_cache( - "Cache format invalid; ignoring cache.", - status=CacheStatus.INVALID_TYPE, - ) - return None + return self._reject_invalid_cache_format() if version != self._CACHE_VERSION: - self._ignore_cache( - f"Cache version mismatch (found {version}); ignoring cache.", - status=CacheStatus.VERSION_MISMATCH, - schema_version=version, - ) - return None + return self._reject_version_mismatch(version) sig = _as_str(raw.get("sig")) payload_obj = raw.get("payload") payload = _as_str_dict(payload_obj) if sig is None or payload is None: - self._ignore_cache( - "Cache format invalid; ignoring cache.", - status=CacheStatus.INVALID_TYPE, - schema_version=version, - ) - return None + return self._reject_invalid_cache_format(schema_version=version) expected_sig = self._sign_data(payload) if not hmac.compare_digest(sig, expected_sig): - self._ignore_cache( + return self._reject_cache_load( "Cache signature mismatch; ignoring cache.", status=CacheStatus.INTEGRITY_FAILED, schema_version=version, ) - return None runtime_tag = current_python_tag() py_tag = _as_str(payload.get("py")) if py_tag is None: - self._ignore_cache( - "Cache format invalid; ignoring cache.", - status=CacheStatus.INVALID_TYPE, - schema_version=version, - ) - return None + return self._reject_invalid_cache_format(schema_version=version) if py_tag != runtime_tag: - self._ignore_cache( + return self._reject_cache_load( "Cache python tag mismatch " f"(found {py_tag}, expected {runtime_tag}); ignoring cache.", status=CacheStatus.PYTHON_TAG_MISMATCH, schema_version=version, ) - return None fp_version = _as_str(payload.get("fp")) if fp_version is None: - self._ignore_cache( - "Cache format invalid; ignoring cache.", - status=CacheStatus.INVALID_TYPE, - schema_version=version, - ) - return None + return self._reject_invalid_cache_format(schema_version=version) if fp_version != self.fingerprint_version: - self._ignore_cache( + return self._reject_cache_load( "Cache fingerprint version mismatch " f"(found {fp_version}, expected {self.fingerprint_version}); " "ignoring cache.", status=CacheStatus.FINGERPRINT_MISMATCH, schema_version=version, ) - return None analysis_profile = _as_analysis_profile(payload.get("ap")) if analysis_profile is None: - self._ignore_cache( - "Cache format invalid; ignoring cache.", - status=CacheStatus.INVALID_TYPE, - schema_version=version, - ) - return None + return self._reject_invalid_cache_format(schema_version=version) if analysis_profile != self.analysis_profile: - self._ignore_cache( + return self._reject_cache_load( "Cache analysis profile mismatch " f"(found min_loc={analysis_profile['min_loc']}, " f"min_stmt={analysis_profile['min_stmt']}; " @@ -345,41 +548,35 @@ def _parse_cache_document(self, raw_obj: object) -> CacheData | None: status=CacheStatus.ANALYSIS_PROFILE_MISMATCH, schema_version=version, ) - return None files_obj = payload.get("files") files_dict = _as_str_dict(files_obj) if files_dict is None: - self._ignore_cache( - "Cache format invalid; ignoring cache.", - status=CacheStatus.INVALID_TYPE, - schema_version=version, - ) - return None + return self._reject_invalid_cache_format(schema_version=version) parsed_files: dict[str, CacheEntry] = {} for wire_path, file_entry_obj in files_dict.items(): runtime_path = self._runtime_filepath_from_wire(wire_path) - parsed_entry = _decode_wire_file_entry(file_entry_obj, runtime_path) + parsed_entry = self._decode_entry(file_entry_obj, runtime_path) if parsed_entry is None: - self._ignore_cache( - "Cache format invalid; ignoring cache.", - status=CacheStatus.INVALID_TYPE, - schema_version=version, - ) - return None - parsed_files[runtime_path] = parsed_entry + return self._reject_invalid_cache_format(schema_version=version) + parsed_files[runtime_path] = _canonicalize_cache_entry(parsed_entry) + self.segment_report_projection = self._decode_segment_report_projection( + payload.get("sr") + ) self.cache_schema_version = version - return { - "version": self._CACHE_VERSION, - "python_tag": runtime_tag, - "fingerprint_version": self.fingerprint_version, - "analysis_profile": self.analysis_profile, - "files": parsed_files, - } + return CacheData( + version=self._CACHE_VERSION, + python_tag=runtime_tag, + fingerprint_version=self.fingerprint_version, + analysis_profile=self.analysis_profile, + files=parsed_files, + ) def save(self) -> None: + if not self._dirty: + return try: self.path.parent.mkdir(parents=True, exist_ok=True) wire_files: dict[str, object] = {} @@ -390,7 +587,7 @@ def save(self) -> None: entry = self.get_file_entry(runtime_path) if entry is None: continue - wire_files[wire_map[runtime_path]] = _encode_wire_file_entry(entry) + wire_files[wire_map[runtime_path]] = self._encode_entry(entry) payload: dict[str, object] = { "py": current_python_tag(), @@ -398,6 +595,9 @@ def save(self) -> None: "ap": self.analysis_profile, "files": wire_files, } + segment_projection = self._encode_segment_report_projection() + if segment_projection is not None: + payload["sr"] = segment_projection signed_doc = { "v": self._CACHE_VERSION, "payload": payload, @@ -405,8 +605,13 @@ def save(self) -> None: } tmp_path = self.path.with_name(f"{self.path.name}.tmp") - tmp_path.write_text(_canonical_json(signed_doc), "utf-8") + data = _canonical_json(signed_doc).encode("utf-8") + with tmp_path.open("wb") as tmp_file: + tmp_file.write(data) + tmp_file.flush() + os.fsync(tmp_file.fileno()) os.replace(tmp_path, self.path) + self._dirty = False self.data["version"] = self._CACHE_VERSION self.data["python_tag"] = current_python_tag() @@ -416,6 +621,14 @@ def save(self) -> None: except OSError as e: raise CacheError(f"Failed to save cache: {e}") from e + @staticmethod + def _decode_entry(value: object, filepath: str) -> CacheEntry | None: + return _decode_wire_file_entry(value, filepath) + + @staticmethod + def _encode_entry(entry: CacheEntry) -> dict[str, object]: + return _encode_wire_file_entry(entry) + def _wire_filepath_from_runtime(self, runtime_filepath: str) -> str: runtime_path = Path(runtime_filepath) if self.root is None: @@ -446,36 +659,192 @@ def _runtime_filepath_from_wire(self, wire_filepath: str) -> str: except OSError: return str(combined) + def _decode_segment_report_projection( + self, + value: object, + ) -> SegmentReportProjection | None: + obj = _as_str_dict(value) + if obj is None: + return None + digest = _as_str(obj.get("d")) + suppressed = _as_int(obj.get("s")) + groups_raw = _as_list(obj.get("g")) + if digest is None or suppressed is None or groups_raw is None: + return None + groups: dict[str, list[SegmentDict]] = {} + for group_row in groups_raw: + group_list = _as_list(group_row) + if group_list is None or len(group_list) != 2: + return None + group_key = _as_str(group_list[0]) + items_raw = _as_list(group_list[1]) + if group_key is None or items_raw is None: + return None + items: list[SegmentDict] = [] + for item_raw in items_raw: + item_list = _as_list(item_raw) + if item_list is None or len(item_list) != 7: + return None + wire_filepath = _as_str(item_list[0]) + qualname = _as_str(item_list[1]) + start_line = _as_int(item_list[2]) + end_line = _as_int(item_list[3]) + size = _as_int(item_list[4]) + segment_hash = _as_str(item_list[5]) + segment_sig = _as_str(item_list[6]) + if ( + wire_filepath is None + or qualname is None + or start_line is None + or end_line is None + or size is None + or segment_hash is None + or segment_sig is None + ): + return None + items.append( + SegmentGroupItem( + segment_hash=segment_hash, + segment_sig=segment_sig, + filepath=self._runtime_filepath_from_wire(wire_filepath), + qualname=qualname, + start_line=start_line, + end_line=end_line, + size=size, + ) + ) + groups[group_key] = items + return { + "digest": digest, + "suppressed": max(0, suppressed), + "groups": groups, + } + + def _encode_segment_report_projection(self) -> dict[str, object] | None: + projection = self.segment_report_projection + if projection is None: + return None + groups_rows: list[list[object]] = [] + for group_key in sorted(projection["groups"]): + items = sorted( + projection["groups"][group_key], + key=lambda item: ( + item["filepath"], + item["qualname"], + item["start_line"], + item["end_line"], + ), + ) + encoded_items = [ + [ + self._wire_filepath_from_runtime(item["filepath"]), + item["qualname"], + item["start_line"], + item["end_line"], + item["size"], + item["segment_hash"], + item["segment_sig"], + ] + for item in items + ] + groups_rows.append([group_key, encoded_items]) + return { + "d": projection["digest"], + "s": max(0, int(projection["suppressed"])), + "g": groups_rows, + } + + def _store_canonical_file_entry( + self, + *, + runtime_path: str, + canonical_entry: CacheEntry, + ) -> CacheEntry: + previous_entry = self.data["files"].get(runtime_path) + was_canonical = runtime_path in self._canonical_runtime_paths + self.data["files"][runtime_path] = canonical_entry + self._canonical_runtime_paths.add(runtime_path) + if not was_canonical or previous_entry != canonical_entry: + self._dirty = True + return canonical_entry + def get_file_entry(self, filepath: str) -> CacheEntry | None: - entry = self.data["files"].get(filepath) - if entry is None: + runtime_lookup_key = filepath + entry_obj = self.data["files"].get(runtime_lookup_key) + if entry_obj is None: wire_key = self._wire_filepath_from_runtime(filepath) - runtime_key = self._runtime_filepath_from_wire(wire_key) - entry = self.data["files"].get(runtime_key) + runtime_lookup_key = self._runtime_filepath_from_wire(wire_key) + entry_obj = self.data["files"].get(runtime_lookup_key) - if entry is None: + if entry_obj is None: return None - if not isinstance(entry, dict): + if runtime_lookup_key in self._canonical_runtime_paths: + if _is_canonical_cache_entry(entry_obj): + return entry_obj + self._canonical_runtime_paths.discard(runtime_lookup_key) + + if not isinstance(entry_obj, dict): return None + entry = entry_obj required = {"stat", "units", "blocks", "segments"} if not required.issubset(entry.keys()): return None - stat = entry.get("stat") - units = entry.get("units") - blocks = entry.get("blocks") - segments = entry.get("segments") - if not ( - _is_file_stat_dict(stat) - and _is_unit_list(units) - and _is_block_list(blocks) - and _is_segment_list(segments) + stat = _as_file_stat_dict(entry.get("stat")) + units = _as_typed_unit_list(entry.get("units")) + blocks = _as_typed_block_list(entry.get("blocks")) + segments = _as_typed_segment_list(entry.get("segments")) + if stat is None or units is None or blocks is None or segments is None: + return None + + class_metrics_raw = _as_typed_class_metrics_list(entry.get("class_metrics", [])) + module_deps_raw = _as_typed_module_deps_list(entry.get("module_deps", [])) + dead_candidates_raw = _as_typed_dead_candidates_list( + entry.get("dead_candidates", []) + ) + referenced_names_raw = _as_typed_string_list(entry.get("referenced_names", [])) + referenced_qualnames_raw = _as_typed_string_list( + entry.get("referenced_qualnames", []) + ) + import_names_raw = _as_typed_string_list(entry.get("import_names", [])) + class_names_raw = _as_typed_string_list(entry.get("class_names", [])) + if ( + class_metrics_raw is None + or module_deps_raw is None + or dead_candidates_raw is None + or referenced_names_raw is None + or referenced_qualnames_raw is None + or import_names_raw is None + or class_names_raw is None ): return None - return entry + entry_to_canonicalize: CacheEntry = CacheEntry( + stat=stat, + units=units, + blocks=blocks, + segments=segments, + class_metrics=class_metrics_raw, + module_deps=module_deps_raw, + dead_candidates=dead_candidates_raw, + referenced_names=referenced_names_raw, + referenced_qualnames=referenced_qualnames_raw, + import_names=import_names_raw, + class_names=class_names_raw, + ) + source_stats = _as_source_stats_dict(entry.get("source_stats")) + if source_stats is not None: + entry_to_canonicalize["source_stats"] = source_stats + sf_raw = entry.get("structural_findings") + if isinstance(sf_raw, list): + entry_to_canonicalize["structural_findings"] = sf_raw + canonical_entry = _canonicalize_cache_entry(entry_to_canonicalize) + return self._store_canonical_file_entry( + runtime_path=runtime_lookup_key, + canonical_entry=canonical_entry, + ) def put_file_entry( self, @@ -484,64 +853,88 @@ def put_file_entry( units: list[Unit], blocks: list[BlockUnit], segments: list[SegmentUnit], + *, + source_stats: SourceStatsDict | None = None, + file_metrics: FileMetrics | None = None, + structural_findings: list[StructuralFindingGroup] | None = None, ) -> None: runtime_path = self._runtime_filepath_from_wire( self._wire_filepath_from_runtime(filepath) ) - unit_rows: list[UnitDict] = [ - { - "qualname": unit.qualname, - "filepath": runtime_path, - "start_line": unit.start_line, - "end_line": unit.end_line, - "loc": unit.loc, - "stmt_count": unit.stmt_count, - "fingerprint": unit.fingerprint, - "loc_bucket": unit.loc_bucket, - } - for unit in units - ] - - block_rows: list[BlockDict] = [ - { - "block_hash": block.block_hash, - "filepath": runtime_path, - "qualname": block.qualname, - "start_line": block.start_line, - "end_line": block.end_line, - "size": block.size, - } - for block in blocks + unit_rows = [_unit_dict_from_model(unit, runtime_path) for unit in units] + block_rows = [_block_dict_from_model(block, runtime_path) for block in blocks] + segment_rows = [ + _segment_dict_from_model(segment, runtime_path) for segment in segments ] - segment_rows: list[SegmentDict] = [ - { - "segment_hash": segment.segment_hash, - "segment_sig": segment.segment_sig, - "filepath": runtime_path, - "qualname": segment.qualname, - "start_line": segment.start_line, - "end_line": segment.end_line, - "size": segment.size, - } - for segment in segments - ] - - self.data["files"][runtime_path] = { - "stat": stat_sig, - "units": unit_rows, - "blocks": block_rows, - "segments": segment_rows, - } + ( + class_metrics_rows, + module_dep_rows, + dead_candidate_rows, + referenced_names, + referenced_qualnames, + import_names, + class_names, + ) = _new_optional_metrics_payload() + if file_metrics is not None: + class_metrics_rows = [ + _class_metrics_dict_from_model(metric, runtime_path) + for metric in file_metrics.class_metrics + ] + module_dep_rows = [ + _module_dep_dict_from_model(dep) for dep in file_metrics.module_deps + ] + dead_candidate_rows = [ + _dead_candidate_dict_from_model(candidate, runtime_path) + for candidate in file_metrics.dead_candidates + ] + referenced_names = sorted(set(file_metrics.referenced_names)) + referenced_qualnames = sorted(set(file_metrics.referenced_qualnames)) + import_names = sorted(set(file_metrics.import_names)) + class_names = sorted(set(file_metrics.class_names)) + + source_stats_payload = source_stats or SourceStatsDict( + lines=0, + functions=0, + methods=0, + classes=0, + ) + entry_dict = CacheEntry( + stat=stat_sig, + source_stats=source_stats_payload, + units=unit_rows, + blocks=block_rows, + segments=segment_rows, + class_metrics=class_metrics_rows, + module_deps=module_dep_rows, + dead_candidates=dead_candidate_rows, + referenced_names=referenced_names, + referenced_qualnames=referenced_qualnames, + import_names=import_names, + class_names=class_names, + ) + if structural_findings is not None: + entry_dict["structural_findings"] = _normalize_cached_structural_groups( + [ + _structural_group_dict_from_model(group) + for group in structural_findings + ], + filepath=runtime_path, + ) + canonical_entry = _canonicalize_cache_entry(entry_dict) + self._store_canonical_file_entry( + runtime_path=runtime_path, + canonical_entry=canonical_entry, + ) def file_stat_signature(path: str) -> FileStat: st = os.stat(path) - return { - "mtime_ns": st.st_mtime_ns, - "size": st.st_size, - } + return FileStat( + mtime_ns=st.st_mtime_ns, + size=st.st_size, + ) def _empty_cache_data( @@ -551,13 +944,13 @@ def _empty_cache_data( fingerprint_version: str, analysis_profile: AnalysisProfile, ) -> CacheData: - return { - "version": version, - "python_tag": python_tag, - "fingerprint_version": fingerprint_version, - "analysis_profile": analysis_profile, - "files": {}, - } + return CacheData( + version=version, + python_tag=python_tag, + fingerprint_version=fingerprint_version, + analysis_profile=analysis_profile, + files={}, + ) def _canonical_json(data: object) -> str: @@ -576,6 +969,348 @@ def _as_list(value: object) -> list[object] | None: return value if isinstance(value, list) else None +def _as_risk_literal(value: object) -> Literal["low", "medium", "high"] | None: + match value: + case "low": + return "low" + case "medium": + return "medium" + case "high": + return "high" + case _: + return None + + +def _new_optional_metrics_payload() -> tuple[ + list[ClassMetricsDict], + list[ModuleDepDict], + list[DeadCandidateDict], + list[str], + list[str], + list[str], + list[str], +]: + return [], [], [], [], [], [], [] + + +def _unit_dict_from_model(unit: Unit, filepath: str) -> UnitDict: + return FunctionGroupItem( + qualname=unit.qualname, + filepath=filepath, + start_line=unit.start_line, + end_line=unit.end_line, + loc=unit.loc, + stmt_count=unit.stmt_count, + fingerprint=unit.fingerprint, + loc_bucket=unit.loc_bucket, + cyclomatic_complexity=unit.cyclomatic_complexity, + nesting_depth=unit.nesting_depth, + risk=unit.risk, + raw_hash=unit.raw_hash, + entry_guard_count=unit.entry_guard_count, + entry_guard_terminal_profile=unit.entry_guard_terminal_profile, + entry_guard_has_side_effect_before=unit.entry_guard_has_side_effect_before, + terminal_kind=unit.terminal_kind, + try_finally_profile=unit.try_finally_profile, + side_effect_order_profile=unit.side_effect_order_profile, + ) + + +def _block_dict_from_model(block: BlockUnit, filepath: str) -> BlockDict: + return BlockGroupItem( + block_hash=block.block_hash, + filepath=filepath, + qualname=block.qualname, + start_line=block.start_line, + end_line=block.end_line, + size=block.size, + ) + + +def _segment_dict_from_model(segment: SegmentUnit, filepath: str) -> SegmentDict: + return SegmentGroupItem( + segment_hash=segment.segment_hash, + segment_sig=segment.segment_sig, + filepath=filepath, + qualname=segment.qualname, + start_line=segment.start_line, + end_line=segment.end_line, + size=segment.size, + ) + + +def _class_metrics_dict_from_model( + metric: ClassMetrics, + filepath: str, +) -> ClassMetricsDict: + return ClassMetricsDict( + qualname=metric.qualname, + filepath=filepath, + start_line=metric.start_line, + end_line=metric.end_line, + cbo=metric.cbo, + lcom4=metric.lcom4, + method_count=metric.method_count, + instance_var_count=metric.instance_var_count, + risk_coupling=metric.risk_coupling, + risk_cohesion=metric.risk_cohesion, + coupled_classes=sorted(set(metric.coupled_classes)), + ) + + +def _module_dep_dict_from_model(dep: ModuleDep) -> ModuleDepDict: + return ModuleDepDict( + source=dep.source, + target=dep.target, + import_type=dep.import_type, + line=dep.line, + ) + + +def _dead_candidate_dict_from_model( + candidate: DeadCandidate, + filepath: str, +) -> DeadCandidateDict: + result = DeadCandidateDict( + qualname=candidate.qualname, + local_name=candidate.local_name, + filepath=filepath, + start_line=candidate.start_line, + end_line=candidate.end_line, + kind=candidate.kind, + ) + if candidate.suppressed_rules: + result["suppressed_rules"] = sorted(set(candidate.suppressed_rules)) + return result + + +def _structural_occurrence_dict_from_model( + occurrence: StructuralFindingOccurrence, +) -> StructuralFindingOccurrenceDict: + return StructuralFindingOccurrenceDict( + qualname=occurrence.qualname, + start=occurrence.start, + end=occurrence.end, + ) + + +def _structural_group_dict_from_model( + group: StructuralFindingGroup, +) -> StructuralFindingGroupDict: + return StructuralFindingGroupDict( + finding_kind=group.finding_kind, + finding_key=group.finding_key, + signature=dict(group.signature), + items=[ + _structural_occurrence_dict_from_model(occurrence) + for occurrence in group.items + ], + ) + + +def _as_file_stat_dict(value: object) -> FileStat | None: + if not _is_file_stat_dict(value): + return None + obj = cast("Mapping[str, object]", value) + mtime_ns = obj.get("mtime_ns") + size = obj.get("size") + if not isinstance(mtime_ns, int) or not isinstance(size, int): + return None + return FileStat(mtime_ns=mtime_ns, size=size) + + +def _as_source_stats_dict(value: object) -> SourceStatsDict | None: + if not _is_source_stats_dict(value): + return None + obj = cast("Mapping[str, object]", value) + lines = obj.get("lines") + functions = obj.get("functions") + methods = obj.get("methods") + classes = obj.get("classes") + assert isinstance(lines, int) + assert isinstance(functions, int) + assert isinstance(methods, int) + assert isinstance(classes, int) + return SourceStatsDict( + lines=lines, + functions=functions, + methods=methods, + classes=classes, + ) + + +def _as_typed_list( + value: object, + *, + predicate: Callable[[object], bool], +) -> list[_ValidatedItemT] | None: + if not isinstance(value, list): + return None + if not all(predicate(item) for item in value): + return None + return cast("list[_ValidatedItemT]", value) + + +def _as_typed_unit_list(value: object) -> list[UnitDict] | None: + return _as_typed_list(value, predicate=_is_unit_dict) + + +def _as_typed_block_list(value: object) -> list[BlockDict] | None: + return _as_typed_list(value, predicate=_is_block_dict) + + +def _as_typed_segment_list(value: object) -> list[SegmentDict] | None: + return _as_typed_list(value, predicate=_is_segment_dict) + + +def _as_typed_class_metrics_list(value: object) -> list[ClassMetricsDict] | None: + return _as_typed_list(value, predicate=_is_class_metrics_dict) + + +def _as_typed_dead_candidates_list( + value: object, +) -> list[DeadCandidateDict] | None: + return _as_typed_list(value, predicate=_is_dead_candidate_dict) + + +def _as_typed_module_deps_list(value: object) -> list[ModuleDepDict] | None: + return _as_typed_list(value, predicate=_is_module_dep_dict) + + +def _as_typed_string_list(value: object) -> list[str] | None: + return _as_typed_list(value, predicate=lambda item: isinstance(item, str)) + + +def _is_canonical_cache_entry(value: object) -> TypeGuard[CacheEntry]: + return isinstance(value, dict) and _has_cache_entry_container_shape(value) + + +def _has_cache_entry_container_shape(entry: Mapping[str, object]) -> bool: + required = {"stat", "units", "blocks", "segments"} + if not required.issubset(entry.keys()): + return False + if not isinstance(entry.get("stat"), dict): + return False + if not isinstance(entry.get("units"), list): + return False + if not isinstance(entry.get("blocks"), list): + return False + if not isinstance(entry.get("segments"), list): + return False + source_stats = entry.get("source_stats") + if source_stats is not None and not _is_source_stats_dict(source_stats): + return False + optional_list_keys = ( + "class_metrics", + "module_deps", + "dead_candidates", + "referenced_names", + "referenced_qualnames", + "import_names", + "class_names", + "structural_findings", + ) + return all(isinstance(entry.get(key, []), list) for key in optional_list_keys) + + +def _canonicalize_cache_entry(entry: CacheEntry) -> CacheEntry: + class_metrics_sorted = sorted( + entry["class_metrics"], + key=lambda item: ( + item["start_line"], + item["end_line"], + item["qualname"], + ), + ) + for metric in class_metrics_sorted: + coupled_classes = metric.get("coupled_classes", []) + if coupled_classes: + metric["coupled_classes"] = sorted(set(coupled_classes)) + + module_deps_sorted = sorted( + entry["module_deps"], + key=lambda item: ( + item["source"], + item["target"], + item["import_type"], + item["line"], + ), + ) + dead_candidates_normalized: list[DeadCandidateDict] = [] + for candidate in entry["dead_candidates"]: + suppressed_rules = candidate.get("suppressed_rules", []) + normalized_candidate = DeadCandidateDict( + qualname=candidate["qualname"], + local_name=candidate["local_name"], + filepath=candidate["filepath"], + start_line=candidate["start_line"], + end_line=candidate["end_line"], + kind=candidate["kind"], + ) + if _is_string_list(suppressed_rules): + normalized_rules = sorted(set(suppressed_rules)) + if normalized_rules: + normalized_candidate["suppressed_rules"] = normalized_rules + dead_candidates_normalized.append(normalized_candidate) + + dead_candidates_sorted = sorted( + dead_candidates_normalized, + key=lambda item: ( + item["start_line"], + item["end_line"], + item["qualname"], + item["local_name"], + item["kind"], + tuple(item.get("suppressed_rules", [])), + ), + ) + + result: CacheEntry = { + "stat": entry["stat"], + "units": entry["units"], + "blocks": entry["blocks"], + "segments": entry["segments"], + "class_metrics": class_metrics_sorted, + "module_deps": module_deps_sorted, + "dead_candidates": dead_candidates_sorted, + "referenced_names": sorted(set(entry["referenced_names"])), + "referenced_qualnames": sorted(set(entry.get("referenced_qualnames", []))), + "import_names": sorted(set(entry["import_names"])), + "class_names": sorted(set(entry["class_names"])), + } + sf = entry.get("structural_findings") + if sf is not None: + result["structural_findings"] = sf + source_stats = entry.get("source_stats") + if source_stats is not None: + result["source_stats"] = source_stats + return result + + +def _decode_wire_qualname_span( + row: list[object], +) -> tuple[str, int, int] | None: + qualname = _as_str(row[0]) + start_line = _as_int(row[1]) + end_line = _as_int(row[2]) + if qualname is None or start_line is None or end_line is None: + return None + return qualname, start_line, end_line + + +def _decode_wire_qualname_span_size( + row: list[object], +) -> tuple[str, int, int, int] | None: + qualname_span = _decode_wire_qualname_span(row) + if qualname_span is None: + return None + size = _as_int(row[3]) + if size is None: + return None + qualname, start_line, end_line = qualname_span + return qualname, start_line, end_line, size + + def _as_str_dict(value: object) -> dict[str, object] | None: if not isinstance(value, dict): return None @@ -590,179 +1325,730 @@ def _as_analysis_profile(value: object) -> AnalysisProfile | None: if obj is None: return None - if set(obj.keys()) != {"min_loc", "min_stmt"}: + _REQUIRED = { + "min_loc", + "min_stmt", + "block_min_loc", + "block_min_stmt", + "segment_min_loc", + "segment_min_stmt", + } + if set(obj.keys()) < _REQUIRED: return None min_loc = _as_int(obj.get("min_loc")) min_stmt = _as_int(obj.get("min_stmt")) - if min_loc is None or min_stmt is None: + block_min_loc = _as_int(obj.get("block_min_loc")) + block_min_stmt = _as_int(obj.get("block_min_stmt")) + segment_min_loc = _as_int(obj.get("segment_min_loc")) + segment_min_stmt = _as_int(obj.get("segment_min_stmt")) + if ( + min_loc is None + or min_stmt is None + or block_min_loc is None + or block_min_stmt is None + or segment_min_loc is None + or segment_min_stmt is None + ): return None - return {"min_loc": min_loc, "min_stmt": min_stmt} - + return AnalysisProfile( + min_loc=min_loc, + min_stmt=min_stmt, + block_min_loc=block_min_loc, + block_min_stmt=block_min_stmt, + segment_min_loc=segment_min_loc, + segment_min_stmt=segment_min_stmt, + ) -def _decode_wire_file_entry(value: object, filepath: str) -> CacheEntry | None: - obj = _as_str_dict(value) - if obj is None: - return None - stat_obj = obj.get("st") - stat_list = _as_list(stat_obj) +def _decode_wire_stat(obj: dict[str, object]) -> FileStat | None: + stat_list = _as_list(obj.get("st")) if stat_list is None or len(stat_list) != 2: return None mtime_ns = _as_int(stat_list[0]) size = _as_int(stat_list[1]) if mtime_ns is None or size is None: return None + return FileStat(mtime_ns=mtime_ns, size=size) - units: list[UnitDict] = [] - blocks: list[BlockDict] = [] - segments: list[SegmentDict] = [] - units_obj = obj.get("u") - if units_obj is not None: - units_list = _as_list(units_obj) - if units_list is None: +def _decode_optional_wire_source_stats( + *, + obj: dict[str, object], +) -> SourceStatsDict | None: + raw = obj.get("ss") + if raw is None: + return None + row = _as_list(raw) + if row is None or len(row) != 4: + return None + counts = _decode_wire_int_fields(row, 0, 1, 2, 3) + if counts is None: + return None + lines, functions, methods, classes = counts + if any(value < 0 for value in counts): + return None + return SourceStatsDict( + lines=lines, + functions=functions, + methods=methods, + classes=classes, + ) + + +def _decode_optional_wire_items( + *, + obj: dict[str, object], + key: str, + decode_item: Callable[[object], _DecodedItemT | None], +) -> list[_DecodedItemT] | None: + raw_items = obj.get(key) + if raw_items is None: + return [] + wire_items = _as_list(raw_items) + if wire_items is None: + return None + decoded_items: list[_DecodedItemT] = [] + for wire_item in wire_items: + decoded = decode_item(wire_item) + if decoded is None: return None - for unit_obj in units_list: - decoded_unit = _decode_wire_unit(unit_obj, filepath) - if decoded_unit is None: - return None - units.append(decoded_unit) + decoded_items.append(decoded) + return decoded_items + - blocks_obj = obj.get("b") - if blocks_obj is not None: - blocks_list = _as_list(blocks_obj) - if blocks_list is None: +def _decode_optional_wire_items_for_filepath( + *, + obj: dict[str, object], + key: str, + filepath: str, + decode_item: Callable[[object, str], _DecodedItemT | None], +) -> list[_DecodedItemT] | None: + raw_items = obj.get(key) + if raw_items is None: + return [] + wire_items = _as_list(raw_items) + if wire_items is None: + return None + decoded_items: list[_DecodedItemT] = [] + for wire_item in wire_items: + decoded = decode_item(wire_item, filepath) + if decoded is None: return None - for block_obj in blocks_list: - decoded_block = _decode_wire_block(block_obj, filepath) - if decoded_block is None: - return None - blocks.append(decoded_block) + decoded_items.append(decoded) + return decoded_items + - segments_obj = obj.get("s") - if segments_obj is not None: - segments_list = _as_list(segments_obj) - if segments_list is None: +def _decode_optional_wire_names( + *, + obj: dict[str, object], + key: str, +) -> list[str] | None: + raw_names = obj.get(key) + if raw_names is None: + return [] + names = _as_list(raw_names) + if names is None or not all(isinstance(name, str) for name in names): + return None + return [str(name) for name in names] + + +def _decode_optional_wire_coupled_classes( + *, + obj: dict[str, object], + key: str, +) -> dict[str, list[str]] | None: + raw = obj.get(key) + if raw is None: + return {} + + rows = _as_list(raw) + if rows is None: + return None + + decoded: dict[str, list[str]] = {} + for wire_row in rows: + row = _as_list(wire_row) + if row is None or len(row) != 2: return None - for segment_obj in segments_list: - decoded_segment = _decode_wire_segment(segment_obj, filepath) - if decoded_segment is None: - return None - segments.append(decoded_segment) + qualname = _as_str(row[0]) + names = _as_list(row[1]) + if qualname is None or names is None: + return None + if not all(isinstance(name, str) for name in names): + return None + decoded[qualname] = sorted({str(name) for name in names if str(name)}) - return { - "stat": {"mtime_ns": mtime_ns, "size": size}, - "units": units, - "blocks": blocks, - "segments": segments, - } + return decoded -def _decode_wire_unit(value: object, filepath: str) -> UnitDict | None: - row = _as_list(value) - if row is None or len(row) != 7: +def _decode_wire_file_entry(value: object, filepath: str) -> CacheEntry | None: + obj = _as_str_dict(value) + if obj is None: return None - qualname = _as_str(row[0]) - start_line = _as_int(row[1]) - end_line = _as_int(row[2]) - loc = _as_int(row[3]) - stmt_count = _as_int(row[4]) - fingerprint = _as_str(row[5]) - loc_bucket = _as_str(row[6]) + stat = _decode_wire_stat(obj) + if stat is None: + return None + source_stats = _decode_optional_wire_source_stats(obj=obj) + file_sections = _decode_wire_file_sections(obj=obj, filepath=filepath) + if file_sections is None: + return None + ( + units, + blocks, + segments, + class_metrics, + module_deps, + dead_candidates, + ) = file_sections + name_sections = _decode_wire_name_sections(obj=obj) + if name_sections is None: + return None + ( + referenced_names, + referenced_qualnames, + import_names, + class_names, + ) = name_sections + coupled_classes_map = _decode_optional_wire_coupled_classes(obj=obj, key="cc") + if coupled_classes_map is None: + return None + + for metric in class_metrics: + names = coupled_classes_map.get(metric["qualname"], []) + if names: + metric["coupled_classes"] = names + + has_structural_findings = "sf" in obj + structural_findings = _decode_wire_structural_findings_optional(obj) + if structural_findings is None: + return None + + result = CacheEntry( + stat=stat, + units=units, + blocks=blocks, + segments=segments, + class_metrics=class_metrics, + module_deps=module_deps, + dead_candidates=dead_candidates, + referenced_names=referenced_names, + referenced_qualnames=referenced_qualnames, + import_names=import_names, + class_names=class_names, + ) + if source_stats is not None: + result["source_stats"] = source_stats + if has_structural_findings: + result["structural_findings"] = _normalize_cached_structural_groups( + structural_findings, + filepath=filepath, + ) + return result + +def _decode_wire_file_sections( + *, + obj: dict[str, object], + filepath: str, +) -> ( + tuple[ + list[UnitDict], + list[BlockDict], + list[SegmentDict], + list[ClassMetricsDict], + list[ModuleDepDict], + list[DeadCandidateDict], + ] + | None +): + units = _decode_optional_wire_items_for_filepath( + obj=obj, + key="u", + filepath=filepath, + decode_item=_decode_wire_unit, + ) + blocks = _decode_optional_wire_items_for_filepath( + obj=obj, + key="b", + filepath=filepath, + decode_item=_decode_wire_block, + ) + segments = _decode_optional_wire_items_for_filepath( + obj=obj, + key="s", + filepath=filepath, + decode_item=_decode_wire_segment, + ) + class_metrics = _decode_optional_wire_items_for_filepath( + obj=obj, + key="cm", + filepath=filepath, + decode_item=_decode_wire_class_metric, + ) + module_deps = _decode_optional_wire_items( + obj=obj, + key="md", + decode_item=_decode_wire_module_dep, + ) + dead_candidates = _decode_optional_wire_items_for_filepath( + obj=obj, + key="dc", + filepath=filepath, + decode_item=_decode_wire_dead_candidate, + ) if ( - qualname is None - or start_line is None - or end_line is None - or loc is None - or stmt_count is None - or fingerprint is None - or loc_bucket is None + units is None + or blocks is None + or segments is None + or class_metrics is None + or module_deps is None + or dead_candidates is None ): return None + return ( + units, + blocks, + segments, + class_metrics, + module_deps, + dead_candidates, + ) - return { - "qualname": qualname, - "filepath": filepath, - "start_line": start_line, - "end_line": end_line, - "loc": loc, - "stmt_count": stmt_count, - "fingerprint": fingerprint, - "loc_bucket": loc_bucket, - } +def _decode_wire_name_sections( + *, + obj: dict[str, object], +) -> tuple[list[str], list[str], list[str], list[str]] | None: + referenced_names = _decode_optional_wire_names(obj=obj, key="rn") + referenced_qualnames = _decode_optional_wire_names(obj=obj, key="rq") + import_names = _decode_optional_wire_names(obj=obj, key="in") + class_names = _decode_optional_wire_names(obj=obj, key="cn") + if ( + referenced_names is None + or referenced_qualnames is None + or import_names is None + or class_names is None + ): + return None + return ( + referenced_names, + referenced_qualnames, + import_names, + class_names, + ) -def _decode_wire_block(value: object, filepath: str) -> BlockDict | None: + +def _decode_wire_structural_findings_optional( + obj: dict[str, object], +) -> list[StructuralFindingGroupDict] | None: + """Decode optional 'sf' wire key. Returns [] if absent, None on invalid format.""" + raw = obj.get("sf") + if raw is None: + return [] + groups_raw = _as_list(raw) + if groups_raw is None: + return None + groups: list[StructuralFindingGroupDict] = [] + for group_raw in groups_raw: + group = _decode_wire_structural_group(group_raw) + if group is None: + return None + groups.append(group) + return groups + + +def _decode_wire_row( + value: object, + *, + valid_lengths: Collection[int], +) -> list[object] | None: row = _as_list(value) - if row is None or len(row) != 5: + if row is None or len(row) not in valid_lengths: return None + return row + + +def _decode_wire_named_span( + value: object, + *, + valid_lengths: Collection[int], +) -> tuple[list[object], str, int, int] | None: + row = _decode_wire_row(value, valid_lengths=valid_lengths) + if row is None: + return None + span = _decode_wire_qualname_span(row) + if span is None: + return None + qualname, start_line, end_line = span + return row, qualname, start_line, end_line + + +def _decode_wire_named_sized_span( + value: object, + *, + valid_lengths: Collection[int], +) -> tuple[list[object], str, int, int, int] | None: + row = _decode_wire_row(value, valid_lengths=valid_lengths) + if row is None: + return None + span = _decode_wire_qualname_span_size(row) + if span is None: + return None + qualname, start_line, end_line, size = span + return row, qualname, start_line, end_line, size + + +def _decode_wire_int_fields( + row: list[object], + *indexes: int, +) -> tuple[int, ...] | None: + values: list[int] = [] + for index in indexes: + value = _as_int(row[index]) + if value is None: + return None + values.append(value) + return tuple(values) + + +def _decode_wire_str_fields( + row: list[object], + *indexes: int, +) -> tuple[str, ...] | None: + values: list[str] = [] + for index in indexes: + value = _as_str(row[index]) + if value is None: + return None + values.append(value) + return tuple(values) - qualname = _as_str(row[0]) - start_line = _as_int(row[1]) - end_line = _as_int(row[2]) - size = _as_int(row[3]) - block_hash = _as_str(row[4]) +def _decode_wire_unit_core_fields( + row: list[object], +) -> tuple[int, int, str, str, int, int, Literal["low", "medium", "high"], str] | None: + int_fields = _decode_wire_int_fields(row, 3, 4, 7, 8) + str_fields = _decode_wire_str_fields(row, 5, 6, 10) + risk = _as_risk_literal(row[9]) + if int_fields is None or str_fields is None or risk is None: + return None + loc, stmt_count, cyclomatic_complexity, nesting_depth = int_fields + fingerprint, loc_bucket, raw_hash = str_fields + return ( + loc, + stmt_count, + fingerprint, + loc_bucket, + cyclomatic_complexity, + nesting_depth, + risk, + raw_hash, + ) + + +def _decode_wire_unit_flow_profiles( + row: list[object], +) -> tuple[int, str, bool, str, str, str] | None: + if len(row) != 17: + return _DEFAULT_WIRE_UNIT_FLOW_PROFILES + + parsed_entry_guard_count = _as_int(row[11]) + parsed_entry_guard_terminal_profile = _as_str(row[12]) + parsed_entry_guard_has_side_effect_before = _as_int(row[13]) + parsed_terminal_kind = _as_str(row[14]) + parsed_try_finally_profile = _as_str(row[15]) + parsed_side_effect_order_profile = _as_str(row[16]) if ( - qualname is None - or start_line is None - or end_line is None - or size is None - or block_hash is None + parsed_entry_guard_count is None + or parsed_entry_guard_terminal_profile is None + or parsed_entry_guard_has_side_effect_before is None + or parsed_terminal_kind is None + or parsed_try_finally_profile is None + or parsed_side_effect_order_profile is None ): return None + return ( + max(0, parsed_entry_guard_count), + parsed_entry_guard_terminal_profile or "none", + parsed_entry_guard_has_side_effect_before != 0, + parsed_terminal_kind or "fallthrough", + parsed_try_finally_profile or "none", + parsed_side_effect_order_profile or "none", + ) - return { - "block_hash": block_hash, - "filepath": filepath, - "qualname": qualname, - "start_line": start_line, - "end_line": end_line, - "size": size, - } +def _decode_wire_class_metric_fields( + row: list[object], +) -> tuple[int, int, int, int, str, str] | None: + int_fields = _decode_wire_int_fields(row, 3, 4, 5, 6) + str_fields = _decode_wire_str_fields(row, 7, 8) + if int_fields is None or str_fields is None: + return None + cbo, lcom4, method_count, instance_var_count = int_fields + risk_coupling, risk_cohesion = str_fields + return ( + cbo, + lcom4, + method_count, + instance_var_count, + risk_coupling, + risk_cohesion, + ) -def _decode_wire_segment(value: object, filepath: str) -> SegmentDict | None: - row = _as_list(value) - if row is None or len(row) != 6: + +def _decode_wire_structural_group(value: object) -> StructuralFindingGroupDict | None: + group_row = _as_list(value) + if group_row is None or len(group_row) != 4: + return None + finding_kind = _as_str(group_row[0]) + finding_key = _as_str(group_row[1]) + items_raw = _as_list(group_row[3]) + signature = _decode_wire_structural_signature(group_row[2]) + if ( + finding_kind is None + or finding_key is None + or items_raw is None + or signature is None + ): return None + items: list[StructuralFindingOccurrenceDict] = [] + for item_raw in items_raw: + item = _decode_wire_structural_occurrence(item_raw) + if item is None: + return None + items.append(item) + return StructuralFindingGroupDict( + finding_kind=finding_kind, + finding_key=finding_key, + signature=signature, + items=items, + ) - qualname = _as_str(row[0]) - start_line = _as_int(row[1]) - end_line = _as_int(row[2]) - size = _as_int(row[3]) + +def _decode_wire_structural_signature(value: object) -> dict[str, str] | None: + sig_raw = _as_list(value) + if sig_raw is None: + return None + signature: dict[str, str] = {} + for pair in sig_raw: + pair_list = _as_list(pair) + if pair_list is None or len(pair_list) != 2: + return None + key = _as_str(pair_list[0]) + val = _as_str(pair_list[1]) + if key is None or val is None: + return None + signature[key] = val + return signature + + +def _decode_wire_structural_occurrence( + value: object, +) -> StructuralFindingOccurrenceDict | None: + item_list = _as_list(value) + if item_list is None or len(item_list) != 3: + return None + qualname = _as_str(item_list[0]) + start = _as_int(item_list[1]) + end = _as_int(item_list[2]) + if qualname is None or start is None or end is None: + return None + return StructuralFindingOccurrenceDict( + qualname=qualname, + start=start, + end=end, + ) + + +def _decode_wire_unit(value: object, filepath: str) -> UnitDict | None: + decoded = _decode_wire_named_span(value, valid_lengths={11, 17}) + if decoded is None: + return None + row, qualname, start_line, end_line = decoded + core_fields = _decode_wire_unit_core_fields(row) + flow_profiles = _decode_wire_unit_flow_profiles(row) + if core_fields is None or flow_profiles is None: + return None + ( + loc, + stmt_count, + fingerprint, + loc_bucket, + cyclomatic_complexity, + nesting_depth, + risk, + raw_hash, + ) = core_fields + ( + entry_guard_count, + entry_guard_terminal_profile, + entry_guard_has_side_effect_before, + terminal_kind, + try_finally_profile, + side_effect_order_profile, + ) = flow_profiles + return FunctionGroupItem( + qualname=qualname, + filepath=filepath, + start_line=start_line, + end_line=end_line, + loc=loc, + stmt_count=stmt_count, + fingerprint=fingerprint, + loc_bucket=loc_bucket, + cyclomatic_complexity=cyclomatic_complexity, + nesting_depth=nesting_depth, + risk=risk, + raw_hash=raw_hash, + entry_guard_count=entry_guard_count, + entry_guard_terminal_profile=entry_guard_terminal_profile, + entry_guard_has_side_effect_before=entry_guard_has_side_effect_before, + terminal_kind=terminal_kind, + try_finally_profile=try_finally_profile, + side_effect_order_profile=side_effect_order_profile, + ) + + +def _decode_wire_block(value: object, filepath: str) -> BlockDict | None: + decoded = _decode_wire_named_sized_span(value, valid_lengths={5}) + if decoded is None: + return None + row, qualname, start_line, end_line, size = decoded + block_hash = _as_str(row[4]) + if block_hash is None: + return None + + return BlockGroupItem( + block_hash=block_hash, + filepath=filepath, + qualname=qualname, + start_line=start_line, + end_line=end_line, + size=size, + ) + + +def _decode_wire_segment(value: object, filepath: str) -> SegmentDict | None: + decoded = _decode_wire_named_sized_span(value, valid_lengths={6}) + if decoded is None: + return None + row, qualname, start_line, end_line, size = decoded segment_hash = _as_str(row[4]) segment_sig = _as_str(row[5]) + if segment_hash is None or segment_sig is None: + return None + + return SegmentGroupItem( + segment_hash=segment_hash, + segment_sig=segment_sig, + filepath=filepath, + qualname=qualname, + start_line=start_line, + end_line=end_line, + size=size, + ) + +def _decode_wire_class_metric( + value: object, + filepath: str, +) -> ClassMetricsDict | None: + decoded = _decode_wire_named_span(value, valid_lengths={9}) + if decoded is None: + return None + row, qualname, start_line, end_line = decoded + metric_fields = _decode_wire_class_metric_fields(row) + if metric_fields is None: + return None + cbo, lcom4, method_count, instance_var_count, risk_coupling, risk_cohesion = ( + metric_fields + ) + return ClassMetricsDict( + qualname=qualname, + filepath=filepath, + start_line=start_line, + end_line=end_line, + cbo=cbo, + lcom4=lcom4, + method_count=method_count, + instance_var_count=instance_var_count, + risk_coupling=risk_coupling, + risk_cohesion=risk_cohesion, + ) + + +def _decode_wire_module_dep(value: object) -> ModuleDepDict | None: + row = _as_list(value) + if row is None or len(row) != 4: + return None + source = _as_str(row[0]) + target = _as_str(row[1]) + import_type = _as_str(row[2]) + line = _as_int(row[3]) + if source is None or target is None or import_type is None or line is None: + return None + return ModuleDepDict( + source=source, + target=target, + import_type=import_type, + line=line, + ) + + +def _decode_wire_dead_candidate( + value: object, + filepath: str, +) -> DeadCandidateDict | None: + row = _decode_wire_row(value, valid_lengths={5, 6}) + if row is None: + return None + qualname = _as_str(row[0]) + local_name = _as_str(row[1]) + start_line = _as_int(row[2]) + end_line = _as_int(row[3]) + kind = _as_str(row[4]) + suppressed_rules: list[str] | None = [] + if len(row) == 6: + raw_rules = _as_list(row[5]) + if raw_rules is None or not all(isinstance(rule, str) for rule in raw_rules): + return None + suppressed_rules = sorted({str(rule) for rule in raw_rules if str(rule)}) if ( qualname is None + or local_name is None or start_line is None or end_line is None - or size is None - or segment_hash is None - or segment_sig is None + or kind is None ): return None - - return { - "segment_hash": segment_hash, - "segment_sig": segment_sig, - "filepath": filepath, - "qualname": qualname, - "start_line": start_line, - "end_line": end_line, - "size": size, - } + decoded = DeadCandidateDict( + qualname=qualname, + local_name=local_name, + filepath=filepath, + start_line=start_line, + end_line=end_line, + kind=kind, + ) + if suppressed_rules: + decoded["suppressed_rules"] = suppressed_rules + return decoded def _encode_wire_file_entry(entry: CacheEntry) -> dict[str, object]: wire: dict[str, object] = { "st": [entry["stat"]["mtime_ns"], entry["stat"]["size"]], } + source_stats = entry.get("source_stats") + if source_stats is not None: + wire["ss"] = [ + source_stats["lines"], + source_stats["functions"], + source_stats["methods"], + source_stats["classes"], + ] units = sorted( entry["units"], @@ -783,6 +2069,16 @@ def _encode_wire_file_entry(entry: CacheEntry) -> dict[str, object]: unit["stmt_count"], unit["fingerprint"], unit["loc_bucket"], + unit.get("cyclomatic_complexity", 1), + unit.get("nesting_depth", 0), + unit.get("risk", "low"), + unit.get("raw_hash", ""), + unit.get("entry_guard_count", 0), + unit.get("entry_guard_terminal_profile", "none"), + 1 if unit.get("entry_guard_has_side_effect_before", False) else 0, + unit.get("terminal_kind", "fallthrough"), + unit.get("try_finally_profile", "none"), + unit.get("side_effect_order_profile", "none"), ] for unit in units ] @@ -830,6 +2126,110 @@ def _encode_wire_file_entry(entry: CacheEntry) -> dict[str, object]: for segment in segments ] + class_metrics = sorted( + entry["class_metrics"], + key=lambda metric: ( + metric["start_line"], + metric["end_line"], + metric["qualname"], + ), + ) + if class_metrics: + wire["cm"] = [ + [ + metric["qualname"], + metric["start_line"], + metric["end_line"], + metric["cbo"], + metric["lcom4"], + metric["method_count"], + metric["instance_var_count"], + metric["risk_coupling"], + metric["risk_cohesion"], + ] + for metric in class_metrics + ] + coupled_classes_rows = [] + for metric in class_metrics: + coupled_classes_raw = metric.get("coupled_classes", []) + if not _is_string_list(coupled_classes_raw): + continue + coupled_classes = sorted(set(coupled_classes_raw)) + if not coupled_classes: + continue + coupled_classes_rows.append([metric["qualname"], coupled_classes]) + if coupled_classes_rows: + wire["cc"] = coupled_classes_rows + + module_deps = sorted( + entry["module_deps"], + key=lambda dep: (dep["source"], dep["target"], dep["import_type"], dep["line"]), + ) + if module_deps: + wire["md"] = [ + [ + dep["source"], + dep["target"], + dep["import_type"], + dep["line"], + ] + for dep in module_deps + ] + + dead_candidates = sorted( + entry["dead_candidates"], + key=lambda candidate: ( + candidate["start_line"], + candidate["end_line"], + candidate["qualname"], + candidate["local_name"], + candidate["kind"], + ), + ) + if dead_candidates: + # Dead candidates are stored inside a per-file cache entry, so the + # filepath is implicit and does not need to be repeated in every row. + encoded_dead_candidates: list[list[object]] = [] + for candidate in dead_candidates: + encoded = [ + candidate["qualname"], + candidate["local_name"], + candidate["start_line"], + candidate["end_line"], + candidate["kind"], + ] + suppressed_rules = candidate.get("suppressed_rules", []) + if _is_string_list(suppressed_rules): + normalized_rules = sorted(set(suppressed_rules)) + if normalized_rules: + encoded.append(normalized_rules) + encoded_dead_candidates.append(encoded) + wire["dc"] = encoded_dead_candidates + + if entry["referenced_names"]: + wire["rn"] = sorted(set(entry["referenced_names"])) + if entry.get("referenced_qualnames"): + wire["rq"] = sorted(set(entry["referenced_qualnames"])) + if entry["import_names"]: + wire["in"] = sorted(set(entry["import_names"])) + if entry["class_names"]: + wire["cn"] = sorted(set(entry["class_names"])) + + if "structural_findings" in entry: + sf = entry.get("structural_findings", []) + wire["sf"] = [ + [ + group["finding_kind"], + group["finding_key"], + sorted(group["signature"].items()), + [ + [item["qualname"], item["start"], item["end"]] + for item in group["items"] + ], + ] + for group in sf + ] + return wire @@ -848,12 +2248,49 @@ def _is_file_stat_dict(value: object) -> bool: return isinstance(value.get("mtime_ns"), int) and isinstance(value.get("size"), int) +def _is_source_stats_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + lines = value.get("lines") + functions = value.get("functions") + methods = value.get("methods") + classes = value.get("classes") + return ( + isinstance(lines, int) + and lines >= 0 + and isinstance(functions, int) + and functions >= 0 + and isinstance(methods, int) + and methods >= 0 + and isinstance(classes, int) + and classes >= 0 + ) + + def _is_unit_dict(value: object) -> bool: if not isinstance(value, dict): return False string_keys = ("qualname", "filepath", "fingerprint", "loc_bucket") int_keys = ("start_line", "end_line", "loc", "stmt_count") - return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) + if not _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys): + return False + cyclomatic_complexity = value.get("cyclomatic_complexity", 1) + nesting_depth = value.get("nesting_depth", 0) + risk = value.get("risk", "low") + raw_hash = value.get("raw_hash", "") + return ( + isinstance(cyclomatic_complexity, int) + and isinstance(nesting_depth, int) + and isinstance(risk, str) + and risk in {"low", "medium", "high"} + and isinstance(raw_hash, str) + and isinstance(value.get("entry_guard_count", 0), int) + and isinstance(value.get("entry_guard_terminal_profile", "none"), str) + and isinstance(value.get("entry_guard_has_side_effect_before", False), bool) + and isinstance(value.get("terminal_kind", "fallthrough"), str) + and isinstance(value.get("try_finally_profile", "none"), str) + and isinstance(value.get("side_effect_order_profile", "none"), str) + ) def _is_block_dict(value: object) -> bool: @@ -872,16 +2309,61 @@ def _is_segment_dict(value: object) -> bool: return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) -def _is_unit_list(value: object) -> bool: - return isinstance(value, list) and all(_is_unit_dict(item) for item in value) +def _is_class_metrics_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + if not _has_typed_fields( + value, + string_keys=( + "qualname", + "filepath", + "risk_coupling", + "risk_cohesion", + ), + int_keys=( + "start_line", + "end_line", + "cbo", + "lcom4", + "method_count", + "instance_var_count", + ), + ): + return False + + coupled_classes = value.get("coupled_classes") + if coupled_classes is None: + return True + return _is_string_list(coupled_classes) + + +def _is_module_dep_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + return _has_typed_fields( + value, + string_keys=("source", "target", "import_type"), + int_keys=("line",), + ) -def _is_block_list(value: object) -> bool: - return isinstance(value, list) and all(_is_block_dict(item) for item in value) +def _is_dead_candidate_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + if not _has_typed_fields( + value, + string_keys=("qualname", "local_name", "filepath", "kind"), + int_keys=("start_line", "end_line"), + ): + return False + suppressed_rules = value.get("suppressed_rules") + if suppressed_rules is None: + return True + return _is_string_list(suppressed_rules) -def _is_segment_list(value: object) -> bool: - return isinstance(value, list) and all(_is_segment_dict(item) for item in value) +def _is_string_list(value: object) -> bool: + return isinstance(value, list) and all(isinstance(item, str) for item in value) def _has_typed_fields( diff --git a/codeclone/cfg.py b/codeclone/cfg.py index 625f1f8..097a216 100644 --- a/codeclone/cfg.py +++ b/codeclone/cfg.py @@ -1,21 +1,18 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import ast -from collections.abc import Iterable from dataclasses import dataclass -from typing import Protocol, cast +from typing import TYPE_CHECKING, Protocol, cast from .cfg_model import CFG, Block from .meta_markers import CFG_META_PREFIX +if TYPE_CHECKING: + from collections.abc import Iterable + __all__ = ["CFG", "CFGBuilder"] TryStar = getattr(ast, "TryStar", ast.Try) @@ -105,9 +102,9 @@ def _visit(self, stmt: ast.stmt) -> None: self._visit_for(stmt) # Structure is identical to For case ast.Try(): - self._visit_try(cast(_TryLike, stmt)) + self._visit_try(cast("_TryLike", stmt)) case _ if TryStar is not None and isinstance(stmt, TryStar): - self._visit_try(cast(_TryLike, cast(object, stmt))) + self._visit_try(cast("_TryLike", cast("object", stmt))) case ast.With() | ast.AsyncWith(): self._visit_with(stmt) @@ -139,64 +136,98 @@ def _visit_if(self, stmt: ast.If) -> None: self.current = after_block - def _visit_while(self, stmt: ast.While) -> None: - cond_block = self.cfg.create_block() - body_block = self.cfg.create_block() - else_block = self.cfg.create_block() if stmt.orelse else None - after_block = self.cfg.create_block() - - self.current.add_successor(cond_block) - - self.current = cond_block - false_target = else_block if else_block is not None else after_block - self._emit_condition(stmt.test, body_block, false_target) - + def _visit_loop_body( + self, + *, + body_block: Block, + continue_target: Block, + break_target: Block, + body: Iterable[ast.stmt], + ) -> None: self._loop_stack.append( - _LoopContext(continue_target=cond_block, break_target=after_block) + _LoopContext(continue_target=continue_target, break_target=break_target) ) self.current = body_block - self._visit_statements(stmt.body) + self._visit_statements(body) if not self.current.is_terminated: - self.current.add_successor(cond_block) + self.current.add_successor(continue_target) self._loop_stack.pop() - if else_block is not None: - self.current = else_block - self._visit_statements(stmt.orelse) - if not self.current.is_terminated: - self.current.add_successor(after_block) - - self.current = after_block + def _visit_loop_else( + self, + *, + else_block: Block | None, + orelse: Iterable[ast.stmt], + after_block: Block, + ) -> None: + if else_block is None: + return + self.current = else_block + self._visit_statements(orelse) + if not self.current.is_terminated: + self.current.add_successor(after_block) - def _visit_for(self, stmt: ast.For | ast.AsyncFor) -> None: - iter_block = self.cfg.create_block() + def _create_loop_followup_blocks( + self, *, has_else: bool + ) -> tuple[Block, Block | None, Block]: body_block = self.cfg.create_block() - else_block = self.cfg.create_block() if stmt.orelse else None + else_block = self.cfg.create_block() if has_else else None after_block = self.cfg.create_block() + return body_block, else_block, after_block + + def _enter_loop_header( + self, *, has_else: bool + ) -> tuple[Block, Block, Block | None, Block]: + header_block = self.cfg.create_block() + body_block, else_block, after_block = self._create_loop_followup_blocks( + has_else=has_else + ) + self.current.add_successor(header_block) + self.current = header_block + return header_block, body_block, else_block, after_block - self.current.add_successor(iter_block) + def _visit_while(self, stmt: ast.While) -> None: + cond_block, body_block, else_block, after_block = self._enter_loop_header( + has_else=bool(stmt.orelse) + ) + false_target = else_block if else_block is not None else after_block + self._emit_condition(stmt.test, body_block, false_target) - self.current = iter_block + self._visit_loop_body( + body_block=body_block, + continue_target=cond_block, + break_target=after_block, + body=stmt.body, + ) + self._visit_loop_else( + else_block=else_block, + orelse=stmt.orelse, + after_block=after_block, + ) + + self.current = after_block + + def _visit_for(self, stmt: ast.For | ast.AsyncFor) -> None: + iter_block, body_block, else_block, after_block = self._enter_loop_header( + has_else=bool(stmt.orelse) + ) self.current.statements.append(ast.Expr(value=stmt.iter)) self.current.add_successor(body_block) self.current.add_successor( else_block if else_block is not None else after_block ) - self._loop_stack.append( - _LoopContext(continue_target=iter_block, break_target=after_block) + self._visit_loop_body( + body_block=body_block, + continue_target=iter_block, + break_target=after_block, + body=stmt.body, + ) + self._visit_loop_else( + else_block=else_block, + orelse=stmt.orelse, + after_block=after_block, ) - self.current = body_block - self._visit_statements(stmt.body) - if not self.current.is_terminated: - self.current.add_successor(iter_block) - self._loop_stack.pop() - - if else_block is not None: - self.current = else_block - self._visit_statements(stmt.orelse) - if not self.current.is_terminated: - self.current.add_successor(after_block) self.current = after_block diff --git a/codeclone/cfg_model.py b/codeclone/cfg_model.py index a90d576..bb5fba2 100644 --- a/codeclone/cfg_model.py +++ b/codeclone/cfg_model.py @@ -1,15 +1,13 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations -import ast from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import ast @dataclass(eq=False, slots=True) diff --git a/codeclone/cli.py b/codeclone/cli.py index f729ec3..4de107c 100644 --- a/codeclone/cli.py +++ b/codeclone/cli.py @@ -1,102 +1,174 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import os import sys import time -from collections.abc import Mapping, Sequence -from concurrent.futures import Future, ProcessPoolExecutor, as_completed -from dataclasses import asdict, dataclass +from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, cast - -from rich.console import Console -from rich.panel import Panel -from rich.progress import ( - BarColumn, - Progress, - SpinnerColumn, - TextColumn, - TimeElapsedColumn, -) -from rich.theme import Theme +from typing import TYPE_CHECKING, Literal, Protocol, cast from . import __version__ from . import ui_messages as ui from ._cli_args import build_parser -from ._cli_meta import _build_report_meta +from ._cli_baselines import ( + CloneBaselineState as _CloneBaselineStateImpl, +) +from ._cli_baselines import ( + MetricsBaselineSectionProbe as _MetricsBaselineSectionProbeImpl, +) +from ._cli_baselines import ( + MetricsBaselineState as _MetricsBaselineStateImpl, +) +from ._cli_baselines import ( + probe_metrics_baseline_section as _probe_metrics_baseline_section_impl, +) +from ._cli_baselines import ( + resolve_clone_baseline_state as _resolve_clone_baseline_state_impl, +) +from ._cli_baselines import ( + resolve_metrics_baseline_state as _resolve_metrics_baseline_state_impl, +) +from ._cli_config import ( + ConfigValidationError, + apply_pyproject_config_overrides, + collect_explicit_cli_dests, + load_pyproject_config, +) +from ._cli_gating import ( + parse_metric_reason_entry as _parse_metric_reason_entry_impl, +) +from ._cli_gating import ( + print_gating_failure_block as _print_gating_failure_block_impl, +) from ._cli_paths import _validate_output_path -from ._cli_summary import _print_summary -from ._report_types import GroupItem -from .baseline import ( - BASELINE_UNTRUSTED_STATUSES, - Baseline, - BaselineStatus, - coerce_baseline_status, - current_python_tag, +from ._cli_reports import ( + write_report_outputs as _write_report_outputs_impl, ) -from .cache import Cache, CacheEntry, CacheStatus, FileStat, file_stat_signature -from .contracts import ( - BASELINE_FINGERPRINT_VERSION, - BASELINE_SCHEMA_VERSION, - ISSUES_URL, - ExitCode, +from ._cli_rich import ( + PlainConsole as _PlainConsole, ) -from .errors import BaselineValidationError, CacheError -from .extractor import extract_units_from_source -from .html_report import build_html_report -from .normalize import NormalizationConfig -from .report import ( - build_block_group_facts, - build_block_groups, - build_groups, - build_segment_groups, - prepare_block_report_groups, - prepare_segment_report_groups, - to_json_report, - to_text_report, +from ._cli_rich import ( + make_console as _make_rich_console, ) -from .scanner import iter_py_files, module_name_from_path +from ._cli_rich import ( + make_plain_console as _make_plain_console_impl, +) +from ._cli_rich import ( + print_banner as _print_banner_impl, +) +from ._cli_rich import ( + rich_progress_symbols as _rich_progress_symbols_impl, +) +from ._cli_runtime import ( + configure_metrics_mode as _configure_metrics_mode_impl, +) +from ._cli_runtime import ( + metrics_computed as _metrics_computed_impl, +) +from ._cli_runtime import ( + print_failed_files as _print_failed_files_impl, +) +from ._cli_runtime import ( + resolve_cache_path as _resolve_cache_path_impl, +) +from ._cli_runtime import ( + resolve_cache_status as _resolve_cache_status_impl, +) +from ._cli_runtime import ( + validate_numeric_args as _validate_numeric_args_impl, +) +from ._cli_summary import MetricsSnapshot, _print_metrics, _print_summary +from .baseline import Baseline +from .cache import Cache, CacheStatus, build_segment_report_projection +from .contracts import ISSUES_URL, ExitCode +from .errors import CacheError if TYPE_CHECKING: - from .blocks import BlockUnit, SegmentUnit - from .extractor import Unit - -# Custom theme for Rich -custom_theme = Theme( - { - "info": "cyan", - "warning": "yellow", - "error": "bold red", - "success": "bold green", - "dim": "dim", - } -) + from argparse import Namespace + from collections.abc import Callable, Mapping, Sequence + from types import ModuleType + + from rich.console import Console as RichConsole + from rich.progress import BarColumn as RichBarColumn + from rich.progress import Progress as RichProgress + from rich.progress import SpinnerColumn as RichSpinnerColumn + from rich.progress import TextColumn as RichTextColumn + from rich.progress import TimeElapsedColumn as RichTimeElapsedColumn + + from ._cli_baselines import _BaselineArgs as _BaselineArgsLike + from ._cli_gating import _GatingArgs as _GatingArgsLike + from ._cli_reports import _QuietArgs as _QuietArgsLike + from ._cli_runtime import _RuntimeArgs as _RuntimeArgsLike + from .models import MetricsDiff + from .normalize import NormalizationConfig + from .pipeline import ( + AnalysisResult, + BootstrapResult, + DiscoveryResult, + GatingResult, + ReportArtifacts, + ) + from .pipeline import ( + OutputPaths as PipelineOutputPaths, + ) + from .pipeline import ( + ProcessingResult as PipelineProcessingResult, + ) -LEGACY_CACHE_PATH = Path("~/.cache/codeclone/cache.json").expanduser() +MAX_FILE_SIZE = 10 * 1024 * 1024 +__all__ = [ + "MAX_FILE_SIZE", + "ProcessingResult", + "analyze", + "bootstrap", + "discover", + "gate", + "main", + "process", + "process_file", + "report", +] +_PIPELINE_MODULE: ModuleType | None = None -def _make_console(*, no_color: bool) -> Console: - return Console(theme=custom_theme, width=200, no_color=no_color) +def _pipeline_module() -> ModuleType: + global _PIPELINE_MODULE + if _PIPELINE_MODULE is None: + from . import pipeline as _pipeline -console = _make_console(no_color=False) + _PIPELINE_MODULE = _pipeline + return _PIPELINE_MODULE -MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB -BATCH_SIZE = 100 +@dataclass(frozen=True, slots=True) +class OutputPaths: + html: Path | None = None + json: Path | None = None + text: Path | None = None + md: Path | None = None + sarif: Path | None = None -@dataclass(slots=True) -class ProcessingResult: - """Result of processing a single file.""" +@dataclass(frozen=True, slots=True) +class ProcessingResult: filepath: str success: bool error: str | None = None - units: list[Unit] | None = None - blocks: list[BlockUnit] | None = None - segments: list[SegmentUnit] | None = None - stat: FileStat | None = None + units: list[object] | None = None + blocks: list[object] | None = None + segments: list[object] | None = None + lines: int = 0 + functions: int = 0 + methods: int = 0 + classes: int = 0 + stat: Mapping[str, int] | None = None error_kind: str | None = None + file_metrics: object | None = None + structural_findings: list[object] | None = None def process_file( @@ -105,98 +177,210 @@ def process_file( cfg: NormalizationConfig, min_loc: int, min_stmt: int, + collect_structural_findings: bool = True, ) -> ProcessingResult: - """ - Process a single Python file with comprehensive error handling. + pipeline_mod = _pipeline_module() + result = pipeline_mod.process_file( + filepath, + root, + cfg, + min_loc, + min_stmt, + collect_structural_findings, + ) + return cast("ProcessingResult", result) - Args: - filepath: Absolute path to the file - root: Root directory of the scan - cfg: Normalization configuration - min_loc: Minimum lines of code to consider a function - min_stmt: Minimum statements to consider a function - Returns: - ProcessingResult object indicating success/failure and containing - extracted units/blocks if successful. - """ +def bootstrap( + *, + args: Namespace, + root: Path, + output_paths: PipelineOutputPaths | OutputPaths, + cache_path: Path, +) -> BootstrapResult: + return cast( + "BootstrapResult", + _pipeline_module().bootstrap( + args=args, + root=root, + output_paths=output_paths, + cache_path=cache_path, + ), + ) - try: - # Single os.stat() for both size check and cache signature - try: - st = os.stat(filepath) - if st.st_size > MAX_FILE_SIZE: - return ProcessingResult( - filepath=filepath, - success=False, - error=f"File too large: {st.st_size} bytes (max {MAX_FILE_SIZE})", - error_kind="file_too_large", - ) - except OSError as e: - return ProcessingResult( - filepath=filepath, - success=False, - error=f"Cannot stat file: {e}", - error_kind="stat_error", - ) - stat: FileStat = {"mtime_ns": st.st_mtime_ns, "size": st.st_size} +def discover(*, boot: BootstrapResult, cache: Cache) -> DiscoveryResult: + return cast("DiscoveryResult", _pipeline_module().discover(boot=boot, cache=cache)) - try: - source = Path(filepath).read_text("utf-8") - except UnicodeDecodeError as e: - return ProcessingResult( - filepath=filepath, - success=False, - error=f"Encoding error: {e}", - error_kind="source_read_error", - ) - except OSError as e: - return ProcessingResult( - filepath=filepath, - success=False, - error=f"Cannot read file: {e}", - error_kind="source_read_error", - ) - module_name = module_name_from_path(root, filepath) +def process( + *, + boot: BootstrapResult, + discovery: DiscoveryResult, + cache: Cache, + on_advance: Callable[[], None] | None = None, + on_worker_error: Callable[[str], None] | None = None, + on_parallel_fallback: Callable[[Exception], None] | None = None, +) -> PipelineProcessingResult: + return cast( + "PipelineProcessingResult", + _pipeline_module().process( + boot=boot, + discovery=discovery, + cache=cache, + on_advance=on_advance, + on_worker_error=on_worker_error, + on_parallel_fallback=on_parallel_fallback, + ), + ) - units, blocks, segments = extract_units_from_source( - source=source, - filepath=filepath, - module_name=module_name, - cfg=cfg, - min_loc=min_loc, - min_stmt=min_stmt, - ) - return ProcessingResult( - filepath=filepath, - success=True, - units=units, - blocks=blocks, - segments=segments, - stat=stat, - ) +def analyze( + *, + boot: BootstrapResult, + discovery: DiscoveryResult, + processing: PipelineProcessingResult, +) -> AnalysisResult: + return cast( + "AnalysisResult", + _pipeline_module().analyze( + boot=boot, + discovery=discovery, + processing=processing, + ), + ) - except Exception as e: - return ProcessingResult( - filepath=filepath, - success=False, - error=f"Unexpected error: {type(e).__name__}: {e}", - error_kind="unexpected_error", - ) + +def report( + *, + boot: BootstrapResult, + discovery: DiscoveryResult, + processing: PipelineProcessingResult, + analysis: AnalysisResult, + report_meta: Mapping[str, object], + new_func: set[str], + new_block: set[str], + html_builder: Callable[..., str] | None = None, + metrics_diff: MetricsDiff | None = None, +) -> ReportArtifacts: + return cast( + "ReportArtifacts", + _pipeline_module().report( + boot=boot, + discovery=discovery, + processing=processing, + analysis=analysis, + report_meta=report_meta, + new_func=new_func, + new_block=new_block, + html_builder=html_builder, + metrics_diff=metrics_diff, + ), + ) -def print_banner() -> None: - console.print( - Panel( - ui.banner_title(__version__), - border_style="blue", - padding=(0, 2), - width=ui.CLI_LAYOUT_WIDTH, - expand=False, - ) +def gate( + *, + boot: BootstrapResult, + analysis: AnalysisResult, + new_func: set[str], + new_block: set[str], + metrics_diff: MetricsDiff | None, +) -> GatingResult: + return cast( + "GatingResult", + _pipeline_module().gate( + boot=boot, + analysis=analysis, + new_func=new_func, + new_block=new_block, + metrics_diff=metrics_diff, + ), + ) + + +class _PrinterLike(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... + + +LEGACY_CACHE_PATH = Path("~/.cache/codeclone/cache.json").expanduser() +ReportPathOrigin = Literal["default", "explicit"] + + +def _rich_progress_symbols() -> tuple[ + type[RichProgress], + type[RichSpinnerColumn], + type[RichTextColumn], + type[RichBarColumn], + type[RichTimeElapsedColumn], +]: + return _rich_progress_symbols_impl() + + +def _make_console(*, no_color: bool) -> RichConsole: + return _make_rich_console( + no_color=no_color, + width=ui.CLI_LAYOUT_MAX_WIDTH, + ) + + +def _print_verbose_clone_hashes( + console: _PrinterLike, + *, + label: str, + clone_hashes: set[str], +) -> None: + if not clone_hashes: + return + console.print(f"\n {label}:") + for clone_hash in sorted(clone_hashes): + console.print(f" - {clone_hash}") + + +def _make_plain_console() -> _PlainConsole: + return _make_plain_console_impl() + + +console: RichConsole | _PlainConsole = _make_plain_console() + + +def _parse_metric_reason_entry(reason: str) -> tuple[str, str]: + return _parse_metric_reason_entry_impl(reason) + + +def _print_gating_failure_block( + *, + code: str, + entries: Sequence[tuple[str, object]], + args: Namespace, +) -> None: + _print_gating_failure_block_impl( + console=cast("_PrinterLike", console), + code=code, + entries=list(entries), + args=cast("_GatingArgsLike", cast(object, args)), + ) + + +def build_html_report(*args: object, **kwargs: object) -> str: + # Lazy import avoids pulling HTML renderer in non-HTML CLI runs. + from .html_report import build_html_report as _build_html_report + + html_builder: Callable[..., str] = _build_html_report + return html_builder(*args, **kwargs) + + +_CloneBaselineState = _CloneBaselineStateImpl +_MetricsBaselineState = _MetricsBaselineStateImpl +_MetricsBaselineSectionProbe = _MetricsBaselineSectionProbeImpl + + +def print_banner(*, root: Path | None = None) -> None: + _print_banner_impl( + console=cast("_PrinterLike", console), + banner_title=ui.banner_title(__version__), + project_name=(root.name if root is not None else None), + root_display=(str(root) if root is not None else None), ) @@ -212,678 +396,853 @@ def _is_debug_enabled( return debug_from_flag or debug_from_env -def _main_impl() -> None: - ap = build_parser(__version__) +def _report_path_origins(argv: Sequence[str]) -> dict[str, ReportPathOrigin | None]: + origins: dict[str, ReportPathOrigin | None] = { + "html": None, + "json": None, + "md": None, + "sarif": None, + "text": None, + } + flag_to_field = { + "--html": "html", + "--json": "json", + "--md": "md", + "--sarif": "sarif", + "--text": "text", + } + index = 0 + while index < len(argv): + token = argv[index] + if token == "--": + break + if "=" in token: + flag, _value = token.split("=", maxsplit=1) + field_name = flag_to_field.get(flag) + if field_name is not None: + origins[field_name] = "explicit" + index += 1 + continue + field_name = flag_to_field.get(token) + if field_name is None: + index += 1 + continue + next_token = argv[index + 1] if index + 1 < len(argv) else None + if next_token is None or next_token.startswith("-"): + origins[field_name] = "default" + index += 1 + continue + origins[field_name] = "explicit" + index += 2 + return origins + + +def _report_path_timestamp_slug(report_generated_at_utc: str) -> str: + return report_generated_at_utc.replace("-", "").replace(":", "") + + +def _timestamped_report_path(path: Path, *, report_generated_at_utc: str) -> Path: + suffix = path.suffix + stem = path.name[: -len(suffix)] if suffix else path.name + return path.with_name( + f"{stem}-{_report_path_timestamp_slug(report_generated_at_utc)}{suffix}" + ) - cache_path_from_args = any( - arg in {"--cache-dir", "--cache-path"} - or arg.startswith(("--cache-dir=", "--cache-path=")) - for arg in sys.argv + +def _resolve_output_paths( + args: Namespace, + *, + report_path_origins: Mapping[str, ReportPathOrigin | None], + report_generated_at_utc: str, +) -> OutputPaths: + printer = cast("_PrinterLike", console) + resolved: dict[str, Path | None] = { + "html": None, + "json": None, + "md": None, + "sarif": None, + "text": None, + } + output_specs = ( + ("html", "html_out", ".html", "HTML"), + ("json", "json_out", ".json", "JSON"), + ("md", "md_out", ".md", "Markdown"), + ("sarif", "sarif_out", ".sarif", "SARIF"), + ("text", "text_out", ".txt", "text"), ) - args = ap.parse_args() - if args.ci: - args.fail_on_new = True - args.no_color = True - args.quiet = True + for field_name, arg_name, expected_suffix, label in output_specs: + raw_value = getattr(args, arg_name, None) + if not raw_value: + continue + path = _validate_output_path( + raw_value, + expected_suffix=expected_suffix, + label=label, + console=printer, + invalid_message=ui.fmt_invalid_output_extension, + invalid_path_message=ui.fmt_invalid_output_path, + ) + if ( + args.timestamped_report_paths + and report_path_origins.get(field_name) == "default" + ): + path = _timestamped_report_path( + path, + report_generated_at_utc=report_generated_at_utc, + ) + resolved[field_name] = path + + return OutputPaths( + html=resolved["html"], + json=resolved["json"], + text=resolved["text"], + md=resolved["md"], + sarif=resolved["sarif"], + ) - if args.quiet: - args.no_progress = True - global console - console = _make_console(no_color=args.no_color) +def _validate_report_ui_flags(*, args: Namespace, output_paths: OutputPaths) -> None: + if args.open_html_report and output_paths.html is None: + console.print(ui.fmt_contract_error(ui.ERR_OPEN_HTML_REPORT_REQUIRES_HTML)) + sys.exit(ExitCode.CONTRACT_ERROR) - if args.max_baseline_size_mb < 0 or args.max_cache_size_mb < 0: + if args.timestamped_report_paths and not any( + ( + output_paths.html, + output_paths.json, + output_paths.md, + output_paths.sarif, + output_paths.text, + ) + ): console.print( - ui.fmt_contract_error("Size limits must be non-negative integers (MB).") + ui.fmt_contract_error(ui.ERR_TIMESTAMPED_REPORT_PATHS_REQUIRES_REPORT) ) sys.exit(ExitCode.CONTRACT_ERROR) - t0 = time.monotonic() - if not args.quiet: - print_banner() +def _resolve_cache_path(*, root_path: Path, args: Namespace, from_args: bool) -> Path: + return _resolve_cache_path_impl( + root_path=root_path, + args=cast("_RuntimeArgsLike", cast(object, args)), + from_args=from_args, + legacy_cache_path=LEGACY_CACHE_PATH, + console=cast("_PrinterLike", console), + ) - try: - root_path = Path(args.root).resolve() - if not root_path.exists(): - console.print( - ui.fmt_contract_error(ui.ERR_ROOT_NOT_FOUND.format(path=root_path)) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - except OSError as e: - console.print(ui.fmt_contract_error(ui.ERR_INVALID_ROOT_PATH.format(error=e))) - sys.exit(ExitCode.CONTRACT_ERROR) - if not args.quiet: - console.print(ui.fmt_scanning_root(root_path)) - - html_out_path: Path | None = None - json_out_path: Path | None = None - text_out_path: Path | None = None - if args.html_out: - html_out_path = _validate_output_path( - args.html_out, - expected_suffix=".html", - label="HTML", - console=console, - invalid_message=ui.fmt_invalid_output_extension, - invalid_path_message=ui.fmt_invalid_output_path, - ) - if args.json_out: - json_out_path = _validate_output_path( - args.json_out, - expected_suffix=".json", - label="JSON", - console=console, - invalid_message=ui.fmt_invalid_output_extension, - invalid_path_message=ui.fmt_invalid_output_path, - ) - if args.text_out: - text_out_path = _validate_output_path( - args.text_out, - expected_suffix=".txt", - label="text", - console=console, - invalid_message=ui.fmt_invalid_output_extension, - invalid_path_message=ui.fmt_invalid_output_path, - ) +def _validate_numeric_args(args: Namespace) -> bool: + return _validate_numeric_args_impl(cast("_RuntimeArgsLike", cast(object, args))) - # Initialize Cache - cfg = NormalizationConfig() - if cache_path_from_args and args.cache_path: - cache_path = Path(args.cache_path).expanduser() - else: - cache_path = root_path / ".cache" / "codeclone" / "cache.json" - if LEGACY_CACHE_PATH.exists(): - try: - legacy_resolved = LEGACY_CACHE_PATH.resolve() - except OSError: - legacy_resolved = LEGACY_CACHE_PATH - if legacy_resolved != cache_path: - console.print( - ui.fmt_legacy_cache_warning( - legacy_path=legacy_resolved, new_path=cache_path - ) - ) - cache = Cache( - cache_path, - root=root_path, - max_size_bytes=args.max_cache_size_mb * 1024 * 1024, - min_loc=args.min_loc, - min_stmt=args.min_stmt, + +def _configure_metrics_mode(*, args: Namespace, metrics_baseline_exists: bool) -> None: + _configure_metrics_mode_impl( + args=cast("_RuntimeArgsLike", cast(object, args)), + metrics_baseline_exists=metrics_baseline_exists, + console=cast("_PrinterLike", console), ) - cache.load() - if cache.load_warning: - console.print(f"[warning]{cache.load_warning}[/warning]") - all_units: list[GroupItem] = [] - all_blocks: list[GroupItem] = [] - all_segments: list[GroupItem] = [] - files_found = 0 - files_analyzed = 0 - cache_hits = 0 - files_skipped = 0 - files_to_process: list[str] = [] - - def _get_cached_entry( - fp: str, - ) -> tuple[FileStat | None, CacheEntry | None, str | None]: - try: - stat = file_stat_signature(fp) - except OSError as e: - return None, None, ui.fmt_skipping_file(fp, e) - cached = cache.get_file_entry(fp) - return stat, cached, None - def _safe_process_file(fp: str) -> ProcessingResult | None: - try: - return process_file( - fp, - str(root_path), - cfg, - args.min_loc, - args.min_stmt, - ) - except Exception as e: - console.print(ui.fmt_worker_failed(e)) - return None +def _print_failed_files(failed_files: Sequence[str]) -> None: + _print_failed_files_impl( + failed_files=tuple(failed_files), + console=cast("_PrinterLike", console), + ) - def _safe_future_result( - future: Future[ProcessingResult], - ) -> tuple[ProcessingResult | None, str | None]: - try: - return future.result(), None - except Exception as e: - return None, str(e) - - # Discovery phase - def _discover_files() -> None: - nonlocal files_found, cache_hits, files_skipped - for fp in iter_py_files(str(root_path)): - files_found += 1 - stat, cached, warn = _get_cached_entry(fp) - if warn: - console.print(warn) - files_skipped += 1 - continue - if cached and cached.get("stat") == stat: - cache_hits += 1 - all_units.extend( - cast( - list[GroupItem], - cast(object, cached.get("units", [])), - ) - ) - all_blocks.extend( - cast( - list[GroupItem], - cast(object, cached.get("blocks", [])), - ) - ) - all_segments.extend( - cast( - list[GroupItem], - cast(object, cached.get("segments", [])), - ) - ) - else: - files_to_process.append(fp) - try: - if args.quiet: - _discover_files() - else: - with console.status(ui.STATUS_DISCOVERING, spinner="dots"): - _discover_files() - except OSError as e: - console.print(ui.fmt_contract_error(ui.ERR_SCAN_FAILED.format(error=e))) - sys.exit(ExitCode.CONTRACT_ERROR) +def _metrics_computed(args: Namespace) -> tuple[str, ...]: + return _metrics_computed_impl(cast("_RuntimeArgsLike", cast(object, args))) - total_files = len(files_to_process) - failed_files = [] - source_read_failures: list[str] = [] - - # Processing phase - if total_files > 0: - - def handle_result(result: ProcessingResult) -> None: - nonlocal files_analyzed, files_skipped - if result.success and result.stat: - cache.put_file_entry( - result.filepath, - result.stat, - result.units or [], - result.blocks or [], - result.segments or [], - ) - files_analyzed += 1 - if result.units: - all_units.extend([asdict(u) for u in result.units]) - if result.blocks: - all_blocks.extend([asdict(b) for b in result.blocks]) - if result.segments: - all_segments.extend([asdict(s) for s in result.segments]) - else: - files_skipped += 1 - failure = f"{result.filepath}: {result.error}" - failed_files.append(failure) - if result.error_kind == "source_read_error": - source_read_failures.append(failure) - - def process_sequential(with_progress: bool) -> None: - nonlocal files_skipped - if with_progress: - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - BarColumn(), - TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), - TimeElapsedColumn(), - console=console, - ) as progress: - task = progress.add_task( - f"Analyzing {total_files} files...", total=total_files - ) - for fp in files_to_process: - result = _safe_process_file(fp) - if result is not None: - handle_result(result) - else: - files_skipped += 1 - failed_files.append(f"{fp}: worker failed") - progress.advance(task) - else: - if not args.quiet: - console.print(ui.fmt_processing_changed(total_files)) - for fp in files_to_process: - result = _safe_process_file(fp) - if result is not None: - handle_result(result) - else: - files_skipped += 1 - failed_files.append(f"{fp}: worker failed") - try: - with ProcessPoolExecutor(max_workers=args.processes) as executor: - if args.no_progress: - if not args.quiet: - console.print(ui.fmt_processing_changed(total_files)) - - # Process in batches to manage memory - for i in range(0, total_files, BATCH_SIZE): - batch = files_to_process[i : i + BATCH_SIZE] - futures = [ - executor.submit( - process_file, - fp, - str(root_path), - cfg, - args.min_loc, - args.min_stmt, - ) - for fp in batch - ] - future_to_fp = { - id(fut): fp for fut, fp in zip(futures, batch, strict=True) - } - - for future in as_completed(futures): - fp = future_to_fp[id(future)] - result, err = _safe_future_result(future) - if result is not None: - handle_result(result) - elif err is not None: - files_skipped += 1 - reason = err - failed_files.append(f"{fp}: {reason}") - console.print(ui.fmt_batch_item_failed(reason)) - else: - files_skipped += 1 - - else: - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - BarColumn(), - TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), - TimeElapsedColumn(), - console=console, - ) as progress: - task = progress.add_task( - f"Analyzing {total_files} files...", total=total_files - ) - - # Process in batches - for i in range(0, total_files, BATCH_SIZE): - batch = files_to_process[i : i + BATCH_SIZE] - futures = [ - executor.submit( - process_file, - fp, - str(root_path), - cfg, - args.min_loc, - args.min_stmt, - ) - for fp in batch - ] - future_to_fp = { - id(fut): fp - for fut, fp in zip(futures, batch, strict=True) - } - - for future in as_completed(futures): - fp = future_to_fp[id(future)] - result, err = _safe_future_result(future) - if result is not None: - handle_result(result) - elif err is not None: - files_skipped += 1 - reason = err - failed_files.append(f"{fp}: {reason}") - # Should rarely happen due to try/except - # in process_file. - console.print(ui.fmt_worker_failed(reason)) - else: - files_skipped += 1 - progress.advance(task) - except (OSError, RuntimeError, PermissionError) as e: - console.print(ui.fmt_parallel_fallback(e)) - process_sequential(with_progress=not args.no_progress) - - if failed_files: - console.print(ui.fmt_failed_files_header(len(failed_files))) - for failure in failed_files[:10]: - console.print(f" • {failure}") - if len(failed_files) > 10: - console.print(f" ... and {len(failed_files) - 10} more") +def _probe_metrics_baseline_section(path: Path) -> _MetricsBaselineSectionProbe: + return _probe_metrics_baseline_section_impl(path) - gating_mode = args.fail_on_new or args.fail_threshold >= 0 - source_read_contract_failure = ( - bool(source_read_failures) and gating_mode and not args.update_baseline + +def _resolve_clone_baseline_state( + *, + args: Namespace, + baseline_path: Path, + baseline_exists: bool, + analysis: AnalysisResult, + shared_baseline_payload: dict[str, object] | None = None, +) -> _CloneBaselineState: + return _resolve_clone_baseline_state_impl( + args=cast("_BaselineArgsLike", cast(object, args)), + baseline_path=baseline_path, + baseline_exists=baseline_exists, + func_groups=analysis.func_groups, + block_groups=analysis.block_groups, + codeclone_version=__version__, + console=cast("_PrinterLike", console), + shared_baseline_payload=shared_baseline_payload, ) - # Analysis phase - suppressed_segment_groups = 0 - if args.quiet: - func_groups = build_groups(all_units) - block_groups = build_block_groups(all_blocks) - segment_groups = build_segment_groups(all_segments) - segment_groups, suppressed_segment_groups = prepare_segment_report_groups( - segment_groups - ) - try: - cache.save() - except CacheError as e: - console.print(ui.fmt_cache_save_failed(e)) + +def _resolve_metrics_baseline_state( + *, + args: Namespace, + metrics_baseline_path: Path, + metrics_baseline_exists: bool, + baseline_updated_path: Path | None, + analysis: AnalysisResult, + shared_baseline_payload: dict[str, object] | None = None, +) -> _MetricsBaselineState: + return _resolve_metrics_baseline_state_impl( + args=cast("_BaselineArgsLike", cast(object, args)), + metrics_baseline_path=metrics_baseline_path, + metrics_baseline_exists=metrics_baseline_exists, + baseline_updated_path=baseline_updated_path, + project_metrics=analysis.project_metrics, + console=cast("_PrinterLike", console), + shared_baseline_payload=shared_baseline_payload, + ) + + +def _resolve_cache_status(cache: Cache) -> tuple[CacheStatus, str | None]: + return _resolve_cache_status_impl(cache) + + +def _cache_update_segment_projection(cache: Cache, analysis: AnalysisResult) -> None: + if not hasattr(cache, "segment_report_projection"): + return + new_projection = build_segment_report_projection( + digest=analysis.segment_groups_raw_digest, + suppressed=analysis.suppressed_segment_groups, + groups=analysis.segment_groups, + ) + if new_projection != cache.segment_report_projection: + cache.segment_report_projection = new_projection + cache._dirty = True + + +def _run_analysis_stages( + *, + args: Namespace, + boot: BootstrapResult, + cache: Cache, +) -> tuple[DiscoveryResult, PipelineProcessingResult, AnalysisResult]: + def _require_rich_console( + value: RichConsole | _PlainConsole, + ) -> RichConsole: + if isinstance(value, _PlainConsole): + raise RuntimeError("Rich console is required when progress UI is enabled.") + return value + + use_status = not args.quiet and not args.no_progress + try: + if use_status: + with console.status(ui.STATUS_DISCOVERING, spinner="dots"): + discovery_result = discover(boot=boot, cache=cache) + else: + discovery_result = discover(boot=boot, cache=cache) + except OSError as exc: + console.print(ui.fmt_contract_error(ui.ERR_SCAN_FAILED.format(error=exc))) + sys.exit(ExitCode.CONTRACT_ERROR) + + for warning in discovery_result.skipped_warnings: + console.print(f"[warning]{warning}[/warning]") + + total_files = len(discovery_result.files_to_process) + if total_files > 0 and not args.quiet and args.no_progress: + console.print(ui.fmt_processing_changed(total_files)) + + if total_files > 0 and not args.no_progress: + ( + progress_cls, + spinner_column_cls, + text_column_cls, + bar_column_cls, + time_elapsed_column_cls, + ) = _rich_progress_symbols() + + with progress_cls( + spinner_column_cls(), + text_column_cls("[progress.description]{task.description}"), + bar_column_cls(), + text_column_cls("[progress.percentage]{task.percentage:>3.0f}%"), + time_elapsed_column_cls(), + console=_require_rich_console(console), + ) as progress_ui: + task_id = progress_ui.add_task( + f"Analyzing {total_files} files...", + total=total_files, + ) + processing_result = process( + boot=boot, + discovery=discovery_result, + cache=cache, + on_advance=lambda: progress_ui.advance(task_id), + on_worker_error=lambda reason: console.print( + ui.fmt_worker_failed(reason) + ), + on_parallel_fallback=lambda exc: console.print( + ui.fmt_parallel_fallback(exc) + ), + ) else: + processing_result = process( + boot=boot, + discovery=discovery_result, + cache=cache, + on_worker_error=( + (lambda reason: console.print(ui.fmt_batch_item_failed(reason))) + if args.no_progress + else (lambda reason: console.print(ui.fmt_worker_failed(reason))) + ), + on_parallel_fallback=lambda exc: console.print( + ui.fmt_parallel_fallback(exc) + ), + ) + + _print_failed_files(processing_result.failed_files) + # Keep unreadable-source diagnostics visible in normal mode even if + # failed_files was filtered/empty due upstream transport differences. + if not processing_result.failed_files and processing_result.source_read_failures: + _print_failed_files(processing_result.source_read_failures) + + if use_status: with console.status(ui.STATUS_GROUPING, spinner="dots"): - func_groups = build_groups(all_units) - block_groups = build_block_groups(all_blocks) - segment_groups = build_segment_groups(all_segments) - segment_groups, suppressed_segment_groups = prepare_segment_report_groups( - segment_groups + analysis_result = analyze( + boot=boot, + discovery=discovery_result, + processing=processing_result, ) + _cache_update_segment_projection(cache, analysis_result) try: cache.save() - except CacheError as e: - console.print(ui.fmt_cache_save_failed(e)) - - # Reporting - block_groups_report = prepare_block_report_groups(block_groups) - block_group_facts = build_block_group_facts(block_groups_report) - func_clones_count = len(func_groups) - block_clones_count = len(block_groups) - segment_clones_count = len(segment_groups) - - # Baseline Logic - baseline_arg_path = Path(args.baseline).expanduser() - try: - baseline_path = baseline_arg_path.resolve() - baseline_exists = baseline_path.exists() - except OSError as e: + except CacheError as exc: + console.print(ui.fmt_cache_save_failed(exc)) + else: + analysis_result = analyze( + boot=boot, + discovery=discovery_result, + processing=processing_result, + ) + _cache_update_segment_projection(cache, analysis_result) + try: + cache.save() + except CacheError as exc: + console.print(ui.fmt_cache_save_failed(exc)) + + return discovery_result, processing_result, analysis_result + + +def _write_report_outputs( + *, + args: Namespace, + output_paths: OutputPaths, + report_artifacts: ReportArtifacts, + open_html_report: bool = False, +) -> str | None: + return _write_report_outputs_impl( + args=cast("_QuietArgsLike", cast(object, args)), + output_paths=output_paths, + report_artifacts=report_artifacts, + console=cast("_PrinterLike", console), + open_html_report=open_html_report, + ) + + +def _enforce_gating( + *, + args: Namespace, + boot: BootstrapResult, + analysis: AnalysisResult, + processing: PipelineProcessingResult, + source_read_contract_failure: bool, + baseline_failure_code: ExitCode | None, + metrics_baseline_failure_code: ExitCode | None, + new_func: set[str], + new_block: set[str], + metrics_diff: MetricsDiff | None, + html_report_path: str | None, +) -> None: + if source_read_contract_failure: console.print( ui.fmt_contract_error( - ui.fmt_invalid_baseline_path(path=baseline_arg_path, error=e) + ui.fmt_unreadable_source_in_gating( + count=len(processing.source_read_failures) + ) ) ) + for failure in processing.source_read_failures[:10]: + console.print(f" • {failure}") + if len(processing.source_read_failures) > 10: + console.print(f" ... and {len(processing.source_read_failures) - 10} more") sys.exit(ExitCode.CONTRACT_ERROR) - # If user didn't specify path, the default is ./codeclone.baseline.json. + if baseline_failure_code is not None: + console.print(ui.fmt_contract_error(ui.ERR_BASELINE_GATING_REQUIRES_TRUSTED)) + sys.exit(baseline_failure_code) - baseline = Baseline(baseline_path) - baseline_loaded = False - baseline_status = BaselineStatus.MISSING - baseline_failure_code: ExitCode | None = None - baseline_trusted_for_diff = False + if metrics_baseline_failure_code is not None: + console.print( + ui.fmt_contract_error( + "Metrics baseline is untrusted or missing for requested metrics gating." + ) + ) + sys.exit(metrics_baseline_failure_code) + + gate_result = gate( + boot=boot, + analysis=analysis, + new_func=new_func, + new_block=new_block, + metrics_diff=metrics_diff, + ) - if baseline_exists: - try: - baseline.load(max_size_bytes=args.max_baseline_size_mb * 1024 * 1024) - except BaselineValidationError as e: - baseline_status = coerce_baseline_status(e.status) - if not args.update_baseline: - console.print(ui.fmt_invalid_baseline(e)) - if args.fail_on_new: - baseline_failure_code = ExitCode.CONTRACT_ERROR - else: - console.print(ui.WARN_BASELINE_IGNORED) - else: - if not args.update_baseline: - try: - baseline.verify_compatibility( - current_python_tag=current_python_tag() - ) - except BaselineValidationError as e: - baseline_status = coerce_baseline_status(e.status) - console.print(ui.fmt_invalid_baseline(e)) - if args.fail_on_new: - baseline_failure_code = ExitCode.CONTRACT_ERROR - else: - console.print(ui.WARN_BASELINE_IGNORED) - else: - baseline_loaded = True - baseline_status = BaselineStatus.OK - baseline_trusted_for_diff = True - else: - if not args.update_baseline: - console.print(ui.fmt_path(ui.WARN_BASELINE_MISSING, baseline_path)) - - if baseline_status in BASELINE_UNTRUSTED_STATUSES: - baseline_loaded = False - baseline_trusted_for_diff = False - if args.fail_on_new and not args.update_baseline: - baseline_failure_code = ExitCode.CONTRACT_ERROR - - if args.update_baseline: - new_baseline = Baseline.from_groups( - func_groups, - block_groups, - path=baseline_path, - python_tag=current_python_tag(), - fingerprint_version=BASELINE_FINGERPRINT_VERSION, - schema_version=BASELINE_SCHEMA_VERSION, - generator_version=__version__, + metric_reasons = [ + reason[len("metric:") :] + for reason in gate_result.reasons + if reason.startswith("metric:") + ] + if metric_reasons: + _print_gating_failure_block( + code="metrics", + entries=[_parse_metric_reason_entry(reason) for reason in metric_reasons], + args=args, + ) + sys.exit(ExitCode.GATING_FAILURE) + + if "clone:new" in gate_result.reasons: + default_report = Path(".cache/codeclone/report.html") + resolved_html_report_path = html_report_path + if resolved_html_report_path is None and default_report.exists(): + resolved_html_report_path = str(default_report) + + clone_entries: list[tuple[str, object]] = [ + ("new_function_clone_groups", len(new_func)), + ("new_block_clone_groups", len(new_block)), + ] + if resolved_html_report_path: + clone_entries.append(("report", resolved_html_report_path)) + clone_entries.append(("accept", "codeclone . --update-baseline")) + _print_gating_failure_block( + code="new-clones", + entries=clone_entries, + args=args, + ) + + if args.verbose: + _print_verbose_clone_hashes( + cast("_PrinterLike", console), + label="Function clone hashes", + clone_hashes=new_func, + ) + _print_verbose_clone_hashes( + cast("_PrinterLike", console), + label="Block clone hashes", + clone_hashes=new_block, + ) + + sys.exit(ExitCode.GATING_FAILURE) + + threshold_reason = next( + ( + reason + for reason in gate_result.reasons + if reason.startswith("clone:threshold:") + ), + None, + ) + if threshold_reason is not None: + _, _, total_raw, threshold_raw = threshold_reason.split(":", maxsplit=3) + total = int(total_raw) + threshold = int(threshold_raw) + _print_gating_failure_block( + code="threshold", + entries=( + ("clone_groups_total", total), + ("clone_groups_limit", threshold), + ), + args=args, + ) + sys.exit(ExitCode.GATING_FAILURE) + + +def _main_impl() -> None: + global console + + run_started_at = time.monotonic() + from ._cli_meta import _build_report_meta, _current_report_timestamp_utc + + ap = build_parser(__version__) + + def _prepare_run_inputs() -> tuple[ + Namespace, + Path, + Path, + bool, + Path, + bool, + OutputPaths, + Path, + dict[str, object] | None, + str, + ]: + global console + raw_argv = tuple(sys.argv[1:]) + explicit_cli_dests = collect_explicit_cli_dests(ap, argv=raw_argv) + report_path_origins = _report_path_origins(raw_argv) + report_generated_at_utc = _current_report_timestamp_utc() + cache_path_from_args = any( + arg in {"--cache-dir", "--cache-path"} + or arg.startswith(("--cache-dir=", "--cache-path=")) + for arg in sys.argv + ) + metrics_path_from_args = any( + arg == "--metrics-baseline" or arg.startswith("--metrics-baseline=") + for arg in sys.argv ) + args = ap.parse_args() + try: - new_baseline.save() - except OSError as e: + root_path = Path(args.root).resolve() + if not root_path.exists(): + console.print( + ui.fmt_contract_error(ui.ERR_ROOT_NOT_FOUND.format(path=root_path)) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + except OSError as exc: + console.print( + ui.fmt_contract_error(ui.ERR_INVALID_ROOT_PATH.format(error=exc)) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + try: + pyproject_config = load_pyproject_config(root_path) + except ConfigValidationError as exc: + console.print(ui.fmt_contract_error(str(exc))) + sys.exit(ExitCode.CONTRACT_ERROR) + apply_pyproject_config_overrides( + args=args, + config_values=pyproject_config, + explicit_cli_dests=explicit_cli_dests, + ) + if args.debug: + os.environ["CODECLONE_DEBUG"] = "1" + + if args.ci: + args.fail_on_new = True + args.no_color = True + args.quiet = True + + console = ( + _make_plain_console() + if args.quiet + else _make_console(no_color=args.no_color) + ) + + if not _validate_numeric_args(args): console.print( ui.fmt_contract_error( - ui.fmt_baseline_write_failed(path=baseline_path, error=e) + "Size limits must be non-negative integers (MB), " + "threshold flags must be >= 0 or -1." ) ) sys.exit(ExitCode.CONTRACT_ERROR) - console.print(ui.fmt_path(ui.SUCCESS_BASELINE_UPDATED, baseline_path)) - baseline = new_baseline - baseline_loaded = True - baseline_status = BaselineStatus.OK - baseline_trusted_for_diff = True - # When updating, we don't fail on new, we just saved the new state. - # But we might still want to print the summary. - try: - report_cache_path = cache_path.resolve() - except OSError: - report_cache_path = cache_path + baseline_arg_path = Path(args.baseline).expanduser() + try: + baseline_path = baseline_arg_path.resolve() + baseline_exists = baseline_path.exists() + except OSError as exc: + console.print( + ui.fmt_contract_error( + ui.fmt_invalid_baseline_path(path=baseline_arg_path, error=exc) + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) - raw_cache_status = getattr(cache, "load_status", None) - if isinstance(raw_cache_status, CacheStatus): - cache_status = raw_cache_status - elif isinstance(raw_cache_status, str): + shared_baseline_payload: dict[str, object] | None = None + default_metrics_baseline = ap.get_default("metrics_baseline") + metrics_path_overridden = metrics_path_from_args or ( + args.metrics_baseline != default_metrics_baseline + ) + metrics_baseline_arg_path = Path( + args.metrics_baseline if metrics_path_overridden else args.baseline + ).expanduser() try: - cache_status = CacheStatus(raw_cache_status) - except ValueError: - cache_status = ( - CacheStatus.OK - if cache.load_warning is None - else CacheStatus.INVALID_TYPE + metrics_baseline_path = metrics_baseline_arg_path.resolve() + if metrics_baseline_path == baseline_path: + probe = _probe_metrics_baseline_section(metrics_baseline_path) + metrics_baseline_exists = probe.has_metrics_section + shared_baseline_payload = probe.payload + else: + metrics_baseline_exists = metrics_baseline_path.exists() + except OSError as exc: + console.print( + ui.fmt_contract_error( + ui.fmt_invalid_baseline_path( + path=metrics_baseline_arg_path, + error=exc, + ) + ) ) - else: - cache_status = ( - CacheStatus.OK if cache.load_warning is None else CacheStatus.INVALID_TYPE + sys.exit(ExitCode.CONTRACT_ERROR) + + if ( + args.update_baseline + and not args.skip_metrics + and not args.update_metrics_baseline + ): + args.update_metrics_baseline = True + _configure_metrics_mode( + args=args, + metrics_baseline_exists=metrics_baseline_exists, + ) + if ( + args.update_metrics_baseline + and metrics_baseline_path == baseline_path + and not baseline_exists + and not args.update_baseline + ): + # Unified baseline needs clone payload before metrics can be embedded. + args.update_baseline = True + + if args.quiet: + args.no_progress = True + + if not args.quiet: + print_banner(root=root_path) + + output_paths = _resolve_output_paths( + args, + report_path_origins=report_path_origins, + report_generated_at_utc=report_generated_at_utc, + ) + _validate_report_ui_flags(args=args, output_paths=output_paths) + cache_path = _resolve_cache_path( + root_path=root_path, + args=args, + from_args=cache_path_from_args, + ) + return ( + args, + root_path, + baseline_path, + baseline_exists, + metrics_baseline_path, + metrics_baseline_exists, + output_paths, + cache_path, + shared_baseline_payload, + report_generated_at_utc, ) - raw_cache_schema_version = getattr(cache, "cache_schema_version", None) - cache_schema_version = ( - raw_cache_schema_version if isinstance(raw_cache_schema_version, str) else None + ( + args, + root_path, + baseline_path, + baseline_exists, + metrics_baseline_path, + metrics_baseline_exists, + output_paths, + cache_path, + shared_baseline_payload, + report_generated_at_utc, + ) = _prepare_run_inputs() + + cache = Cache( + cache_path, + root=root_path, + max_size_bytes=args.max_cache_size_mb * 1024 * 1024, + min_loc=args.min_loc, + min_stmt=args.min_stmt, + block_min_loc=args.block_min_loc, + block_min_stmt=args.block_min_stmt, + segment_min_loc=args.segment_min_loc, + segment_min_stmt=args.segment_min_stmt, ) + cache.load() + if cache.load_warning: + console.print(f"[warning]{cache.load_warning}[/warning]") + + boot = bootstrap( + args=args, + root=root_path, + output_paths=output_paths, + cache_path=cache_path, + ) + discovery_result, processing_result, analysis_result = _run_analysis_stages( + args=args, + boot=boot, + cache=cache, + ) + + gating_mode = ( + args.fail_on_new + or args.fail_threshold >= 0 + or args.fail_complexity >= 0 + or args.fail_coupling >= 0 + or args.fail_cohesion >= 0 + or args.fail_cycles + or args.fail_dead_code + or args.fail_health >= 0 + or args.fail_on_new_metrics + ) + source_read_contract_failure = ( + bool(processing_result.source_read_failures) + and gating_mode + and not args.update_baseline + ) + baseline_state = _resolve_clone_baseline_state( + args=args, + baseline_path=baseline_path, + baseline_exists=baseline_exists, + analysis=analysis_result, + shared_baseline_payload=( + shared_baseline_payload if metrics_baseline_path == baseline_path else None + ), + ) + metrics_baseline_state = _resolve_metrics_baseline_state( + args=args, + metrics_baseline_path=metrics_baseline_path, + metrics_baseline_exists=metrics_baseline_exists, + baseline_updated_path=baseline_state.updated_path, + analysis=analysis_result, + shared_baseline_payload=( + shared_baseline_payload if metrics_baseline_path == baseline_path else None + ), + ) + + try: + report_cache_path = cache_path.resolve() + except OSError: + report_cache_path = cache_path + + cache_status, cache_schema_version = _resolve_cache_status(cache) report_meta = _build_report_meta( codeclone_version=__version__, + scan_root=root_path, baseline_path=baseline_path, - baseline=baseline, - baseline_loaded=baseline_loaded, - baseline_status=baseline_status.value, + baseline=baseline_state.baseline, + baseline_loaded=baseline_state.loaded, + baseline_status=baseline_state.status.value, cache_path=report_cache_path, cache_used=cache_status == CacheStatus.OK, cache_status=cache_status.value, cache_schema_version=cache_schema_version, - files_skipped_source_io=len(source_read_failures), + files_skipped_source_io=len(processing_result.source_read_failures), + metrics_baseline_path=metrics_baseline_path, + metrics_baseline=metrics_baseline_state.baseline, + metrics_baseline_loaded=metrics_baseline_state.loaded, + metrics_baseline_status=metrics_baseline_state.status.value, + health_score=( + analysis_result.project_metrics.health.total + if analysis_result.project_metrics + else None + ), + health_grade=( + analysis_result.project_metrics.health.grade + if analysis_result.project_metrics + else None + ), + analysis_mode=("clones_only" if args.skip_metrics else "full"), + metrics_computed=_metrics_computed(args), + report_generated_at_utc=report_generated_at_utc, ) - # Diff baseline_for_diff = ( - baseline if baseline_trusted_for_diff else Baseline(baseline_path) + baseline_state.baseline + if baseline_state.trusted_for_diff + else Baseline(baseline_path) + ) + new_func, new_block = baseline_for_diff.diff( + analysis_result.func_groups, + analysis_result.block_groups, ) - new_func, new_block = baseline_for_diff.diff(func_groups, block_groups) new_clones_count = len(new_func) + len(new_block) + metrics_diff: MetricsDiff | None = None + if ( + analysis_result.project_metrics is not None + and metrics_baseline_state.trusted_for_diff + ): + metrics_diff = metrics_baseline_state.baseline.diff( + analysis_result.project_metrics + ) + _print_summary( - console=console, + console=cast("_PrinterLike", console), quiet=args.quiet, - files_found=files_found, - files_analyzed=files_analyzed, - cache_hits=cache_hits, - files_skipped=files_skipped, - func_clones_count=func_clones_count, - block_clones_count=block_clones_count, - segment_clones_count=segment_clones_count, - suppressed_segment_groups=suppressed_segment_groups, + files_found=discovery_result.files_found, + files_analyzed=processing_result.files_analyzed, + cache_hits=discovery_result.cache_hits, + files_skipped=processing_result.files_skipped, + analyzed_lines=processing_result.analyzed_lines, + analyzed_functions=processing_result.analyzed_functions, + analyzed_methods=processing_result.analyzed_methods, + analyzed_classes=processing_result.analyzed_classes, + func_clones_count=analysis_result.func_clones_count, + block_clones_count=analysis_result.block_clones_count, + segment_clones_count=analysis_result.segment_clones_count, + suppressed_segment_groups=analysis_result.suppressed_segment_groups, new_clones_count=new_clones_count, ) - # Outputs - html_report_path: str | None = None - output_notice_printed = False - - def _print_output_notice(message: str) -> None: - nonlocal output_notice_printed - if args.quiet: - return - if not output_notice_printed: - console.print("") - output_notice_printed = True - console.print(message) - - def _write_report_output(*, out: Path, content: str, label: str) -> None: - try: - out.parent.mkdir(parents=True, exist_ok=True) - out.write_text(content, "utf-8") - except OSError as e: - console.print( - ui.fmt_contract_error( - ui.fmt_report_write_failed(label=label, path=out, error=e) - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - - if html_out_path: - out = html_out_path - _write_report_output( - out=out, - content=build_html_report( - func_groups=func_groups, - block_groups=block_groups_report, - segment_groups=segment_groups, - block_group_facts=block_group_facts, - new_function_group_keys=new_func, - new_block_group_keys=new_block, - report_meta=report_meta, - title="CodeClone Report", - context_lines=3, - max_snippet_lines=220, - ), - label="HTML", - ) - html_report_path = str(out) - _print_output_notice(ui.fmt_path(ui.INFO_HTML_REPORT_SAVED, out)) - - if json_out_path: - out = json_out_path - _write_report_output( - out=out, - content=to_json_report( - func_groups, - block_groups_report, - segment_groups, - report_meta, - block_group_facts, - new_function_group_keys=new_func, - new_block_group_keys=new_block, - new_segment_group_keys=set(segment_groups.keys()), - ), - label="JSON", - ) - _print_output_notice(ui.fmt_path(ui.INFO_JSON_REPORT_SAVED, out)) - - if text_out_path: - out = text_out_path - _write_report_output( - out=out, - content=to_text_report( - meta=report_meta, - func_groups=func_groups, - block_groups=block_groups_report, - segment_groups=segment_groups, - new_function_group_keys=new_func, - new_block_group_keys=new_block, - new_segment_group_keys=set(segment_groups.keys()), + if analysis_result.project_metrics is not None: + pm = analysis_result.project_metrics + _print_metrics( + console=cast("_PrinterLike", console), + quiet=args.quiet, + metrics=MetricsSnapshot( + complexity_avg=pm.complexity_avg, + complexity_max=pm.complexity_max, + high_risk_count=len(pm.high_risk_functions), + coupling_avg=pm.coupling_avg, + coupling_max=pm.coupling_max, + cohesion_avg=pm.cohesion_avg, + cohesion_max=pm.cohesion_max, + cycles_count=len(pm.dependency_cycles), + dead_code_count=len(pm.dead_code), + health_total=pm.health.total, + health_grade=pm.health.grade, + suppressed_dead_code_count=analysis_result.suppressed_dead_code_items, ), - label="text", - ) - _print_output_notice(ui.fmt_path(ui.INFO_TEXT_REPORT_SAVED, out)) - - if source_read_contract_failure: - console.print( - ui.fmt_contract_error( - ui.fmt_unreadable_source_in_gating(count=len(source_read_failures)) - ) ) - for failure in source_read_failures[:10]: - console.print(f" • {failure}") - if len(source_read_failures) > 10: - console.print(f" ... and {len(source_read_failures) - 10} more") - sys.exit(ExitCode.CONTRACT_ERROR) - if baseline_failure_code is not None: - console.print(ui.fmt_contract_error(ui.ERR_BASELINE_GATING_REQUIRES_TRUSTED)) - sys.exit(baseline_failure_code) - - # Exit Codes - if args.fail_on_new and (new_func or new_block): - default_report = Path(".cache/codeclone/report.html") - if html_report_path is None and default_report.exists(): - html_report_path = str(default_report) - - console.print(ui.fmt_gating_failure("New code clones detected.")) - console.print(f"\n{ui.FAIL_NEW_TITLE}") - console.print(f"\n{ui.FAIL_NEW_SUMMARY_TITLE}") - console.print(ui.FAIL_NEW_FUNCTION.format(count=len(new_func))) - console.print(ui.FAIL_NEW_BLOCK.format(count=len(new_block))) - if html_report_path: - console.print(f"\n{ui.FAIL_NEW_REPORT_TITLE}") - console.print(f" {html_report_path}") - console.print(f"\n{ui.FAIL_NEW_ACCEPT_TITLE}") - console.print(ui.FAIL_NEW_ACCEPT_COMMAND) - - if args.verbose: - if new_func: - console.print(f"\n{ui.FAIL_NEW_DETAIL_FUNCTION}") - for h in sorted(new_func): - console.print(f"- {h}") - if new_block: - console.print(f"\n{ui.FAIL_NEW_DETAIL_BLOCK}") - for h in sorted(new_block): - console.print(f"- {h}") - sys.exit(ExitCode.GATING_FAILURE) + report_artifacts = report( + boot=boot, + discovery=discovery_result, + processing=processing_result, + analysis=analysis_result, + report_meta=report_meta, + new_func=new_func, + new_block=new_block, + html_builder=build_html_report, + metrics_diff=metrics_diff, + ) + html_report_path = _write_report_outputs( + args=args, + output_paths=output_paths, + report_artifacts=report_artifacts, + open_html_report=args.open_html_report, + ) - if 0 <= args.fail_threshold < (func_clones_count + block_clones_count): - total = func_clones_count + block_clones_count - console.print( - ui.fmt_gating_failure( - ui.fmt_fail_threshold(total=total, threshold=args.fail_threshold) - ) - ) - sys.exit(ExitCode.GATING_FAILURE) + _enforce_gating( + args=args, + boot=boot, + analysis=analysis_result, + processing=processing_result, + source_read_contract_failure=source_read_contract_failure, + baseline_failure_code=baseline_state.failure_code, + metrics_baseline_failure_code=metrics_baseline_state.failure_code, + new_func=new_func, + new_block=new_block, + metrics_diff=metrics_diff, + html_report_path=html_report_path, + ) if not args.update_baseline and not args.fail_on_new and new_clones_count > 0: console.print(ui.WARN_NEW_CLONES_WITHOUT_FAIL) if not args.quiet: - elapsed = time.monotonic() - t0 - console.print(f"\n[dim]Done in {elapsed:.1f}s[/dim]") + elapsed = time.monotonic() - run_started_at + console.print() + console.print(ui.fmt_pipeline_done(elapsed)) def main() -> None: @@ -891,10 +1250,10 @@ def main() -> None: _main_impl() except SystemExit: raise - except Exception as e: + except Exception as exc: console.print( ui.fmt_internal_error( - e, + exc, issues_url=ISSUES_URL, debug=_is_debug_enabled(), ) diff --git a/codeclone/contracts.py b/codeclone/contracts.py index 0eacd25..fdb09de 100644 --- a/codeclone/contracts.py +++ b/codeclone/contracts.py @@ -1,21 +1,38 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations from enum import IntEnum from typing import Final -BASELINE_SCHEMA_VERSION: Final = "1.0" +BASELINE_SCHEMA_VERSION: Final = "2.0" BASELINE_FINGERPRINT_VERSION: Final = "1" -CACHE_VERSION: Final = "1.3" -REPORT_SCHEMA_VERSION: Final = "1.1" +CACHE_VERSION: Final = "2.2" +REPORT_SCHEMA_VERSION: Final = "2.1" +METRICS_BASELINE_SCHEMA_VERSION: Final = "1.0" + +DEFAULT_COMPLEXITY_THRESHOLD: Final = 20 +DEFAULT_COUPLING_THRESHOLD: Final = 10 +DEFAULT_COHESION_THRESHOLD: Final = 4 +DEFAULT_HEALTH_THRESHOLD: Final = 60 + +COMPLEXITY_RISK_LOW_MAX: Final = 10 +COMPLEXITY_RISK_MEDIUM_MAX: Final = 20 +COUPLING_RISK_LOW_MAX: Final = 5 +COUPLING_RISK_MEDIUM_MAX: Final = 10 +COHESION_RISK_MEDIUM_MAX: Final = 3 + +HEALTH_WEIGHTS: Final[dict[str, float]] = { + "clones": 0.25, + "complexity": 0.20, + "coupling": 0.10, + "cohesion": 0.15, + "dead_code": 0.10, + "dependencies": 0.10, + "coverage": 0.10, +} class ExitCode(IntEnum): @@ -27,38 +44,23 @@ class ExitCode(IntEnum): REPOSITORY_URL: Final = "https://github.com/orenlab/codeclone" ISSUES_URL: Final = "https://github.com/orenlab/codeclone/issues" -DOCS_URL: Final = "https://github.com/orenlab/codeclone/tree/main/docs" - -EXIT_CODE_DESCRIPTIONS: Final[tuple[tuple[ExitCode, str], ...]] = ( - (ExitCode.SUCCESS, "success"), - ( - ExitCode.CONTRACT_ERROR, - ( - "contract error (baseline missing/untrusted, invalid output " - "extensions, incompatible versions, unreadable source files in CI/gating)" - ), - ), - ( - ExitCode.GATING_FAILURE, - "gating failure (new clones detected, threshold exceeded)", - ), - ( - ExitCode.INTERNAL_ERROR, - "internal error (unexpected exception; please report)", - ), -) +DOCS_URL: Final = "https://orenlab.github.io/codeclone/" def cli_help_epilog() -> str: - lines = ["Exit codes"] - for code, description in EXIT_CODE_DESCRIPTIONS: - lines.append(f" - {int(code)} - {description}") - lines.extend( + return "\n".join( [ + "Exit codes:", + " 0 Success.", + " 2 Contract error: untrusted or invalid baseline, invalid output", + " configuration, incompatible versions, or unreadable sources in", + " CI/gating mode.", + " 3 Gating failure: new clones, threshold violations, or metrics", + " quality gate failures.", + " 5 Internal error: unexpected exception.", "", f"Repository: {REPOSITORY_URL}", - f"Issues: {ISSUES_URL}", - f"Docs: {DOCS_URL}", + f"Issues: {ISSUES_URL}", + f"Docs: {DOCS_URL}", ] ) - return "\n".join(lines) diff --git a/codeclone/domain/__init__.py b/codeclone/domain/__init__.py new file mode 100644 index 0000000..59fc066 --- /dev/null +++ b/codeclone/domain/__init__.py @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from .findings import ( + CATEGORY_CLONE, + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, + CATEGORY_STRUCTURAL, + CLONE_KIND_BLOCK, + CLONE_KIND_FUNCTION, + CLONE_KIND_SEGMENT, + CLONE_NOVELTY_KNOWN, + CLONE_NOVELTY_NEW, + FAMILY_CLONE, + FAMILY_CLONES, + FAMILY_DEAD_CODE, + FAMILY_DESIGN, + FAMILY_METRICS, + FAMILY_STRUCTURAL, + FINDING_KIND_CLASS_HOTSPOT, + FINDING_KIND_CLONE_GROUP, + FINDING_KIND_CYCLE, + FINDING_KIND_FUNCTION_HOTSPOT, + FINDING_KIND_UNUSED_SYMBOL, + STRUCTURAL_KIND_CLONE_COHORT_DRIFT, + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + STRUCTURAL_KIND_DUPLICATED_BRANCHES, + SYMBOL_KIND_CLASS, + SYMBOL_KIND_IMPORT, + SYMBOL_KIND_METHOD, +) +from .quality import ( + CONFIDENCE_HIGH, + CONFIDENCE_LOW, + CONFIDENCE_MEDIUM, + EFFORT_EASY, + EFFORT_HARD, + EFFORT_MODERATE, + EFFORT_WEIGHT, + HEALTH_GRADE_A, + HEALTH_GRADE_B, + HEALTH_GRADE_C, + HEALTH_GRADE_D, + HEALTH_GRADE_F, + HEALTH_GRADES, + RISK_HIGH, + RISK_LOW, + RISK_MEDIUM, + SEVERITY_CRITICAL, + SEVERITY_INFO, + SEVERITY_ORDER, + SEVERITY_RANK, + SEVERITY_WARNING, +) +from .source_scope import ( + IMPACT_SCOPE_MIXED, + IMPACT_SCOPE_NON_RUNTIME, + IMPACT_SCOPE_RUNTIME, + SOURCE_KIND_BREAKDOWN_KEYS, + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_ORDER, + SOURCE_KIND_OTHER, + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, +) + +__all__ = [ + "CATEGORY_CLONE", + "CATEGORY_COHESION", + "CATEGORY_COMPLEXITY", + "CATEGORY_COUPLING", + "CATEGORY_DEAD_CODE", + "CATEGORY_DEPENDENCY", + "CATEGORY_STRUCTURAL", + "CLONE_KIND_BLOCK", + "CLONE_KIND_FUNCTION", + "CLONE_KIND_SEGMENT", + "CLONE_NOVELTY_KNOWN", + "CLONE_NOVELTY_NEW", + "CONFIDENCE_HIGH", + "CONFIDENCE_LOW", + "CONFIDENCE_MEDIUM", + "EFFORT_EASY", + "EFFORT_HARD", + "EFFORT_MODERATE", + "EFFORT_WEIGHT", + "FAMILY_CLONE", + "FAMILY_CLONES", + "FAMILY_DEAD_CODE", + "FAMILY_DESIGN", + "FAMILY_METRICS", + "FAMILY_STRUCTURAL", + "FINDING_KIND_CLASS_HOTSPOT", + "FINDING_KIND_CLONE_GROUP", + "FINDING_KIND_CYCLE", + "FINDING_KIND_FUNCTION_HOTSPOT", + "FINDING_KIND_UNUSED_SYMBOL", + "HEALTH_GRADES", + "HEALTH_GRADE_A", + "HEALTH_GRADE_B", + "HEALTH_GRADE_C", + "HEALTH_GRADE_D", + "HEALTH_GRADE_F", + "IMPACT_SCOPE_MIXED", + "IMPACT_SCOPE_NON_RUNTIME", + "IMPACT_SCOPE_RUNTIME", + "RISK_HIGH", + "RISK_LOW", + "RISK_MEDIUM", + "SEVERITY_CRITICAL", + "SEVERITY_INFO", + "SEVERITY_ORDER", + "SEVERITY_RANK", + "SEVERITY_WARNING", + "SOURCE_KIND_BREAKDOWN_KEYS", + "SOURCE_KIND_FIXTURES", + "SOURCE_KIND_MIXED", + "SOURCE_KIND_ORDER", + "SOURCE_KIND_OTHER", + "SOURCE_KIND_PRODUCTION", + "SOURCE_KIND_TESTS", + "STRUCTURAL_KIND_CLONE_COHORT_DRIFT", + "STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE", + "STRUCTURAL_KIND_DUPLICATED_BRANCHES", + "SYMBOL_KIND_CLASS", + "SYMBOL_KIND_IMPORT", + "SYMBOL_KIND_METHOD", +] diff --git a/codeclone/domain/findings.py b/codeclone/domain/findings.py new file mode 100644 index 0000000..37928b2 --- /dev/null +++ b/codeclone/domain/findings.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Final + +CLONE_KIND_FUNCTION: Final = "function" +CLONE_KIND_BLOCK: Final = "block" +CLONE_KIND_SEGMENT: Final = "segment" + +SYMBOL_KIND_CLASS: Final = "class" +SYMBOL_KIND_METHOD: Final = "method" +SYMBOL_KIND_IMPORT: Final = "import" + +CLONE_NOVELTY_NEW: Final = "new" +CLONE_NOVELTY_KNOWN: Final = "known" + +FAMILY_CLONE: Final = "clone" +FAMILY_CLONES: Final = "clones" +FAMILY_STRUCTURAL: Final = "structural" +FAMILY_DEAD_CODE: Final = "dead_code" +FAMILY_DESIGN: Final = "design" +FAMILY_METRICS: Final = "metrics" + +CATEGORY_CLONE: Final = "clone" +CATEGORY_STRUCTURAL: Final = "structural" +CATEGORY_COMPLEXITY: Final = "complexity" +CATEGORY_COUPLING: Final = "coupling" +CATEGORY_COHESION: Final = "cohesion" +CATEGORY_DEAD_CODE: Final = "dead_code" +CATEGORY_DEPENDENCY: Final = "dependency" + +FINDING_KIND_CLONE_GROUP: Final = "clone_group" +FINDING_KIND_UNUSED_SYMBOL: Final = "unused_symbol" +FINDING_KIND_CLASS_HOTSPOT: Final = "class_hotspot" +FINDING_KIND_FUNCTION_HOTSPOT: Final = "function_hotspot" +FINDING_KIND_CYCLE: Final = "cycle" + +STRUCTURAL_KIND_DUPLICATED_BRANCHES: Final = "duplicated_branches" +STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: Final = "clone_guard_exit_divergence" +STRUCTURAL_KIND_CLONE_COHORT_DRIFT: Final = "clone_cohort_drift" + +__all__ = [ + "CATEGORY_CLONE", + "CATEGORY_COHESION", + "CATEGORY_COMPLEXITY", + "CATEGORY_COUPLING", + "CATEGORY_DEAD_CODE", + "CATEGORY_DEPENDENCY", + "CATEGORY_STRUCTURAL", + "CLONE_KIND_BLOCK", + "CLONE_KIND_FUNCTION", + "CLONE_KIND_SEGMENT", + "CLONE_NOVELTY_KNOWN", + "CLONE_NOVELTY_NEW", + "FAMILY_CLONE", + "FAMILY_CLONES", + "FAMILY_DEAD_CODE", + "FAMILY_DESIGN", + "FAMILY_METRICS", + "FAMILY_STRUCTURAL", + "FINDING_KIND_CLASS_HOTSPOT", + "FINDING_KIND_CLONE_GROUP", + "FINDING_KIND_CYCLE", + "FINDING_KIND_FUNCTION_HOTSPOT", + "FINDING_KIND_UNUSED_SYMBOL", + "STRUCTURAL_KIND_CLONE_COHORT_DRIFT", + "STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE", + "STRUCTURAL_KIND_DUPLICATED_BRANCHES", + "SYMBOL_KIND_CLASS", + "SYMBOL_KIND_IMPORT", + "SYMBOL_KIND_METHOD", +] diff --git a/codeclone/domain/quality.py b/codeclone/domain/quality.py new file mode 100644 index 0000000..6d03baa --- /dev/null +++ b/codeclone/domain/quality.py @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Final + +SEVERITY_CRITICAL: Final = "critical" +SEVERITY_WARNING: Final = "warning" +SEVERITY_INFO: Final = "info" + +SEVERITY_RANK: Final[dict[str, int]] = { + SEVERITY_CRITICAL: 3, + SEVERITY_WARNING: 2, + SEVERITY_INFO: 1, +} +SEVERITY_ORDER: Final[dict[str, int]] = { + SEVERITY_CRITICAL: 0, + SEVERITY_WARNING: 1, + SEVERITY_INFO: 2, +} + +EFFORT_EASY: Final = "easy" +EFFORT_MODERATE: Final = "moderate" +EFFORT_HARD: Final = "hard" + +EFFORT_WEIGHT: Final[dict[str, int]] = { + EFFORT_EASY: 1, + EFFORT_MODERATE: 2, + EFFORT_HARD: 3, +} + +RISK_LOW: Final = "low" +RISK_MEDIUM: Final = "medium" +RISK_HIGH: Final = "high" + +CONFIDENCE_LOW: Final = "low" +CONFIDENCE_MEDIUM: Final = "medium" +CONFIDENCE_HIGH: Final = "high" + +HEALTH_GRADE_A: Final = "A" +HEALTH_GRADE_B: Final = "B" +HEALTH_GRADE_C: Final = "C" +HEALTH_GRADE_D: Final = "D" +HEALTH_GRADE_F: Final = "F" + +HEALTH_GRADES: Final[tuple[str, ...]] = ( + HEALTH_GRADE_A, + HEALTH_GRADE_B, + HEALTH_GRADE_C, + HEALTH_GRADE_D, + HEALTH_GRADE_F, +) + +__all__ = [ + "CONFIDENCE_HIGH", + "CONFIDENCE_LOW", + "CONFIDENCE_MEDIUM", + "EFFORT_EASY", + "EFFORT_HARD", + "EFFORT_MODERATE", + "EFFORT_WEIGHT", + "HEALTH_GRADES", + "HEALTH_GRADE_A", + "HEALTH_GRADE_B", + "HEALTH_GRADE_C", + "HEALTH_GRADE_D", + "HEALTH_GRADE_F", + "RISK_HIGH", + "RISK_LOW", + "RISK_MEDIUM", + "SEVERITY_CRITICAL", + "SEVERITY_INFO", + "SEVERITY_ORDER", + "SEVERITY_RANK", + "SEVERITY_WARNING", +] diff --git a/codeclone/domain/source_scope.py b/codeclone/domain/source_scope.py new file mode 100644 index 0000000..ddfd3ea --- /dev/null +++ b/codeclone/domain/source_scope.py @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Final + +SOURCE_KIND_PRODUCTION: Final = "production" +SOURCE_KIND_TESTS: Final = "tests" +SOURCE_KIND_FIXTURES: Final = "fixtures" +SOURCE_KIND_MIXED: Final = "mixed" +SOURCE_KIND_OTHER: Final = "other" + +SOURCE_KIND_ORDER: Final[dict[str, int]] = { + SOURCE_KIND_PRODUCTION: 0, + SOURCE_KIND_TESTS: 1, + SOURCE_KIND_FIXTURES: 2, + SOURCE_KIND_MIXED: 3, + SOURCE_KIND_OTHER: 4, +} + +SOURCE_KIND_BREAKDOWN_KEYS: Final[tuple[str, ...]] = ( + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, + SOURCE_KIND_FIXTURES, + SOURCE_KIND_OTHER, +) + +IMPACT_SCOPE_RUNTIME: Final = "runtime" +IMPACT_SCOPE_NON_RUNTIME: Final = "non_runtime" +IMPACT_SCOPE_MIXED: Final = "mixed" + +__all__ = [ + "IMPACT_SCOPE_MIXED", + "IMPACT_SCOPE_NON_RUNTIME", + "IMPACT_SCOPE_RUNTIME", + "SOURCE_KIND_BREAKDOWN_KEYS", + "SOURCE_KIND_FIXTURES", + "SOURCE_KIND_MIXED", + "SOURCE_KIND_ORDER", + "SOURCE_KIND_OTHER", + "SOURCE_KIND_PRODUCTION", + "SOURCE_KIND_TESTS", +] diff --git a/codeclone/errors.py b/codeclone/errors.py index 87b8ffa..77371c4 100644 --- a/codeclone/errors.py +++ b/codeclone/errors.py @@ -1,10 +1,5 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy class CodeCloneError(Exception): diff --git a/codeclone/extractor.py b/codeclone/extractor.py index a2e814f..116731a 100644 --- a/codeclone/extractor.py +++ b/codeclone/extractor.py @@ -1,44 +1,68 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import ast +import io import math import os import signal -from collections.abc import Iterator +import tokenize from contextlib import contextmanager -from dataclasses import dataclass +from dataclasses import dataclass, field +from hashlib import sha1 as _sha1 +from typing import TYPE_CHECKING, Literal, NamedTuple -from .blockhash import stmt_hash -from .blocks import BlockUnit, SegmentUnit, extract_blocks, extract_segments +from .blockhash import stmt_hashes +from .blocks import extract_blocks, extract_segments from .cfg import CFGBuilder from .errors import ParseError from .fingerprint import bucket_loc, sha1 -from .normalize import NormalizationConfig, normalized_ast_dump_from_list - -# ========================= -# Data structures -# ========================= +from .metrics import ( + cohesion_risk, + compute_cbo, + compute_lcom4, + coupling_risk, + cyclomatic_complexity, + risk_level, +) +from .models import ( + BlockUnit, + ClassMetrics, + DeadCandidate, + FileMetrics, + ModuleDep, + SegmentUnit, + SourceStats, + StructuralFindingGroup, + Unit, +) +from .normalize import ( + AstNormalizer, + NormalizationConfig, + normalized_ast_dump_from_list, +) +from .paths import is_test_filepath +from .structural_findings import scan_function_structure +from .suppressions import ( + DeclarationTarget, + bind_suppressions_to_declarations, + build_suppression_index, + extract_suppression_directives, + suppression_target_key, +) +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping -@dataclass(frozen=True, slots=True) -class Unit: - qualname: str - filepath: str - start_line: int - end_line: int - loc: int - stmt_count: int - fingerprint: str - loc_bucket: str + from .suppressions import SuppressionTargetKey +__all__ = [ + "Unit", + "_QualnameCollector", + "extract_units_and_stats_from_source", +] # ========================= # Helpers @@ -51,6 +75,11 @@ class _ParseTimeoutError(Exception): pass +FunctionNode = ast.FunctionDef | ast.AsyncFunctionDef +_NamedDeclarationNode = FunctionNode | ast.ClassDef +_DeclarationTokenIndexKey = tuple[int, int, str] + + def _consumed_cpu_seconds(resource_module: object) -> float: """Return consumed CPU seconds for the current process.""" try: @@ -128,25 +157,158 @@ def _stmt_count(node: ast.AST) -> int: return len(body) if isinstance(body, list) else 0 -class _QualnameBuilder(ast.NodeVisitor): - __slots__ = ("stack", "units") +def _source_tokens(source: str) -> tuple[tokenize.TokenInfo, ...]: + try: + return tuple(tokenize.generate_tokens(io.StringIO(source).readline)) + except tokenize.TokenError: + return () + + +def _declaration_token_name(node: ast.AST) -> str: + if isinstance(node, ast.ClassDef): + return "class" + if isinstance(node, ast.AsyncFunctionDef): + return "async" + return "def" + + +def _declaration_token_index( + *, + source_tokens: tuple[tokenize.TokenInfo, ...], + start_line: int, + start_col: int, + declaration_token: str, + source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None, +) -> int | None: + if source_token_index is not None: + return source_token_index.get((start_line, start_col, declaration_token)) + for idx, token in enumerate(source_tokens): + if token.start != (start_line, start_col): + continue + if token.type == tokenize.NAME and token.string == declaration_token: + return idx + return None + + +def _build_declaration_token_index( + source_tokens: tuple[tokenize.TokenInfo, ...], +) -> Mapping[_DeclarationTokenIndexKey, int]: + indexed: dict[_DeclarationTokenIndexKey, int] = {} + for idx, token in enumerate(source_tokens): + if token.type != tokenize.NAME: + continue + if token.string not in {"def", "async", "class"}: + continue + indexed[(token.start[0], token.start[1], token.string)] = idx + return indexed + + +def _scan_declaration_colon_line( + *, + source_tokens: tuple[tokenize.TokenInfo, ...], + start_index: int, +) -> int | None: + nesting = 0 + for token in source_tokens[start_index + 1 :]: + if token.type == tokenize.OP: + if token.string in "([{": + nesting += 1 + continue + if token.string in ")]}": + if nesting > 0: + nesting -= 1 + continue + if token.string == ":" and nesting == 0: + return token.start[0] + if token.type == tokenize.NEWLINE and nesting == 0: + return None + return None + + +def _fallback_declaration_end_line(node: ast.AST, *, start_line: int) -> int: + body = getattr(node, "body", None) + if not isinstance(body, list) or not body: + return start_line + + first_body_line = int(getattr(body[0], "lineno", 0)) + if first_body_line <= 0 or first_body_line == start_line: + return start_line + return max(start_line, first_body_line - 1) + + +def _declaration_end_line( + node: ast.AST, + *, + source_tokens: tuple[tokenize.TokenInfo, ...], + source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None, +) -> int: + start_line = int(getattr(node, "lineno", 0)) + start_col = int(getattr(node, "col_offset", 0)) + if start_line <= 0: + return 0 + + declaration_token = _declaration_token_name(node) + start_index = _declaration_token_index( + source_tokens=source_tokens, + start_line=start_line, + start_col=start_col, + declaration_token=declaration_token, + source_token_index=source_token_index, + ) + if start_index is None: + return _fallback_declaration_end_line(node, start_line=start_line) + + colon_line = _scan_declaration_colon_line( + source_tokens=source_tokens, + start_index=start_index, + ) + if colon_line is not None: + return colon_line + return _fallback_declaration_end_line(node, start_line=start_line) + + +class _QualnameCollector(ast.NodeVisitor): + __slots__ = ( + "class_count", + "class_nodes", + "funcs", + "function_count", + "method_count", + "stack", + "units", + ) def __init__(self) -> None: self.stack: list[str] = [] - self.units: list[tuple[str, ast.FunctionDef | ast.AsyncFunctionDef]] = [] + self.units: list[tuple[str, FunctionNode]] = [] + self.class_nodes: list[tuple[str, ast.ClassDef]] = [] + self.funcs: dict[str, FunctionNode] = {} + self.class_count = 0 + self.function_count = 0 + self.method_count = 0 def visit_ClassDef(self, node: ast.ClassDef) -> None: + self.class_count += 1 + class_qualname = ".".join([*self.stack, node.name]) if self.stack else node.name + self.class_nodes.append((class_qualname, node)) self.stack.append(node.name) self.generic_visit(node) self.stack.pop() - def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + def _register_function(self, node: FunctionNode) -> None: name = ".".join([*self.stack, node.name]) if self.stack else node.name + if self.stack: + self.method_count += 1 + else: + self.function_count += 1 self.units.append((name, node)) + self.funcs[name] = node + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self._register_function(node) def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: - name = ".".join([*self.stack, node.name]) if self.stack else node.name - self.units.append((name, node)) + self._register_function(node) # ========================= @@ -154,11 +316,11 @@ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: # ========================= -def get_cfg_fingerprint( - node: ast.FunctionDef | ast.AsyncFunctionDef, +def _cfg_fingerprint_and_complexity( + node: FunctionNode, cfg: NormalizationConfig, qualname: str, -) -> str: +) -> tuple[str, int]: """ Generate a structural fingerprint for a function using CFG analysis. @@ -182,6 +344,7 @@ def get_cfg_fingerprint( """ builder = CFGBuilder() graph = builder.build(qualname, node) + cfg_normalizer = AstNormalizer(cfg) # Use generator to avoid building large list of strings parts: list[str] = [] @@ -189,11 +352,568 @@ def get_cfg_fingerprint( succ_ids = ",".join( str(s.id) for s in sorted(block.successors, key=lambda s: s.id) ) - parts.append( - f"BLOCK[{block.id}]:{normalized_ast_dump_from_list(block.statements, cfg)}" - f"|SUCCESSORS:{succ_ids}" + block_dump = normalized_ast_dump_from_list( + block.statements, + cfg, + normalizer=cfg_normalizer, + ) + parts.append(f"BLOCK[{block.id}]:{block_dump}|SUCCESSORS:{succ_ids}") + return sha1("|".join(parts)), cyclomatic_complexity(graph) + + +def _raw_source_hash_for_range( + source_lines: list[str], + start_line: int, + end_line: int, +) -> str: + window = "".join(source_lines[start_line - 1 : end_line]).strip() + no_space = "".join(window.split()) + return _sha1(no_space.encode("utf-8")).hexdigest() + + +def _resolve_import_target( + module_name: str, + import_node: ast.ImportFrom, +) -> str: + if import_node.level <= 0: + return import_node.module or "" + + parent_parts = module_name.split(".") + keep = max(0, len(parent_parts) - import_node.level) + prefix = parent_parts[:keep] + if import_node.module: + return ".".join([*prefix, import_node.module]) + return ".".join(prefix) + + +_PROTOCOL_MODULE_NAMES = frozenset({"typing", "typing_extensions"}) + + +@dataclass(slots=True) +class _ModuleWalkState: + import_names: set[str] = field(default_factory=set) + deps: list[ModuleDep] = field(default_factory=list) + referenced_names: set[str] = field(default_factory=set) + imported_symbol_bindings: dict[str, set[str]] = field(default_factory=dict) + imported_module_aliases: dict[str, str] = field(default_factory=dict) + name_nodes: list[ast.Name] = field(default_factory=list) + attr_nodes: list[ast.Attribute] = field(default_factory=list) + protocol_symbol_aliases: set[str] = field(default_factory=lambda: {"Protocol"}) + protocol_module_aliases: set[str] = field( + default_factory=lambda: set(_PROTOCOL_MODULE_NAMES) + ) + + +def _append_module_dep( + *, + module_name: str, + target: str, + import_type: Literal["import", "from_import"], + line: int, + state: _ModuleWalkState, +) -> None: + state.deps.append( + ModuleDep( + source=module_name, + target=target, + import_type=import_type, + line=line, + ) + ) + + +def _collect_import_node( + *, + node: ast.Import, + module_name: str, + state: _ModuleWalkState, + collect_referenced_names: bool, +) -> None: + line = int(getattr(node, "lineno", 0)) + for alias in node.names: + alias_name = alias.asname or alias.name.split(".", 1)[0] + state.import_names.add(alias_name) + _append_module_dep( + module_name=module_name, + target=alias.name, + import_type="import", + line=line, + state=state, + ) + if collect_referenced_names: + state.imported_module_aliases[alias_name] = alias.name + if alias.name in _PROTOCOL_MODULE_NAMES: + state.protocol_module_aliases.add(alias_name) + + +def _dotted_expr_name(expr: ast.expr) -> str | None: + if isinstance(expr, ast.Name): + return expr.id + if isinstance(expr, ast.Attribute): + prefix = _dotted_expr_name(expr.value) + if prefix is None: + return None + return f"{prefix}.{expr.attr}" + return None + + +def _collect_import_from_node( + *, + node: ast.ImportFrom, + module_name: str, + state: _ModuleWalkState, + collect_referenced_names: bool, +) -> None: + target = _resolve_import_target(module_name, node) + if target: + state.import_names.add(target.split(".", 1)[0]) + _append_module_dep( + module_name=module_name, + target=target, + import_type="from_import", + line=int(getattr(node, "lineno", 0)), + state=state, + ) + + if node.module in _PROTOCOL_MODULE_NAMES: + for alias in node.names: + if alias.name == "Protocol": + state.protocol_symbol_aliases.add(alias.asname or alias.name) + + if not collect_referenced_names or not target: + return + + for alias in node.names: + if alias.name == "*": + continue + alias_name = alias.asname or alias.name + state.imported_symbol_bindings.setdefault(alias_name, set()).add( + f"{target}:{alias.name}" + ) + + +def _is_protocol_class( + class_node: ast.ClassDef, + *, + protocol_symbol_aliases: frozenset[str], + protocol_module_aliases: frozenset[str], +) -> bool: + for base in class_node.bases: + base_name = _dotted_expr_name(base) + if base_name is None: + continue + if base_name in protocol_symbol_aliases: + return True + if "." in base_name and base_name.rsplit(".", 1)[-1] == "Protocol": + module_alias = base_name.rsplit(".", 1)[0] + if module_alias in protocol_module_aliases: + return True + return False + + +def _is_non_runtime_candidate(node: FunctionNode) -> bool: + for decorator in node.decorator_list: + name = _dotted_expr_name(decorator) + if name is None: + continue + terminal = name.rsplit(".", 1)[-1] + if terminal in {"overload", "abstractmethod"}: + return True + return False + + +def _node_line_span(node: ast.AST) -> tuple[int, int] | None: + start = int(getattr(node, "lineno", 0)) + end = int(getattr(node, "end_lineno", 0)) + if start <= 0 or end <= 0: + return None + return start, end + + +def _dead_candidate_kind(local_name: str) -> Literal["function", "method"]: + return "method" if "." in local_name else "function" + + +def _should_skip_dead_candidate( + local_name: str, + node: FunctionNode, + *, + protocol_class_qualnames: set[str], +) -> bool: + if _is_non_runtime_candidate(node): + return True + if "." not in local_name: + return False + owner_qualname = local_name.rsplit(".", 1)[0] + return owner_qualname in protocol_class_qualnames + + +def _build_dead_candidate( + *, + module_name: str, + local_name: str, + node: _NamedDeclarationNode, + filepath: str, + kind: Literal["class", "function", "method"], + suppression_index: Mapping[SuppressionTargetKey, tuple[str, ...]], + start_line: int, + end_line: int, +) -> DeadCandidate: + qualname = f"{module_name}:{local_name}" + return DeadCandidate( + qualname=qualname, + local_name=node.name, + filepath=filepath, + start_line=start_line, + end_line=end_line, + kind=kind, + suppressed_rules=suppression_index.get( + suppression_target_key( + filepath=filepath, + qualname=qualname, + start_line=start_line, + end_line=end_line, + kind=kind, + ), + (), + ), + ) + + +def _dead_candidate_for_unit( + *, + module_name: str, + local_name: str, + node: FunctionNode, + filepath: str, + suppression_index: Mapping[SuppressionTargetKey, tuple[str, ...]], + protocol_class_qualnames: set[str], +) -> DeadCandidate | None: + span = _node_line_span(node) + if span is None: + return None + if _should_skip_dead_candidate( + local_name, + node, + protocol_class_qualnames=protocol_class_qualnames, + ): + return None + start, end = span + return _build_dead_candidate( + module_name=module_name, + local_name=local_name, + node=node, + filepath=filepath, + kind=_dead_candidate_kind(local_name), + suppression_index=suppression_index, + start_line=start, + end_line=end, + ) + + +def _collect_load_reference_node( + *, + node: ast.AST, + state: _ModuleWalkState, +) -> None: + if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load): + state.referenced_names.add(node.id) + state.name_nodes.append(node) + return + if isinstance(node, ast.Attribute) and isinstance(node.ctx, ast.Load): + state.referenced_names.add(node.attr) + state.attr_nodes.append(node) + + +def _resolve_referenced_qualnames( + *, + module_name: str, + collector: _QualnameCollector, + state: _ModuleWalkState, +) -> frozenset[str]: + top_level_class_by_name = { + class_qualname: class_qualname + for class_qualname, _class_node in collector.class_nodes + if "." not in class_qualname + } + local_method_qualnames = frozenset( + f"{module_name}:{local_name}" + for local_name, _node in collector.units + if "." in local_name + ) + + resolved: set[str] = set() + for name_node in state.name_nodes: + for qualname in state.imported_symbol_bindings.get(name_node.id, ()): + resolved.add(qualname) + + for attr_node in state.attr_nodes: + base = attr_node.value + if not isinstance(base, ast.Name): + continue + imported_module = state.imported_module_aliases.get(base.id) + if imported_module is not None: + resolved.add(f"{imported_module}:{attr_node.attr}") + continue + class_qualname = top_level_class_by_name.get(base.id) + if class_qualname is None: + continue + local_method_qualname = f"{module_name}:{class_qualname}.{attr_node.attr}" + if local_method_qualname in local_method_qualnames: + resolved.add(local_method_qualname) + + return frozenset(resolved) + + +class _ModuleWalkResult(NamedTuple): + import_names: frozenset[str] + module_deps: tuple[ModuleDep, ...] + referenced_names: frozenset[str] + referenced_qualnames: frozenset[str] + protocol_symbol_aliases: frozenset[str] + protocol_module_aliases: frozenset[str] + + +def _collect_module_walk_data( + *, + tree: ast.AST, + module_name: str, + collector: _QualnameCollector, + collect_referenced_names: bool, +) -> _ModuleWalkResult: + """Single ast.walk that collects imports, deps, names, qualnames & protocol aliases. + + Reduces the hot path to one tree walk plus one local qualname resolution phase. + """ + state = _ModuleWalkState() + for node in ast.walk(tree): + if isinstance(node, ast.Import): + _collect_import_node( + node=node, + module_name=module_name, + state=state, + collect_referenced_names=collect_referenced_names, + ) + continue + if isinstance(node, ast.ImportFrom): + _collect_import_from_node( + node=node, + module_name=module_name, + state=state, + collect_referenced_names=collect_referenced_names, + ) + continue + if collect_referenced_names: + _collect_load_reference_node(node=node, state=state) + + deps_sorted = tuple( + sorted( + state.deps, + key=lambda dep: (dep.source, dep.target, dep.import_type, dep.line), + ) + ) + resolved = ( + _resolve_referenced_qualnames( + module_name=module_name, + collector=collector, + state=state, + ) + if collect_referenced_names + else frozenset() + ) + + return _ModuleWalkResult( + import_names=frozenset(state.import_names), + module_deps=deps_sorted, + referenced_names=frozenset(state.referenced_names), + referenced_qualnames=resolved, + protocol_symbol_aliases=frozenset(state.protocol_symbol_aliases), + protocol_module_aliases=frozenset(state.protocol_module_aliases), + ) + + +def _collect_dead_candidates( + *, + filepath: str, + module_name: str, + collector: _QualnameCollector, + protocol_symbol_aliases: frozenset[str] = frozenset({"Protocol"}), + protocol_module_aliases: frozenset[str] = frozenset( + {"typing", "typing_extensions"} + ), + suppression_rules_by_target: Mapping[SuppressionTargetKey, tuple[str, ...]] + | None = None, +) -> tuple[DeadCandidate, ...]: + protocol_class_qualnames = { + class_qualname + for class_qualname, class_node in collector.class_nodes + if _is_protocol_class( + class_node, + protocol_symbol_aliases=protocol_symbol_aliases, + protocol_module_aliases=protocol_module_aliases, + ) + } + + candidates: list[DeadCandidate] = [] + suppression_index = ( + suppression_rules_by_target if suppression_rules_by_target is not None else {} + ) + for local_name, node in collector.units: + candidate = _dead_candidate_for_unit( + module_name=module_name, + local_name=local_name, + node=node, + filepath=filepath, + suppression_index=suppression_index, + protocol_class_qualnames=protocol_class_qualnames, + ) + if candidate is None: + continue + candidates.append(candidate) + + for class_qualname, class_node in collector.class_nodes: + span = _node_line_span(class_node) + if span is None: + continue + start, end = span + candidates.append( + _build_dead_candidate( + module_name=module_name, + local_name=class_qualname, + node=class_node, + filepath=filepath, + kind="class", + suppression_index=suppression_index, + start_line=start, + end_line=end, + ) + ) + + return tuple( + sorted( + candidates, + key=lambda item: ( + item.filepath, + item.start_line, + item.end_line, + item.qualname, + ), + ) + ) + + +def _collect_declaration_targets( + *, + filepath: str, + module_name: str, + collector: _QualnameCollector, + source_tokens: tuple[tokenize.TokenInfo, ...] = (), + source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None, + include_inline_lines: bool = False, +) -> tuple[DeclarationTarget, ...]: + declarations: list[DeclarationTarget] = [] + + for local_name, node in collector.units: + start = int(getattr(node, "lineno", 0)) + end = int(getattr(node, "end_lineno", 0)) + if start <= 0 or end <= 0: + continue + declaration_end_line = ( + _declaration_end_line( + node, + source_tokens=source_tokens, + source_token_index=source_token_index, + ) + if include_inline_lines + else None + ) + kind: Literal["function", "method"] = ( + "method" if "." in local_name else "function" + ) + declarations.append( + DeclarationTarget( + filepath=filepath, + qualname=f"{module_name}:{local_name}", + start_line=start, + end_line=end, + kind=kind, + declaration_end_line=declaration_end_line, + ) ) - return sha1("|".join(parts)) + + for class_qualname, class_node in collector.class_nodes: + start = int(getattr(class_node, "lineno", 0)) + end = int(getattr(class_node, "end_lineno", 0)) + if start <= 0 or end <= 0: + continue + declaration_end_line = ( + _declaration_end_line( + class_node, + source_tokens=source_tokens, + source_token_index=source_token_index, + ) + if include_inline_lines + else None + ) + declarations.append( + DeclarationTarget( + filepath=filepath, + qualname=f"{module_name}:{class_qualname}", + start_line=start, + end_line=end, + kind="class", + declaration_end_line=declaration_end_line, + ) + ) + + return tuple( + sorted( + declarations, + key=lambda item: ( + item.filepath, + item.start_line, + item.end_line, + item.qualname, + item.kind, + ), + ) + ) + + +def _build_suppression_index_for_source( + *, + source: str, + filepath: str, + module_name: str, + collector: _QualnameCollector, +) -> Mapping[SuppressionTargetKey, tuple[str, ...]]: + suppression_directives = extract_suppression_directives(source) + if not suppression_directives: + return {} + + needs_inline_binding = any( + directive.binding == "inline" for directive in suppression_directives + ) + source_tokens: tuple[tokenize.TokenInfo, ...] = () + source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None + if needs_inline_binding: + source_tokens = _source_tokens(source) + if source_tokens: + source_token_index = _build_declaration_token_index(source_tokens) + + declaration_targets = _collect_declaration_targets( + filepath=filepath, + module_name=module_name, + collector=collector, + source_tokens=source_tokens, + source_token_index=source_token_index, + include_inline_lines=needs_inline_binding, + ) + suppression_bindings = bind_suppressions_to_declarations( + directives=suppression_directives, + declarations=declaration_targets, + ) + return build_suppression_index(suppression_bindings) # ========================= @@ -201,27 +921,70 @@ def get_cfg_fingerprint( # ========================= -def extract_units_from_source( +def extract_units_and_stats_from_source( source: str, filepath: str, module_name: str, cfg: NormalizationConfig, min_loc: int, min_stmt: int, -) -> tuple[list[Unit], list[BlockUnit], list[SegmentUnit]]: + *, + block_min_loc: int = 20, + block_min_stmt: int = 8, + segment_min_loc: int = 20, + segment_min_stmt: int = 10, + collect_structural_findings: bool = True, +) -> tuple[ + list[Unit], + list[BlockUnit], + list[SegmentUnit], + SourceStats, + FileMetrics, + list[StructuralFindingGroup], +]: try: tree = _parse_with_limits(source, PARSE_TIMEOUT_SECONDS) except SyntaxError as e: raise ParseError(f"Failed to parse {filepath}: {e}") from e - qb = _QualnameBuilder() - qb.visit(tree) + collector = _QualnameCollector() + collector.visit(tree) + source_lines = source.splitlines() + source_line_count = len(source_lines) + + is_test_file = is_test_filepath(filepath) + + # Single-pass AST walk replaces 3 separate functions / 4 walks. + _walk = _collect_module_walk_data( + tree=tree, + module_name=module_name, + collector=collector, + collect_referenced_names=not is_test_file, + ) + import_names = _walk.import_names + module_deps = _walk.module_deps + referenced_names = _walk.referenced_names + referenced_qualnames = _walk.referenced_qualnames + protocol_symbol_aliases = _walk.protocol_symbol_aliases + protocol_module_aliases = _walk.protocol_module_aliases + + suppression_index = _build_suppression_index_for_source( + source=source, + filepath=filepath, + module_name=module_name, + collector=collector, + ) + class_names = frozenset(class_node.name for _, class_node in collector.class_nodes) + module_import_names = set(import_names) + module_class_names = set(class_names) + class_metrics: list[ClassMetrics] = [] units: list[Unit] = [] block_units: list[BlockUnit] = [] segment_units: list[SegmentUnit] = [] + structural_findings: list[StructuralFindingGroup] = [] - for local_name, node in qb.units: + for local_name, node in collector.units: start = getattr(node, "lineno", None) end = getattr(node, "end_lineno", None) @@ -235,7 +998,16 @@ def extract_units_from_source( continue qualname = f"{module_name}:{local_name}" - fingerprint = get_cfg_fingerprint(node, cfg, qualname) + fingerprint, complexity = _cfg_fingerprint_and_complexity(node, cfg, qualname) + structure_facts = scan_function_structure( + node, + filepath, + qualname, + collect_findings=collect_structural_findings, + ) + depth = structure_facts.nesting_depth + risk = risk_level(complexity) + raw_hash = _raw_source_hash_for_range(source_lines, start, end) # Function-level unit (including __init__) units.append( @@ -248,20 +1020,36 @@ def extract_units_from_source( stmt_count=stmt_count, fingerprint=fingerprint, loc_bucket=bucket_loc(loc), + cyclomatic_complexity=complexity, + nesting_depth=depth, + risk=risk, + raw_hash=raw_hash, + entry_guard_count=structure_facts.entry_guard_count, + entry_guard_terminal_profile=( + structure_facts.entry_guard_terminal_profile + ), + entry_guard_has_side_effect_before=( + structure_facts.entry_guard_has_side_effect_before + ), + terminal_kind=structure_facts.terminal_kind, + try_finally_profile=structure_facts.try_finally_profile, + side_effect_order_profile=structure_facts.side_effect_order_profile, ) ) # Block-level and segment-level units share statement hashes needs_blocks = ( - not local_name.endswith("__init__") and loc >= 40 and stmt_count >= 10 + not local_name.endswith("__init__") + and loc >= block_min_loc + and stmt_count >= block_min_stmt ) - needs_segments = loc >= 30 and stmt_count >= 12 + needs_segments = loc >= segment_min_loc and stmt_count >= segment_min_stmt if needs_blocks or needs_segments: body = getattr(node, "body", None) hashes: list[str] | None = None if isinstance(body, list): - hashes = [stmt_hash(stmt, cfg) for stmt in body] + hashes = stmt_hashes(body, cfg) if needs_blocks: block_units.extend( @@ -289,4 +1077,76 @@ def extract_units_from_source( ) ) - return units, block_units, segment_units + # Structural findings extraction (report-only, no re-parse) + if collect_structural_findings: + structural_findings.extend(structure_facts.structural_findings) + + for class_qualname, class_node in collector.class_nodes: + start = int(getattr(class_node, "lineno", 0)) + end = int(getattr(class_node, "end_lineno", 0)) + if start <= 0 or end <= 0: + continue + cbo, coupled_classes = compute_cbo( + class_node, + module_import_names=module_import_names, + module_class_names=module_class_names, + ) + lcom4, method_count, instance_var_count = compute_lcom4(class_node) + class_metrics.append( + ClassMetrics( + qualname=f"{module_name}:{class_qualname}", + filepath=filepath, + start_line=start, + end_line=end, + cbo=cbo, + lcom4=lcom4, + method_count=method_count, + instance_var_count=instance_var_count, + risk_coupling=coupling_risk(cbo), + risk_cohesion=cohesion_risk(lcom4), + coupled_classes=coupled_classes, + ) + ) + + dead_candidates = _collect_dead_candidates( + filepath=filepath, + module_name=module_name, + collector=collector, + protocol_symbol_aliases=protocol_symbol_aliases, + protocol_module_aliases=protocol_module_aliases, + suppression_rules_by_target=suppression_index, + ) + + sorted_class_metrics = tuple( + sorted( + class_metrics, + key=lambda item: ( + item.filepath, + item.start_line, + item.end_line, + item.qualname, + ), + ) + ) + + return ( + units, + block_units, + segment_units, + SourceStats( + lines=source_line_count, + functions=collector.function_count, + methods=collector.method_count, + classes=collector.class_count, + ), + FileMetrics( + class_metrics=sorted_class_metrics, + module_deps=module_deps, + dead_candidates=dead_candidates, + referenced_names=referenced_names, + import_names=import_names, + class_names=class_names, + referenced_qualnames=referenced_qualnames, + ), + structural_findings, + ) diff --git a/codeclone/fingerprint.py b/codeclone/fingerprint.py index 829f2e6..d47b8fc 100644 --- a/codeclone/fingerprint.py +++ b/codeclone/fingerprint.py @@ -1,10 +1,5 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/grouping.py b/codeclone/grouping.py new file mode 100644 index 0000000..583e62a --- /dev/null +++ b/codeclone/grouping.py @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .models import GroupItemsLike, GroupMap + + +def build_groups(units: GroupItemsLike) -> GroupMap: + groups: GroupMap = {} + for unit in units: + fingerprint = str(unit["fingerprint"]) + loc_bucket = str(unit["loc_bucket"]) + key = f"{fingerprint}|{loc_bucket}" + groups.setdefault(key, []).append(dict(unit)) + return {group_key: items for group_key, items in groups.items() if len(items) > 1} + + +def build_block_groups(blocks: GroupItemsLike, min_functions: int = 2) -> GroupMap: + groups: GroupMap = {} + for block in blocks: + groups.setdefault(str(block["block_hash"]), []).append(dict(block)) + + filtered: GroupMap = {} + for block_hash, items in groups.items(): + functions = {str(item["qualname"]) for item in items} + if len(functions) >= min_functions: + filtered[block_hash] = items + + return filtered + + +def build_segment_groups( + segments: GroupItemsLike, min_occurrences: int = 2 +) -> GroupMap: + signature_groups: GroupMap = {} + for segment in segments: + signature_groups.setdefault( + str(segment["segment_sig"]), + [], + ).append(dict(segment)) + + confirmed: GroupMap = {} + for items in signature_groups.values(): + if len(items) < min_occurrences: + continue + + hash_groups: GroupMap = {} + for item in items: + hash_groups.setdefault(str(item["segment_hash"]), []).append(dict(item)) + + for segment_hash, hash_items in hash_groups.items(): + if len(hash_items) < min_occurrences: + continue + + by_function: GroupMap = {} + for item in hash_items: + by_function.setdefault(str(item["qualname"]), []).append(item) + + for qualname, q_items in by_function.items(): + if len(q_items) >= min_occurrences: + confirmed[f"{segment_hash}|{qualname}"] = q_items + + return confirmed diff --git a/codeclone/html_report.py b/codeclone/html_report.py index fc6ee75..3783b23 100644 --- a/codeclone/html_report.py +++ b/codeclone/html_report.py @@ -1,789 +1,26 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Public facade for HTML report generation. -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. +Re-exports build_html_report from the new _html_report package and +keeps backward-compatible imports that tests and downstream code rely on. """ from __future__ import annotations -from collections.abc import Collection, Mapping - -from . import __version__ -from ._html_escape import _escape_attr, _escape_html, _meta_display +from ._html_report import build_html_report from ._html_snippets import ( _FileCache, - _prefix_css, _pygments_css, _render_code_block, _try_pygments, - pairwise, ) -from ._report_explain_contract import format_group_instance_compare_meta -from ._report_types import GroupItem, GroupMap -from .contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL -from .templates import FONT_CSS_URL, REPORT_TEMPLATE __all__ = [ "_FileCache", - "_prefix_css", "_pygments_css", "_render_code_block", "_try_pygments", "build_html_report", - "pairwise", ] - -# ============================ -# HTML report builder -# ============================ - - -def _group_sort_key(items: list[GroupItem]) -> tuple[int]: - return (-len(items),) - - -def build_html_report( - *, - func_groups: GroupMap, - block_groups: GroupMap, - segment_groups: GroupMap, - block_group_facts: dict[str, dict[str, str]], - new_function_group_keys: Collection[str] | None = None, - new_block_group_keys: Collection[str] | None = None, - report_meta: Mapping[str, object] | None = None, - title: str = "CodeClone Report", - context_lines: int = 3, - max_snippet_lines: int = 220, -) -> str: - file_cache = _FileCache() - resolved_block_group_facts = block_group_facts - - def _path_basename(value: object) -> str | None: - if not isinstance(value, str): - return None - text = value.strip() - if not text: - return None - normalized = text.replace("\\", "/").rstrip("/") - if not normalized: - return None - return normalized.rsplit("/", maxsplit=1)[-1] - - meta = dict(report_meta or {}) - baseline_loaded = bool(meta.get("baseline_loaded")) - baseline_status = str(meta.get("baseline_status", "")).strip().lower() - - if baseline_loaded and baseline_status == "ok": - baseline_split_note = ( - "Split is based on baseline: known duplicates are already " - "recorded in baseline, new duplicates are absent from baseline." - ) - else: - baseline_split_note = ( - "Baseline is not loaded or not trusted: " - "all duplicates are treated as new versus an empty baseline." - ) - - func_sorted = sorted( - func_groups.items(), key=lambda kv: (*_group_sort_key(kv[1]), kv[0]) - ) - block_sorted = sorted( - block_groups.items(), key=lambda kv: (*_group_sort_key(kv[1]), kv[0]) - ) - segment_sorted = sorted( - segment_groups.items(), key=lambda kv: (*_group_sort_key(kv[1]), kv[0]) - ) - - has_any = bool(func_sorted) or bool(block_sorted) or bool(segment_sorted) - - # Pygments CSS (scoped). Use modern GitHub-like styles when available. - # We scope per theme to support toggle without reloading. - pyg_dark_raw = _pygments_css("github-dark") - if not pyg_dark_raw: - pyg_dark_raw = _pygments_css("monokai") - pyg_light_raw = _pygments_css("github-light") - if not pyg_light_raw: - pyg_light_raw = _pygments_css("friendly") - - pyg_dark = _prefix_css(pyg_dark_raw, "html[data-theme='dark']") - pyg_light = _prefix_css(pyg_light_raw, "html[data-theme='light']") - - # ============================ - # Icons (Inline SVG) - # ============================ - def _svg_icon(size: int, stroke_width: str, body: str) -> str: - return ( - f'' - f"{body}" - ) - - ICONS = { - "search": _svg_icon( - 16, - "2.5", - '' - '', - ), - "clear": _svg_icon( - 16, - "2.5", - '' - '', - ), - "chev_down": _svg_icon( - 16, - "2.5", - '', - ), - # ICON_CHEV_RIGHT = ( - # '' - # '' - # "" - # ) - "theme": _svg_icon( - 16, - "2", - '', - ), - "check": _svg_icon( - 48, - "2", - '', - ), - "prev": _svg_icon( - 16, - "2", - '', - ), - "next": _svg_icon( - 16, - "2", - '', - ), - } - - # ---------------------------- - # Section renderer - # ---------------------------- - - def _display_group_key( - section_id: str, group_key: str, block_meta: dict[str, str] | None = None - ) -> str: - if section_id != "blocks": - return group_key - - if block_meta and block_meta.get("pattern_display"): - return str(block_meta["pattern_display"]) - - return group_key - - def _block_group_explanation_meta( - section_id: str, group_key: str - ) -> dict[str, str]: - if section_id != "blocks": - return {} - - raw = resolved_block_group_facts.get(group_key, {}) - return {str(k): str(v) for k, v in raw.items() if v is not None} - - def _render_group_explanation(meta: Mapping[str, object]) -> str: - if not meta: - return "" - - explain_items: list[tuple[str, str]] = [] - if meta.get("match_rule"): - explain_items.append( - (f"match_rule: {meta['match_rule']}", "group-explain-item") - ) - if meta.get("block_size"): - explain_items.append( - (f"block_size: {meta['block_size']}", "group-explain-item") - ) - if meta.get("signature_kind"): - explain_items.append( - (f"signature_kind: {meta['signature_kind']}", "group-explain-item") - ) - if meta.get("merged_regions"): - explain_items.append( - (f"merged_regions: {meta['merged_regions']}", "group-explain-item") - ) - pattern_value = str(meta.get("pattern", "")).strip() - if pattern_value: - pattern_label = str(meta.get("pattern_label", pattern_value)).strip() - pattern_display = str(meta.get("pattern_display", "")).strip() - if pattern_display: - explain_items.append( - ( - f"pattern: {pattern_label} ({pattern_display})", - "group-explain-item", - ) - ) - else: - explain_items.append( - (f"pattern: {pattern_label}", "group-explain-item") - ) - - hint_id = str(meta.get("hint", "")).strip() - if hint_id: - hint_label = str(meta.get("hint_label", hint_id)).strip() - explain_items.append( - (f"hint: {hint_label}", "group-explain-item group-explain-warn") - ) - if meta.get("hint_confidence"): - explain_items.append( - ( - f"hint_confidence: {meta['hint_confidence']}", - "group-explain-item group-explain-muted", - ) - ) - if meta.get("assert_ratio"): - explain_items.append( - ( - f"assert_ratio: {meta['assert_ratio']}", - "group-explain-item group-explain-muted", - ) - ) - if meta.get("consecutive_asserts"): - explain_items.append( - ( - f"consecutive_asserts: {meta['consecutive_asserts']}", - "group-explain-item group-explain-muted", - ) - ) - hint_context_label = str(meta.get("hint_context_label", "")).strip() - if hint_context_label: - explain_items.append( - ( - hint_context_label, - "group-explain-item group-explain-muted", - ) - ) - - attrs = { - "data-match-rule": str(meta.get("match_rule", "")), - "data-block-size": str(meta.get("block_size", "")), - "data-signature-kind": str(meta.get("signature_kind", "")), - "data-merged-regions": str(meta.get("merged_regions", "")), - "data-pattern": str(meta.get("pattern", "")), - "data-pattern-label": str(meta.get("pattern_label", "")), - "data-hint": str(meta.get("hint", "")), - "data-hint-label": str(meta.get("hint_label", "")), - "data-hint-context-label": str(meta.get("hint_context_label", "")), - "data-hint-confidence": str(meta.get("hint_confidence", "")), - "data-assert-ratio": str(meta.get("assert_ratio", "")), - "data-consecutive-asserts": str(meta.get("consecutive_asserts", "")), - } - attr_html = " ".join( - f'{key}="{_escape_attr(value)}"' for key, value in attrs.items() if value - ) - parts = [ - f'{_escape_html(text)}' - for text, css_class in explain_items - ] - note = "" - if isinstance(meta.get("hint_note"), str): - note_text = _escape_html(str(meta["hint_note"])) - note = f'

    {note_text}

    ' - return f'
    {"".join(parts)}{note}
    ' - - def render_section( - section_id: str, - section_title: str, - groups: list[tuple[str, list[GroupItem]]], - pill_cls: str, - *, - novelty_by_group: Mapping[str, str] | None = None, - ) -> str: - if not groups: - return "" - - def _block_group_name(display_key: str, meta: dict[str, str]) -> str: - if meta.get("group_display_name"): - return str(meta["group_display_name"]) - if len(display_key) > 56: - return f"{display_key[:24]}...{display_key[-16:]}" - return display_key - - def _group_name(display_key: str, meta: dict[str, str]) -> str: - if section_id == "blocks": - return _block_group_name(display_key, meta) - return display_key - - def _item_span_size(item: GroupItem) -> int: - start_line = int(item.get("start_line", 0)) - end_line = int(item.get("end_line", 0)) - return max(0, end_line - start_line + 1) - - def _group_span_size(items: list[GroupItem]) -> int: - return max((_item_span_size(item) for item in items), default=0) - - section_novelty = novelty_by_group or {} - has_novelty_filter = bool(section_novelty) - - out: list[str] = [ - f'
    ', - '
    ', - f"

    {_escape_html(section_title)} " - f'' - f"{len(groups)} groups

    ", - "
    ", - f""" - -""", - '
    ', - ] - - for idx, (gkey, items) in enumerate(groups, start=1): - group_id = f"{section_id}-{idx}" - search_parts: list[str] = [str(gkey)] - for it in items: - search_parts.append(str(it.get("qualname", ""))) - search_parts.append(str(it.get("filepath", ""))) - search_blob = " ".join(search_parts).lower() - search_blob_escaped = _escape_attr(search_blob) - block_meta = _block_group_explanation_meta(section_id, gkey) - display_key = _display_group_key(section_id, gkey, block_meta) - group_name = _group_name(display_key, block_meta) - group_span_size = _group_span_size(items) - group_arity = len(items) - if section_id == "blocks": - block_size_raw = block_meta.get("block_size", "").strip() - if block_size_raw.isdigit(): - group_span_size = int(block_size_raw) - arity_raw = block_meta.get("group_arity", "").strip() - if arity_raw.isdigit() and int(arity_raw) > 0: - group_arity = int(arity_raw) - group_summary = ( - f"{group_arity} instances • block size {group_span_size}" - if group_span_size > 0 - else f"{group_arity} instances" - ) - block_group_attrs = "" - if block_meta: - attrs = { - "data-group-id": group_id, - "data-clone-size": str(group_span_size), - "data-items-count": str(group_arity), - "data-match-rule": block_meta.get("match_rule"), - "data-block-size": block_meta.get("block_size"), - "data-signature-kind": block_meta.get("signature_kind"), - "data-merged-regions": block_meta.get("merged_regions"), - "data-pattern": block_meta.get("pattern"), - "data-pattern-label": block_meta.get("pattern_label"), - "data-hint": block_meta.get("hint"), - "data-hint-label": block_meta.get("hint_label"), - "data-hint-context-label": block_meta.get("hint_context_label"), - "data-hint-confidence": block_meta.get("hint_confidence"), - "data-assert-ratio": block_meta.get("assert_ratio"), - "data-consecutive-asserts": block_meta.get("consecutive_asserts"), - "data-boilerplate-asserts": block_meta.get("boilerplate_asserts"), - } - block_group_attrs = " ".join( - f'{name}="{_escape_attr(value)}"' - for name, value in attrs.items() - if value - ) - if block_group_attrs: - block_group_attrs = f" {block_group_attrs}" - if 'data-group-id="' not in block_group_attrs: - group_id_attr = _escape_attr(group_id) - block_group_attrs = ( - f' data-group-id="{group_id_attr}"{block_group_attrs}' - ) - if 'data-clone-size="' not in block_group_attrs: - clone_size_attr = _escape_attr(str(group_span_size)) - block_group_attrs += f' data-clone-size="{clone_size_attr}"' - if 'data-items-count="' not in block_group_attrs: - items_count_attr = _escape_attr(str(group_arity)) - block_group_attrs += f' data-items-count="{items_count_attr}"' - arity_attr = _escape_attr(str(group_arity)) - block_group_attrs += f' data-group-arity="{arity_attr}"' - - metrics_button = "" - if section_id == "blocks": - metrics_button = ( - f'' - ) - group_novelty = section_novelty.get(gkey, "all") - out.append( - f'
    ' - ) - - out.append( - '
    ' - '
    ' - f'' - '
    ' - f'
    {_escape_html(group_name)}
    ' - f'
    {_escape_html(group_summary)}
    ' - "
    " - "
    " - '
    ' - f'{group_arity}' - f"{metrics_button}" - "
    " - "
    " - ) - if section_id == "blocks" and group_arity > 2: - compare_note = block_meta.get("group_compare_note", "").strip() - if compare_note: - out.append( - '
    ' - f"{_escape_html(compare_note)}" - "
    " - ) - - if section_id == "blocks": - explanation_html = _render_group_explanation(block_meta) - if explanation_html: - out.append(explanation_html) - - out.append(f'
    ') - for item_index, item in enumerate(items, start=1): - snippet = _render_code_block( - filepath=item["filepath"], - start_line=int(item["start_line"]), - end_line=int(item["end_line"]), - file_cache=file_cache, - context=context_lines, - max_lines=max_snippet_lines, - ) - qualname = _escape_html(item["qualname"]) - qualname_attr = _escape_attr(item["qualname"]) - filepath = _escape_html(item["filepath"]) - filepath_attr = _escape_attr(item["filepath"]) - start_line = int(item["start_line"]) - end_line = int(item["end_line"]) - peer_count = 0 - peer_count_raw = block_meta.get("instance_peer_count", "").strip() - if peer_count_raw.isdigit() and int(peer_count_raw) >= 0: - peer_count = int(peer_count_raw) - compare_meta_html = "" - if section_id == "blocks" and "group_arity" in block_meta: - compare_text = format_group_instance_compare_meta( - instance_index=item_index, - group_arity=group_arity, - peer_count=peer_count, - ) - compare_meta_html = ( - f'
    {compare_text}
    ' - ) - out.append( - f'
    ' - '
    ' - f'
    {qualname}
    ' - f'
    ' - f"{filepath}:{start_line}-{end_line}" - "
    " - "
    " - f"{compare_meta_html}" - f"{snippet.code_html}" - "
    " - ) - out.append("
    ") # group-body - out.append("
    ") # group - - out.append("
    ") # section-body - out.append("
    ") - return "\n".join(out) - - # ============================ - # HTML Rendering - # ============================ - - empty_state_html = "" - if not has_any: - empty_state_html = f""" -
    -
    -
    {ICONS["check"]}
    -

    No code clones detected

    -

    - No structural, block-level, or segment-level duplication was found above - configured thresholds. -

    -

    This usually indicates healthy abstraction boundaries.

    -
    -
    -""" - - new_function_key_set = set(new_function_group_keys or ()) - new_block_key_set = set(new_block_group_keys or ()) - function_novelty = { - group_key: ("new" if group_key in new_function_key_set else "known") - for group_key, _ in func_sorted - } - block_novelty = { - group_key: ("new" if group_key in new_block_key_set else "known") - for group_key, _ in block_sorted - } - novelty_enabled = bool(function_novelty) or bool(block_novelty) - total_new_groups = sum(1 for value in function_novelty.values() if value == "new") - total_new_groups += sum(1 for value in block_novelty.values() if value == "new") - total_known_groups = sum( - 1 for value in function_novelty.values() if value == "known" - ) - total_known_groups += sum(1 for value in block_novelty.values() if value == "known") - default_novelty = "new" if total_new_groups > 0 else "known" - global_novelty_html = "" - if novelty_enabled: - global_novelty_html = ( - '
    ' - '
    ' - "

    Duplicate Scope

    " - '
    ' - '" - '" - "
    " - "
    " - f'

    {_escape_html(baseline_split_note)}

    ' - "
    " - ) - - func_section = render_section( - "functions", - "Function clones", - func_sorted, - "pill-func", - novelty_by_group=function_novelty, - ) - block_section = render_section( - "blocks", - "Block clones", - block_sorted, - "pill-block", - novelty_by_group=block_novelty, - ) - segment_section = render_section( - "segments", "Segment clones", segment_sorted, "pill-segment" - ) - baseline_path_value = meta.get("baseline_path") - meta_rows: list[tuple[str, object]] = [ - ("Report schema", meta.get("report_schema_version")), - ("CodeClone", meta.get("codeclone_version", __version__)), - ("Python", meta.get("python_version")), - ("Baseline file", _path_basename(baseline_path_value)), - ("Baseline fingerprint", meta.get("baseline_fingerprint_version")), - ("Baseline schema", meta.get("baseline_schema_version")), - ("Baseline Python tag", meta.get("baseline_python_tag")), - ("Baseline generator name", meta.get("baseline_generator_name")), - ("Baseline generator version", meta.get("baseline_generator_version")), - ("Baseline payload sha256", meta.get("baseline_payload_sha256")), - ( - "Baseline payload verified", - meta.get("baseline_payload_sha256_verified"), - ), - ("Baseline loaded", meta.get("baseline_loaded")), - ("Baseline status", meta.get("baseline_status")), - ("Source IO skipped", meta.get("files_skipped_source_io")), - ("Baseline path", baseline_path_value), - ] - if "cache_path" in meta: - meta_rows.append(("Cache path", meta.get("cache_path"))) - if "cache_schema_version" in meta: - meta_rows.append(("Cache schema", meta.get("cache_schema_version"))) - if "cache_status" in meta: - meta_rows.append(("Cache status", meta.get("cache_status"))) - if "cache_used" in meta: - meta_rows.append(("Cache used", meta.get("cache_used"))) - - meta_attrs = " ".join( - [ - ( - 'data-report-schema-version="' - f'{_escape_attr(meta.get("report_schema_version"))}"' - ), - ( - 'data-codeclone-version="' - f'{_escape_attr(meta.get("codeclone_version", __version__))}"' - ), - f'data-python-version="{_escape_attr(meta.get("python_version"))}"', - f'data-baseline-file="{_escape_attr(_path_basename(baseline_path_value))}"', - f'data-baseline-path="{_escape_attr(baseline_path_value)}"', - ( - 'data-baseline-fingerprint-version="' - f'{_escape_attr(meta.get("baseline_fingerprint_version"))}"' - ), - f'data-baseline-schema-version="{_escape_attr(meta.get("baseline_schema_version"))}"', - ( - 'data-baseline-python-tag="' - f'{_escape_attr(meta.get("baseline_python_tag"))}"' - ), - ( - 'data-baseline-generator-name="' - f'{_escape_attr(meta.get("baseline_generator_name"))}"' - ), - ( - 'data-baseline-generator-version="' - f'{_escape_attr(meta.get("baseline_generator_version"))}"' - ), - ( - 'data-baseline-payload-sha256="' - f'{_escape_attr(meta.get("baseline_payload_sha256"))}"' - ), - ( - 'data-baseline-payload-verified="' - f'{_escape_attr(_meta_display(meta.get("baseline_payload_sha256_verified")))}"' - ), - f'data-baseline-loaded="{_escape_attr(_meta_display(meta.get("baseline_loaded")))}"', - f'data-baseline-status="{_escape_attr(meta.get("baseline_status"))}"', - f'data-cache-path="{_escape_attr(meta.get("cache_path"))}"', - ( - 'data-cache-schema-version="' - f'{_escape_attr(meta.get("cache_schema_version"))}"' - ), - f'data-cache-status="{_escape_attr(meta.get("cache_status"))}"', - f'data-cache-used="{_escape_attr(_meta_display(meta.get("cache_used")))}"', - ( - 'data-files-skipped-source-io="' - f'{_escape_attr(meta.get("files_skipped_source_io"))}"' - ), - ] - ) - - def _meta_item_class(label: str) -> str: - cls = ["meta-item"] - if label in {"Baseline path", "Cache path", "Baseline payload sha256"}: - cls.append("meta-item-wide") - if label in { - "Baseline payload verified", - "Baseline loaded", - "Cache used", - }: - cls.append("meta-item-boolean") - return " ".join(cls) - - def _meta_value_html(label: str, value: object) -> str: - if label in { - "Baseline payload verified", - "Baseline loaded", - "Cache used", - } and isinstance(value, bool): - badge_cls = "meta-bool-true" if value else "meta-bool-false" - text = "true" if value else "false" - return f'{text}' - return _escape_html(_meta_display(value)) - - meta_rows_html = "".join( - ( - f'
    ' - f'
    {_escape_html(label)}
    ' - f'
    {_meta_value_html(label, value)}
    ' - "
    " - ) - for label, value in meta_rows - ) - - # Chevron icon for toggle - chevron_icon = ( - '' - '' - "" - ) - - report_meta_html = ( - f'
    ' - '
    ' - '
    ' - "Report Provenance" - "
    " - f'' - "
    " - '" - "
    " - ) - - return REPORT_TEMPLATE.substitute( - title=_escape_html(title), - version=__version__, - pyg_dark=pyg_dark, - pyg_light=pyg_light, - global_novelty_html=global_novelty_html, - report_meta_html=report_meta_html, - empty_state_html=empty_state_html, - func_section=func_section, - block_section=block_section, - segment_section=segment_section, - icon_theme=ICONS["theme"], - font_css_url=FONT_CSS_URL, - repository_url=_escape_attr(REPOSITORY_URL), - issues_url=_escape_attr(ISSUES_URL), - docs_url=_escape_attr(DOCS_URL), - ) diff --git a/codeclone/meta_markers.py b/codeclone/meta_markers.py index cc9197f..ec0d390 100644 --- a/codeclone/meta_markers.py +++ b/codeclone/meta_markers.py @@ -1,10 +1,5 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/metrics/__init__.py b/codeclone/metrics/__init__.py new file mode 100644 index 0000000..e9c1afe --- /dev/null +++ b/codeclone/metrics/__init__.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from .cohesion import cohesion_risk, compute_lcom4 +from .complexity import cyclomatic_complexity, nesting_depth, risk_level +from .coupling import compute_cbo, coupling_risk +from .dead_code import find_suppressed_unused, find_unused +from .dependencies import ( + build_dep_graph, + build_import_graph, + find_cycles, + longest_chains, + max_depth, +) +from .health import HealthInputs, compute_health + +__all__ = [ + "HealthInputs", + "build_dep_graph", + "build_import_graph", + "cohesion_risk", + "compute_cbo", + "compute_health", + "compute_lcom4", + "coupling_risk", + "cyclomatic_complexity", + "find_cycles", + "find_suppressed_unused", + "find_unused", + "longest_chains", + "max_depth", + "nesting_depth", + "risk_level", +] diff --git a/codeclone/metrics/cohesion.py b/codeclone/metrics/cohesion.py new file mode 100644 index 0000000..f0b2cc1 --- /dev/null +++ b/codeclone/metrics/cohesion.py @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import ast +from typing import Literal + +from ..contracts import COHESION_RISK_MEDIUM_MAX + + +def _self_attribute_name(node: ast.AST) -> str | None: + if ( + isinstance(node, ast.Attribute) + and isinstance(node.value, ast.Name) + and node.value.id == "self" + ): + return node.attr + return None + + +def compute_lcom4(class_node: ast.ClassDef) -> tuple[int, int, int]: + methods: list[ast.FunctionDef | ast.AsyncFunctionDef] = [ + node + for node in class_node.body + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) + ] + method_names = tuple(method.name for method in methods) + if not methods: + return 1, 0, 0 + + method_to_attrs: dict[str, set[str]] = {name: set() for name in method_names} + method_calls: dict[str, set[str]] = {name: set() for name in method_names} + + for method in methods: + for node in ast.walk(method): + attr_name = _self_attribute_name(node) + if attr_name is not None: + method_to_attrs[method.name].add(attr_name) + continue + if ( + isinstance(node, ast.Call) + and isinstance(node.func, ast.Attribute) + and isinstance(node.func.value, ast.Name) + and node.func.value.id == "self" + ): + callee = node.func.attr + if callee in method_calls: + method_calls[method.name].add(callee) + + adjacency: dict[str, set[str]] = {name: set() for name in method_names} + for name in method_names: + adjacency[name].update(method_calls[name]) + for callee in method_calls[name]: + adjacency.setdefault(callee, set()).add(name) + + for i, left in enumerate(method_names): + left_attrs = method_to_attrs[left] + for right in method_names[i + 1 :]: + if left_attrs & method_to_attrs[right]: + adjacency[left].add(right) + adjacency[right].add(left) + + visited: set[str] = set() + components = 0 + + for method_name in method_names: + if method_name in visited: + continue + components += 1 + stack = [method_name] + while stack: + current = stack.pop() + if current in visited: + continue + visited.add(current) + stack.extend(sorted(adjacency[current] - visited)) + + instance_vars = set().union(*method_to_attrs.values()) if method_to_attrs else set() + return components, len(method_names), len(instance_vars) + + +def cohesion_risk(lcom4: int) -> Literal["low", "medium", "high"]: + if lcom4 <= 1: + return "low" + if lcom4 <= COHESION_RISK_MEDIUM_MAX: + return "medium" + return "high" diff --git a/codeclone/metrics/complexity.py b/codeclone/metrics/complexity.py new file mode 100644 index 0000000..2e6919e --- /dev/null +++ b/codeclone/metrics/complexity.py @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import ast +from typing import TYPE_CHECKING, Literal + +from ..contracts import COMPLEXITY_RISK_LOW_MAX, COMPLEXITY_RISK_MEDIUM_MAX + +if TYPE_CHECKING: + from collections.abc import Iterable + + from ..cfg_model import CFG + +ControlNode = ( + ast.If + | ast.For + | ast.While + | ast.Try + | ast.With + | ast.Match + | ast.AsyncFor + | ast.AsyncWith +) + + +def cyclomatic_complexity(cfg: CFG) -> int: + """Compute McCabe complexity from CFG graph topology.""" + node_count = len(cfg.blocks) + edge_count = sum(len(block.successors) for block in cfg.blocks) + complexity = edge_count - node_count + 2 + return max(1, complexity) + + +def _iter_nested_statement_lists(node: ast.AST) -> Iterable[list[ast.stmt]]: + if isinstance(node, (ast.If, ast.For, ast.While, ast.AsyncFor)): + yield node.body + if node.orelse: + yield node.orelse + elif isinstance(node, (ast.With, ast.AsyncWith)): + yield node.body + elif isinstance(node, ast.Try): + yield node.body + if node.orelse: + yield node.orelse + if node.finalbody: + yield node.finalbody + for handler in node.handlers: + yield handler.body + elif isinstance(node, ast.Match): + for case in node.cases: + yield case.body + + +def nesting_depth(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> int: + """Compute maximum nesting depth for control-flow statements.""" + + def _visit_statements(statements: list[ast.stmt], depth: int) -> int: + best = depth + for statement in statements: + if isinstance( + statement, + ( + ast.If, + ast.For, + ast.While, + ast.Try, + ast.With, + ast.Match, + ast.AsyncFor, + ast.AsyncWith, + ), + ): + next_depth = depth + 1 + best = max(best, next_depth) + for nested in _iter_nested_statement_lists(statement): + best = max(best, _visit_statements(nested, next_depth)) + else: + nested_body = getattr(statement, "body", None) + if isinstance(nested_body, list): + best = max(best, _visit_statements(nested_body, depth)) + return best + + return _visit_statements(list(func_node.body), 0) + + +def risk_level(cc: int) -> Literal["low", "medium", "high"]: + if cc <= COMPLEXITY_RISK_LOW_MAX: + return "low" + if cc <= COMPLEXITY_RISK_MEDIUM_MAX: + return "medium" + return "high" diff --git a/codeclone/metrics/coupling.py b/codeclone/metrics/coupling.py new file mode 100644 index 0000000..07752b7 --- /dev/null +++ b/codeclone/metrics/coupling.py @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import ast +import builtins +from typing import Literal + +from ..contracts import COUPLING_RISK_LOW_MAX, COUPLING_RISK_MEDIUM_MAX + +_BUILTIN_NAMES = frozenset(dir(builtins)) + + +def _annotation_name(node: ast.AST) -> str | None: + if isinstance(node, ast.Name): + return node.id + if isinstance(node, ast.Attribute): + return node.attr + if isinstance(node, ast.Subscript): + return _annotation_name(node.value) + if isinstance(node, ast.Tuple): + for element in node.elts: + candidate = _annotation_name(element) + if candidate: + return candidate + return None + + +def compute_cbo( + class_node: ast.ClassDef, + *, + module_import_names: set[str], + module_class_names: set[str], +) -> tuple[int, tuple[str, ...]]: + """ + Conservative deterministic CBO approximation. + + We count unique external symbols referenced by class bases, annotations, + constructor calls and non-self attributes. + """ + couplings: set[str] = set() + + for base in class_node.bases: + candidate = _annotation_name(base) + if candidate: + couplings.add(candidate) + + for node in ast.walk(class_node): + if isinstance(node, ast.Name): + couplings.add(node.id) + continue + if isinstance(node, ast.Attribute): + if isinstance(node.value, ast.Name) and node.value.id in {"self", "cls"}: + continue + couplings.add(node.attr) + continue + if isinstance(node, ast.Call): + candidate = _annotation_name(node.func) + if candidate: + couplings.add(candidate) + continue + if isinstance(node, ast.AnnAssign) and node.annotation is not None: + candidate = _annotation_name(node.annotation) + if candidate: + couplings.add(candidate) + continue + if isinstance(node, ast.arg) and node.annotation is not None: + candidate = _annotation_name(node.annotation) + if candidate: + couplings.add(candidate) + + filtered = { + name + for name in couplings + if name + and name not in _BUILTIN_NAMES + and name not in {"self", "cls", class_node.name} + and ( + name in module_import_names + or (name in module_class_names and name != class_node.name) + ) + } + resolved = tuple(sorted(filtered)) + return len(resolved), resolved + + +def coupling_risk(cbo: int) -> Literal["low", "medium", "high"]: + if cbo <= COUPLING_RISK_LOW_MAX: + return "low" + if cbo <= COUPLING_RISK_MEDIUM_MAX: + return "medium" + return "high" diff --git a/codeclone/metrics/dead_code.py b/codeclone/metrics/dead_code.py new file mode 100644 index 0000000..eeccc81 --- /dev/null +++ b/codeclone/metrics/dead_code.py @@ -0,0 +1,120 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import replace +from typing import Literal + +from ..domain.findings import CLONE_KIND_FUNCTION, SYMBOL_KIND_METHOD +from ..domain.quality import CONFIDENCE_HIGH, CONFIDENCE_MEDIUM +from ..models import DeadCandidate, DeadItem +from ..paths import is_test_filepath +from ..suppressions import DEAD_CODE_RULE_ID + +_TEST_NAME_PREFIXES = ("test_", "pytest_") +_DYNAMIC_METHOD_PREFIXES = ("visit_",) +_MODULE_RUNTIME_HOOK_NAMES = {"__getattr__", "__dir__"} +_DYNAMIC_HOOK_NAMES = { + "setup", + "teardown", + "setUp", + "tearDown", + "setUpClass", + "tearDownClass", + "setup_class", + "teardown_class", + "setup_method", + "teardown_method", +} + + +def find_unused( + *, + definitions: tuple[DeadCandidate, ...], + referenced_names: frozenset[str], + referenced_qualnames: frozenset[str] = frozenset(), +) -> tuple[DeadItem, ...]: + items: list[DeadItem] = [] + for symbol in definitions: + if DEAD_CODE_RULE_ID in symbol.suppressed_rules: + continue + if _is_non_actionable_candidate(symbol): + continue + if symbol.qualname in referenced_qualnames: + continue + if symbol.local_name in referenced_names: + continue + + confidence: Literal["high", "medium"] = CONFIDENCE_HIGH + if symbol.qualname.split(":", 1)[-1] in referenced_names: + confidence = CONFIDENCE_MEDIUM + + items.append( + DeadItem( + qualname=symbol.qualname, + filepath=symbol.filepath, + start_line=symbol.start_line, + end_line=symbol.end_line, + kind=symbol.kind, + confidence=confidence, + ) + ) + + items_sorted = tuple( + sorted( + items, + key=lambda item: ( + item.filepath, + item.start_line, + item.end_line, + item.qualname, + item.kind, + ), + ) + ) + return items_sorted + + +def find_suppressed_unused( + *, + definitions: tuple[DeadCandidate, ...], + referenced_names: frozenset[str], + referenced_qualnames: frozenset[str] = frozenset(), +) -> tuple[DeadItem, ...]: + suppressed_definitions = tuple( + replace(symbol, suppressed_rules=()) + for symbol in definitions + if DEAD_CODE_RULE_ID in symbol.suppressed_rules + ) + if not suppressed_definitions: + return () + return find_unused( + definitions=suppressed_definitions, + referenced_names=referenced_names, + referenced_qualnames=referenced_qualnames, + ) + + +def _is_non_actionable_candidate(symbol: DeadCandidate) -> bool: + # pytest entrypoints and fixtures are discovered by naming conventions. + if symbol.local_name.startswith(_TEST_NAME_PREFIXES): + return True + if is_test_filepath(symbol.filepath): + return True + + # Module-level dynamic hooks (PEP 562) are invoked by import/runtime lookup. + if symbol.kind == CLONE_KIND_FUNCTION: + return symbol.local_name in _MODULE_RUNTIME_HOOK_NAMES + # Magic methods and visitor callbacks are invoked by runtime dispatch. + if symbol.kind == SYMBOL_KIND_METHOD: + return ( + _is_dunder(symbol.local_name) + or symbol.local_name.startswith(_DYNAMIC_METHOD_PREFIXES) + or symbol.local_name in _DYNAMIC_HOOK_NAMES + ) + return False + + +def _is_dunder(name: str) -> bool: + return len(name) > 4 and name.startswith("__") and name.endswith("__") diff --git a/codeclone/metrics/dependencies.py b/codeclone/metrics/dependencies.py new file mode 100644 index 0000000..caa32d9 --- /dev/null +++ b/codeclone/metrics/dependencies.py @@ -0,0 +1,198 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ..models import DepGraph, ModuleDep + +if TYPE_CHECKING: + from collections.abc import Iterable, Sequence + +DepAdjacency = dict[str, set[str]] + + +def build_import_graph( + *, + modules: Iterable[str], + deps: Sequence[ModuleDep], +) -> DepAdjacency: + graph: DepAdjacency = {module: set() for module in sorted(set(modules))} + for dep in deps: + graph.setdefault(dep.source, set()).add(dep.target) + graph.setdefault(dep.target, set()) + return graph + + +def _tarjan_scc(graph: DepAdjacency) -> list[list[str]]: + index = 0 + stack: list[str] = [] + on_stack: set[str] = set() + index_by_node: dict[str, int] = {} + low_by_node: dict[str, int] = {} + components: list[list[str]] = [] + + def _strong_connect(node: str) -> None: + nonlocal index + index_by_node[node] = index + low_by_node[node] = index + index += 1 + stack.append(node) + on_stack.add(node) + + for neighbor in sorted(graph.get(node, set())): + if neighbor not in index_by_node: + _strong_connect(neighbor) + low_by_node[node] = min(low_by_node[node], low_by_node[neighbor]) + elif neighbor in on_stack: + low_by_node[node] = min(low_by_node[node], index_by_node[neighbor]) + + if low_by_node[node] == index_by_node[node]: + component: list[str] = [] + while True: + candidate = stack.pop() + on_stack.remove(candidate) + component.append(candidate) + if candidate == node: + break + components.append(sorted(component)) + + for node in sorted(graph): + if node not in index_by_node: + _strong_connect(node) + + return components + + +def find_cycles(graph: DepAdjacency) -> tuple[tuple[str, ...], ...]: + cycles: list[tuple[str, ...]] = [] + for component in _tarjan_scc(graph): + if len(component) > 1: + cycles.append(tuple(component)) + continue + node = component[0] + if node in graph and node in graph[node]: + cycles.append((node,)) + return tuple(sorted(cycles)) + + +def _longest_path_from( + node: str, + *, + graph: DepAdjacency, + visiting: set[str], + memo: dict[str, int], +) -> int: + if node in memo: + return memo[node] + if node in visiting: + return 0 + + visiting.add(node) + best = 1 + for neighbor in sorted(graph.get(node, set())): + best = max( + best, + 1 + + _longest_path_from( + neighbor, + graph=graph, + visiting=visiting, + memo=memo, + ), + ) + visiting.remove(node) + memo[node] = best + return best + + +def max_depth(graph: DepAdjacency) -> int: + if not graph: + return 0 + memo: dict[str, int] = {} + best = 0 + for node in sorted(graph): + best = max( + best, + _longest_path_from(node, graph=graph, visiting=set(), memo=memo), + ) + return best + + +def _longest_path_nodes_from( + node: str, + *, + graph: DepAdjacency, + visiting: set[str], + memo: dict[str, tuple[str, ...]], +) -> tuple[str, ...]: + if node in memo: + return memo[node] + if node in visiting: + return (node,) + + visiting.add(node) + best_path: tuple[str, ...] = (node,) + for neighbor in sorted(graph.get(node, set())): + suffix = _longest_path_nodes_from( + neighbor, + graph=graph, + visiting=visiting, + memo=memo, + ) + candidate = (node, *suffix) + if len(candidate) > len(best_path) or ( + len(candidate) == len(best_path) and candidate < best_path + ): + best_path = candidate + visiting.remove(node) + memo[node] = best_path + return best_path + + +def longest_chains( + graph: DepAdjacency, + *, + limit: int = 5, +) -> tuple[tuple[str, ...], ...]: + if not graph or limit <= 0: + return () + + memo: dict[str, tuple[str, ...]] = {} + chains = { + _longest_path_nodes_from( + node, + graph=graph, + visiting=set(), + memo=memo, + ) + for node in sorted(graph) + } + sorted_chains = sorted( + chains, + key=lambda chain: (-len(chain), chain), + ) + return tuple(sorted_chains[:limit]) + + +def build_dep_graph(*, modules: Iterable[str], deps: Sequence[ModuleDep]) -> DepGraph: + graph = build_import_graph(modules=modules, deps=deps) + cycles = find_cycles(graph) + depth = max_depth(graph) + chains = longest_chains(graph) + unique_edges = tuple( + sorted( + { + (dep.source, dep.target, dep.import_type, dep.line): dep for dep in deps + }.values(), + key=lambda dep: (dep.source, dep.target, dep.import_type, dep.line), + ) + ) + return DepGraph( + modules=frozenset(graph.keys()), + edges=unique_edges, + cycles=cycles, + max_depth=depth, + longest_chains=chains, + ) diff --git a/codeclone/metrics/health.py b/codeclone/metrics/health.py new file mode 100644 index 0000000..9886ae9 --- /dev/null +++ b/codeclone/metrics/health.py @@ -0,0 +1,125 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal + +from ..contracts import HEALTH_WEIGHTS +from ..models import HealthScore + + +@dataclass(frozen=True, slots=True) +class HealthInputs: + files_found: int + files_analyzed_or_cached: int + function_clone_groups: int + block_clone_groups: int + complexity_avg: float + complexity_max: int + high_risk_functions: int + coupling_avg: float + coupling_max: int + high_risk_classes: int + cohesion_avg: float + low_cohesion_classes: int + dependency_cycles: int + dependency_max_depth: int + dead_code_items: int + + +def _clamp_score(value: float) -> int: + return max(0, min(100, round(value))) + + +def _grade(score: int) -> Literal["A", "B", "C", "D", "F"]: + if score >= 90: + return "A" + if score >= 75: + return "B" + if score >= 60: + return "C" + if score >= 40: + return "D" + return "F" + + +def _safe_div(numerator: float, denominator: float) -> float: + if denominator <= 0: + return 0.0 + return numerator / denominator + + +# Piecewise clone-density curve: mild penalty for low density, +# steep in the structural-debt zone, brutal when it's systemic. +_CLONE_BREAKPOINTS: tuple[tuple[float, float], ...] = ( + (0.05, 90.0), # ≤5% density — 1-2 accidental groups, almost no penalty + (0.20, 50.0), # 5-20% — clear structural debt, steep slope + (0.50, 0.0), # >20% — systemic duplication, score floors at 0 +) + + +def _clone_piecewise_score(density: float) -> int: + """Return clone dimension score (0-100) for a given clone density.""" + if density <= 0: + return 100 + prev_d, prev_s = 0.0, 100.0 + for bp_d, bp_s in _CLONE_BREAKPOINTS: + if density <= bp_d: + t = (density - prev_d) / (bp_d - prev_d) + return _clamp_score(prev_s + t * (bp_s - prev_s)) + prev_d, prev_s = bp_d, bp_s + return 0 + + +def compute_health(inputs: HealthInputs) -> HealthScore: + total_clone_groups = inputs.function_clone_groups + inputs.block_clone_groups + clone_density = _safe_div( + float(total_clone_groups), + max(1, inputs.files_analyzed_or_cached), + ) + + clones_score = _clone_piecewise_score(clone_density) + complexity_score = _clamp_score( + 100 + - (inputs.complexity_avg * 2.5) + - (inputs.complexity_max * 1.2) + - (inputs.high_risk_functions * 8) + ) + coupling_score = _clamp_score( + 100 + - (inputs.coupling_avg * 7) + - (inputs.coupling_max * 2) + - (inputs.high_risk_classes * 8) + ) + cohesion_score = _clamp_score( + 100 + - max(0.0, inputs.cohesion_avg - 1.0) * 20 + - (inputs.low_cohesion_classes * 12) + ) + dead_code_score = _clamp_score(100 - inputs.dead_code_items * 8) + dependency_score = _clamp_score( + 100 + - inputs.dependency_cycles * 25 + - max(0, inputs.dependency_max_depth - 6) * 4 + ) + coverage_score = _clamp_score( + _safe_div(inputs.files_analyzed_or_cached * 100.0, max(1, inputs.files_found)) + ) + + dimensions = { + "clones": clones_score, + "complexity": complexity_score, + "coupling": coupling_score, + "cohesion": cohesion_score, + "dead_code": dead_code_score, + "dependencies": dependency_score, + "coverage": coverage_score, + } + + total = sum( + dimensions[name] * HEALTH_WEIGHTS[name] for name in sorted(HEALTH_WEIGHTS) + ) + score = _clamp_score(total) + return HealthScore(total=score, grade=_grade(score), dimensions=dimensions) diff --git a/codeclone/metrics_baseline.py b/codeclone/metrics_baseline.py new file mode 100644 index 0000000..d7aa592 --- /dev/null +++ b/codeclone/metrics_baseline.py @@ -0,0 +1,788 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import hmac +import json +import os +from datetime import datetime, timezone +from enum import Enum +from pathlib import Path +from typing import TYPE_CHECKING, Any, Final, Literal, cast + +from . import __version__ +from ._schema_validation import validate_top_level_structure +from .baseline import current_python_tag +from .contracts import BASELINE_SCHEMA_VERSION, METRICS_BASELINE_SCHEMA_VERSION +from .errors import BaselineValidationError +from .models import MetricsDiff, MetricsSnapshot, ProjectMetrics + +if TYPE_CHECKING: + from collections.abc import Mapping + +METRICS_BASELINE_GENERATOR: Final = "codeclone" +MAX_METRICS_BASELINE_SIZE_BYTES: Final = 5 * 1024 * 1024 + + +class MetricsBaselineStatus(str, Enum): + OK = "ok" + MISSING = "missing" + TOO_LARGE = "too_large" + INVALID_JSON = "invalid_json" + INVALID_TYPE = "invalid_type" + MISSING_FIELDS = "missing_fields" + MISMATCH_SCHEMA_VERSION = "mismatch_schema_version" + MISMATCH_PYTHON_VERSION = "mismatch_python_version" + GENERATOR_MISMATCH = "generator_mismatch" + INTEGRITY_MISSING = "integrity_missing" + INTEGRITY_FAILED = "integrity_failed" + + +METRICS_BASELINE_UNTRUSTED_STATUSES: Final[frozenset[MetricsBaselineStatus]] = ( + frozenset( + { + MetricsBaselineStatus.MISSING, + MetricsBaselineStatus.TOO_LARGE, + MetricsBaselineStatus.INVALID_JSON, + MetricsBaselineStatus.INVALID_TYPE, + MetricsBaselineStatus.MISSING_FIELDS, + MetricsBaselineStatus.MISMATCH_SCHEMA_VERSION, + MetricsBaselineStatus.MISMATCH_PYTHON_VERSION, + MetricsBaselineStatus.GENERATOR_MISMATCH, + MetricsBaselineStatus.INTEGRITY_MISSING, + MetricsBaselineStatus.INTEGRITY_FAILED, + } + ) +) + +_TOP_LEVEL_REQUIRED_KEYS = frozenset({"meta", "metrics"}) +_TOP_LEVEL_ALLOWED_KEYS = _TOP_LEVEL_REQUIRED_KEYS | frozenset({"clones"}) +_META_REQUIRED_KEYS = frozenset( + {"generator", "schema_version", "python_tag", "created_at", "payload_sha256"} +) +_METRICS_REQUIRED_KEYS = frozenset( + { + "max_complexity", + "high_risk_functions", + "max_coupling", + "high_coupling_classes", + "max_cohesion", + "low_cohesion_classes", + "dependency_cycles", + "dependency_max_depth", + "dead_code_items", + "health_score", + "health_grade", + } +) +_METRICS_PAYLOAD_SHA256_KEY = "metrics_payload_sha256" + + +def coerce_metrics_baseline_status( + raw_status: str | MetricsBaselineStatus | None, +) -> MetricsBaselineStatus: + if isinstance(raw_status, MetricsBaselineStatus): + return raw_status + if isinstance(raw_status, str): + try: + return MetricsBaselineStatus(raw_status) + except ValueError: + return MetricsBaselineStatus.INVALID_TYPE + return MetricsBaselineStatus.INVALID_TYPE + + +def snapshot_from_project_metrics(project_metrics: ProjectMetrics) -> MetricsSnapshot: + return MetricsSnapshot( + max_complexity=int(project_metrics.complexity_max), + high_risk_functions=tuple(sorted(set(project_metrics.high_risk_functions))), + max_coupling=int(project_metrics.coupling_max), + high_coupling_classes=tuple(sorted(set(project_metrics.high_risk_classes))), + max_cohesion=int(project_metrics.cohesion_max), + low_cohesion_classes=tuple(sorted(set(project_metrics.low_cohesion_classes))), + dependency_cycles=tuple( + sorted({tuple(cycle) for cycle in project_metrics.dependency_cycles}) + ), + dependency_max_depth=int(project_metrics.dependency_max_depth), + dead_code_items=tuple( + sorted({item.qualname for item in project_metrics.dead_code}) + ), + health_score=int(project_metrics.health.total), + health_grade=project_metrics.health.grade, + ) + + +def _canonical_json(payload: object) -> str: + return json.dumps( + payload, + sort_keys=True, + separators=(",", ":"), + ensure_ascii=False, + ) + + +def _snapshot_payload(snapshot: MetricsSnapshot) -> dict[str, object]: + return { + "max_complexity": int(snapshot.max_complexity), + "high_risk_functions": list(snapshot.high_risk_functions), + "max_coupling": int(snapshot.max_coupling), + "high_coupling_classes": list(snapshot.high_coupling_classes), + "max_cohesion": int(snapshot.max_cohesion), + "low_cohesion_classes": list(snapshot.low_cohesion_classes), + "dependency_cycles": [list(cycle) for cycle in snapshot.dependency_cycles], + "dependency_max_depth": int(snapshot.dependency_max_depth), + "dead_code_items": list(snapshot.dead_code_items), + "health_score": int(snapshot.health_score), + "health_grade": snapshot.health_grade, + } + + +def _compute_payload_sha256(snapshot: MetricsSnapshot) -> str: + canonical = _canonical_json(_snapshot_payload(snapshot)) + return hashlib.sha256(canonical.encode("utf-8")).hexdigest() + + +def _now_utc_z() -> str: + return ( + datetime.now(timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace( + "+00:00", + "Z", + ) + ) + + +class MetricsBaseline: + __slots__ = ( + "created_at", + "generator_name", + "generator_version", + "is_embedded_in_clone_baseline", + "path", + "payload_sha256", + "python_tag", + "schema_version", + "snapshot", + ) + + def __init__(self, path: str | Path) -> None: + self.path = Path(path) + self.generator_name: str | None = None + self.generator_version: str | None = None + self.schema_version: str | None = None + self.python_tag: str | None = None + self.created_at: str | None = None + self.payload_sha256: str | None = None + self.snapshot: MetricsSnapshot | None = None + self.is_embedded_in_clone_baseline = False + + def load( + self, + *, + max_size_bytes: int | None = None, + preloaded_payload: dict[str, object] | None = None, + ) -> None: + try: + exists = self.path.exists() + except OSError as e: + raise BaselineValidationError( + f"Cannot stat metrics baseline file at {self.path}: {e}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) from e + if not exists: + return + + size_limit = ( + MAX_METRICS_BASELINE_SIZE_BYTES + if max_size_bytes is None + else max_size_bytes + ) + try: + file_size = self.path.stat().st_size + except OSError as e: + raise BaselineValidationError( + f"Cannot stat metrics baseline file at {self.path}: {e}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) from e + if file_size > size_limit: + raise BaselineValidationError( + "Metrics baseline file is too large " + f"({file_size} bytes, max {size_limit} bytes) at {self.path}.", + status=MetricsBaselineStatus.TOO_LARGE, + ) + + if preloaded_payload is None: + payload = _load_json_object(self.path) + else: + if not isinstance(preloaded_payload, dict): + raise BaselineValidationError( + f"Metrics baseline payload must be an object at {self.path}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + payload = preloaded_payload + _validate_top_level_structure(payload, path=self.path) + self.is_embedded_in_clone_baseline = "clones" in payload + + meta_obj = payload.get("meta") + metrics_obj = payload.get("metrics") + if not isinstance(meta_obj, dict): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {self.path}: " + "'meta' must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if not isinstance(metrics_obj, dict): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {self.path}: " + "'metrics' must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + _validate_required_keys(meta_obj, _META_REQUIRED_KEYS, path=self.path) + _validate_required_keys(metrics_obj, _METRICS_REQUIRED_KEYS, path=self.path) + _validate_exact_keys(metrics_obj, _METRICS_REQUIRED_KEYS, path=self.path) + + generator_name, generator_version = _parse_generator(meta_obj, path=self.path) + schema_version = _require_str(meta_obj, "schema_version", path=self.path) + python_tag = _require_str(meta_obj, "python_tag", path=self.path) + created_at = _require_str(meta_obj, "created_at", path=self.path) + payload_sha256 = _extract_metrics_payload_sha256(meta_obj, path=self.path) + + self.generator_name = generator_name + self.generator_version = generator_version + self.schema_version = schema_version + self.python_tag = python_tag + self.created_at = created_at + self.payload_sha256 = payload_sha256 + self.snapshot = _parse_snapshot(metrics_obj, path=self.path) + + def save(self) -> None: + if self.snapshot is None: + raise BaselineValidationError( + "Metrics baseline snapshot is missing.", + status=MetricsBaselineStatus.MISSING_FIELDS, + ) + payload = _build_payload( + snapshot=self.snapshot, + schema_version=self.schema_version or METRICS_BASELINE_SCHEMA_VERSION, + python_tag=self.python_tag or current_python_tag(), + generator_name=self.generator_name or METRICS_BASELINE_GENERATOR, + generator_version=self.generator_version or __version__, + created_at=self.created_at or _now_utc_z(), + ) + payload_meta = cast("Mapping[str, Any]", payload["meta"]) + payload_metrics_hash = _require_str( + payload_meta, + "payload_sha256", + path=self.path, + ) + existing: dict[str, Any] | None = None + try: + if self.path.exists(): + loaded = _load_json_object(self.path) + if "clones" in loaded: + existing = loaded + except BaselineValidationError as e: + raise BaselineValidationError( + f"Cannot read existing baseline file at {self.path}: {e}", + status=MetricsBaselineStatus.INVALID_JSON, + ) from e + + if existing is not None: + existing_meta, clones_obj = _require_embedded_clone_baseline_payload( + existing, path=self.path + ) + merged_schema_version = _resolve_embedded_schema_version( + existing_meta, path=self.path + ) + merged_meta = dict(existing_meta) + merged_meta["schema_version"] = merged_schema_version + merged_meta[_METRICS_PAYLOAD_SHA256_KEY] = payload_metrics_hash + merged_payload: dict[str, object] = { + "meta": merged_meta, + "clones": clones_obj, + "metrics": payload["metrics"], + } + self.path.parent.mkdir(parents=True, exist_ok=True) + _atomic_write_json(self.path, merged_payload) + self.is_embedded_in_clone_baseline = True + self.schema_version = merged_schema_version + self.python_tag = _require_str(merged_meta, "python_tag", path=self.path) + self.created_at = _require_str(merged_meta, "created_at", path=self.path) + self.payload_sha256 = _require_str( + merged_meta, _METRICS_PAYLOAD_SHA256_KEY, path=self.path + ) + self.generator_name, self.generator_version = _parse_generator( + merged_meta, path=self.path + ) + return + + self.path.parent.mkdir(parents=True, exist_ok=True) + _atomic_write_json(self.path, payload) + self.is_embedded_in_clone_baseline = False + self.schema_version = _require_str( + payload_meta, "schema_version", path=self.path + ) + self.python_tag = _require_str(payload_meta, "python_tag", path=self.path) + self.created_at = _require_str(payload_meta, "created_at", path=self.path) + self.payload_sha256 = payload_metrics_hash + + def verify_compatibility(self, *, runtime_python_tag: str) -> None: + if self.generator_name != METRICS_BASELINE_GENERATOR: + raise BaselineValidationError( + "Metrics baseline generator mismatch: expected 'codeclone'.", + status=MetricsBaselineStatus.GENERATOR_MISMATCH, + ) + expected_schema = ( + BASELINE_SCHEMA_VERSION + if self.is_embedded_in_clone_baseline + else METRICS_BASELINE_SCHEMA_VERSION + ) + if self.schema_version != expected_schema: + raise BaselineValidationError( + "Metrics baseline schema version mismatch: " + f"baseline={self.schema_version}, " + f"expected={expected_schema}.", + status=MetricsBaselineStatus.MISMATCH_SCHEMA_VERSION, + ) + if self.python_tag != runtime_python_tag: + raise BaselineValidationError( + "Metrics baseline python tag mismatch: " + f"baseline={self.python_tag}, current={runtime_python_tag}.", + status=MetricsBaselineStatus.MISMATCH_PYTHON_VERSION, + ) + self.verify_integrity() + + def verify_integrity(self) -> None: + if self.snapshot is None: + raise BaselineValidationError( + "Metrics baseline snapshot is missing.", + status=MetricsBaselineStatus.MISSING_FIELDS, + ) + if not isinstance(self.payload_sha256, str): + raise BaselineValidationError( + "Metrics baseline integrity payload hash is missing.", + status=MetricsBaselineStatus.INTEGRITY_MISSING, + ) + if len(self.payload_sha256) != 64: + raise BaselineValidationError( + "Metrics baseline integrity payload hash is missing.", + status=MetricsBaselineStatus.INTEGRITY_MISSING, + ) + expected = _compute_payload_sha256(self.snapshot) + if not hmac.compare_digest(self.payload_sha256, expected): + raise BaselineValidationError( + "Metrics baseline integrity check failed: payload_sha256 mismatch.", + status=MetricsBaselineStatus.INTEGRITY_FAILED, + ) + + @staticmethod + def from_project_metrics( + *, + project_metrics: ProjectMetrics, + path: str | Path, + schema_version: str | None = None, + python_tag: str | None = None, + generator_version: str | None = None, + ) -> MetricsBaseline: + baseline = MetricsBaseline(path) + baseline.generator_name = METRICS_BASELINE_GENERATOR + baseline.generator_version = generator_version or __version__ + baseline.schema_version = schema_version or METRICS_BASELINE_SCHEMA_VERSION + baseline.python_tag = python_tag or current_python_tag() + baseline.created_at = _now_utc_z() + baseline.snapshot = snapshot_from_project_metrics(project_metrics) + baseline.payload_sha256 = _compute_payload_sha256(baseline.snapshot) + return baseline + + def diff(self, current: ProjectMetrics) -> MetricsDiff: + if self.snapshot is None: + snapshot = MetricsSnapshot( + max_complexity=0, + high_risk_functions=(), + max_coupling=0, + high_coupling_classes=(), + max_cohesion=0, + low_cohesion_classes=(), + dependency_cycles=(), + dependency_max_depth=0, + dead_code_items=(), + health_score=0, + health_grade="F", + ) + else: + snapshot = self.snapshot + + current_snapshot = snapshot_from_project_metrics(current) + + new_high_risk_functions = tuple( + sorted( + set(current_snapshot.high_risk_functions) + - set(snapshot.high_risk_functions) + ) + ) + new_high_coupling_classes = tuple( + sorted( + set(current_snapshot.high_coupling_classes) + - set(snapshot.high_coupling_classes) + ) + ) + new_cycles = tuple( + sorted( + set(current_snapshot.dependency_cycles) + - set(snapshot.dependency_cycles) + ) + ) + new_dead_code = tuple( + sorted( + set(current_snapshot.dead_code_items) - set(snapshot.dead_code_items) + ) + ) + + return MetricsDiff( + new_high_risk_functions=new_high_risk_functions, + new_high_coupling_classes=new_high_coupling_classes, + new_cycles=new_cycles, + new_dead_code=new_dead_code, + health_delta=current_snapshot.health_score - snapshot.health_score, + ) + + +def _atomic_write_json(path: Path, payload: dict[str, object]) -> None: + tmp_path = path.with_name(f"{path.name}.tmp") + data = json.dumps(payload, indent=2, ensure_ascii=False) + "\n" + with tmp_path.open("wb") as tmp_file: + tmp_file.write(data.encode("utf-8")) + tmp_file.flush() + os.fsync(tmp_file.fileno()) + os.replace(tmp_path, path) + + +def _load_json_object(path: Path) -> dict[str, Any]: + try: + raw = path.read_text("utf-8") + except OSError as e: + raise BaselineValidationError( + f"Cannot read metrics baseline file at {path}: {e}", + status=MetricsBaselineStatus.INVALID_JSON, + ) from e + try: + data = json.loads(raw) + except json.JSONDecodeError as e: + raise BaselineValidationError( + f"Corrupted metrics baseline file at {path}: {e}", + status=MetricsBaselineStatus.INVALID_JSON, + ) from e + if not isinstance(data, dict): + raise BaselineValidationError( + f"Metrics baseline payload must be an object at {path}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + return data + + +def _validate_top_level_structure(payload: dict[str, Any], *, path: Path) -> None: + validate_top_level_structure( + payload, + path=path, + required_keys=_TOP_LEVEL_REQUIRED_KEYS, + allowed_keys=_TOP_LEVEL_ALLOWED_KEYS, + schema_label="metrics baseline", + missing_status=MetricsBaselineStatus.MISSING_FIELDS, + extra_status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _validate_required_keys( + payload: Mapping[str, Any], + required: frozenset[str], + *, + path: Path, +) -> None: + missing = required - set(payload.keys()) + if missing: + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: missing required fields: {', '.join(sorted(missing))}", + status=MetricsBaselineStatus.MISSING_FIELDS, + ) + + +def _validate_exact_keys( + payload: Mapping[str, Any], + required: frozenset[str], + *, + path: Path, +) -> None: + extra = set(payload.keys()) - set(required) + if extra: + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: unexpected fields: {', '.join(sorted(extra))}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _require_str(payload: Mapping[str, Any], key: str, *, path: Path) -> str: + value = payload.get(key) + if isinstance(value, str): + return value + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be str", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _extract_metrics_payload_sha256( + payload: Mapping[str, Any], + *, + path: Path, +) -> str: + direct = payload.get(_METRICS_PAYLOAD_SHA256_KEY) + if isinstance(direct, str): + return direct + return _require_str(payload, "payload_sha256", path=path) + + +def _require_int(payload: Mapping[str, Any], key: str, *, path: Path) -> int: + value = payload.get(key) + if isinstance(value, bool): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be int", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if isinstance(value, int): + return value + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be int", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _require_str_list(payload: Mapping[str, Any], key: str, *, path: Path) -> list[str]: + value = payload.get(key) + if not isinstance(value, list): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if not all(isinstance(item, str) for item in value): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + return value + + +def _parse_cycles( + payload: Mapping[str, Any], + *, + key: str, + path: Path, +) -> tuple[tuple[str, ...], ...]: + value = payload.get(key) + if not isinstance(value, list): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be list", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + cycles: list[tuple[str, ...]] = [] + for cycle in value: + if not isinstance(cycle, list): + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: {key!r} cycle item must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if not all(isinstance(item, str) for item in cycle): + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: {key!r} cycle item must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + cycles.append(tuple(cycle)) + return tuple(sorted(set(cycles))) + + +def _parse_generator( + meta: Mapping[str, Any], + *, + path: Path, +) -> tuple[str, str | None]: + generator = meta.get("generator") + if isinstance(generator, str): + version_value = meta.get("generator_version") + if version_value is None: + version_value = meta.get("codeclone_version") + if version_value is None: + return generator, None + if not isinstance(version_value, str): + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: generator_version must be str", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + return generator, version_value + + if isinstance(generator, dict): + allowed_keys = {"name", "version"} + extra = set(generator.keys()) - allowed_keys + if extra: + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + f"unexpected generator keys: {', '.join(sorted(extra))}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + name = generator.get("name") + version = generator.get("version") + if not isinstance(name, str): + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: generator.name must be str", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if version is not None and not isinstance(version, str): + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: generator.version must be str", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + return name, version if isinstance(version, str) else None + + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: generator must be object or str", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _require_embedded_clone_baseline_payload( + payload: Mapping[str, Any], + *, + path: Path, +) -> tuple[dict[str, Any], dict[str, Any]]: + meta_obj = payload.get("meta") + clones_obj = payload.get("clones") + if not isinstance(meta_obj, dict): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: 'meta' must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if not isinstance(clones_obj, dict): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: 'clones' must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + _require_str(meta_obj, "payload_sha256", path=path) + _require_str(meta_obj, "python_tag", path=path) + _require_str(meta_obj, "created_at", path=path) + functions = clones_obj.get("functions") + blocks = clones_obj.get("blocks") + if not isinstance(functions, list) or not all( + isinstance(item, str) for item in functions + ): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: 'clones.functions' must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if not isinstance(blocks, list) or not all( + isinstance(item, str) for item in blocks + ): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: 'clones.blocks' must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + return meta_obj, clones_obj + + +def _resolve_embedded_schema_version(meta: Mapping[str, Any], *, path: Path) -> str: + raw_version = _require_str(meta, "schema_version", path=path) + parts = raw_version.split(".") + if len(parts) not in {2, 3} or not all(part.isdigit() for part in parts): + raise BaselineValidationError( + "Invalid baseline schema at " + f"{path}: 'schema_version' must be semver string", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + major = int(parts[0]) + if major >= 2: + return raw_version + return BASELINE_SCHEMA_VERSION + + +def _parse_snapshot( + payload: Mapping[str, Any], + *, + path: Path, +) -> MetricsSnapshot: + grade = _require_str(payload, "health_grade", path=path) + if grade not in {"A", "B", "C", "D", "F"}: + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: 'health_grade' must be one of A/B/C/D/F", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + return MetricsSnapshot( + max_complexity=_require_int(payload, "max_complexity", path=path), + high_risk_functions=tuple( + sorted(set(_require_str_list(payload, "high_risk_functions", path=path))) + ), + max_coupling=_require_int(payload, "max_coupling", path=path), + high_coupling_classes=tuple( + sorted(set(_require_str_list(payload, "high_coupling_classes", path=path))) + ), + max_cohesion=_require_int(payload, "max_cohesion", path=path), + low_cohesion_classes=tuple( + sorted(set(_require_str_list(payload, "low_cohesion_classes", path=path))) + ), + dependency_cycles=_parse_cycles(payload, key="dependency_cycles", path=path), + dependency_max_depth=_require_int(payload, "dependency_max_depth", path=path), + dead_code_items=tuple( + sorted(set(_require_str_list(payload, "dead_code_items", path=path))) + ), + health_score=_require_int(payload, "health_score", path=path), + health_grade=cast("Literal['A', 'B', 'C', 'D', 'F']", grade), + ) + + +def _build_payload( + *, + snapshot: MetricsSnapshot, + schema_version: str, + python_tag: str, + generator_name: str, + generator_version: str, + created_at: str, +) -> dict[str, Any]: + payload_sha256 = _compute_payload_sha256(snapshot) + return { + "meta": { + "generator": { + "name": generator_name, + "version": generator_version, + }, + "schema_version": schema_version, + "python_tag": python_tag, + "created_at": created_at, + "payload_sha256": payload_sha256, + }, + "metrics": _snapshot_payload(snapshot), + } + + +__all__ = [ + "BASELINE_SCHEMA_VERSION", + "MAX_METRICS_BASELINE_SIZE_BYTES", + "METRICS_BASELINE_GENERATOR", + "METRICS_BASELINE_SCHEMA_VERSION", + "METRICS_BASELINE_UNTRUSTED_STATUSES", + "MetricsBaseline", + "MetricsBaselineStatus", + "coerce_metrics_baseline_status", + "current_python_tag", + "snapshot_from_project_metrics", +] diff --git a/codeclone/models.py b/codeclone/models.py new file mode 100644 index 0000000..f882d37 --- /dev/null +++ b/codeclone/models.py @@ -0,0 +1,298 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass, field +from typing import Literal, TypedDict + + +@dataclass(frozen=True, slots=True) +class Unit: + qualname: str + filepath: str + start_line: int + end_line: int + loc: int + stmt_count: int + fingerprint: str + loc_bucket: str + cyclomatic_complexity: int = 1 + nesting_depth: int = 0 + risk: Literal["low", "medium", "high"] = "low" + raw_hash: str = "" + entry_guard_count: int = 0 + entry_guard_terminal_profile: str = "none" + entry_guard_has_side_effect_before: bool = False + terminal_kind: str = "fallthrough" + try_finally_profile: str = "none" + side_effect_order_profile: str = "none" + + +@dataclass(frozen=True, slots=True) +class BlockUnit: + block_hash: str + filepath: str + qualname: str + start_line: int + end_line: int + size: int + + +@dataclass(frozen=True, slots=True) +class SegmentUnit: + segment_hash: str + segment_sig: str + filepath: str + qualname: str + start_line: int + end_line: int + size: int + + +@dataclass(frozen=True, slots=True) +class SourceStats: + """Structural counters collected while processing source files.""" + + lines: int + functions: int + methods: int + classes: int + + +@dataclass(frozen=True, slots=True) +class ClassMetrics: + qualname: str + filepath: str + start_line: int + end_line: int + cbo: int + lcom4: int + method_count: int + instance_var_count: int + risk_coupling: Literal["low", "medium", "high"] + risk_cohesion: Literal["low", "medium", "high"] + coupled_classes: tuple[str, ...] = () + + +@dataclass(frozen=True, slots=True) +class ModuleDep: + source: str + target: str + import_type: Literal["import", "from_import"] + line: int + + +@dataclass(frozen=True, slots=True) +class DepGraph: + modules: frozenset[str] + edges: tuple[ModuleDep, ...] + cycles: tuple[tuple[str, ...], ...] + max_depth: int + longest_chains: tuple[tuple[str, ...], ...] + + +@dataclass(frozen=True, slots=True) +class DeadItem: + qualname: str + filepath: str + start_line: int + end_line: int + kind: Literal["function", "class", "method", "import"] + confidence: Literal["high", "medium"] + + +@dataclass(frozen=True, slots=True) +class DeadCandidate: + qualname: str + local_name: str + filepath: str + start_line: int + end_line: int + kind: Literal["function", "class", "method", "import"] + suppressed_rules: tuple[str, ...] = field(default_factory=tuple) + + +@dataclass(frozen=True, slots=True) +class FileMetrics: + class_metrics: tuple[ClassMetrics, ...] + module_deps: tuple[ModuleDep, ...] + dead_candidates: tuple[DeadCandidate, ...] + referenced_names: frozenset[str] + import_names: frozenset[str] + class_names: frozenset[str] + referenced_qualnames: frozenset[str] = field(default_factory=frozenset) + + +@dataclass(frozen=True, slots=True) +class HealthScore: + total: int + grade: Literal["A", "B", "C", "D", "F"] + dimensions: dict[str, int] + + +SourceKind = Literal["production", "tests", "fixtures", "mixed", "other"] + + +@dataclass(frozen=True, slots=True) +class ReportLocation: + filepath: str + relative_path: str + start_line: int + end_line: int + qualname: str + source_kind: SourceKind + + +@dataclass(frozen=True, slots=True) +class Suggestion: + severity: Literal["critical", "warning", "info"] + category: Literal[ + "clone", + "structural", + "complexity", + "coupling", + "cohesion", + "dead_code", + "dependency", + ] + title: str + location: str + steps: tuple[str, ...] + effort: Literal["easy", "moderate", "hard"] + priority: float + finding_family: Literal["clones", "structural", "metrics"] = "metrics" + finding_kind: str = "" + subject_key: str = "" + fact_kind: str = "" + fact_summary: str = "" + fact_count: int = 0 + spread_files: int = 0 + spread_functions: int = 0 + clone_type: str = "" + confidence: Literal["high", "medium", "low"] = "medium" + source_kind: SourceKind = "other" + source_breakdown: tuple[tuple[SourceKind, int], ...] = field(default_factory=tuple) + representative_locations: tuple[ReportLocation, ...] = field(default_factory=tuple) + location_label: str = "" + + +@dataclass(frozen=True, slots=True) +class ProjectMetrics: + complexity_avg: float + complexity_max: int + high_risk_functions: tuple[str, ...] + coupling_avg: float + coupling_max: int + high_risk_classes: tuple[str, ...] + cohesion_avg: float + cohesion_max: int + low_cohesion_classes: tuple[str, ...] + dependency_modules: int + dependency_edges: int + dependency_edge_list: tuple[ModuleDep, ...] + dependency_cycles: tuple[tuple[str, ...], ...] + dependency_max_depth: int + dependency_longest_chains: tuple[tuple[str, ...], ...] + dead_code: tuple[DeadItem, ...] + health: HealthScore + + +@dataclass(frozen=True, slots=True) +class MetricsSnapshot: + max_complexity: int + high_risk_functions: tuple[str, ...] + max_coupling: int + high_coupling_classes: tuple[str, ...] + max_cohesion: int + low_cohesion_classes: tuple[str, ...] + dependency_cycles: tuple[tuple[str, ...], ...] + dependency_max_depth: int + dead_code_items: tuple[str, ...] + health_score: int + health_grade: Literal["A", "B", "C", "D", "F"] + + +@dataclass(frozen=True, slots=True) +class MetricsDiff: + new_high_risk_functions: tuple[str, ...] + new_high_coupling_classes: tuple[str, ...] + new_cycles: tuple[tuple[str, ...], ...] + new_dead_code: tuple[str, ...] + health_delta: int + + +GroupItem = dict[str, object] +GroupItemLike = Mapping[str, object] +GroupItemsLike = Sequence[GroupItemLike] +GroupMapLike = Mapping[str, Sequence[GroupItemLike]] + + +class FunctionGroupItemBase(TypedDict): + qualname: str + filepath: str + start_line: int + end_line: int + loc: int + stmt_count: int + fingerprint: str + loc_bucket: str + + +class FunctionGroupItem(FunctionGroupItemBase, total=False): + cyclomatic_complexity: int + nesting_depth: int + risk: Literal["low", "medium", "high"] + raw_hash: str + entry_guard_count: int + entry_guard_terminal_profile: str + entry_guard_has_side_effect_before: bool + terminal_kind: str + try_finally_profile: str + side_effect_order_profile: str + + +class BlockGroupItem(TypedDict): + block_hash: str + filepath: str + qualname: str + start_line: int + end_line: int + size: int + + +class SegmentGroupItem(TypedDict): + segment_hash: str + segment_sig: str + filepath: str + qualname: str + start_line: int + end_line: int + size: int + + +GroupMap = dict[str, list[GroupItem]] + + +@dataclass(frozen=True, slots=True) +class StructuralFindingOccurrence: + """Single occurrence of a structural finding (e.g. one duplicate branch).""" + + finding_kind: str + finding_key: str + file_path: str + qualname: str + start: int + end: int + signature: dict[str, str] + + +@dataclass(frozen=True, slots=True) +class StructuralFindingGroup: + """Group of structurally equivalent occurrences (e.g. duplicate branches).""" + + finding_kind: str + finding_key: str + signature: dict[str, str] + items: tuple[StructuralFindingOccurrence, ...] diff --git a/codeclone/normalize.py b/codeclone/normalize.py index 67e5eef..b3e0243 100644 --- a/codeclone/normalize.py +++ b/codeclone/normalize.py @@ -1,22 +1,19 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import ast import copy from ast import AST -from collections.abc import Sequence from dataclasses import dataclass -from typing import cast +from typing import TYPE_CHECKING, cast from .meta_markers import CFG_META_PREFIX +if TYPE_CHECKING: + from collections.abc import Sequence + @dataclass(frozen=True, slots=True) class NormalizationConfig: @@ -93,9 +90,9 @@ def visit_Constant(self, node: ast.Constant) -> ast.Constant: def visit_Call(self, node: ast.Call) -> ast.Call: node.func = self._visit_call_target(node.func) - node.args = [cast(ast.expr, self.visit(arg)) for arg in node.args] + node.args = [cast("ast.expr", self.visit(arg)) for arg in node.args] for kw in node.keywords: - kw.value = cast(ast.expr, self.visit(kw.value)) + kw.value = cast("ast.expr", self.visit(kw.value)) return node def _visit_call_target(self, node: ast.expr) -> ast.expr: @@ -107,9 +104,9 @@ def _visit_call_target(self, node: ast.expr) -> ast.expr: if isinstance(value, (ast.Name, ast.Attribute)): node.value = self._visit_call_target(value) else: - node.value = cast(ast.expr, self.visit(value)) + node.value = cast("ast.expr", self.visit(value)) return node - return cast(ast.expr, self.visit(node)) + return cast("ast.expr", self.visit(node)) def visit_AugAssign(self, node: ast.AugAssign) -> AST: # Normalize x += 1 to x = x + 1 @@ -209,28 +206,23 @@ def _is_proven_commutative_constant(value: object, op: ast.operator) -> bool: return False -def normalized_ast_dump(func_node: ast.AST, cfg: NormalizationConfig) -> str: - """ - Dump the normalized AST. - WARNING: This modifies the AST in-place for performance. - """ - normalizer = AstNormalizer(cfg) - new_node = ast.fix_missing_locations(normalizer.visit(func_node)) - return ast.dump(new_node, annotate_fields=True, include_attributes=False) - - def normalized_ast_dump_from_list( - nodes: Sequence[ast.AST], cfg: NormalizationConfig + nodes: Sequence[ast.AST], + cfg: NormalizationConfig, + *, + normalizer: AstNormalizer | None = None, ) -> str: """ Dump a list of AST nodes after normalization. WARNING: This modifies the AST nodes in-place for performance. """ - normalizer = AstNormalizer(cfg) + active_normalizer = normalizer or AstNormalizer(cfg) dumps: list[str] = [] for node in nodes: - new_node = ast.fix_missing_locations(normalizer.visit(node)) + # Fingerprints ignore location attributes, so we skip location repair. + new_node = active_normalizer.visit(node) + assert isinstance(new_node, ast.AST) dumps.append(ast.dump(new_node, annotate_fields=True, include_attributes=False)) return ";".join(dumps) diff --git a/codeclone/paths.py b/codeclone/paths.py new file mode 100644 index 0000000..551d2be --- /dev/null +++ b/codeclone/paths.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +_TEST_FILE_NAMES = {"conftest.py"} + + +def is_test_filepath(filepath: str) -> bool: + normalized = filepath.lower().replace("\\", "/") + if "/tests/" in normalized or "/test/" in normalized: + return True + filename = Path(filepath).name.lower() + return filename in _TEST_FILE_NAMES or filename.startswith("test_") diff --git a/codeclone/pipeline.py b/codeclone/pipeline.py new file mode 100644 index 0000000..a3701bb --- /dev/null +++ b/codeclone/pipeline.py @@ -0,0 +1,1703 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +from concurrent.futures import ProcessPoolExecutor, as_completed +from dataclasses import dataclass +from hashlib import sha256 +from pathlib import Path +from typing import TYPE_CHECKING, Literal, cast + +from . import _coerce +from .cache import ( + Cache, + CacheEntry, + ClassMetricsDict, + DeadCandidateDict, + FileStat, + ModuleDepDict, + SegmentReportProjection, + SourceStatsDict, + StructuralFindingGroupDict, + file_stat_signature, +) +from .contracts import ExitCode +from .domain.findings import CATEGORY_COHESION, CATEGORY_COMPLEXITY, CATEGORY_COUPLING +from .domain.quality import CONFIDENCE_HIGH, RISK_HIGH, RISK_LOW +from .extractor import extract_units_and_stats_from_source +from .grouping import build_block_groups, build_groups, build_segment_groups +from .metrics import ( + HealthInputs, + build_dep_graph, + compute_health, + find_suppressed_unused, + find_unused, +) +from .models import ( + BlockUnit, + ClassMetrics, + DeadCandidate, + DeadItem, + DepGraph, + FileMetrics, + GroupItem, + GroupItemLike, + GroupMap, + MetricsDiff, + ModuleDep, + ProjectMetrics, + SegmentUnit, + StructuralFindingGroup, + StructuralFindingOccurrence, + Suggestion, + Unit, +) +from .normalize import NormalizationConfig +from .paths import is_test_filepath +from .report.blocks import prepare_block_report_groups +from .report.explain import build_block_group_facts +from .report.json_contract import build_report_document +from .report.segments import prepare_segment_report_groups +from .report.serialize import render_json_report_document, render_text_report_document +from .report.suggestions import generate_suggestions +from .scanner import iter_py_files, module_name_from_path +from .structural_findings import build_clone_cohort_structural_findings +from .suppressions import DEAD_CODE_RULE_ID, INLINE_CODECLONE_SUPPRESSION_SOURCE + +if TYPE_CHECKING: + from argparse import Namespace + from collections.abc import Callable, Collection, Mapping, Sequence + +MAX_FILE_SIZE = 10 * 1024 * 1024 +DEFAULT_BATCH_SIZE = 100 +PARALLEL_MIN_FILES_PER_WORKER = 8 +PARALLEL_MIN_FILES_FLOOR = 16 + + +@dataclass(frozen=True, slots=True) +class OutputPaths: + html: Path | None = None + json: Path | None = None + text: Path | None = None + md: Path | None = None + sarif: Path | None = None + + +@dataclass(frozen=True, slots=True) +class BootstrapResult: + root: Path + config: NormalizationConfig + args: Namespace + output_paths: OutputPaths + cache_path: Path + + +@dataclass(frozen=True, slots=True) +class DiscoveryResult: + files_found: int + cache_hits: int + files_skipped: int + all_file_paths: tuple[str, ...] + cached_units: tuple[GroupItem, ...] + cached_blocks: tuple[GroupItem, ...] + cached_segments: tuple[GroupItem, ...] + cached_class_metrics: tuple[ClassMetrics, ...] + cached_module_deps: tuple[ModuleDep, ...] + cached_dead_candidates: tuple[DeadCandidate, ...] + cached_referenced_names: frozenset[str] + files_to_process: tuple[str, ...] + skipped_warnings: tuple[str, ...] + cached_referenced_qualnames: frozenset[str] = frozenset() + cached_structural_findings: tuple[StructuralFindingGroup, ...] = () + cached_segment_report_projection: SegmentReportProjection | None = None + cached_lines: int = 0 + cached_functions: int = 0 + cached_methods: int = 0 + cached_classes: int = 0 + + +@dataclass(frozen=True, slots=True) +class FileProcessResult: + filepath: str + success: bool + error: str | None = None + units: list[Unit] | None = None + blocks: list[BlockUnit] | None = None + segments: list[SegmentUnit] | None = None + lines: int = 0 + functions: int = 0 + methods: int = 0 + classes: int = 0 + stat: FileStat | None = None + error_kind: str | None = None + file_metrics: FileMetrics | None = None + structural_findings: list[StructuralFindingGroup] | None = None + + +@dataclass(frozen=True, slots=True) +class ProcessingResult: + units: tuple[GroupItem, ...] + blocks: tuple[GroupItem, ...] + segments: tuple[GroupItem, ...] + class_metrics: tuple[ClassMetrics, ...] + module_deps: tuple[ModuleDep, ...] + dead_candidates: tuple[DeadCandidate, ...] + referenced_names: frozenset[str] + files_analyzed: int + files_skipped: int + analyzed_lines: int + analyzed_functions: int + analyzed_methods: int + analyzed_classes: int + failed_files: tuple[str, ...] + source_read_failures: tuple[str, ...] + referenced_qualnames: frozenset[str] = frozenset() + structural_findings: tuple[StructuralFindingGroup, ...] = () + + +@dataclass(frozen=True, slots=True) +class AnalysisResult: + func_groups: GroupMap + block_groups: GroupMap + block_groups_report: GroupMap + segment_groups: GroupMap + suppressed_segment_groups: int + block_group_facts: dict[str, dict[str, str]] + func_clones_count: int + block_clones_count: int + segment_clones_count: int + files_analyzed_or_cached: int + project_metrics: ProjectMetrics | None + metrics_payload: dict[str, object] | None + suggestions: tuple[Suggestion, ...] + segment_groups_raw_digest: str + suppressed_dead_code_items: int = 0 + structural_findings: tuple[StructuralFindingGroup, ...] = () + + +@dataclass(frozen=True, slots=True) +class GatingResult: + exit_code: int + reasons: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class ReportArtifacts: + html: str | None = None + json: str | None = None + text: str | None = None + md: str | None = None + sarif: str | None = None + + +@dataclass(frozen=True, slots=True) +class MetricGateConfig: + fail_complexity: int + fail_coupling: int + fail_cohesion: int + fail_cycles: bool + fail_dead_code: bool + fail_health: int + fail_on_new_metrics: bool + + +_as_int = _coerce.as_int +_as_str = _coerce.as_str + + +def _as_sorted_str_tuple(value: object) -> tuple[str, ...]: + if not isinstance(value, list): + return () + return tuple(sorted({item for item in value if isinstance(item, str) and item})) + + +def _group_item_sort_key(item: GroupItemLike) -> tuple[str, int, int, str]: + return ( + _as_str(item.get("filepath")), + _as_int(item.get("start_line")), + _as_int(item.get("end_line")), + _as_str(item.get("qualname")), + ) + + +def _segment_projection_item_sort_key(item: GroupItemLike) -> tuple[str, str, int, int]: + return ( + _as_str(item.get("filepath")), + _as_str(item.get("qualname")), + _as_int(item.get("start_line")), + _as_int(item.get("end_line")), + ) + + +def _segment_groups_digest(segment_groups: GroupMap) -> str: + normalized_rows: list[ + tuple[str, tuple[tuple[str, str, int, int, int, str, str], ...]] + ] = [] + for group_key in sorted(segment_groups): + items = sorted(segment_groups[group_key], key=_segment_projection_item_sort_key) + normalized_items: list[tuple[str, str, int, int, int, str, str]] = [ + ( + _as_str(item.get("filepath")), + _as_str(item.get("qualname")), + _as_int(item.get("start_line")), + _as_int(item.get("end_line")), + _as_int(item.get("size")), + _as_str(item.get("segment_hash")), + _as_str(item.get("segment_sig")), + ) + for item in items + ] + normalized_rows.append((group_key, tuple(normalized_items))) + payload = repr(tuple(normalized_rows)).encode("utf-8") + return sha256(payload).hexdigest() + + +def _coerce_segment_report_projection( + value: object, +) -> SegmentReportProjection | None: + if not isinstance(value, dict): + return None + digest = value.get("digest") + suppressed = value.get("suppressed") + groups = value.get("groups") + if ( + not isinstance(digest, str) + or not isinstance(suppressed, int) + or not isinstance(groups, dict) + ): + return None + if not all( + isinstance(group_key, str) and isinstance(items, list) + for group_key, items in groups.items() + ): + return None + return cast("SegmentReportProjection", value) + + +def _module_dep_sort_key(dep: ModuleDep) -> tuple[str, str, str, int]: + return dep.source, dep.target, dep.import_type, dep.line + + +def _class_metric_sort_key(metric: ClassMetrics) -> tuple[str, int, int, str]: + return metric.filepath, metric.start_line, metric.end_line, metric.qualname + + +def _dead_candidate_sort_key(item: DeadCandidate) -> tuple[str, int, int, str]: + return item.filepath, item.start_line, item.end_line, item.qualname + + +def _unit_to_group_item(unit: Unit) -> GroupItem: + return { + "qualname": unit.qualname, + "filepath": unit.filepath, + "start_line": unit.start_line, + "end_line": unit.end_line, + "loc": unit.loc, + "stmt_count": unit.stmt_count, + "fingerprint": unit.fingerprint, + "loc_bucket": unit.loc_bucket, + "cyclomatic_complexity": unit.cyclomatic_complexity, + "nesting_depth": unit.nesting_depth, + "risk": unit.risk, + "raw_hash": unit.raw_hash, + "entry_guard_count": unit.entry_guard_count, + "entry_guard_terminal_profile": unit.entry_guard_terminal_profile, + "entry_guard_has_side_effect_before": unit.entry_guard_has_side_effect_before, + "terminal_kind": unit.terminal_kind, + "try_finally_profile": unit.try_finally_profile, + "side_effect_order_profile": unit.side_effect_order_profile, + } + + +def _block_to_group_item(block: BlockUnit) -> GroupItem: + return { + "block_hash": block.block_hash, + "filepath": block.filepath, + "qualname": block.qualname, + "start_line": block.start_line, + "end_line": block.end_line, + "size": block.size, + } + + +def _segment_to_group_item(segment: SegmentUnit) -> GroupItem: + return { + "segment_hash": segment.segment_hash, + "segment_sig": segment.segment_sig, + "filepath": segment.filepath, + "qualname": segment.qualname, + "start_line": segment.start_line, + "end_line": segment.end_line, + "size": segment.size, + } + + +def _parallel_min_files(processes: int) -> int: + return max(PARALLEL_MIN_FILES_FLOOR, processes * PARALLEL_MIN_FILES_PER_WORKER) + + +def _should_collect_structural_findings(output_paths: OutputPaths) -> bool: + return any( + path is not None + for path in ( + output_paths.html, + output_paths.json, + output_paths.md, + output_paths.sarif, + output_paths.text, + ) + ) + + +def _should_use_parallel(files_count: int, processes: int) -> bool: + if processes <= 1: + return False + return files_count >= _parallel_min_files(processes) + + +def _new_discovery_buffers() -> tuple[ + list[GroupItem], + list[GroupItem], + list[GroupItem], + list[ClassMetrics], + list[ModuleDep], + list[DeadCandidate], + set[str], + set[str], + list[str], + list[str], +]: + return [], [], [], [], [], [], set(), set(), [], [] + + +def _decode_cached_structural_finding_group( + group_dict: StructuralFindingGroupDict, + filepath: str, +) -> StructuralFindingGroup: + """Convert a StructuralFindingGroupDict (from cache) to a StructuralFindingGroup.""" + finding_kind = group_dict["finding_kind"] + finding_key = group_dict["finding_key"] + signature = group_dict["signature"] + items = tuple( + StructuralFindingOccurrence( + finding_kind=finding_kind, + finding_key=finding_key, + file_path=filepath, + qualname=item["qualname"], + start=item["start"], + end=item["end"], + signature=signature, + ) + for item in group_dict["items"] + ) + return StructuralFindingGroup( + finding_kind=finding_kind, + finding_key=finding_key, + signature=signature, + items=items, + ) + + +def bootstrap( + *, + args: Namespace, + root: Path, + output_paths: OutputPaths, + cache_path: Path, +) -> BootstrapResult: + return BootstrapResult( + root=root, + config=NormalizationConfig(), + args=args, + output_paths=output_paths, + cache_path=cache_path, + ) + + +def _cache_entry_has_metrics(entry: CacheEntry) -> bool: + metric_keys = ( + "class_metrics", + "module_deps", + "dead_candidates", + "referenced_names", + "referenced_qualnames", + "import_names", + "class_names", + ) + return all(key in entry and isinstance(entry.get(key), list) for key in metric_keys) + + +def _cache_entry_has_structural_findings(entry: CacheEntry) -> bool: + return "structural_findings" in entry + + +def _cache_entry_source_stats(entry: CacheEntry) -> tuple[int, int, int, int] | None: + stats_obj = entry.get("source_stats") + if not isinstance(stats_obj, dict): + return None + lines = stats_obj.get("lines") + functions = stats_obj.get("functions") + methods = stats_obj.get("methods") + classes = stats_obj.get("classes") + if not ( + isinstance(lines, int) + and isinstance(functions, int) + and isinstance(methods, int) + and isinstance(classes, int) + and lines >= 0 + and functions >= 0 + and methods >= 0 + and classes >= 0 + ): + return None + return lines, functions, methods, classes + + +def _usable_cached_source_stats( + entry: CacheEntry, + *, + skip_metrics: bool, + collect_structural_findings: bool, +) -> tuple[int, int, int, int] | None: + if not skip_metrics and not _cache_entry_has_metrics(entry): + return None + if collect_structural_findings and not _cache_entry_has_structural_findings(entry): + return None + return _cache_entry_source_stats(entry) + + +def _load_cached_metrics( + entry: CacheEntry, + *, + filepath: str, +) -> tuple[ + tuple[ClassMetrics, ...], + tuple[ModuleDep, ...], + tuple[DeadCandidate, ...], + frozenset[str], + frozenset[str], +]: + class_metrics_rows: list[ClassMetricsDict] = entry.get("class_metrics", []) + class_metrics = tuple( + ClassMetrics( + qualname=row["qualname"], + filepath=row["filepath"], + start_line=row["start_line"], + end_line=row["end_line"], + cbo=row["cbo"], + lcom4=row["lcom4"], + method_count=row["method_count"], + instance_var_count=row["instance_var_count"], + risk_coupling=cast( + "Literal['low', 'medium', 'high']", + row["risk_coupling"], + ), + risk_cohesion=cast( + "Literal['low', 'medium', 'high']", + row["risk_cohesion"], + ), + coupled_classes=_as_sorted_str_tuple(row.get("coupled_classes", [])), + ) + for row in class_metrics_rows + if row.get("qualname") and row.get("filepath") + ) + + module_dep_rows: list[ModuleDepDict] = entry.get("module_deps", []) + module_deps = tuple( + ModuleDep( + source=row["source"], + target=row["target"], + import_type=cast("Literal['import', 'from_import']", row["import_type"]), + line=row["line"], + ) + for row in module_dep_rows + if row.get("source") and row.get("target") + ) + + dead_rows: list[DeadCandidateDict] = entry.get("dead_candidates", []) + dead_candidates = tuple( + DeadCandidate( + qualname=row["qualname"], + local_name=row["local_name"], + filepath=row["filepath"], + start_line=row["start_line"], + end_line=row["end_line"], + kind=cast( + "Literal['function', 'class', 'method', 'import']", + row["kind"], + ), + suppressed_rules=tuple(sorted(set(row.get("suppressed_rules", [])))), + ) + for row in dead_rows + if row.get("qualname") and row.get("local_name") and row.get("filepath") + ) + + referenced_names = ( + frozenset() + if is_test_filepath(filepath) + else frozenset(entry.get("referenced_names", [])) + ) + referenced_qualnames = ( + frozenset() + if is_test_filepath(filepath) + else frozenset(entry.get("referenced_qualnames", [])) + ) + return ( + class_metrics, + module_deps, + dead_candidates, + referenced_names, + referenced_qualnames, + ) + + +def discover(*, boot: BootstrapResult, cache: Cache) -> DiscoveryResult: + files_found = 0 + cache_hits = 0 + files_skipped = 0 + collect_structural_findings = _should_collect_structural_findings(boot.output_paths) + cached_segment_projection = _coerce_segment_report_projection( + getattr(cache, "segment_report_projection", None) + ) + + ( + cached_units, + cached_blocks, + cached_segments, + cached_class_metrics, + cached_module_deps, + cached_dead_candidates, + cached_referenced_names, + cached_referenced_qualnames, + files_to_process, + skipped_warnings, + ) = _new_discovery_buffers() + cached_sf: list[StructuralFindingGroup] = [] + cached_lines = 0 + cached_functions = 0 + cached_methods = 0 + cached_classes = 0 + all_file_paths: list[str] = [] + + for filepath in iter_py_files(str(boot.root)): + files_found += 1 + all_file_paths.append(filepath) + try: + stat = file_stat_signature(filepath) + except OSError as exc: + files_skipped += 1 + skipped_warnings.append(f"{filepath}: {exc}") + continue + + cached = cache.get_file_entry(filepath) + if cached and cached.get("stat") == stat: + cached_source_stats = _usable_cached_source_stats( + cached, + skip_metrics=boot.args.skip_metrics, + collect_structural_findings=collect_structural_findings, + ) + if cached_source_stats is None: + files_to_process.append(filepath) + continue + + cache_hits += 1 + lines, functions, methods, classes = cached_source_stats + cached_lines += lines + cached_functions += functions + cached_methods += methods + cached_classes += classes + cached_units.extend(cast("list[GroupItem]", cast(object, cached["units"]))) + cached_blocks.extend( + cast("list[GroupItem]", cast(object, cached["blocks"])) + ) + cached_segments.extend( + cast("list[GroupItem]", cast(object, cached["segments"])) + ) + + if not boot.args.skip_metrics: + ( + class_metrics, + module_deps, + dead_candidates, + referenced_names, + referenced_qualnames, + ) = _load_cached_metrics(cached, filepath=filepath) + cached_class_metrics.extend(class_metrics) + cached_module_deps.extend(module_deps) + cached_dead_candidates.extend(dead_candidates) + cached_referenced_names.update(referenced_names) + cached_referenced_qualnames.update(referenced_qualnames) + if collect_structural_findings: + cached_sf.extend( + _decode_cached_structural_finding_group(group_dict, filepath) + for group_dict in cached.get("structural_findings") or [] + ) + continue + + files_to_process.append(filepath) + + return DiscoveryResult( + files_found=files_found, + cache_hits=cache_hits, + files_skipped=files_skipped, + all_file_paths=tuple(all_file_paths), + cached_units=tuple(sorted(cached_units, key=_group_item_sort_key)), + cached_blocks=tuple(sorted(cached_blocks, key=_group_item_sort_key)), + cached_segments=tuple(sorted(cached_segments, key=_group_item_sort_key)), + cached_class_metrics=tuple( + sorted(cached_class_metrics, key=_class_metric_sort_key) + ), + cached_module_deps=tuple(sorted(cached_module_deps, key=_module_dep_sort_key)), + cached_dead_candidates=tuple( + sorted(cached_dead_candidates, key=_dead_candidate_sort_key) + ), + cached_referenced_names=frozenset(cached_referenced_names), + cached_referenced_qualnames=frozenset(cached_referenced_qualnames), + files_to_process=tuple(files_to_process), + skipped_warnings=tuple(sorted(skipped_warnings)), + cached_structural_findings=tuple(cached_sf), + cached_segment_report_projection=cached_segment_projection, + cached_lines=cached_lines, + cached_functions=cached_functions, + cached_methods=cached_methods, + cached_classes=cached_classes, + ) + + +def process_file( + filepath: str, + root: str, + cfg: NormalizationConfig, + min_loc: int, + min_stmt: int, + collect_structural_findings: bool = True, + block_min_loc: int = 20, + block_min_stmt: int = 8, + segment_min_loc: int = 20, + segment_min_stmt: int = 10, +) -> FileProcessResult: + try: + try: + stat_result = os.stat(filepath) + if stat_result.st_size > MAX_FILE_SIZE: + return FileProcessResult( + filepath=filepath, + success=False, + error=( + f"File too large: {stat_result.st_size} bytes " + f"(max {MAX_FILE_SIZE})" + ), + error_kind="file_too_large", + ) + except OSError as exc: + return FileProcessResult( + filepath=filepath, + success=False, + error=f"Cannot stat file: {exc}", + error_kind="stat_error", + ) + + stat: FileStat = { + "mtime_ns": stat_result.st_mtime_ns, + "size": stat_result.st_size, + } + + try: + source = Path(filepath).read_text("utf-8") + except UnicodeDecodeError as exc: + return FileProcessResult( + filepath=filepath, + success=False, + error=f"Encoding error: {exc}", + error_kind="source_read_error", + ) + except OSError as exc: + return FileProcessResult( + filepath=filepath, + success=False, + error=f"Cannot read file: {exc}", + error_kind="source_read_error", + ) + + module_name = module_name_from_path(root, filepath) + units, blocks, segments, source_stats, file_metrics, sf = ( + extract_units_and_stats_from_source( + source=source, + filepath=filepath, + module_name=module_name, + cfg=cfg, + min_loc=min_loc, + min_stmt=min_stmt, + block_min_loc=block_min_loc, + block_min_stmt=block_min_stmt, + segment_min_loc=segment_min_loc, + segment_min_stmt=segment_min_stmt, + collect_structural_findings=collect_structural_findings, + ) + ) + + return FileProcessResult( + filepath=filepath, + success=True, + units=units, + blocks=blocks, + segments=segments, + lines=source_stats.lines, + functions=source_stats.functions, + methods=source_stats.methods, + classes=source_stats.classes, + stat=stat, + file_metrics=file_metrics, + structural_findings=sf, + ) + except Exception as exc: # pragma: no cover - defensive shell around workers + return FileProcessResult( + filepath=filepath, + success=False, + error=f"Unexpected error: {type(exc).__name__}: {exc}", + error_kind="unexpected_error", + ) + + +def process( + *, + boot: BootstrapResult, + discovery: DiscoveryResult, + cache: Cache, + on_advance: Callable[[], None] | None = None, + on_worker_error: Callable[[str], None] | None = None, + on_parallel_fallback: Callable[[Exception], None] | None = None, + batch_size: int = DEFAULT_BATCH_SIZE, +) -> ProcessingResult: + files_to_process = discovery.files_to_process + if not files_to_process: + return ProcessingResult( + units=discovery.cached_units, + blocks=discovery.cached_blocks, + segments=discovery.cached_segments, + class_metrics=discovery.cached_class_metrics, + module_deps=discovery.cached_module_deps, + dead_candidates=discovery.cached_dead_candidates, + referenced_names=discovery.cached_referenced_names, + referenced_qualnames=discovery.cached_referenced_qualnames, + files_analyzed=0, + files_skipped=discovery.files_skipped, + analyzed_lines=0, + analyzed_functions=0, + analyzed_methods=0, + analyzed_classes=0, + failed_files=(), + source_read_failures=(), + structural_findings=discovery.cached_structural_findings, + ) + + all_units: list[GroupItem] = list(discovery.cached_units) + all_blocks: list[GroupItem] = list(discovery.cached_blocks) + all_segments: list[GroupItem] = list(discovery.cached_segments) + + all_class_metrics: list[ClassMetrics] = list(discovery.cached_class_metrics) + all_module_deps: list[ModuleDep] = list(discovery.cached_module_deps) + all_dead_candidates: list[DeadCandidate] = list(discovery.cached_dead_candidates) + all_referenced_names: set[str] = set(discovery.cached_referenced_names) + all_referenced_qualnames: set[str] = set(discovery.cached_referenced_qualnames) + + files_analyzed = 0 + files_skipped = discovery.files_skipped + analyzed_lines = 0 + analyzed_functions = 0 + analyzed_methods = 0 + analyzed_classes = 0 + + all_structural_findings: list[StructuralFindingGroup] = list( + discovery.cached_structural_findings + ) + failed_files: list[str] = [] + source_read_failures: list[str] = [] + root_str = str(boot.root) + processes = max(1, int(boot.args.processes)) + min_loc = int(boot.args.min_loc) + min_stmt = int(boot.args.min_stmt) + block_min_loc = int(boot.args.block_min_loc) + block_min_stmt = int(boot.args.block_min_stmt) + segment_min_loc = int(boot.args.segment_min_loc) + segment_min_stmt = int(boot.args.segment_min_stmt) + collect_structural_findings = _should_collect_structural_findings(boot.output_paths) + + def _accept_result(result: FileProcessResult) -> None: + nonlocal files_analyzed + nonlocal files_skipped + nonlocal analyzed_lines + nonlocal analyzed_functions + nonlocal analyzed_methods + nonlocal analyzed_classes + + if result.success and result.stat is not None: + source_stats_payload = SourceStatsDict( + lines=result.lines, + functions=result.functions, + methods=result.methods, + classes=result.classes, + ) + structural_payload = ( + result.structural_findings if collect_structural_findings else None + ) + try: + cache.put_file_entry( + result.filepath, + result.stat, + result.units or [], + result.blocks or [], + result.segments or [], + source_stats=source_stats_payload, + file_metrics=result.file_metrics, + structural_findings=structural_payload, + ) + except TypeError as exc: + if "source_stats" not in str(exc): + raise + cache.put_file_entry( + result.filepath, + result.stat, + result.units or [], + result.blocks or [], + result.segments or [], + file_metrics=result.file_metrics, + structural_findings=structural_payload, + ) + files_analyzed += 1 + analyzed_lines += result.lines + analyzed_functions += result.functions + analyzed_methods += result.methods + analyzed_classes += result.classes + + if result.units: + all_units.extend(_unit_to_group_item(unit) for unit in result.units) + if result.blocks: + all_blocks.extend( + _block_to_group_item(block) for block in result.blocks + ) + if result.segments: + all_segments.extend( + _segment_to_group_item(segment) for segment in result.segments + ) + if result.structural_findings: + all_structural_findings.extend(result.structural_findings) + + if not boot.args.skip_metrics and result.file_metrics is not None: + all_class_metrics.extend(result.file_metrics.class_metrics) + all_module_deps.extend(result.file_metrics.module_deps) + all_dead_candidates.extend(result.file_metrics.dead_candidates) + all_referenced_names.update(result.file_metrics.referenced_names) + all_referenced_qualnames.update( + result.file_metrics.referenced_qualnames + ) + return + + files_skipped += 1 + failure = f"{result.filepath}: {result.error}" + failed_files.append(failure) + if result.error_kind == "source_read_error": + source_read_failures.append(failure) + + def _run_sequential(files: Sequence[str]) -> None: + for filepath in files: + _accept_result( + process_file( + filepath, + root_str, + boot.config, + min_loc, + min_stmt, + collect_structural_findings, + block_min_loc, + block_min_stmt, + segment_min_loc, + segment_min_stmt, + ) + ) + if on_advance is not None: + on_advance() + + if _should_use_parallel(len(files_to_process), processes): + try: + with ProcessPoolExecutor(max_workers=processes) as executor: + for idx in range(0, len(files_to_process), batch_size): + batch = files_to_process[idx : idx + batch_size] + futures = [ + executor.submit( + process_file, + filepath, + root_str, + boot.config, + min_loc, + min_stmt, + collect_structural_findings, + block_min_loc, + block_min_stmt, + segment_min_loc, + segment_min_stmt, + ) + for filepath in batch + ] + future_to_path = { + id(future): filepath + for future, filepath in zip(futures, batch, strict=True) + } + for future in as_completed(futures): + filepath = future_to_path[id(future)] + try: + _accept_result(future.result()) + except Exception as exc: # pragma: no cover - worker crash + files_skipped += 1 + failed_files.append(f"{filepath}: {exc}") + if on_worker_error is not None: + on_worker_error(str(exc)) + if on_advance is not None: + on_advance() + except (OSError, RuntimeError, PermissionError) as exc: + if on_parallel_fallback is not None: + on_parallel_fallback(exc) + _run_sequential(files_to_process) + else: + _run_sequential(files_to_process) + + return ProcessingResult( + units=tuple(sorted(all_units, key=_group_item_sort_key)), + blocks=tuple(sorted(all_blocks, key=_group_item_sort_key)), + segments=tuple(sorted(all_segments, key=_group_item_sort_key)), + class_metrics=tuple(sorted(all_class_metrics, key=_class_metric_sort_key)), + module_deps=tuple(sorted(all_module_deps, key=_module_dep_sort_key)), + dead_candidates=tuple( + sorted(all_dead_candidates, key=_dead_candidate_sort_key) + ), + referenced_names=frozenset(all_referenced_names), + referenced_qualnames=frozenset(all_referenced_qualnames), + files_analyzed=files_analyzed, + files_skipped=files_skipped, + analyzed_lines=analyzed_lines, + analyzed_functions=analyzed_functions, + analyzed_methods=analyzed_methods, + analyzed_classes=analyzed_classes, + failed_files=tuple(sorted(failed_files)), + source_read_failures=tuple(sorted(source_read_failures)), + structural_findings=tuple(all_structural_findings), + ) + + +def _module_names_from_units(units: Sequence[GroupItemLike]) -> frozenset[str]: + modules: set[str] = set() + for unit in units: + qualname = _as_str(unit.get("qualname")) + module_name = qualname.split(":", 1)[0] if ":" in qualname else qualname + if module_name: + modules.add(module_name) + return frozenset(sorted(modules)) + + +def compute_project_metrics( + *, + units: Sequence[GroupItemLike], + class_metrics: Sequence[ClassMetrics], + module_deps: Sequence[ModuleDep], + dead_candidates: Sequence[DeadCandidate], + referenced_names: frozenset[str], + referenced_qualnames: frozenset[str], + files_found: int, + files_analyzed_or_cached: int, + function_clone_groups: int, + block_clone_groups: int, + skip_dependencies: bool, + skip_dead_code: bool, +) -> tuple[ProjectMetrics, DepGraph, tuple[DeadItem, ...]]: + unit_rows = sorted(units, key=_group_item_sort_key) + complexities = tuple( + max(1, _as_int(row.get("cyclomatic_complexity"), 1)) for row in unit_rows + ) + complexity_max = max(complexities) if complexities else 0 + complexity_avg = ( + float(sum(complexities)) / float(len(complexities)) if complexities else 0.0 + ) + high_risk_functions = tuple( + sorted( + { + _as_str(row.get("qualname")) + for row in unit_rows + if _as_str(row.get("risk")) == RISK_HIGH + } + ) + ) + + classes_sorted = tuple(sorted(class_metrics, key=_class_metric_sort_key)) + coupling_values = tuple(metric.cbo for metric in classes_sorted) + coupling_max = max(coupling_values) if coupling_values else 0 + coupling_avg = ( + float(sum(coupling_values)) / float(len(coupling_values)) + if coupling_values + else 0.0 + ) + high_risk_classes = tuple( + sorted( + { + metric.qualname + for metric in classes_sorted + if metric.risk_coupling == RISK_HIGH + } + ) + ) + + cohesion_values = tuple(metric.lcom4 for metric in classes_sorted) + cohesion_max = max(cohesion_values) if cohesion_values else 0 + cohesion_avg = ( + float(sum(cohesion_values)) / float(len(cohesion_values)) + if cohesion_values + else 0.0 + ) + low_cohesion_classes = tuple( + sorted( + { + metric.qualname + for metric in classes_sorted + if metric.risk_cohesion == RISK_HIGH + } + ) + ) + + dep_graph = DepGraph( + modules=frozenset(), + edges=(), + cycles=(), + max_depth=0, + longest_chains=(), + ) + if not skip_dependencies: + dep_graph = build_dep_graph( + modules=_module_names_from_units(unit_rows), + deps=module_deps, + ) + + dead_items: tuple[DeadItem, ...] = () + if not skip_dead_code: + dead_items = find_unused( + definitions=tuple(dead_candidates), + referenced_names=referenced_names, + referenced_qualnames=referenced_qualnames, + ) + + health = compute_health( + HealthInputs( + files_found=files_found, + files_analyzed_or_cached=files_analyzed_or_cached, + function_clone_groups=function_clone_groups, + block_clone_groups=block_clone_groups, + complexity_avg=complexity_avg, + complexity_max=complexity_max, + high_risk_functions=len(high_risk_functions), + coupling_avg=coupling_avg, + coupling_max=coupling_max, + high_risk_classes=len(high_risk_classes), + cohesion_avg=cohesion_avg, + low_cohesion_classes=len(low_cohesion_classes), + dependency_cycles=len(dep_graph.cycles), + dependency_max_depth=dep_graph.max_depth, + dead_code_items=len(dead_items), + ) + ) + + project_metrics = ProjectMetrics( + complexity_avg=complexity_avg, + complexity_max=complexity_max, + high_risk_functions=high_risk_functions, + coupling_avg=coupling_avg, + coupling_max=coupling_max, + high_risk_classes=high_risk_classes, + cohesion_avg=cohesion_avg, + cohesion_max=cohesion_max, + low_cohesion_classes=low_cohesion_classes, + dependency_modules=len(dep_graph.modules), + dependency_edges=len(dep_graph.edges), + dependency_edge_list=dep_graph.edges, + dependency_cycles=dep_graph.cycles, + dependency_max_depth=dep_graph.max_depth, + dependency_longest_chains=dep_graph.longest_chains, + dead_code=dead_items, + health=health, + ) + return project_metrics, dep_graph, dead_items + + +def compute_suggestions( + *, + project_metrics: ProjectMetrics, + units: Sequence[GroupItemLike], + class_metrics: Sequence[ClassMetrics], + func_groups: Mapping[str, Sequence[GroupItemLike]], + block_groups: Mapping[str, Sequence[GroupItemLike]], + segment_groups: Mapping[str, Sequence[GroupItemLike]], + block_group_facts: Mapping[str, Mapping[str, str]] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, + scan_root: str = "", +) -> tuple[Suggestion, ...]: + return generate_suggestions( + project_metrics=project_metrics, + units=units, + class_metrics=class_metrics, + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + block_group_facts=block_group_facts, + structural_findings=structural_findings, + scan_root=scan_root, + ) + + +def build_metrics_report_payload( + *, + project_metrics: ProjectMetrics, + units: Sequence[GroupItemLike], + class_metrics: Sequence[ClassMetrics], + suppressed_dead_code: Sequence[DeadItem] = (), +) -> dict[str, object]: + sorted_units = sorted( + units, + key=lambda item: ( + _as_int(item.get("cyclomatic_complexity")), + _as_int(item.get("nesting_depth")), + _as_str(item.get("qualname")), + ), + reverse=True, + ) + complexity_rows = [ + { + "qualname": _as_str(item.get("qualname")), + "filepath": _as_str(item.get("filepath")), + "start_line": _as_int(item.get("start_line")), + "end_line": _as_int(item.get("end_line")), + "cyclomatic_complexity": _as_int(item.get("cyclomatic_complexity"), 1), + "nesting_depth": _as_int(item.get("nesting_depth")), + "risk": _as_str(item.get("risk"), RISK_LOW), + } + for item in sorted_units + ] + classes_sorted = sorted( + class_metrics, + key=lambda item: (item.cbo, item.lcom4, item.qualname), + reverse=True, + ) + coupling_rows = [ + { + "qualname": metric.qualname, + "filepath": metric.filepath, + "start_line": metric.start_line, + "end_line": metric.end_line, + "cbo": metric.cbo, + "risk": metric.risk_coupling, + "coupled_classes": list(metric.coupled_classes), + } + for metric in classes_sorted + ] + cohesion_rows = [ + { + "qualname": metric.qualname, + "filepath": metric.filepath, + "start_line": metric.start_line, + "end_line": metric.end_line, + "lcom4": metric.lcom4, + "risk": metric.risk_cohesion, + "method_count": metric.method_count, + "instance_var_count": metric.instance_var_count, + } + for metric in classes_sorted + ] + active_dead_items = tuple(project_metrics.dead_code) + suppressed_dead_items = tuple(suppressed_dead_code) + + def _serialize_dead_item( + item: DeadItem, + *, + suppressed: bool = False, + ) -> dict[str, object]: + payload: dict[str, object] = { + "qualname": item.qualname, + "filepath": item.filepath, + "start_line": item.start_line, + "end_line": item.end_line, + "kind": item.kind, + "confidence": item.confidence, + } + if suppressed: + payload["suppressed_by"] = [ + { + "rule": DEAD_CODE_RULE_ID, + "source": INLINE_CODECLONE_SUPPRESSION_SOURCE, + } + ] + return payload + + return { + CATEGORY_COMPLEXITY: { + "functions": complexity_rows, + "summary": { + "total": len(complexity_rows), + "average": round(project_metrics.complexity_avg, 2), + "max": project_metrics.complexity_max, + "high_risk": len(project_metrics.high_risk_functions), + }, + }, + CATEGORY_COUPLING: { + "classes": coupling_rows, + "summary": { + "total": len(coupling_rows), + "average": round(project_metrics.coupling_avg, 2), + "max": project_metrics.coupling_max, + "high_risk": len(project_metrics.high_risk_classes), + }, + }, + CATEGORY_COHESION: { + "classes": cohesion_rows, + "summary": { + "total": len(cohesion_rows), + "average": round(project_metrics.cohesion_avg, 2), + "max": project_metrics.cohesion_max, + "low_cohesion": len(project_metrics.low_cohesion_classes), + }, + }, + "dependencies": { + "modules": project_metrics.dependency_modules, + "edges": project_metrics.dependency_edges, + "max_depth": project_metrics.dependency_max_depth, + "cycles": [list(cycle) for cycle in project_metrics.dependency_cycles], + "longest_chains": [ + list(chain) for chain in project_metrics.dependency_longest_chains + ], + "edge_list": [ + { + "source": edge.source, + "target": edge.target, + "import_type": edge.import_type, + "line": edge.line, + } + for edge in project_metrics.dependency_edge_list + ], + }, + "dead_code": { + "items": [_serialize_dead_item(item) for item in active_dead_items], + "suppressed_items": [ + _serialize_dead_item(item, suppressed=True) + for item in suppressed_dead_items + ], + "summary": { + "total": len(active_dead_items), + "critical": sum( + 1 + for item in active_dead_items + if item.confidence == CONFIDENCE_HIGH + ), + "high_confidence": sum( + 1 + for item in active_dead_items + if item.confidence == CONFIDENCE_HIGH + ), + "suppressed": len(suppressed_dead_items), + }, + }, + "health": { + "score": project_metrics.health.total, + "grade": project_metrics.health.grade, + "dimensions": dict(project_metrics.health.dimensions), + }, + } + + +def analyze( + *, + boot: BootstrapResult, + discovery: DiscoveryResult, + processing: ProcessingResult, +) -> AnalysisResult: + func_groups = build_groups(processing.units) + block_groups = build_block_groups(processing.blocks) + segment_groups_raw = build_segment_groups(processing.segments) + segment_groups_raw_digest = _segment_groups_digest(segment_groups_raw) + cached_projection = discovery.cached_segment_report_projection + if ( + cached_projection is not None + and cached_projection.get("digest") == segment_groups_raw_digest + ): + projection_groups = cached_projection.get("groups", {}) + segment_groups = { + group_key: [ + { + "segment_hash": str(item["segment_hash"]), + "segment_sig": str(item["segment_sig"]), + "filepath": str(item["filepath"]), + "qualname": str(item["qualname"]), + "start_line": int(item["start_line"]), + "end_line": int(item["end_line"]), + "size": int(item["size"]), + } + for item in projection_groups[group_key] + ] + for group_key in sorted(projection_groups) + } + suppressed_segment_groups = int(cached_projection.get("suppressed", 0)) + else: + segment_groups, suppressed_segment_groups = prepare_segment_report_groups( + segment_groups_raw + ) + + block_groups_report = prepare_block_report_groups(block_groups) + block_group_facts = build_block_group_facts(block_groups_report) + + func_clones_count = len(func_groups) + block_clones_count = len(block_groups) + segment_clones_count = len(segment_groups) + files_analyzed_or_cached = processing.files_analyzed + discovery.cache_hits + + project_metrics: ProjectMetrics | None = None + metrics_payload: dict[str, object] | None = None + suggestions: tuple[Suggestion, ...] = () + suppressed_dead_items: tuple[DeadItem, ...] = () + cohort_structural_findings: tuple[StructuralFindingGroup, ...] = () + if _should_collect_structural_findings(boot.output_paths): + cohort_structural_findings = build_clone_cohort_structural_findings( + func_groups=func_groups, + ) + combined_structural_findings = ( + *processing.structural_findings, + *cohort_structural_findings, + ) + + if not boot.args.skip_metrics: + project_metrics, _, _ = compute_project_metrics( + units=processing.units, + class_metrics=processing.class_metrics, + module_deps=processing.module_deps, + dead_candidates=processing.dead_candidates, + referenced_names=processing.referenced_names, + referenced_qualnames=processing.referenced_qualnames, + files_found=discovery.files_found, + files_analyzed_or_cached=files_analyzed_or_cached, + function_clone_groups=func_clones_count, + block_clone_groups=block_clones_count, + skip_dependencies=boot.args.skip_dependencies, + skip_dead_code=boot.args.skip_dead_code, + ) + if not boot.args.skip_dead_code: + suppressed_dead_items = find_suppressed_unused( + definitions=tuple(processing.dead_candidates), + referenced_names=processing.referenced_names, + referenced_qualnames=processing.referenced_qualnames, + ) + suggestions = compute_suggestions( + project_metrics=project_metrics, + units=processing.units, + class_metrics=processing.class_metrics, + func_groups=func_groups, + block_groups=block_groups_report, + segment_groups=segment_groups, + block_group_facts=block_group_facts, + structural_findings=combined_structural_findings, + scan_root=str(boot.root), + ) + metrics_payload = build_metrics_report_payload( + project_metrics=project_metrics, + units=processing.units, + class_metrics=processing.class_metrics, + suppressed_dead_code=suppressed_dead_items, + ) + + return AnalysisResult( + func_groups=func_groups, + block_groups=block_groups, + block_groups_report=block_groups_report, + segment_groups=segment_groups, + suppressed_segment_groups=suppressed_segment_groups, + block_group_facts=block_group_facts, + func_clones_count=func_clones_count, + block_clones_count=block_clones_count, + segment_clones_count=segment_clones_count, + files_analyzed_or_cached=files_analyzed_or_cached, + project_metrics=project_metrics, + metrics_payload=metrics_payload, + suggestions=suggestions, + segment_groups_raw_digest=segment_groups_raw_digest, + suppressed_dead_code_items=len(suppressed_dead_items), + structural_findings=combined_structural_findings, + ) + + +def report( + *, + boot: BootstrapResult, + discovery: DiscoveryResult, + processing: ProcessingResult, + analysis: AnalysisResult, + report_meta: Mapping[str, object], + new_func: Collection[str], + new_block: Collection[str], + html_builder: Callable[..., str] | None = None, + metrics_diff: object | None = None, +) -> ReportArtifacts: + contents: dict[str, str | None] = { + "html": None, + "json": None, + "md": None, + "sarif": None, + "text": None, + } + + sf = analysis.structural_findings if analysis.structural_findings else None + report_inventory = { + "files": { + "total_found": discovery.files_found, + "analyzed": processing.files_analyzed, + "cached": discovery.cache_hits, + "skipped": processing.files_skipped, + "source_io_skipped": len(processing.source_read_failures), + }, + "code": { + "parsed_lines": processing.analyzed_lines + discovery.cached_lines, + "functions": processing.analyzed_functions + discovery.cached_functions, + "methods": processing.analyzed_methods + discovery.cached_methods, + "classes": processing.analyzed_classes + discovery.cached_classes, + }, + "file_list": list(discovery.all_file_paths), + } + report_document: dict[str, object] | None = None + needs_report_document = boot.output_paths.html is not None or any( + path is not None + for path in ( + boot.output_paths.json, + boot.output_paths.md, + boot.output_paths.sarif, + boot.output_paths.text, + ) + ) + + if needs_report_document: + report_document = build_report_document( + func_groups=analysis.func_groups, + block_groups=analysis.block_groups_report, + segment_groups=analysis.segment_groups, + meta=report_meta, + inventory=report_inventory, + block_facts=analysis.block_group_facts, + new_function_group_keys=new_func, + new_block_group_keys=new_block, + new_segment_group_keys=set(analysis.segment_groups.keys()), + metrics=analysis.metrics_payload, + suggestions=analysis.suggestions, + structural_findings=sf, + ) + + if boot.output_paths.html and html_builder is not None: + contents["html"] = html_builder( + func_groups=analysis.func_groups, + block_groups=analysis.block_groups_report, + segment_groups=analysis.segment_groups, + block_group_facts=analysis.block_group_facts, + new_function_group_keys=new_func, + new_block_group_keys=new_block, + report_meta=report_meta, + metrics=analysis.metrics_payload, + suggestions=analysis.suggestions, + structural_findings=sf, + report_document=report_document, + metrics_diff=metrics_diff, + title="CodeClone Report", + context_lines=3, + max_snippet_lines=220, + ) + + if any( + path is not None + for path in ( + boot.output_paths.json, + boot.output_paths.md, + boot.output_paths.sarif, + boot.output_paths.text, + ) + ): + assert report_document is not None + + if boot.output_paths.json and report_document is not None: + contents["json"] = render_json_report_document(report_document) + + if boot.output_paths.md and report_document is not None: + from .report.markdown import to_markdown_report + + contents["md"] = to_markdown_report( + report_document=report_document, + meta=report_meta, + inventory=report_inventory, + func_groups=analysis.func_groups, + block_groups=analysis.block_groups_report, + segment_groups=analysis.segment_groups, + block_facts=analysis.block_group_facts, + new_function_group_keys=new_func, + new_block_group_keys=new_block, + new_segment_group_keys=set(analysis.segment_groups.keys()), + metrics=analysis.metrics_payload, + suggestions=analysis.suggestions, + structural_findings=sf, + ) + + if boot.output_paths.sarif and report_document is not None: + from .report.sarif import to_sarif_report + + contents["sarif"] = to_sarif_report( + report_document=report_document, + meta=report_meta, + inventory=report_inventory, + func_groups=analysis.func_groups, + block_groups=analysis.block_groups_report, + segment_groups=analysis.segment_groups, + block_facts=analysis.block_group_facts, + new_function_group_keys=new_func, + new_block_group_keys=new_block, + new_segment_group_keys=set(analysis.segment_groups.keys()), + metrics=analysis.metrics_payload, + suggestions=analysis.suggestions, + structural_findings=sf, + ) + + if boot.output_paths.text and report_document is not None: + contents["text"] = render_text_report_document(report_document) + + return ReportArtifacts( + html=contents["html"], + json=contents["json"], + md=contents["md"], + sarif=contents["sarif"], + text=contents["text"], + ) + + +def metric_gate_reasons( + *, + project_metrics: ProjectMetrics, + metrics_diff: MetricsDiff | None, + config: MetricGateConfig, +) -> tuple[str, ...]: + reasons: list[str] = [] + + if ( + config.fail_complexity >= 0 + and project_metrics.complexity_max > config.fail_complexity + ): + reasons.append( + "Complexity threshold exceeded: " + f"max CC={project_metrics.complexity_max}, " + f"threshold={config.fail_complexity}." + ) + if ( + config.fail_coupling >= 0 + and project_metrics.coupling_max > config.fail_coupling + ): + reasons.append( + "Coupling threshold exceeded: " + f"max CBO={project_metrics.coupling_max}, " + f"threshold={config.fail_coupling}." + ) + if ( + config.fail_cohesion >= 0 + and project_metrics.cohesion_max > config.fail_cohesion + ): + reasons.append( + "Cohesion threshold exceeded: " + f"max LCOM4={project_metrics.cohesion_max}, " + f"threshold={config.fail_cohesion}." + ) + if config.fail_cycles and project_metrics.dependency_cycles: + reasons.append( + "Dependency cycles detected: " + f"{len(project_metrics.dependency_cycles)} cycle(s)." + ) + if config.fail_dead_code: + high_conf_dead = [ + item for item in project_metrics.dead_code if item.confidence == "high" + ] + if high_conf_dead: + reasons.append( + f"Dead code detected (high confidence): {len(high_conf_dead)} item(s)." + ) + if config.fail_health >= 0 and project_metrics.health.total < config.fail_health: + reasons.append( + "Health score below threshold: " + f"score={project_metrics.health.total}, threshold={config.fail_health}." + ) + + if config.fail_on_new_metrics and metrics_diff is not None: + if metrics_diff.new_high_risk_functions: + reasons.append( + "New high-risk functions vs metrics baseline: " + f"{len(metrics_diff.new_high_risk_functions)}." + ) + if metrics_diff.new_high_coupling_classes: + reasons.append( + "New high-coupling classes vs metrics baseline: " + f"{len(metrics_diff.new_high_coupling_classes)}." + ) + if metrics_diff.new_cycles: + reasons.append( + "New dependency cycles vs metrics baseline: " + f"{len(metrics_diff.new_cycles)}." + ) + if metrics_diff.new_dead_code: + reasons.append( + "New dead code items vs metrics baseline: " + f"{len(metrics_diff.new_dead_code)}." + ) + if metrics_diff.health_delta < 0: + reasons.append( + "Health score regressed vs metrics baseline: " + f"delta={metrics_diff.health_delta}." + ) + + return tuple(reasons) + + +def gate( + *, + boot: BootstrapResult, + analysis: AnalysisResult, + new_func: Collection[str], + new_block: Collection[str], + metrics_diff: MetricsDiff | None, +) -> GatingResult: + reasons: list[str] = [] + + if analysis.project_metrics is not None: + metric_reasons = metric_gate_reasons( + project_metrics=analysis.project_metrics, + metrics_diff=metrics_diff, + config=MetricGateConfig( + fail_complexity=boot.args.fail_complexity, + fail_coupling=boot.args.fail_coupling, + fail_cohesion=boot.args.fail_cohesion, + fail_cycles=boot.args.fail_cycles, + fail_dead_code=boot.args.fail_dead_code, + fail_health=boot.args.fail_health, + fail_on_new_metrics=boot.args.fail_on_new_metrics, + ), + ) + reasons.extend(f"metric:{reason}" for reason in metric_reasons) + + if boot.args.fail_on_new and (new_func or new_block): + reasons.append("clone:new") + + total_clone_groups = analysis.func_clones_count + analysis.block_clones_count + if 0 <= boot.args.fail_threshold < total_clone_groups: + reasons.append( + f"clone:threshold:{total_clone_groups}:{boot.args.fail_threshold}" + ) + + if reasons: + return GatingResult( + exit_code=int(ExitCode.GATING_FAILURE), + reasons=tuple(reasons), + ) + + return GatingResult(exit_code=int(ExitCode.SUCCESS), reasons=()) diff --git a/codeclone/report.py b/codeclone/report.py deleted file mode 100644 index 29d975d..0000000 --- a/codeclone/report.py +++ /dev/null @@ -1,61 +0,0 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" - -from __future__ import annotations - -from ._report_blocks import _merge_block_items, prepare_block_report_groups -from ._report_explain import build_block_group_facts -from ._report_grouping import build_block_groups, build_groups, build_segment_groups -from ._report_segments import ( - _CONTROL_FLOW_STMTS, - _FORBIDDEN_STMTS, - SEGMENT_MIN_UNIQUE_STMT_TYPES, - _analyze_segment_statements, - _assign_targets_attribute_only, - _collect_file_functions, - _merge_segment_items, - _QualnameCollector, - _segment_statements, - _SegmentAnalysis, - prepare_segment_report_groups, -) -from ._report_serialize import ( - _format_meta_text_value, - to_json, - to_json_report, - to_text, - to_text_report, -) -from ._report_types import GroupItem, GroupMap - -__all__ = [ - "SEGMENT_MIN_UNIQUE_STMT_TYPES", - "_CONTROL_FLOW_STMTS", - "_FORBIDDEN_STMTS", - "GroupItem", - "GroupMap", - "_QualnameCollector", - "_SegmentAnalysis", - "_analyze_segment_statements", - "_assign_targets_attribute_only", - "_collect_file_functions", - "_format_meta_text_value", - "_merge_block_items", - "_merge_segment_items", - "_segment_statements", - "build_block_group_facts", - "build_block_groups", - "build_groups", - "build_segment_groups", - "prepare_block_report_groups", - "prepare_segment_report_groups", - "to_json", - "to_json_report", - "to_text", - "to_text_report", -] diff --git a/codeclone/report/__init__.py b/codeclone/report/__init__.py new file mode 100644 index 0000000..08f4da3 --- /dev/null +++ b/codeclone/report/__init__.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ..extractor import _QualnameCollector +from ..grouping import build_block_groups, build_groups, build_segment_groups +from .blocks import merge_block_items as _merge_block_items +from .blocks import prepare_block_report_groups +from .explain import build_block_group_facts +from .markdown import render_markdown_report_document, to_markdown_report +from .sarif import render_sarif_report_document, to_sarif_report +from .segments import ( + _CONTROL_FLOW_STMTS, + _FORBIDDEN_STMTS, + SEGMENT_MIN_UNIQUE_STMT_TYPES, + _SegmentAnalysis, + prepare_segment_report_groups, +) +from .segments import ( + analyze_segment_statements as _analyze_segment_statements, +) +from .segments import ( + assign_targets_attribute_only as _assign_targets_attribute_only, +) +from .segments import ( + collect_file_functions as _collect_file_functions, +) +from .segments import ( + merge_segment_items as _merge_segment_items, +) +from .segments import ( + segment_statements as _segment_statements, +) +from .serialize import ( + format_meta_text_value as _format_meta_text_value, +) +from .serialize import ( + render_json_report_document, + render_text_report_document, +) +from .suggestions import classify_clone_type, generate_suggestions +from .types import GroupItem, GroupMap + +__all__ = [ + "SEGMENT_MIN_UNIQUE_STMT_TYPES", + "_CONTROL_FLOW_STMTS", + "_FORBIDDEN_STMTS", + "GroupItem", + "GroupMap", + "_QualnameCollector", + "_SegmentAnalysis", + "_analyze_segment_statements", + "_assign_targets_attribute_only", + "_collect_file_functions", + "_format_meta_text_value", + "_merge_block_items", + "_merge_segment_items", + "_segment_statements", + "build_block_group_facts", + "build_block_groups", + "build_groups", + "build_segment_groups", + "classify_clone_type", + "generate_suggestions", + "prepare_block_report_groups", + "prepare_segment_report_groups", + "render_json_report_document", + "render_markdown_report_document", + "render_sarif_report_document", + "render_text_report_document", + "to_markdown_report", + "to_sarif_report", +] diff --git a/codeclone/report/_formatting.py b/codeclone/report/_formatting.py new file mode 100644 index 0000000..9b3cffb --- /dev/null +++ b/codeclone/report/_formatting.py @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +__all__ = ["format_spread_text"] + + +def format_spread_text(files: int, functions: int) -> str: + file_word = "file" if files == 1 else "files" + function_word = "function" if functions == 1 else "functions" + return f"{files} {file_word} / {functions} {function_word}" diff --git a/codeclone/report/_source_kinds.py b/codeclone/report/_source_kinds.py new file mode 100644 index 0000000..4e9dee8 --- /dev/null +++ b/codeclone/report/_source_kinds.py @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ..domain.source_scope import ( + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_OTHER, + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, +) + +SOURCE_KIND_FILTER_VALUES: tuple[str, ...] = ( + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, +) + +_SOURCE_KIND_LABELS: dict[str, str] = { + SOURCE_KIND_PRODUCTION: "Production", + SOURCE_KIND_TESTS: "Tests", + SOURCE_KIND_FIXTURES: "Fixtures", + SOURCE_KIND_MIXED: "Mixed", + SOURCE_KIND_OTHER: "Other", +} + +__all__ = [ + "SOURCE_KIND_FILTER_VALUES", + "normalize_source_kind", + "source_kind_label", +] + + +def normalize_source_kind(source_kind: str) -> str: + return source_kind.strip().lower() or SOURCE_KIND_OTHER + + +def source_kind_label(source_kind: str) -> str: + normalized = normalize_source_kind(source_kind) + return _SOURCE_KIND_LABELS.get( + normalized, + normalized.title() or _SOURCE_KIND_LABELS[SOURCE_KIND_OTHER], + ) diff --git a/codeclone/report/blocks.py b/codeclone/report/blocks.py new file mode 100644 index 0000000..7e1b592 --- /dev/null +++ b/codeclone/report/blocks.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .merge import coerce_positive_int, merge_overlapping_items + +if TYPE_CHECKING: + from .types import GroupItem, GroupItemLike, GroupItemsLike, GroupMap, GroupMapLike + + +def block_item_sort_key(item: GroupItemLike) -> tuple[str, str, int, int]: + start_line = coerce_positive_int(item.get("start_line")) or 0 + end_line = coerce_positive_int(item.get("end_line")) or 0 + return ( + str(item.get("filepath", "")), + str(item.get("qualname", "")), + start_line, + end_line, + ) + + +def merge_block_items(items: GroupItemsLike) -> list[GroupItem]: + return merge_overlapping_items(items, sort_key=block_item_sort_key) + + +def prepare_block_report_groups(block_groups: GroupMapLike) -> GroupMap: + """ + Convert sliding block windows into maximal merged regions for reporting. + Block hash keys remain unchanged. + """ + prepared: GroupMap = {} + for key, items in block_groups.items(): + merged = merge_block_items(items) + if merged: + prepared[key] = merged + else: + prepared[key] = [ + dict(item) for item in sorted(items, key=block_item_sort_key) + ] + return prepared diff --git a/codeclone/report/derived.py b/codeclone/report/derived.py new file mode 100644 index 0000000..cfa8fd1 --- /dev/null +++ b/codeclone/report/derived.py @@ -0,0 +1,227 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections import Counter +from typing import TYPE_CHECKING + +from .. import _coerce +from ..domain.source_scope import ( + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_OTHER, + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, +) +from ..domain.source_scope import ( + SOURCE_KIND_ORDER as _SOURCE_KIND_ORDER, +) +from ..models import ReportLocation, SourceKind, StructuralFindingOccurrence + +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping, Sequence + +__all__ = [ + "SOURCE_KIND_ORDER", + "classify_source_kind", + "combine_source_kinds", + "format_group_location_label", + "format_report_location_label", + "format_spread_location_label", + "group_spread", + "relative_report_path", + "report_location_from_group_item", + "report_location_from_structural_occurrence", + "representative_locations", + "source_kind_breakdown", +] + +SOURCE_KIND_ORDER: dict[SourceKind, int] = { + SOURCE_KIND_PRODUCTION: _SOURCE_KIND_ORDER[SOURCE_KIND_PRODUCTION], + SOURCE_KIND_TESTS: _SOURCE_KIND_ORDER[SOURCE_KIND_TESTS], + SOURCE_KIND_FIXTURES: _SOURCE_KIND_ORDER[SOURCE_KIND_FIXTURES], + SOURCE_KIND_MIXED: _SOURCE_KIND_ORDER[SOURCE_KIND_MIXED], + SOURCE_KIND_OTHER: _SOURCE_KIND_ORDER[SOURCE_KIND_OTHER], +} + +_as_int = _coerce.as_int + + +def _normalize_path(value: str) -> str: + return value.replace("\\", "/").strip() + + +def relative_report_path(filepath: str, *, scan_root: str = "") -> str: + normalized_path = _normalize_path(filepath) + normalized_root = _normalize_path(scan_root).rstrip("/") + if not normalized_path: + return normalized_path + if not normalized_root: + return normalized_path + prefix = f"{normalized_root}/" + if normalized_path.startswith(prefix): + return normalized_path[len(prefix) :] + if normalized_path == normalized_root: + return normalized_path.rsplit("/", maxsplit=1)[-1] + return normalized_path + + +def classify_source_kind(filepath: str, *, scan_root: str = "") -> SourceKind: + rel = relative_report_path(filepath, scan_root=scan_root) + parts = [part for part in rel.lower().split("/") if part and part != "."] + if not parts: + return SOURCE_KIND_OTHER + for idx, part in enumerate(parts): + if part != SOURCE_KIND_TESTS: + continue + if idx + 1 < len(parts) and parts[idx + 1] == SOURCE_KIND_FIXTURES: + return SOURCE_KIND_FIXTURES + return SOURCE_KIND_TESTS + return SOURCE_KIND_PRODUCTION + + +def source_kind_breakdown( + filepaths: Iterable[str], + *, + scan_root: str = "", +) -> tuple[tuple[SourceKind, int], ...]: + counts: Counter[SourceKind] = Counter( + classify_source_kind(filepath, scan_root=scan_root) for filepath in filepaths + ) + return tuple( + (kind, counts[kind]) + for kind in sorted(counts, key=lambda item: SOURCE_KIND_ORDER[item]) + if counts[kind] > 0 + ) + + +def combine_source_kinds( + kinds: Iterable[SourceKind] | Iterable[str], +) -> SourceKind: + normalized = tuple(str(kind).strip().lower() for kind in kinds if str(kind).strip()) + if not normalized: + return SOURCE_KIND_OTHER + allowed: tuple[SourceKind, ...] = ( + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_OTHER, + ) + unique = tuple(kind for kind in allowed if kind in set(normalized)) + if len(unique) == 1: + return unique[0] + return SOURCE_KIND_MIXED + + +def report_location_from_group_item( + item: Mapping[str, object], + *, + scan_root: str = "", +) -> ReportLocation: + filepath = str(item.get("filepath", "")) + start_line = _as_int(item.get("start_line")) + end_line = _as_int(item.get("end_line")) + qualname = str(item.get("qualname", "")) + return ReportLocation( + filepath=filepath, + relative_path=relative_report_path(filepath, scan_root=scan_root), + start_line=start_line, + end_line=end_line, + qualname=qualname, + source_kind=classify_source_kind(filepath, scan_root=scan_root), + ) + + +def report_location_from_structural_occurrence( + item: StructuralFindingOccurrence, + *, + scan_root: str = "", +) -> ReportLocation: + return ReportLocation( + filepath=item.file_path, + relative_path=relative_report_path(item.file_path, scan_root=scan_root), + start_line=item.start, + end_line=item.end, + qualname=item.qualname, + source_kind=classify_source_kind(item.file_path, scan_root=scan_root), + ) + + +def _location_key(location: ReportLocation) -> tuple[str, int, int, str]: + return ( + location.relative_path or location.filepath, + location.start_line, + location.end_line, + location.qualname, + ) + + +def representative_locations( + locations: Sequence[ReportLocation], + *, + limit: int = 3, +) -> tuple[ReportLocation, ...]: + unique: dict[tuple[str, int, int, str], ReportLocation] = {} + for location in sorted(locations, key=_location_key): + key = _location_key(location) + if key not in unique: + unique[key] = location + return tuple(list(unique.values())[:limit]) + + +def group_spread(locations: Sequence[ReportLocation]) -> tuple[int, int]: + file_count = len( + {location.relative_path or location.filepath for location in locations} + ) + function_count = len( + {location.qualname for location in locations if location.qualname} + ) + return file_count, function_count + + +def format_report_location_label(location: ReportLocation) -> str: + line = ( + f"{location.start_line}-{location.end_line}" + if location.end_line > location.start_line + else str(location.start_line) + ) + return f"{location.relative_path}:{line}" + + +def format_spread_location_label( + total_count: int, + *, + files: int, + functions: int, +) -> str: + count_word = "occurrence" if total_count == 1 else "occurrences" + file_word = "file" if files == 1 else "files" + function_word = "function" if functions == 1 else "functions" + return ( + f"{total_count} {count_word} across " + f"{files} {file_word} / {functions} {function_word}" + ) + + +def format_group_location_label( + locations: Sequence[ReportLocation], + *, + total_count: int, + spread_files: int | None = None, + spread_functions: int | None = None, +) -> str: + if total_count <= 0 or not locations: + return "(unknown)" + if total_count == 1: + return format_report_location_label(locations[0]) + files = spread_files if spread_files is not None else group_spread(locations)[0] + functions = ( + spread_functions if spread_functions is not None else group_spread(locations)[1] + ) + return format_spread_location_label( + total_count, + files=files, + functions=functions, + ) diff --git a/codeclone/_report_explain.py b/codeclone/report/explain.py similarity index 75% rename from codeclone/_report_explain.py rename to codeclone/report/explain.py index cb22179..5673e84 100644 --- a/codeclone/_report_explain.py +++ b/codeclone/report/explain.py @@ -1,10 +1,5 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations @@ -12,8 +7,10 @@ from bisect import bisect_left, bisect_right from dataclasses import dataclass from pathlib import Path +from typing import TYPE_CHECKING -from ._report_explain_contract import ( +from .. import _coerce +from .explain_contract import ( BLOCK_HINT_ASSERT_ONLY, BLOCK_HINT_ASSERT_ONLY_LABEL, BLOCK_HINT_ASSERT_ONLY_NOTE, @@ -22,7 +19,9 @@ resolve_group_compare_note, resolve_group_display_name, ) -from ._report_types import GroupItem, GroupMap + +if TYPE_CHECKING: + from .types import GroupItemsLike, GroupMapLike @dataclass(frozen=True, slots=True) @@ -36,13 +35,17 @@ class _StatementRecord: _StatementIndex = tuple[tuple[_StatementRecord, ...], tuple[int, ...]] +_EMPTY_ASSERT_RANGE_STATS = (0, 0, 0) -def _signature_parts(group_key: str) -> list[str]: +def signature_parts(group_key: str) -> list[str]: return [part for part in group_key.split("|") if part] -def _parsed_file_tree( +_as_int = _coerce.as_int + + +def parsed_file_tree( filepath: str, *, ast_cache: dict[str, ast.AST | None] ) -> ast.AST | None: if filepath in ast_cache: @@ -57,6 +60,14 @@ def _parsed_file_tree( return tree +def _cache_empty_assert_range_stats( + range_cache: dict[tuple[str, int, int], tuple[int, int, int]], + cache_key: tuple[str, int, int], +) -> tuple[int, int, int]: + range_cache[cache_key] = _EMPTY_ASSERT_RANGE_STATS + return _EMPTY_ASSERT_RANGE_STATS + + def _build_statement_index(tree: ast.AST) -> _StatementIndex: records = tuple( sorted( @@ -85,7 +96,7 @@ def _build_statement_index(tree: ast.AST) -> _StatementIndex: return records, start_lines -def _parsed_statement_index( +def parsed_statement_index( filepath: str, *, ast_cache: dict[str, ast.AST | None], @@ -94,7 +105,7 @@ def _parsed_statement_index( if filepath in stmt_index_cache: return stmt_index_cache[filepath] - tree = _parsed_file_tree(filepath, ast_cache=ast_cache) + tree = parsed_file_tree(filepath, ast_cache=ast_cache) if tree is None: stmt_index_cache[filepath] = None return None @@ -104,11 +115,11 @@ def _parsed_statement_index( return index -def _is_assert_like_stmt(stmt: ast.stmt) -> bool: - if isinstance(stmt, ast.Assert): +def is_assert_like_stmt(statement: ast.stmt) -> bool: + if isinstance(statement, ast.Assert): return True - if isinstance(stmt, ast.Expr): - value = stmt.value + if isinstance(statement, ast.Expr): + value = statement.value if isinstance(value, ast.Constant) and isinstance(value.value, str): return True if isinstance(value, ast.Call): @@ -120,7 +131,7 @@ def _is_assert_like_stmt(stmt: ast.stmt) -> bool: return False -def _assert_range_stats( +def assert_range_stats( *, filepath: str, start_line: int, @@ -133,35 +144,29 @@ def _assert_range_stats( if cache_key in range_cache: return range_cache[cache_key] - statement_index = _parsed_statement_index( + statement_index = parsed_statement_index( filepath, ast_cache=ast_cache, stmt_index_cache=stmt_index_cache, ) if statement_index is None: - range_cache[cache_key] = (0, 0, 0) - return 0, 0, 0 + return _cache_empty_assert_range_stats(range_cache, cache_key) records, start_lines = statement_index if not records: - range_cache[cache_key] = (0, 0, 0) - return 0, 0, 0 + return _cache_empty_assert_range_stats(range_cache, cache_key) left = bisect_left(start_lines, start_line) right = bisect_right(start_lines, end_line) if left >= right: - range_cache[cache_key] = (0, 0, 0) - return 0, 0, 0 + return _cache_empty_assert_range_stats(range_cache, cache_key) - total = 0 - assert_like = 0 - max_consecutive = 0 - current_consecutive = 0 + total, assert_like, max_consecutive, current_consecutive = (0, 0, 0, 0) for record in records[left:right]: if record.end_line > end_line: continue total += 1 - if _is_assert_like_stmt(record.node): + if is_assert_like_stmt(record.node): assert_like += 1 current_consecutive += 1 if current_consecutive > max_consecutive: @@ -170,15 +175,14 @@ def _assert_range_stats( current_consecutive = 0 if total == 0: - range_cache[cache_key] = (0, 0, 0) - return 0, 0, 0 + return _cache_empty_assert_range_stats(range_cache, cache_key) stats = (total, assert_like, max_consecutive) range_cache[cache_key] = stats return stats -def _is_assert_only_range( +def is_assert_only_range( *, filepath: str, start_line: int, @@ -187,7 +191,7 @@ def _is_assert_only_range( stmt_index_cache: dict[str, _StatementIndex | None], range_cache: dict[tuple[str, int, int], tuple[int, int, int]], ) -> bool: - total, assert_like, _ = _assert_range_stats( + total, assert_like, _ = assert_range_stats( filepath=filepath, start_line=start_line, end_line=end_line, @@ -198,12 +202,10 @@ def _is_assert_only_range( return total > 0 and total == assert_like -def _base_block_facts(group_key: str) -> dict[str, str]: - signature_parts = _signature_parts(group_key) - window_size = max(1, len(signature_parts)) - repeated_signature = len(signature_parts) > 1 and all( - part == signature_parts[0] for part in signature_parts - ) +def base_block_facts(group_key: str) -> dict[str, str]: + parts = signature_parts(group_key) + window_size = max(1, len(parts)) + repeated_signature = len(parts) > 1 and all(part == parts[0] for part in parts) facts: dict[str, str] = { "match_rule": "normalized_sliding_window", "block_size": str(window_size), @@ -213,36 +215,38 @@ def _base_block_facts(group_key: str) -> dict[str, str]: if repeated_signature: facts["pattern"] = BLOCK_PATTERN_REPEATED_STMT_HASH facts["pattern_label"] = BLOCK_PATTERN_REPEATED_STMT_HASH - facts["pattern_display"] = f"{signature_parts[0][:12]} x{window_size}" + facts["pattern_display"] = f"{parts[0][:12]} x{window_size}" return facts -def _enrich_with_assert_facts( +def enrich_with_assert_facts( *, facts: dict[str, str], - items: list[GroupItem], + items: GroupItemsLike, ast_cache: dict[str, ast.AST | None], stmt_index_cache: dict[str, _StatementIndex | None], range_cache: dict[tuple[str, int, int], tuple[int, int, int]], ) -> None: - assert_only = True - total_statements = 0 - assert_statements = 0 - max_consecutive_asserts = 0 + ( + assert_only, + total_statements, + assert_statements, + max_consecutive_asserts, + ) = _initial_assert_fact_state() if not items: assert_only = False for item in items: filepath = str(item.get("filepath", "")) - start_line = int(item.get("start_line", 0)) - end_line = int(item.get("end_line", 0)) + start_line = _as_int(item.get("start_line", 0)) + end_line = _as_int(item.get("end_line", 0)) range_total = 0 range_assert = 0 range_max_consecutive = 0 if filepath and start_line > 0 and end_line > 0: - range_total, range_assert, range_max_consecutive = _assert_range_stats( + range_total, range_assert, range_max_consecutive = assert_range_stats( filepath=filepath, start_line=start_line, end_line=end_line, @@ -253,14 +257,15 @@ def _enrich_with_assert_facts( total_statements += range_total assert_statements += range_assert max_consecutive_asserts = max( - max_consecutive_asserts, range_max_consecutive + max_consecutive_asserts, + range_max_consecutive, ) if ( not filepath or start_line <= 0 or end_line <= 0 - or not _is_assert_only_range( + or not is_assert_only_range( filepath=filepath, start_line=start_line, end_line=end_line, @@ -283,7 +288,11 @@ def _enrich_with_assert_facts( facts["hint_note"] = BLOCK_HINT_ASSERT_ONLY_NOTE -def build_block_group_facts(block_groups: GroupMap) -> dict[str, dict[str, str]]: +def _initial_assert_fact_state() -> tuple[bool, int, int, int]: + return True, 0, 0, 0 + + +def build_block_group_facts(block_groups: GroupMapLike) -> dict[str, dict[str, str]]: """ Build deterministic explainability facts for block clone groups. @@ -296,8 +305,8 @@ def build_block_group_facts(block_groups: GroupMap) -> dict[str, dict[str, str]] facts_by_group: dict[str, dict[str, str]] = {} for group_key, items in block_groups.items(): - facts = _base_block_facts(group_key) - _enrich_with_assert_facts( + facts = base_block_facts(group_key) + enrich_with_assert_facts( facts=facts, items=items, ast_cache=ast_cache, @@ -309,7 +318,8 @@ def build_block_group_facts(block_groups: GroupMap) -> dict[str, dict[str, str]] facts["group_arity"] = str(group_arity) facts["instance_peer_count"] = str(peer_count) compare_note = resolve_group_compare_note( - group_arity=group_arity, peer_count=peer_count + group_arity=group_arity, + peer_count=peer_count, ) if compare_note is not None: facts["group_compare_note"] = compare_note diff --git a/codeclone/_report_explain_contract.py b/codeclone/report/explain_contract.py similarity index 66% rename from codeclone/_report_explain_contract.py rename to codeclone/report/explain_contract.py index 543ad02..2169ff2 100644 --- a/codeclone/_report_explain_contract.py +++ b/codeclone/report/explain_contract.py @@ -1,20 +1,10 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations from typing import Final -from .ui_messages import ( - REPORT_BLOCK_GROUP_DISPLAY_NAME_ASSERT_PATTERN, - fmt_report_block_group_compare_note_n_way, -) - BLOCK_PATTERN_REPEATED_STMT_HASH: Final = "repeated_stmt_hash" BLOCK_HINT_ASSERT_ONLY: Final = "assert_only" @@ -25,9 +15,13 @@ "This often occurs in test suites." ) +GROUP_DISPLAY_NAME_BY_HINT_ID: Final[dict[str, str]] = { + BLOCK_HINT_ASSERT_ONLY: "Assert pattern block", +} + def format_n_way_group_compare_note(*, peer_count: int) -> str: - return fmt_report_block_group_compare_note_n_way(peer_count=peer_count) + return f"N-way group: each block matches {peer_count} peers in this group." def resolve_group_compare_note(*, group_arity: int, peer_count: int) -> str | None: @@ -37,9 +31,9 @@ def resolve_group_compare_note(*, group_arity: int, peer_count: int) -> str | No def resolve_group_display_name(*, hint_id: str | None) -> str | None: - if hint_id == BLOCK_HINT_ASSERT_ONLY: - return REPORT_BLOCK_GROUP_DISPLAY_NAME_ASSERT_PATTERN - return None + if hint_id is None: + return None + return GROUP_DISPLAY_NAME_BY_HINT_ID.get(hint_id) def format_group_instance_compare_meta( diff --git a/codeclone/report/findings.py b/codeclone/report/findings.py new file mode 100644 index 0000000..b8745eb --- /dev/null +++ b/codeclone/report/findings.py @@ -0,0 +1,556 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""CodeClone — structural code quality analysis for Python. + +Serialization and rendering helpers for structural findings (report-only layer). +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .._html_badges import _source_kind_badge_html, _tab_empty +from .._html_escape import _escape_attr, _escape_html +from .._html_snippets import _FileCache, _render_code_block +from ..domain.findings import ( + STRUCTURAL_KIND_CLONE_COHORT_DRIFT, + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + STRUCTURAL_KIND_DUPLICATED_BRANCHES, +) +from ..domain.quality import RISK_HIGH, RISK_LOW +from ..structural_findings import normalize_structural_findings +from ._source_kinds import SOURCE_KIND_FILTER_VALUES, source_kind_label +from .derived import ( + combine_source_kinds, + group_spread, + relative_report_path, + report_location_from_structural_occurrence, +) + +if TYPE_CHECKING: + from collections.abc import Sequence + + from ..models import StructuralFindingGroup, StructuralFindingOccurrence + +__all__ = [ + "build_structural_findings_html_panel", +] + +# Human-readable label per finding kind +_KIND_LABEL: dict[str, str] = { + STRUCTURAL_KIND_DUPLICATED_BRANCHES: "Duplicated branches", + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: "Clone guard/exit divergence", + STRUCTURAL_KIND_CLONE_COHORT_DRIFT: "Clone cohort drift", +} + + +def _spread(items: Sequence[StructuralFindingOccurrence]) -> dict[str, int]: + """Compute spread metadata: unique files and functions in a finding group.""" + files: set[str] = set() + functions: set[str] = set() + for item in items: + files.add(item.file_path) + functions.add(item.qualname) + return {"files": len(files), "functions": len(functions)} + + +def _sort_key_group(g: StructuralFindingGroup) -> tuple[str, int, str]: + unique_count = len( + {(item.file_path, item.qualname, item.start, item.end) for item in g.items} + ) + return g.finding_kind, -unique_count, g.finding_key + + +def _sort_key_item(o: StructuralFindingOccurrence) -> tuple[str, str, int, int]: + return o.file_path, o.qualname, o.start, o.end + + +def _dedupe_items( + items: Sequence[StructuralFindingOccurrence], +) -> tuple[StructuralFindingOccurrence, ...]: + unique: dict[tuple[str, str, int, int], StructuralFindingOccurrence] = {} + for item in sorted(items, key=_sort_key_item): + key = (item.file_path, item.qualname, item.start, item.end) + if key not in unique: + unique[key] = item + return tuple(unique.values()) + + +# --------------------------------------------------------------------------- +# HTML panel rendering +# --------------------------------------------------------------------------- + + +def _signature_chips_html(sig: dict[str, str]) -> str: + """Render signature key=value pairs as category-badge chips.""" + chips: list[str] = [] + for k, v in sorted(sig.items()): + key = k.replace("_", " ") + chips.append( + f'' + f'{_escape_html(key)}' + f'{_escape_html(v)}' + ) + return " ".join(chips) + + +def _occurrences_table_html( + items: Sequence[StructuralFindingOccurrence], + *, + scan_root: str, + already_deduped: bool = False, + visible_limit: int = 4, +) -> str: + """Render occurrences as a styled table using the existing table CSS.""" + deduped_items = tuple(items) if already_deduped else _dedupe_items(items) + visible_items = deduped_items[:visible_limit] + hidden_items = deduped_items[visible_limit:] + + def _rows_for(entries: Sequence[StructuralFindingOccurrence]) -> str: + rows: list[str] = [] + for item in entries: + location = report_location_from_structural_occurrence( + item, + scan_root=scan_root, + ) + short_path = relative_report_path(item.file_path, scan_root=scan_root) + rows.append( + "" + f'' + f"{_escape_html(short_path)}" + f'{_source_kind_badge_html(location.source_kind)} ' + f"{_escape_html(item.qualname)}" + f'{item.start}-{item.end}' + "" + ) + return "".join(rows) + + colgroup = ( + "" + '' + '' + '' + "" + ) + thead = "FileLocationLines" + + hidden_details = "" + if hidden_items: + hidden_details = ( + '
    ' + f"Show {len(hidden_items)} more occurrences" + f'
    ' + f"{colgroup}{thead}" + f"{_rows_for(hidden_items)}" + "
    " + ) + return ( + f'
    ' + f"{colgroup}{thead}" + f"{_rows_for(visible_items)}" + "
    " + f"{hidden_details}" + ) + + +def _short_path(file_path: str) -> str: + parts = file_path.replace("\\", "/").split("/") + return "/".join(parts[-2:]) if len(parts) > 1 else file_path + + +def _finding_scope_text(items: Sequence[StructuralFindingOccurrence]) -> str: + spread = _spread(items) + if spread["functions"] == 1: + return f"inside {items[0].qualname}" + return ( + f"across {spread['functions']} functions in {spread['files']} " + f"{'file' if spread['files'] == 1 else 'files'}" + ) + + +def _render_reason_list_html(reasons: Sequence[str]) -> str: + return ( + '
      ' + + "".join(f"
    • {_escape_html(reason)}
    • " for reason in reasons) + + "
    " + ) + + +def _finding_reason_list_html( + group: StructuralFindingGroup, + items: Sequence[StructuralFindingOccurrence], +) -> str: + spread = _spread(items) + if group.finding_kind == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: + reasons = [ + ( + f"{len(items)} divergent clone members were detected after " + "stable sorting and deduplication." + ), + ( + "Members were compared by entry-guard count/profile, terminal " + "kind, and side-effect-before-guard marker." + ), + ( + f"Cohort id: {group.signature.get('cohort_id', 'unknown')}; " + "majority guard count: " + f"{group.signature.get('majority_guard_count', '0')}." + ), + ( + f"Spread includes {spread['functions']} " + f"{'function' if spread['functions'] == 1 else 'functions'} in " + f"{spread['files']} {'file' if spread['files'] == 1 else 'files'}." + ), + "This is a report-only finding and does not affect clone gating.", + ] + return _render_reason_list_html(reasons) + if group.finding_kind == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: + reasons = [ + f"{len(items)} clone members diverge from the cohort majority profile.", + f"Drift fields: {group.signature.get('drift_fields', 'n/a')}.", + ( + f"Cohort id: {group.signature.get('cohort_id', 'unknown')} with " + f"arity {group.signature.get('cohort_arity', 'n/a')}." + ), + ("Majority profile is compared deterministically with lexical tie-breaks."), + "This is a report-only finding and does not affect clone gating.", + ] + return _render_reason_list_html(reasons) + + stmt_seq = group.signature.get("stmt_seq", "n/a") + terminal = group.signature.get("terminal", "n/a") + reasons = [ + ( + f"{len(items)} non-overlapping branch bodies remained after " + "deduplication and overlap pruning." + ), + ( + f"All occurrences belong to {spread['functions']} " + f"{'function' if spread['functions'] == 1 else 'functions'} in " + f"{spread['files']} {'file' if spread['files'] == 1 else 'files'}." + ), + ( + f"The detector grouped them by structural signature: " + f"stmt seq: {stmt_seq}, terminal: {terminal}." + ), + ( + "Call/raise buckets and nested control-flow flags must also match " + "for branches to land in the same finding group." + ), + ( + "This is a local, report-only hint. It does not change clone groups " + "or CI verdicts." + ), + ] + return _render_reason_list_html(reasons) + + +def _finding_matters_paragraph(message: str) -> str: + return f'

    {_escape_html(message)}

    ' + + +def _finding_matters_html( + group: StructuralFindingGroup, + items: Sequence[StructuralFindingOccurrence], +) -> str: + spread = _spread(items) + count = len(items) + if group.finding_kind == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: + message = ( + "Members of one function-clone cohort diverged in guard/exit behavior. " + "This often points to a partial fix where one path was updated and " + "other siblings were left unchanged." + ) + return _finding_matters_paragraph(message) + if group.finding_kind == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: + message = ( + "Members of one function-clone cohort drifted from a stable majority " + "profile (terminal, guard, try/finally, side-effect order). Review " + "whether divergence is intentional." + ) + return _finding_matters_paragraph(message) + + terminal = str(group.signature.get("terminal", "")).strip() + stmt_seq = str(group.signature.get("stmt_seq", "")).strip() + if spread["functions"] > 1 or spread["files"] > 1: + message = ( + f"This pattern repeats across {spread['functions']} functions and " + f"{spread['files']} files, so the same branch policy may be copied " + "between multiple code paths." + ) + elif terminal == "raise": + message = ( + "This group points to repeated guard or validation exits inside one " + "function. Consolidating the shared exit policy usually reduces " + "branch noise." + ) + elif terminal == "return": + message = ( + "This group points to repeated return-path logic inside one function. " + "A helper can often keep the branch predicate local while sharing " + "the emitted behavior." + ) + else: + message = ( + f"This group reports {count} branches with the same local shape " + f"({stmt_seq or 'unknown signature'}). Review whether the shared " + "branch body should stay duplicated or become a helper." + ) + return _finding_matters_paragraph(message) + + +def _finding_example_card_html( + item: StructuralFindingOccurrence, + *, + label: str, + file_cache: _FileCache, + context_lines: int, + max_snippet_lines: int, +) -> str: + snippet = _render_code_block( + filepath=item.file_path, + start_line=item.start, + end_line=item.end, + file_cache=file_cache, + context=context_lines, + max_lines=max_snippet_lines, + ) + return ( + '
    ' + '
    ' + f'{_escape_html(label)}' + f'{_escape_html(item.qualname)}' + f'' + f"{_escape_html(_short_path(item.file_path))}:{item.start}\u2013{item.end}" + "
    " + f"{snippet.code_html}" + "
    " + ) + + +def _finding_why_template_html( + group: StructuralFindingGroup, + items: Sequence[StructuralFindingOccurrence], + *, + file_cache: _FileCache, + context_lines: int, + max_snippet_lines: int, +) -> str: + preview_items = list(items[:2]) + examples_html = "".join( + _finding_example_card_html( + item, + label=f"Example {'AB'[idx] if idx < 2 else idx + 1}", + file_cache=file_cache, + context_lines=context_lines, + max_snippet_lines=max_snippet_lines, + ) + for idx, item in enumerate(preview_items) + ) + if group.finding_kind == STRUCTURAL_KIND_DUPLICATED_BRANCHES: + showing_note = ( + f"Showing the first {len(preview_items)} matching branches from " + f"{len(items)} total occurrences." + ) + reported_subject = "structurally matching branch bodies" + elif group.finding_kind == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: + showing_note = ( + f"Showing the first {len(preview_items)} cohort members from " + f"{len(items)} divergent occurrences." + ) + reported_subject = "clone cohort members with guard/exit divergence" + elif group.finding_kind == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: + showing_note = ( + f"Showing the first {len(preview_items)} cohort members from " + f"{len(items)} divergent occurrences." + ) + reported_subject = "clone cohort members that drift from majority profile" + else: + showing_note = ( + f"Showing the first {len(preview_items)} matching branches from " + f"{len(items)} total occurrences." + ) + reported_subject = "structurally matching branch bodies" + return ( + '
    ' + '
    Impact
    ' + f"{_finding_matters_html(group, items)}" + "
    " + '
    ' + '
    Detection Rationale
    ' + f'

    CodeClone reported this group because it found ' + f"{len(items)} {reported_subject} " + f"{_escape_html(_finding_scope_text(items))}.

    " + f"{_finding_reason_list_html(group, items)}" + "
    " + '
    ' + '
    Signature
    ' + f'
    {_signature_chips_html(group.signature)}
    ' + "
    " + '
    ' + '
    Examples
    ' + f'
    {_escape_html(showing_note)}
    ' + f'
    {examples_html}
    ' + "
    " + ) + + +def _render_finding_card( + g: StructuralFindingGroup, + *, + scan_root: str, + file_cache: _FileCache, + context_lines: int, + max_snippet_lines: int, + why_templates: list[str], +) -> tuple[str, str]: + """Render a single finding group as a compact card. Returns (html, source_kind).""" + deduped_items = _dedupe_items(g.items) + spread = _spread(deduped_items) + chips_html = _signature_chips_html(g.signature) + report_locations = tuple( + report_location_from_structural_occurrence(item, scan_root=scan_root) + for item in deduped_items + ) + source_kind = combine_source_kinds( + location.source_kind for location in report_locations + ) + spread_files, spread_functions = group_spread(report_locations) + spread_bucket = RISK_HIGH if spread_files > 1 or spread_functions > 1 else RISK_LOW + table_html = _occurrences_table_html( + deduped_items, scan_root=scan_root, already_deduped=True + ) + count = len(deduped_items) + + why_template_id = f"finding-why-template-{g.finding_key}" + why_template_html = _finding_why_template_html( + g, + deduped_items, + file_cache=file_cache, + context_lines=context_lines, + max_snippet_lines=max_snippet_lines, + ) + why_templates.append( + f'' + ) + + func_word = "function" if spread["functions"] == 1 else "functions" + file_word = "file" if spread["files"] == 1 else "files" + kind_label = _KIND_LABEL.get(g.finding_kind, g.finding_kind) + + # Context chips — source kind + finding kind + source_chip = _escape_html(source_kind_label(source_kind)) + finding_kind_chip = _escape_html(g.finding_kind.replace("_", " ")) + ctx_chips = ( + f'{source_chip}' + f'{finding_kind_chip}' + ) + + # Scope text — concise spread summary + scope_text = _finding_scope_text(deduped_items) + + return ( + f'
    ' + # -- header -- + '
    ' + 'info' + f'{_escape_html(kind_label)}' + '' + f'' + f"{spread['functions']} {func_word} \u00b7 {spread['files']} {file_word}" + f'' + "
    " + # -- body: context + signature chips + scope -- + '
    ' + f'
    {ctx_chips}
    ' + f'
    {chips_html}
    ' + f'
    {_escape_html(scope_text)}
    ' + "
    " + # -- expandable occurrences -- + '
    ' + f"Occurrences ({count})" + f'
    {table_html}
    ' + "
    " + "
    ", + source_kind, + ) + + +def build_structural_findings_html_panel( + groups: Sequence[StructuralFindingGroup], + files: list[str], + *, + scan_root: str = "", + file_cache: _FileCache | None = None, + context_lines: int = 3, + max_snippet_lines: int = 220, +) -> str: + """Build HTML content for the Structural Findings tab panel.""" + from .._html_report._tabs import render_split_tabs + + normalized_groups = normalize_structural_findings(groups) + if not normalized_groups: + return _tab_empty("No structural findings detected.") + + intro = ( + '
    ' + '
    What are structural findings?
    ' + '
    Repeated non-overlapping branch-body shapes ' + "detected inside individual functions. These are local, report-only " + "refactoring hints and do not affect clone detection or CI verdicts.
    " + "
    " + ) + + resolved_file_cache = file_cache if file_cache is not None else _FileCache() + why_templates: list[str] = [] + + # Render all cards and bucket by source_kind + by_source: dict[str, list[str]] = {} + for g in sorted(normalized_groups, key=_sort_key_group): + card_html, source_kind = _render_finding_card( + g, + scan_root=scan_root, + file_cache=resolved_file_cache, + context_lines=context_lines, + max_snippet_lines=max_snippet_lines, + why_templates=why_templates, + ) + by_source.setdefault(source_kind, []).append(card_html) + + # Build sub-tabs: "All" + per source_kind + all_cards = [] + for cards in by_source.values(): + all_cards.extend(cards) + + sub_tabs: list[tuple[str, str, int, str]] = [ + ( + "all", + "All", + len(all_cards), + f'
    {"".join(all_cards)}
    ', + ), + ] + # Stable order matching SOURCE_KIND_FILTER_VALUES + for kind in SOURCE_KIND_FILTER_VALUES: + cards = by_source.get(kind, []) + if cards: + sub_tabs.append( + ( + kind, + source_kind_label(kind), + len(cards), + f'
    {"".join(cards)}
    ', + ) + ) + + return ( + intro + + render_split_tabs(group_id="findings", tabs=sub_tabs) + + "".join(why_templates) + ) diff --git a/codeclone/report/json_contract.py b/codeclone/report/json_contract.py new file mode 100644 index 0000000..330f92f --- /dev/null +++ b/codeclone/report/json_contract.py @@ -0,0 +1,2270 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +from collections import Counter +from collections.abc import Collection, Iterable, Mapping, Sequence +from hashlib import sha256 +from typing import TYPE_CHECKING, Literal + +from .. import _coerce +from ..contracts import REPORT_SCHEMA_VERSION +from ..domain.findings import ( + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, + CLONE_KIND_BLOCK, + CLONE_KIND_FUNCTION, + CLONE_KIND_SEGMENT, + CLONE_NOVELTY_KNOWN, + CLONE_NOVELTY_NEW, + FAMILY_CLONE, + FAMILY_CLONES, + FAMILY_DEAD_CODE, + FAMILY_DESIGN, + FAMILY_STRUCTURAL, +) +from ..domain.quality import ( + CONFIDENCE_HIGH, + CONFIDENCE_MEDIUM, + EFFORT_EASY, + EFFORT_HARD, + EFFORT_MODERATE, + EFFORT_WEIGHT, + RISK_LOW, + SEVERITY_CRITICAL, + SEVERITY_INFO, + SEVERITY_ORDER, + SEVERITY_RANK, + SEVERITY_WARNING, +) +from ..domain.source_scope import ( + IMPACT_SCOPE_MIXED, + IMPACT_SCOPE_NON_RUNTIME, + IMPACT_SCOPE_RUNTIME, + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_OTHER, + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, +) +from ..structural_findings import normalize_structural_findings +from ..suppressions import INLINE_CODECLONE_SUPPRESSION_SOURCE +from .derived import ( + combine_source_kinds, + group_spread, + relative_report_path, + report_location_from_group_item, + report_location_from_structural_occurrence, +) +from .suggestions import classify_clone_type + +if TYPE_CHECKING: + from ..models import ( + GroupItemLike, + GroupMapLike, + SourceKind, + StructuralFindingGroup, + Suggestion, + ) + +__all__ = [ + "build_report_document", + "clone_group_id", + "dead_code_group_id", + "design_group_id", + "structural_group_id", +] + +_as_int = _coerce.as_int +_as_float = _coerce.as_float +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + +_SOURCE_BREAKDOWN_KEYS_TYPED: tuple[SourceKind, ...] = ( + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, + SOURCE_KIND_FIXTURES, + SOURCE_KIND_OTHER, +) + + +def _optional_str(value: object) -> str | None: + if value is None: + return None + text = str(value).strip() + return text or None + + +def _normalize_path(value: str) -> str: + return value.replace("\\", "/").strip() + + +def _is_absolute_path(value: str) -> bool: + normalized = _normalize_path(value) + if not normalized: + return False + if normalized.startswith("/"): + return True + return len(normalized) > 2 and normalized[1] == ":" and normalized[2] == "/" + + +def _contract_path( + value: object, + *, + scan_root: str, +) -> tuple[str | None, str | None, str | None]: + path_text = _optional_str(value) + if path_text is None: + return None, None, None + normalized_path = _normalize_path(path_text) + relative_path = relative_report_path(normalized_path, scan_root=scan_root) + if relative_path and relative_path != normalized_path: + return relative_path, "in_root", normalized_path + if _is_absolute_path(normalized_path): + return normalized_path.rsplit("/", maxsplit=1)[-1], "external", normalized_path + return normalized_path, "relative", None + + +def _contract_report_location_path(location_path: str, *, scan_root: str) -> str: + contract_path, _scope, _absolute = _contract_path( + location_path, + scan_root=scan_root, + ) + return contract_path or "" + + +def _priority( + severity: str, + effort: str, +) -> float: + severity_rank = SEVERITY_RANK.get(severity, 1) + effort_rank = EFFORT_WEIGHT.get(effort, 1) + return float(severity_rank) / float(effort_rank) + + +def clone_group_id(kind: str, group_key: str) -> str: + return f"clone:{kind}:{group_key}" + + +def structural_group_id(finding_kind: str, finding_key: str) -> str: + return f"structural:{finding_kind}:{finding_key}" + + +def dead_code_group_id(subject_key: str) -> str: + return f"dead_code:{subject_key}" + + +def design_group_id(category: str, subject_key: str) -> str: + return f"design:{category}:{subject_key}" + + +def _clone_novelty( + *, + group_key: str, + baseline_trusted: bool, + new_keys: Collection[str] | None, +) -> str: + if not baseline_trusted: + return CLONE_NOVELTY_NEW + if new_keys is None: + return CLONE_NOVELTY_NEW + return CLONE_NOVELTY_NEW if group_key in new_keys else CLONE_NOVELTY_KNOWN + + +def _item_sort_key(item: Mapping[str, object]) -> tuple[str, int, int, str]: + return ( + str(item.get("relative_path", "")), + _as_int(item.get("start_line")), + _as_int(item.get("end_line")), + str(item.get("qualname", "")), + ) + + +def _parse_bool_text(value: object) -> bool: + text = str(value).strip().lower() + return text in {"1", "true", "yes"} + + +def _parse_ratio_percent(value: object) -> float | None: + text = str(value).strip() + if not text: + return None + if text.endswith("%"): + try: + return float(text[:-1]) / 100.0 + except ValueError: + return None + try: + numeric = float(text) + except ValueError: + return None + return numeric if numeric <= 1.0 else numeric / 100.0 + + +def _normalize_block_machine_facts( + *, + group_key: str, + group_arity: int, + block_facts: Mapping[str, str], +) -> tuple[dict[str, object], dict[str, str]]: + facts: dict[str, object] = { + "group_key": group_key, + "group_arity": group_arity, + } + display_facts: dict[str, str] = {} + for key in sorted(block_facts): + value = str(block_facts[key]) + match key: + case "group_arity": + facts[key] = _as_int(value) + case "block_size" | "consecutive_asserts" | "instance_peer_count": + facts[key] = _as_int(value) + case "merged_regions": + facts[key] = _parse_bool_text(value) + case "assert_ratio": + ratio = _parse_ratio_percent(value) + if ratio is not None: + facts[key] = ratio + display_facts[key] = value + case ( + "match_rule" | "pattern" | "signature_kind" | "hint" | "hint_confidence" + ): + facts[key] = value + case _: + display_facts[key] = value + return facts, display_facts + + +def _source_scope_from_filepaths( + filepaths: Iterable[str], + *, + scan_root: str, +) -> dict[str, object]: + counts: Counter[SourceKind] = Counter() + for filepath in filepaths: + location = report_location_from_group_item( + {"filepath": filepath, "start_line": 0, "end_line": 0, "qualname": ""}, + scan_root=scan_root, + ) + counts[location.source_kind] += 1 + return _source_scope_from_counts(counts) + + +def _normalized_source_kind(value: object) -> SourceKind: + source_kind_text = str(value).strip().lower() or SOURCE_KIND_OTHER + if source_kind_text == SOURCE_KIND_PRODUCTION: + return SOURCE_KIND_PRODUCTION + if source_kind_text == SOURCE_KIND_TESTS: + return SOURCE_KIND_TESTS + if source_kind_text == SOURCE_KIND_FIXTURES: + return SOURCE_KIND_FIXTURES + return SOURCE_KIND_OTHER + + +def _source_scope_from_counts( + counts: Mapping[SourceKind, int], +) -> dict[str, object]: + breakdown = {kind: counts[kind] for kind in _SOURCE_BREAKDOWN_KEYS_TYPED} + present = tuple( + kind for kind in _SOURCE_BREAKDOWN_KEYS_TYPED if breakdown[kind] > 0 + ) + dominant_kind = ( + present[0] + if len(present) == 1 + else combine_source_kinds(present) + if present + else SOURCE_KIND_OTHER + ) + production_count = breakdown[SOURCE_KIND_PRODUCTION] + non_runtime_count = ( + breakdown[SOURCE_KIND_TESTS] + + breakdown[SOURCE_KIND_FIXTURES] + + breakdown[SOURCE_KIND_OTHER] + ) + match (production_count > 0, non_runtime_count == 0, production_count == 0): + case (True, True, _): + impact_scope = IMPACT_SCOPE_RUNTIME + case (_, _, True): + impact_scope = IMPACT_SCOPE_NON_RUNTIME + case _: + impact_scope = IMPACT_SCOPE_MIXED + return { + "dominant_kind": dominant_kind, + "breakdown": breakdown, + "impact_scope": impact_scope, + } + + +def _source_scope_from_locations( + locations: Sequence[Mapping[str, object]], +) -> dict[str, object]: + counts: Counter[SourceKind] = Counter() + for location in locations: + counts[_normalized_source_kind(location.get("source_kind"))] += 1 + return _source_scope_from_counts(counts) + + +def _collect_paths_from_metrics(metrics: Mapping[str, object]) -> set[str]: + paths: set[str] = set() + complexity = _as_mapping(metrics.get(CATEGORY_COMPLEXITY)) + for item in _as_sequence(complexity.get("functions")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + for family_name in (CATEGORY_COUPLING, CATEGORY_COHESION): + family = _as_mapping(metrics.get(family_name)) + for item in _as_sequence(family.get("classes")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + dead_code = _as_mapping(metrics.get(FAMILY_DEAD_CODE)) + for item in _as_sequence(dead_code.get("items")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + for item in _as_sequence(dead_code.get("suppressed_items")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + return paths + + +def _collect_report_file_list( + *, + inventory: Mapping[str, object] | None, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + metrics: Mapping[str, object] | None, + structural_findings: Sequence[StructuralFindingGroup] | None, +) -> list[str]: + files: set[str] = set() + inventory_map = _as_mapping(inventory) + for filepath in _as_sequence(inventory_map.get("file_list")): + file_text = _optional_str(filepath) + if file_text is not None: + files.add(file_text) + for groups in (func_groups, block_groups, segment_groups): + for items in groups.values(): + for item in items: + filepath = _optional_str(item.get("filepath")) + if filepath is not None: + files.add(filepath) + if metrics is not None: + files.update(_collect_paths_from_metrics(metrics)) + if structural_findings: + for group in normalize_structural_findings(structural_findings): + for occurrence in group.items: + filepath = _optional_str(occurrence.file_path) + if filepath is not None: + files.add(filepath) + return sorted(files) + + +def _count_file_lines(filepaths: Sequence[str]) -> int: + total = 0 + for filepath in filepaths: + total += _count_file_lines_for_path(filepath) + return total + + +def _count_file_lines_for_path(filepath: str) -> int: + try: + with open(filepath, encoding="utf-8", errors="surrogateescape") as handle: + return sum(1 for _ in handle) + except OSError: + return 0 + + +def _normalize_nested_string_rows(value: object) -> list[list[str]]: + rows: list[tuple[str, ...]] = [] + for row in _as_sequence(value): + modules = tuple( + str(module) for module in _as_sequence(row) if str(module).strip() + ) + if modules: + rows.append(modules) + rows.sort(key=lambda row: (len(row), row)) + return [list(row) for row in rows] + + +def _normalize_metrics_families( + metrics: Mapping[str, object] | None, + *, + scan_root: str, +) -> dict[str, object]: + metrics_map = _as_mapping(metrics) + complexity = _as_mapping(metrics_map.get(CATEGORY_COMPLEXITY)) + complexity_items = sorted( + ( + { + "qualname": str(item_map.get("qualname", "")), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "cyclomatic_complexity": _as_int( + item_map.get("cyclomatic_complexity"), + 1, + ), + "nesting_depth": _as_int(item_map.get("nesting_depth")), + "risk": str(item_map.get("risk", RISK_LOW)), + } + for item in _as_sequence(complexity.get("functions")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["start_line"], + item["end_line"], + item["qualname"], + ), + ) + + coupling = _as_mapping(metrics_map.get(CATEGORY_COUPLING)) + coupling_items = sorted( + ( + { + "qualname": str(item_map.get("qualname", "")), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "cbo": _as_int(item_map.get("cbo")), + "risk": str(item_map.get("risk", RISK_LOW)), + "coupled_classes": sorted( + { + str(name) + for name in _as_sequence(item_map.get("coupled_classes")) + if str(name).strip() + } + ), + } + for item in _as_sequence(coupling.get("classes")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["start_line"], + item["end_line"], + item["qualname"], + ), + ) + + cohesion = _as_mapping(metrics_map.get(CATEGORY_COHESION)) + cohesion_items = sorted( + ( + { + "qualname": str(item_map.get("qualname", "")), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "lcom4": _as_int(item_map.get("lcom4")), + "risk": str(item_map.get("risk", RISK_LOW)), + "method_count": _as_int(item_map.get("method_count")), + "instance_var_count": _as_int(item_map.get("instance_var_count")), + } + for item in _as_sequence(cohesion.get("classes")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["start_line"], + item["end_line"], + item["qualname"], + ), + ) + + dependencies = _as_mapping(metrics_map.get("dependencies")) + dependency_edges = sorted( + ( + { + "source": str(item_map.get("source", "")), + "target": str(item_map.get("target", "")), + "import_type": str(item_map.get("import_type", "")), + "line": _as_int(item_map.get("line")), + } + for item in _as_sequence(dependencies.get("edge_list")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["source"], + item["target"], + item["import_type"], + item["line"], + ), + ) + dependency_cycles = _normalize_nested_string_rows(dependencies.get("cycles")) + longest_chains = _normalize_nested_string_rows(dependencies.get("longest_chains")) + + dead_code = _as_mapping(metrics_map.get(FAMILY_DEAD_CODE)) + + def _normalize_suppressed_by( + raw_bindings: object, + ) -> list[dict[str, str]]: + normalized_bindings = sorted( + { + ( + str(binding_map.get("rule", "")).strip(), + str(binding_map.get("source", "")).strip(), + ) + for binding in _as_sequence(raw_bindings) + for binding_map in (_as_mapping(binding),) + if str(binding_map.get("rule", "")).strip() + }, + key=lambda item: (item[0], item[1]), + ) + if not normalized_bindings: + return [] + return [ + { + "rule": rule, + "source": source or INLINE_CODECLONE_SUPPRESSION_SOURCE, + } + for rule, source in normalized_bindings + ] + + dead_items = sorted( + ( + { + "qualname": str(item_map.get("qualname", "")), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "kind": str(item_map.get("kind", "")), + "confidence": str(item_map.get("confidence", CONFIDENCE_MEDIUM)), + } + for item in _as_sequence(dead_code.get("items")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["start_line"], + item["end_line"], + item["qualname"], + item["kind"], + ), + ) + dead_suppressed_items = sorted( + ( + { + "qualname": str(item_map.get("qualname", "")), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "kind": str(item_map.get("kind", "")), + "confidence": str(item_map.get("confidence", CONFIDENCE_MEDIUM)), + "suppressed_by": _normalize_suppressed_by( + item_map.get("suppressed_by") + ), + } + for item in _as_sequence(dead_code.get("suppressed_items")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["start_line"], + item["end_line"], + item["qualname"], + item["kind"], + item["confidence"], + tuple( + ( + str(_as_mapping(binding).get("rule", "")), + str(_as_mapping(binding).get("source", "")), + ) + for binding in _as_sequence(item.get("suppressed_by")) + ), + ), + ) + for item in dead_suppressed_items: + suppressed_by = _as_sequence(item.get("suppressed_by")) + first_binding = _as_mapping(suppressed_by[0]) if suppressed_by else {} + item["suppression_rule"] = str(first_binding.get("rule", "")) + item["suppression_source"] = str(first_binding.get("source", "")) + + health = _as_mapping(metrics_map.get("health")) + health_dimensions = { + str(key): _as_int(value) + for key, value in sorted(_as_mapping(health.get("dimensions")).items()) + } + + complexity_summary = _as_mapping(complexity.get("summary")) + coupling_summary = _as_mapping(coupling.get("summary")) + cohesion_summary = _as_mapping(cohesion.get("summary")) + dead_code_summary = _as_mapping(dead_code.get("summary")) + dead_high_confidence = sum( + 1 + for item in dead_items + if str(_as_mapping(item).get("confidence", "")).strip().lower() + == CONFIDENCE_HIGH + ) + + normalized: dict[str, object] = { + CATEGORY_COMPLEXITY: { + "summary": { + "total": len(complexity_items), + "average": round(_as_float(complexity_summary.get("average")), 2), + "max": _as_int(complexity_summary.get("max")), + "high_risk": _as_int(complexity_summary.get("high_risk")), + }, + "items": complexity_items, + "items_truncated": False, + }, + CATEGORY_COUPLING: { + "summary": { + "total": len(coupling_items), + "average": round(_as_float(coupling_summary.get("average")), 2), + "max": _as_int(coupling_summary.get("max")), + "high_risk": _as_int(coupling_summary.get("high_risk")), + }, + "items": coupling_items, + "items_truncated": False, + }, + CATEGORY_COHESION: { + "summary": { + "total": len(cohesion_items), + "average": round(_as_float(cohesion_summary.get("average")), 2), + "max": _as_int(cohesion_summary.get("max")), + "low_cohesion": _as_int(cohesion_summary.get("low_cohesion")), + }, + "items": cohesion_items, + "items_truncated": False, + }, + "dependencies": { + "summary": { + "modules": _as_int(dependencies.get("modules")), + "edges": _as_int(dependencies.get("edges")), + "cycles": len(dependency_cycles), + "max_depth": _as_int(dependencies.get("max_depth")), + }, + "items": dependency_edges, + "cycles": dependency_cycles, + "longest_chains": longest_chains, + "items_truncated": False, + }, + FAMILY_DEAD_CODE: { + "summary": { + "total": len(dead_items), + "high_confidence": dead_high_confidence + or _as_int( + dead_code_summary.get( + "high_confidence", dead_code_summary.get("critical") + ) + ), + "suppressed": len(dead_suppressed_items) + or _as_int(dead_code_summary.get("suppressed")), + }, + "items": dead_items, + "suppressed_items": dead_suppressed_items, + "items_truncated": False, + }, + "health": { + "summary": { + "score": _as_int(health.get("score")), + "grade": str(health.get("grade", "")), + "dimensions": health_dimensions, + }, + "items": [], + "items_truncated": False, + }, + } + return normalized + + +def _build_metrics_payload( + metrics: Mapping[str, object] | None, + *, + scan_root: str, +) -> dict[str, object]: + families = _normalize_metrics_families(metrics, scan_root=scan_root) + return { + "summary": { + family_name: _as_mapping(_as_mapping(family_payload).get("summary")) + for family_name, family_payload in families.items() + }, + "families": families, + } + + +def _derive_inventory_code_counts( + *, + metrics_payload: Mapping[str, object], + inventory_code: Mapping[str, object], + file_list: Sequence[str], + cached_files: int, +) -> dict[str, object]: + complexity = _as_mapping( + _as_mapping(metrics_payload.get("families")).get(CATEGORY_COMPLEXITY) + ) + cohesion = _as_mapping( + _as_mapping(metrics_payload.get("families")).get(CATEGORY_COHESION) + ) + complexity_items = _as_sequence(complexity.get("items")) + cohesion_items = _as_sequence(cohesion.get("items")) + + exact_entities = bool(complexity_items or cohesion_items) + method_count = sum( + _as_int(_as_mapping(item).get("method_count")) for item in cohesion_items + ) + class_count = len(cohesion_items) + function_total = max(len(complexity_items) - method_count, 0) + + if not exact_entities: + function_total = _as_int(inventory_code.get("functions")) + method_count = _as_int(inventory_code.get("methods")) + class_count = _as_int(inventory_code.get("classes")) + + parsed_lines_raw = inventory_code.get("parsed_lines") + if isinstance(parsed_lines_raw, int) and parsed_lines_raw >= 0: + parsed_lines = parsed_lines_raw + elif cached_files > 0 and file_list: + parsed_lines = _count_file_lines(file_list) + else: + parsed_lines = _as_int(parsed_lines_raw) + + if exact_entities and ((cached_files > 0 and file_list) or parsed_lines > 0): + scope = "analysis_root" + elif cached_files > 0 and file_list: + scope = "mixed" + else: + scope = "current_run" + + return { + "scope": scope, + "parsed_lines": parsed_lines, + "functions": function_total, + "methods": method_count, + "classes": class_count, + } + + +def _build_inventory_payload( + *, + inventory: Mapping[str, object] | None, + file_list: Sequence[str], + metrics_payload: Mapping[str, object], + scan_root: str, +) -> dict[str, object]: + inventory_map = _as_mapping(inventory) + files_map = _as_mapping(inventory_map.get("files")) + code_map = _as_mapping(inventory_map.get("code")) + cached_files = _as_int(files_map.get("cached")) + file_registry = [ + path + for path in ( + _contract_path(filepath, scan_root=scan_root)[0] for filepath in file_list + ) + if path is not None + ] + return { + "files": { + "total_found": _as_int(files_map.get("total_found"), len(file_list)), + "analyzed": _as_int(files_map.get("analyzed")), + "cached": cached_files, + "skipped": _as_int(files_map.get("skipped")), + "source_io_skipped": _as_int(files_map.get("source_io_skipped")), + }, + "code": _derive_inventory_code_counts( + metrics_payload=metrics_payload, + inventory_code=code_map, + file_list=file_list, + cached_files=cached_files, + ), + "file_registry": { + "encoding": "relative_path", + "items": file_registry, + }, + } + + +def _baseline_is_trusted(meta: Mapping[str, object]) -> bool: + baseline = _as_mapping(meta.get("baseline")) + return ( + baseline.get("loaded") is True + and str(baseline.get("status", "")).strip().lower() == "ok" + ) + + +def _build_meta_payload( + raw_meta: Mapping[str, object] | None, + *, + scan_root: str, +) -> dict[str, object]: + meta = dict(raw_meta or {}) + metrics_computed = sorted( + { + str(item) + for item in _as_sequence(meta.get("metrics_computed")) + if str(item).strip() + } + ) + baseline_path, baseline_path_scope, baseline_abs = _contract_path( + meta.get("baseline_path"), + scan_root=scan_root, + ) + cache_path, cache_path_scope, cache_abs = _contract_path( + meta.get("cache_path"), + scan_root=scan_root, + ) + metrics_baseline_path, metrics_baseline_path_scope, metrics_baseline_abs = ( + _contract_path( + meta.get("metrics_baseline_path"), + scan_root=scan_root, + ) + ) + return { + "codeclone_version": str(meta.get("codeclone_version", "")), + "project_name": str(meta.get("project_name", "")), + "scan_root": ".", + "python_version": str(meta.get("python_version", "")), + "python_tag": str(meta.get("python_tag", "")), + "analysis_mode": str(meta.get("analysis_mode", "full") or "full"), + "report_mode": str(meta.get("report_mode", "full") or "full"), + "computed_metric_families": metrics_computed, + "baseline": { + "path": baseline_path, + "path_scope": baseline_path_scope, + "loaded": bool(meta.get("baseline_loaded")), + "status": _optional_str(meta.get("baseline_status")), + "fingerprint_version": _optional_str( + meta.get("baseline_fingerprint_version") + ), + "schema_version": _optional_str(meta.get("baseline_schema_version")), + "python_tag": _optional_str(meta.get("baseline_python_tag")), + "generator_name": _optional_str(meta.get("baseline_generator_name")), + "generator_version": _optional_str(meta.get("baseline_generator_version")), + "payload_sha256": _optional_str(meta.get("baseline_payload_sha256")), + "payload_sha256_verified": bool( + meta.get("baseline_payload_sha256_verified") + ), + }, + "cache": { + "path": cache_path, + "path_scope": cache_path_scope, + "used": bool(meta.get("cache_used")), + "status": _optional_str(meta.get("cache_status")), + "schema_version": _optional_str(meta.get("cache_schema_version")), + }, + "metrics_baseline": { + "path": metrics_baseline_path, + "path_scope": metrics_baseline_path_scope, + "loaded": bool(meta.get("metrics_baseline_loaded")), + "status": _optional_str(meta.get("metrics_baseline_status")), + "schema_version": _optional_str( + meta.get("metrics_baseline_schema_version") + ), + "payload_sha256": _optional_str( + meta.get("metrics_baseline_payload_sha256") + ), + "payload_sha256_verified": bool( + meta.get("metrics_baseline_payload_sha256_verified") + ), + }, + "runtime": { + "report_generated_at_utc": _optional_str( + meta.get("report_generated_at_utc") + ), + "scan_root_absolute": _optional_str(meta.get("scan_root")), + "baseline_path_absolute": baseline_abs, + "cache_path_absolute": cache_abs, + "metrics_baseline_path_absolute": metrics_baseline_abs, + }, + } + + +def _clone_group_assessment( + *, + count: int, + clone_type: str, +) -> tuple[str, float]: + match (count >= 4, clone_type in {"Type-1", "Type-2"}): + case (True, _): + severity = SEVERITY_CRITICAL + case (False, True): + severity = SEVERITY_WARNING + case _: + severity = SEVERITY_INFO + effort = "easy" if clone_type in {"Type-1", "Type-2"} else "moderate" + return severity, _priority(severity, effort) + + +def _build_clone_group_facts( + *, + group_key: str, + kind: Literal["function", "block", "segment"], + items: Sequence[GroupItemLike], + block_facts: Mapping[str, Mapping[str, str]], +) -> tuple[dict[str, object], dict[str, str]]: + base: dict[str, object] = { + "group_key": group_key, + "group_arity": len(items), + } + display_facts: dict[str, str] = {} + match kind: + case "function": + loc_buckets = sorted( + { + str(item.get("loc_bucket", "")) + for item in items + if str(item.get("loc_bucket", "")).strip() + } + ) + base["loc_buckets"] = loc_buckets + case "block" if group_key in block_facts: + typed_facts, block_display_facts = _normalize_block_machine_facts( + group_key=group_key, + group_arity=len(items), + block_facts=block_facts[group_key], + ) + base.update(typed_facts) + display_facts.update(block_display_facts) + case _: + pass + return base, display_facts + + +def _clone_item_payload( + item: GroupItemLike, + *, + kind: Literal["function", "block", "segment"], + scan_root: str, +) -> dict[str, object]: + payload: dict[str, object] = { + "relative_path": _contract_report_location_path( + str(item.get("filepath", "")), + scan_root=scan_root, + ), + "qualname": str(item.get("qualname", "")), + "start_line": _as_int(item.get("start_line", 0)), + "end_line": _as_int(item.get("end_line", 0)), + } + match kind: + case "function": + payload.update( + { + "loc": _as_int(item.get("loc", 0)), + "stmt_count": _as_int(item.get("stmt_count", 0)), + "fingerprint": str(item.get("fingerprint", "")), + "loc_bucket": str(item.get("loc_bucket", "")), + "cyclomatic_complexity": _as_int( + item.get("cyclomatic_complexity", 1) + ), + "nesting_depth": _as_int(item.get("nesting_depth", 0)), + "risk": str(item.get("risk", RISK_LOW)), + "raw_hash": str(item.get("raw_hash", "")), + } + ) + case "block": + payload["size"] = _as_int(item.get("size", 0)) + case _: + payload.update( + { + "size": _as_int(item.get("size", 0)), + "segment_hash": str(item.get("segment_hash", "")), + "segment_sig": str(item.get("segment_sig", "")), + } + ) + return payload + + +def _build_clone_groups( + *, + groups: GroupMapLike, + kind: Literal["function", "block", "segment"], + baseline_trusted: bool, + new_keys: Collection[str] | None, + block_facts: Mapping[str, Mapping[str, str]], + scan_root: str, +) -> list[dict[str, object]]: + encoded_groups: list[dict[str, object]] = [] + new_key_set = set(new_keys) if new_keys is not None else None + for group_key in sorted(groups): + items = groups[group_key] + clone_type = classify_clone_type(items=items, kind=kind) + severity, priority = _clone_group_assessment( + count=len(items), + clone_type=clone_type, + ) + novelty = _clone_novelty( + group_key=group_key, + baseline_trusted=baseline_trusted, + new_keys=new_key_set, + ) + locations = tuple( + report_location_from_group_item(item, scan_root=scan_root) for item in items + ) + source_scope = _source_scope_from_locations( + [ + { + "source_kind": location.source_kind, + } + for location in locations + ] + ) + spread_files, spread_functions = group_spread(locations) + rows = sorted( + [ + _clone_item_payload( + item, + kind=kind, + scan_root=scan_root, + ) + for item in items + ], + key=_item_sort_key, + ) + facts, display_facts = _build_clone_group_facts( + group_key=group_key, + kind=kind, + items=items, + block_facts=block_facts, + ) + encoded_groups.append( + { + "id": clone_group_id(kind, group_key), + "family": FAMILY_CLONE, + "category": kind, + "kind": "clone_group", + "severity": severity, + "confidence": CONFIDENCE_HIGH, + "priority": priority, + "clone_kind": kind, + "clone_type": clone_type, + "novelty": novelty, + "count": len(items), + "source_scope": source_scope, + "spread": { + "files": spread_files, + "functions": spread_functions, + }, + "items": rows, + "facts": facts, + **({"display_facts": display_facts} if display_facts else {}), + } + ) + encoded_groups.sort( + key=lambda group: (-_as_int(group.get("count")), str(group["id"])) + ) + return encoded_groups + + +def _structural_group_assessment( + *, + finding_kind: str, + count: int, + spread_functions: int, +) -> tuple[str, float]: + match finding_kind: + case "clone_guard_exit_divergence" | "clone_cohort_drift": + severity = SEVERITY_WARNING + if count >= 3 or spread_functions > 1: + severity = SEVERITY_CRITICAL + return severity, _priority(severity, "moderate") + case _: + severity = ( + SEVERITY_WARNING + if count >= 4 or spread_functions > 1 + else SEVERITY_INFO + ) + return severity, _priority(severity, "moderate") + + +def _csv_values(value: object) -> list[str]: + raw = str(value).strip() + if not raw: + return [] + return sorted({part.strip() for part in raw.split(",") if part.strip()}) + + +def _build_structural_signature( + finding_kind: str, + signature: Mapping[str, str], +) -> dict[str, object]: + debug = {str(key): str(signature[key]) for key in sorted(signature)} + match finding_kind: + case "clone_guard_exit_divergence": + return { + "version": "1", + "stable": { + "family": "clone_guard_exit_divergence", + "cohort_id": str(signature.get("cohort_id", "")), + "majority_guard_count": _as_int( + signature.get("majority_guard_count") + ), + "majority_guard_terminal_profile": str( + signature.get("majority_guard_terminal_profile", "none") + ), + "majority_terminal_kind": str( + signature.get("majority_terminal_kind", "fallthrough") + ), + "majority_side_effect_before_guard": ( + str(signature.get("majority_side_effect_before_guard", "0")) + == "1" + ), + }, + "debug": debug, + } + case "clone_cohort_drift": + return { + "version": "1", + "stable": { + "family": "clone_cohort_drift", + "cohort_id": str(signature.get("cohort_id", "")), + "drift_fields": _csv_values(signature.get("drift_fields")), + "majority_profile": { + "terminal_kind": str( + signature.get("majority_terminal_kind", "") + ), + "guard_exit_profile": str( + signature.get("majority_guard_exit_profile", "") + ), + "try_finally_profile": str( + signature.get("majority_try_finally_profile", "") + ), + "side_effect_order_profile": str( + signature.get("majority_side_effect_order_profile", "") + ), + }, + }, + "debug": debug, + } + case _: + return { + "version": "1", + "stable": { + "family": "duplicated_branches", + "stmt_shape": str(signature.get("stmt_seq", "")), + "terminal_kind": str(signature.get("terminal", "")), + "control_flow": { + "has_loop": str(signature.get("has_loop", "0")) == "1", + "has_try": str(signature.get("has_try", "0")) == "1", + "nested_if": str(signature.get("nested_if", "0")) == "1", + }, + }, + "debug": debug, + } + + +def _build_structural_facts( + finding_kind: str, + signature: Mapping[str, str], + *, + count: int, +) -> dict[str, object]: + match finding_kind: + case "clone_guard_exit_divergence": + return { + "cohort_id": str(signature.get("cohort_id", "")), + "cohort_arity": _as_int(signature.get("cohort_arity")), + "divergent_members": _as_int(signature.get("divergent_members"), count), + "majority_entry_guard_count": _as_int( + signature.get("majority_guard_count"), + ), + "majority_guard_terminal_profile": str( + signature.get("majority_guard_terminal_profile", "none") + ), + "majority_terminal_kind": str( + signature.get("majority_terminal_kind", "fallthrough") + ), + "majority_side_effect_before_guard": ( + str(signature.get("majority_side_effect_before_guard", "0")) == "1" + ), + "guard_count_values": _csv_values(signature.get("guard_count_values")), + "guard_terminal_values": _csv_values( + signature.get("guard_terminal_values"), + ), + "terminal_values": _csv_values(signature.get("terminal_values")), + "side_effect_before_guard_values": _csv_values( + signature.get("side_effect_before_guard_values"), + ), + } + case "clone_cohort_drift": + return { + "cohort_id": str(signature.get("cohort_id", "")), + "cohort_arity": _as_int(signature.get("cohort_arity")), + "divergent_members": _as_int(signature.get("divergent_members"), count), + "drift_fields": _csv_values(signature.get("drift_fields")), + "stable_majority_profile": { + "terminal_kind": str(signature.get("majority_terminal_kind", "")), + "guard_exit_profile": str( + signature.get("majority_guard_exit_profile", "") + ), + "try_finally_profile": str( + signature.get("majority_try_finally_profile", "") + ), + "side_effect_order_profile": str( + signature.get("majority_side_effect_order_profile", "") + ), + }, + } + case _: + return { + "occurrence_count": count, + "non_overlapping": True, + "call_bucket": _as_int(signature.get("calls", "0")), + "raise_bucket": _as_int(signature.get("raises", "0")), + } + + +def _build_structural_groups( + groups: Sequence[StructuralFindingGroup] | None, + *, + scan_root: str, +) -> list[dict[str, object]]: + normalized_groups = normalize_structural_findings(groups or ()) + out: list[dict[str, object]] = [] + for group in normalized_groups: + locations = tuple( + report_location_from_structural_occurrence(item, scan_root=scan_root) + for item in group.items + ) + source_scope = _source_scope_from_locations( + [{"source_kind": location.source_kind} for location in locations] + ) + spread_files, spread_functions = group_spread(locations) + severity, priority = _structural_group_assessment( + finding_kind=group.finding_kind, + count=len(group.items), + spread_functions=spread_functions, + ) + out.append( + { + "id": structural_group_id(group.finding_kind, group.finding_key), + "family": FAMILY_STRUCTURAL, + "category": group.finding_kind, + "kind": group.finding_kind, + "severity": severity, + "confidence": ( + CONFIDENCE_HIGH + if group.finding_kind + in {"clone_guard_exit_divergence", "clone_cohort_drift"} + else CONFIDENCE_MEDIUM + ), + "priority": priority, + "count": len(group.items), + "source_scope": source_scope, + "spread": { + "files": spread_files, + "functions": spread_functions, + }, + "signature": _build_structural_signature( + group.finding_kind, + group.signature, + ), + "items": sorted( + [ + { + "relative_path": _contract_report_location_path( + item.file_path, + scan_root=scan_root, + ), + "qualname": item.qualname, + "start_line": item.start, + "end_line": item.end, + } + for item in group.items + ], + key=_item_sort_key, + ), + "facts": _build_structural_facts( + group.finding_kind, + group.signature, + count=len(group.items), + ), + } + ) + out.sort(key=lambda group: (-_as_int(group.get("count")), str(group["id"]))) + return out + + +def _single_location_source_scope( + filepath: str, + *, + scan_root: str, +) -> dict[str, object]: + location = report_location_from_group_item( + { + "filepath": filepath, + "qualname": "", + "start_line": 0, + "end_line": 0, + }, + scan_root=scan_root, + ) + return _source_scope_from_locations([{"source_kind": location.source_kind}]) + + +def _build_dead_code_groups( + metrics_payload: Mapping[str, object], + *, + scan_root: str, +) -> list[dict[str, object]]: + families = _as_mapping(metrics_payload.get("families")) + dead_code = _as_mapping(families.get(FAMILY_DEAD_CODE)) + groups: list[dict[str, object]] = [] + for item in _as_sequence(dead_code.get("items")): + item_map = _as_mapping(item) + qualname = str(item_map.get("qualname", "")) + filepath = str(item_map.get("relative_path", "")) + confidence = str(item_map.get("confidence", CONFIDENCE_MEDIUM)) + severity = SEVERITY_WARNING if confidence == CONFIDENCE_HIGH else SEVERITY_INFO + groups.append( + { + "id": dead_code_group_id(qualname), + "family": FAMILY_DEAD_CODE, + "category": str(item_map.get("kind", "unknown")), + "kind": "unused_symbol", + "severity": severity, + "confidence": confidence, + "priority": _priority(severity, EFFORT_EASY), + "count": 1, + "source_scope": _single_location_source_scope( + filepath, + scan_root=scan_root, + ), + "spread": {"files": 1, "functions": 1 if qualname else 0}, + "items": [ + { + "relative_path": _contract_report_location_path( + filepath, + scan_root=scan_root, + ), + "qualname": qualname, + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + } + ], + "facts": { + "kind": str(item_map.get("kind", "unknown")), + "confidence": confidence, + }, + } + ) + groups.sort(key=lambda group: (-_as_float(group["priority"]), str(group["id"]))) + return groups + + +def _design_singleton_group( + *, + category: str, + kind: str, + severity: str, + qualname: str, + filepath: str, + start_line: int, + end_line: int, + scan_root: str, + item_data: Mapping[str, object], + facts: Mapping[str, object], +) -> dict[str, object]: + return { + "id": design_group_id(category, qualname), + "family": FAMILY_DESIGN, + "category": category, + "kind": kind, + "severity": severity, + "confidence": CONFIDENCE_HIGH, + "priority": _priority(severity, EFFORT_MODERATE), + "count": 1, + "source_scope": _single_location_source_scope( + filepath, + scan_root=scan_root, + ), + "spread": {"files": 1, "functions": 1}, + "items": [ + { + "relative_path": _contract_report_location_path( + filepath, + scan_root=scan_root, + ), + "qualname": qualname, + "start_line": start_line, + "end_line": end_line, + **item_data, + } + ], + "facts": dict(facts), + } + + +def _complexity_design_group( + item_map: Mapping[str, object], + *, + scan_root: str, +) -> dict[str, object] | None: + cc = _as_int(item_map.get("cyclomatic_complexity"), 1) + if cc <= 20: + return None + qualname = str(item_map.get("qualname", "")) + filepath = str(item_map.get("relative_path", "")) + nesting_depth = _as_int(item_map.get("nesting_depth")) + severity = SEVERITY_CRITICAL if cc > 40 else SEVERITY_WARNING + return _design_singleton_group( + category=CATEGORY_COMPLEXITY, + kind="function_hotspot", + severity=severity, + qualname=qualname, + filepath=filepath, + start_line=_as_int(item_map.get("start_line")), + end_line=_as_int(item_map.get("end_line")), + scan_root=scan_root, + item_data={ + "cyclomatic_complexity": cc, + "nesting_depth": nesting_depth, + "risk": str(item_map.get("risk", RISK_LOW)), + }, + facts={ + "cyclomatic_complexity": cc, + "nesting_depth": nesting_depth, + }, + ) + + +def _coupling_design_group( + item_map: Mapping[str, object], + *, + scan_root: str, +) -> dict[str, object] | None: + cbo = _as_int(item_map.get("cbo")) + if cbo <= 10: + return None + qualname = str(item_map.get("qualname", "")) + filepath = str(item_map.get("relative_path", "")) + coupled_classes = list(_as_sequence(item_map.get("coupled_classes"))) + return _design_singleton_group( + category=CATEGORY_COUPLING, + kind="class_hotspot", + severity=SEVERITY_WARNING, + qualname=qualname, + filepath=filepath, + start_line=_as_int(item_map.get("start_line")), + end_line=_as_int(item_map.get("end_line")), + scan_root=scan_root, + item_data={ + "cbo": cbo, + "risk": str(item_map.get("risk", RISK_LOW)), + "coupled_classes": coupled_classes, + }, + facts={ + "cbo": cbo, + "coupled_classes": coupled_classes, + }, + ) + + +def _cohesion_design_group( + item_map: Mapping[str, object], + *, + scan_root: str, +) -> dict[str, object] | None: + lcom4 = _as_int(item_map.get("lcom4")) + if lcom4 <= 3: + return None + qualname = str(item_map.get("qualname", "")) + filepath = str(item_map.get("relative_path", "")) + method_count = _as_int(item_map.get("method_count")) + instance_var_count = _as_int(item_map.get("instance_var_count")) + return _design_singleton_group( + category=CATEGORY_COHESION, + kind="class_hotspot", + severity=SEVERITY_WARNING, + qualname=qualname, + filepath=filepath, + start_line=_as_int(item_map.get("start_line")), + end_line=_as_int(item_map.get("end_line")), + scan_root=scan_root, + item_data={ + "lcom4": lcom4, + "risk": str(item_map.get("risk", RISK_LOW)), + "method_count": method_count, + "instance_var_count": instance_var_count, + }, + facts={ + "lcom4": lcom4, + "method_count": method_count, + "instance_var_count": instance_var_count, + }, + ) + + +def _dependency_design_group( + cycle: object, + *, + scan_root: str, +) -> dict[str, object] | None: + modules = [str(module) for module in _as_sequence(cycle) if str(module).strip()] + if not modules: + return None + cycle_key = " -> ".join(modules) + return { + "id": design_group_id(CATEGORY_DEPENDENCY, cycle_key), + "family": FAMILY_DESIGN, + "category": CATEGORY_DEPENDENCY, + "kind": "cycle", + "severity": SEVERITY_CRITICAL, + "confidence": CONFIDENCE_HIGH, + "priority": _priority(SEVERITY_CRITICAL, EFFORT_HARD), + "count": len(modules), + "source_scope": _source_scope_from_filepaths( + (module.replace(".", "/") + ".py" for module in modules), + scan_root=scan_root, + ), + "spread": {"files": len(modules), "functions": 0}, + "items": [ + { + "module": module, + "relative_path": module.replace(".", "/") + ".py", + "source_kind": report_location_from_group_item( + { + "filepath": module.replace(".", "/") + ".py", + "qualname": "", + "start_line": 0, + "end_line": 0, + } + ).source_kind, + } + for module in modules + ], + "facts": { + "cycle_length": len(modules), + }, + } + + +def _build_design_groups( + metrics_payload: Mapping[str, object], + *, + scan_root: str, +) -> list[dict[str, object]]: + families = _as_mapping(metrics_payload.get("families")) + groups: list[dict[str, object]] = [] + + complexity = _as_mapping(families.get(CATEGORY_COMPLEXITY)) + for item in _as_sequence(complexity.get("items")): + group = _complexity_design_group(_as_mapping(item), scan_root=scan_root) + if group is not None: + groups.append(group) + + coupling = _as_mapping(families.get(CATEGORY_COUPLING)) + for item in _as_sequence(coupling.get("items")): + group = _coupling_design_group(_as_mapping(item), scan_root=scan_root) + if group is not None: + groups.append(group) + + cohesion = _as_mapping(families.get(CATEGORY_COHESION)) + for item in _as_sequence(cohesion.get("items")): + group = _cohesion_design_group(_as_mapping(item), scan_root=scan_root) + if group is not None: + groups.append(group) + + dependencies = _as_mapping(families.get("dependencies")) + for cycle in _as_sequence(dependencies.get("cycles")): + group = _dependency_design_group(cycle, scan_root=scan_root) + if group is not None: + groups.append(group) + + groups.sort(key=lambda group: (-_as_float(group["priority"]), str(group["id"]))) + return groups + + +def _findings_summary( + *, + clone_functions: Sequence[Mapping[str, object]], + clone_blocks: Sequence[Mapping[str, object]], + clone_segments: Sequence[Mapping[str, object]], + structural_groups: Sequence[Mapping[str, object]], + dead_code_groups: Sequence[Mapping[str, object]], + design_groups: Sequence[Mapping[str, object]], + dead_code_suppressed: int = 0, +) -> dict[str, object]: + flat_groups = [ + *clone_functions, + *clone_blocks, + *clone_segments, + *structural_groups, + *dead_code_groups, + *design_groups, + ] + severity_counts = dict.fromkeys( + (SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO), + 0, + ) + source_scope_counts = dict.fromkeys( + (IMPACT_SCOPE_RUNTIME, IMPACT_SCOPE_NON_RUNTIME, IMPACT_SCOPE_MIXED), + 0, + ) + for group in flat_groups: + severity = str(group.get("severity", SEVERITY_INFO)) + if severity in severity_counts: + severity_counts[severity] += 1 + impact_scope = str( + _as_mapping(group.get("source_scope")).get( + "impact_scope", + IMPACT_SCOPE_NON_RUNTIME, + ) + ) + if impact_scope in source_scope_counts: + source_scope_counts[impact_scope] += 1 + clone_groups = [*clone_functions, *clone_blocks, *clone_segments] + return { + "total": len(flat_groups), + "families": { + FAMILY_CLONES: len(clone_groups), + FAMILY_STRUCTURAL: len(structural_groups), + FAMILY_DEAD_CODE: len(dead_code_groups), + "design": len(design_groups), + }, + "severity": severity_counts, + "impact_scope": source_scope_counts, + "clones": { + "functions": len(clone_functions), + "blocks": len(clone_blocks), + "segments": len(clone_segments), + CLONE_NOVELTY_NEW: sum( + 1 + for group in clone_groups + if str(group.get("novelty", "")) == CLONE_NOVELTY_NEW + ), + CLONE_NOVELTY_KNOWN: sum( + 1 + for group in clone_groups + if str(group.get("novelty", "")) == CLONE_NOVELTY_KNOWN + ), + }, + "suppressed": { + FAMILY_DEAD_CODE: max(0, dead_code_suppressed), + }, + } + + +def _sort_flat_finding_ids( + groups: Sequence[Mapping[str, object]], +) -> list[str]: + ordered = sorted( + groups, + key=lambda group: ( + -_as_float(group.get("priority")), + SEVERITY_ORDER.get(str(group.get("severity", SEVERITY_INFO)), 9), + -_as_int(_as_mapping(group.get("spread")).get("files")), + -_as_int(_as_mapping(group.get("spread")).get("functions")), + -_as_int(group.get("count")), + str(group.get("id", "")), + ), + ) + return [str(group["id"]) for group in ordered] + + +def _sort_highest_spread_ids( + groups: Sequence[Mapping[str, object]], +) -> list[str]: + ordered = sorted( + groups, + key=lambda group: ( + -_as_int(_as_mapping(group.get("spread")).get("files")), + -_as_int(_as_mapping(group.get("spread")).get("functions")), + -_as_int(group.get("count")), + -_as_float(group.get("priority")), + str(group.get("id", "")), + ), + ) + return [str(group["id"]) for group in ordered] + + +def _health_snapshot(metrics_payload: Mapping[str, object]) -> dict[str, object]: + health = _as_mapping(_as_mapping(metrics_payload.get("families")).get("health")) + summary = _as_mapping(health.get("summary")) + dimensions = { + str(key): _as_int(value) + for key, value in _as_mapping(summary.get("dimensions")).items() + } + strongest = None + weakest = None + if dimensions: + strongest = min( + sorted(dimensions), + key=lambda key: (-dimensions[key], key), + ) + weakest = min( + sorted(dimensions), + key=lambda key: (dimensions[key], key), + ) + return { + "score": _as_int(summary.get("score")), + "grade": str(summary.get("grade", "")), + "strongest_dimension": strongest, + "weakest_dimension": weakest, + } + + +def _combined_impact_scope(groups: Sequence[Mapping[str, object]]) -> str: + impact_scopes = { + str( + _as_mapping(group.get("source_scope")).get( + "impact_scope", + IMPACT_SCOPE_NON_RUNTIME, + ) + ) + for group in groups + } + if not impact_scopes: + return IMPACT_SCOPE_NON_RUNTIME + if len(impact_scopes) == 1: + return next(iter(impact_scopes)) + return IMPACT_SCOPE_MIXED + + +def _top_risks( + *, + dead_code_groups: Sequence[Mapping[str, object]], + design_groups: Sequence[Mapping[str, object]], + structural_groups: Sequence[Mapping[str, object]], + clone_groups: Sequence[Mapping[str, object]], +) -> list[dict[str, object]]: + risks: list[dict[str, object]] = [] + + if dead_code_groups: + label = ( + "1 dead code item" + if len(dead_code_groups) == 1 + else f"{len(dead_code_groups)} dead code items" + ) + risks.append( + { + "kind": "family_summary", + "family": FAMILY_DEAD_CODE, + "count": len(dead_code_groups), + "scope": IMPACT_SCOPE_MIXED + if len( + { + _as_mapping(group.get("source_scope")).get("impact_scope") + for group in dead_code_groups + } + ) + > 1 + else str( + _as_mapping(dead_code_groups[0].get("source_scope")).get( + "impact_scope", + IMPACT_SCOPE_NON_RUNTIME, + ) + ), + "label": label, + } + ) + + low_cohesion = [ + group + for group in design_groups + if str(group.get("category", "")) == CATEGORY_COHESION + ] + if low_cohesion: + label = ( + "1 low cohesion class" + if len(low_cohesion) == 1 + else f"{len(low_cohesion)} low cohesion classes" + ) + risks.append( + { + "kind": "family_summary", + "family": FAMILY_DESIGN, + "category": CATEGORY_COHESION, + "count": len(low_cohesion), + "scope": _combined_impact_scope(low_cohesion), + "label": label, + } + ) + + production_structural = [ + group + for group in structural_groups + if str(_as_mapping(group.get("source_scope")).get("impact_scope")) + in {IMPACT_SCOPE_RUNTIME, IMPACT_SCOPE_MIXED} + ] + if production_structural: + label = ( + "1 structural finding in production code" + if len(production_structural) == 1 + else ( + f"{len(production_structural)} structural findings in production code" + ) + ) + risks.append( + { + "kind": "family_summary", + "family": FAMILY_STRUCTURAL, + "count": len(production_structural), + "scope": SOURCE_KIND_PRODUCTION, + "label": label, + } + ) + + fixture_test_clones = [ + group + for group in clone_groups + if _as_mapping(group.get("source_scope")).get("impact_scope") + == IMPACT_SCOPE_NON_RUNTIME + and _as_mapping(group.get("source_scope")).get("dominant_kind") + in {SOURCE_KIND_TESTS, SOURCE_KIND_FIXTURES} + ] + if fixture_test_clones: + label = ( + "1 clone group in fixtures/tests" + if len(fixture_test_clones) == 1 + else f"{len(fixture_test_clones)} clone groups in fixtures/tests" + ) + risks.append( + { + "kind": "family_summary", + "family": FAMILY_CLONE, + "count": len(fixture_test_clones), + "scope": IMPACT_SCOPE_NON_RUNTIME, + "label": label, + } + ) + + return risks[:6] + + +def _build_derived_overview( + *, + findings: Mapping[str, object], + metrics_payload: Mapping[str, object], +) -> tuple[dict[str, object], dict[str, object]]: + groups = _as_mapping(findings.get("groups")) + clones = _as_mapping(groups.get(FAMILY_CLONES)) + clone_groups = [ + *_as_sequence(clones.get("functions")), + *_as_sequence(clones.get("blocks")), + *_as_sequence(clones.get("segments")), + ] + structural_groups = _as_sequence( + _as_mapping(groups.get(FAMILY_STRUCTURAL)).get("groups") + ) + dead_code_groups = _as_sequence( + _as_mapping(groups.get(FAMILY_DEAD_CODE)).get("groups") + ) + design_groups = _as_sequence(_as_mapping(groups.get("design")).get("groups")) + flat_groups = [ + *clone_groups, + *structural_groups, + *dead_code_groups, + *design_groups, + ] + dominant_kind_counts: Counter[str] = Counter( + str( + _as_mapping(_as_mapping(group).get("source_scope")).get( + "dominant_kind", + SOURCE_KIND_OTHER, + ) + ) + for group in flat_groups + ) + summary = _as_mapping(findings.get("summary")) + overview: dict[str, object] = { + "families": dict(_as_mapping(summary.get("families"))), + "top_risks": _top_risks( + dead_code_groups=[_as_mapping(group) for group in dead_code_groups], + design_groups=[_as_mapping(group) for group in design_groups], + structural_groups=[_as_mapping(group) for group in structural_groups], + clone_groups=[_as_mapping(group) for group in clone_groups], + ), + "source_scope_breakdown": { + key: dominant_kind_counts[key] + for key in ( + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_OTHER, + ) + if dominant_kind_counts[key] > 0 + }, + "health_snapshot": _health_snapshot(metrics_payload), + } + hotlists: dict[str, object] = { + "most_actionable_ids": _sort_flat_finding_ids( + [ + group + for group in map(_as_mapping, flat_groups) + if str(group.get("severity")) != SEVERITY_INFO + ] + )[:5], + "highest_spread_ids": _sort_highest_spread_ids( + list(map(_as_mapping, flat_groups)) + )[:5], + "production_hotspot_ids": _sort_flat_finding_ids( + [ + group + for group in map(_as_mapping, flat_groups) + if str(_as_mapping(group.get("source_scope")).get("impact_scope")) + in {IMPACT_SCOPE_RUNTIME, IMPACT_SCOPE_MIXED} + ] + )[:5], + "test_fixture_hotspot_ids": _sort_flat_finding_ids( + [ + group + for group in map(_as_mapping, flat_groups) + if str(_as_mapping(group.get("source_scope")).get("impact_scope")) + == IMPACT_SCOPE_NON_RUNTIME + and str(_as_mapping(group.get("source_scope")).get("dominant_kind")) + in {SOURCE_KIND_TESTS, SOURCE_KIND_FIXTURES} + ] + )[:5], + } + return overview, hotlists + + +def _representative_location_rows( + suggestion: Suggestion, +) -> list[dict[str, object]]: + rows = [ + { + "relative_path": ( + location.relative_path + if ( + location.relative_path + and not _is_absolute_path(location.relative_path) + ) + else _contract_report_location_path( + location.filepath, + scan_root="", + ) + ), + "start_line": location.start_line, + "end_line": location.end_line, + "qualname": location.qualname, + "source_kind": location.source_kind, + } + for location in suggestion.representative_locations + ] + rows.sort( + key=lambda row: ( + str(row["relative_path"]), + _as_int(row["start_line"]), + _as_int(row["end_line"]), + str(row["qualname"]), + ) + ) + return rows[:3] + + +def _suggestion_finding_id(suggestion: Suggestion) -> str: + if suggestion.finding_family == FAMILY_CLONES: + if suggestion.fact_kind.startswith("Function"): + return clone_group_id(CLONE_KIND_FUNCTION, suggestion.subject_key) + if suggestion.fact_kind.startswith("Block"): + return clone_group_id(CLONE_KIND_BLOCK, suggestion.subject_key) + return clone_group_id(CLONE_KIND_SEGMENT, suggestion.subject_key) + if suggestion.finding_family == FAMILY_STRUCTURAL: + return structural_group_id( + suggestion.finding_kind or "duplicated_branches", + suggestion.subject_key, + ) + if suggestion.category == CATEGORY_DEAD_CODE: + return dead_code_group_id(suggestion.subject_key) + if suggestion.category in { + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_COHESION, + CATEGORY_DEPENDENCY, + }: + return design_group_id(suggestion.category, suggestion.subject_key) + return design_group_id( + suggestion.category, + suggestion.subject_key or suggestion.title, + ) + + +def _build_derived_suggestions( + suggestions: Sequence[Suggestion] | None, +) -> list[dict[str, object]]: + suggestion_rows = list(suggestions or ()) + suggestion_rows.sort( + key=lambda suggestion: ( + -suggestion.priority, + SEVERITY_ORDER.get(suggestion.severity, 9), + suggestion.title, + _suggestion_finding_id(suggestion), + ) + ) + return [ + { + "id": f"suggestion:{_suggestion_finding_id(suggestion)}", + "finding_id": _suggestion_finding_id(suggestion), + "title": suggestion.title, + "summary": suggestion.fact_summary, + "location_label": suggestion.location_label or suggestion.location, + "representative_locations": _representative_location_rows(suggestion), + "action": { + "effort": suggestion.effort, + "steps": list(suggestion.steps), + }, + } + for suggestion in suggestion_rows + ] + + +def _build_findings_payload( + *, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + block_facts: Mapping[str, Mapping[str, str]], + structural_findings: Sequence[StructuralFindingGroup] | None, + metrics_payload: Mapping[str, object], + baseline_trusted: bool, + new_function_group_keys: Collection[str] | None, + new_block_group_keys: Collection[str] | None, + new_segment_group_keys: Collection[str] | None, + scan_root: str, +) -> dict[str, object]: + clone_functions = _build_clone_groups( + groups=func_groups, + kind=CLONE_KIND_FUNCTION, + baseline_trusted=baseline_trusted, + new_keys=new_function_group_keys, + block_facts=block_facts, + scan_root=scan_root, + ) + clone_blocks = _build_clone_groups( + groups=block_groups, + kind=CLONE_KIND_BLOCK, + baseline_trusted=baseline_trusted, + new_keys=new_block_group_keys, + block_facts=block_facts, + scan_root=scan_root, + ) + clone_segments = _build_clone_groups( + groups=segment_groups, + kind=CLONE_KIND_SEGMENT, + baseline_trusted=baseline_trusted, + new_keys=new_segment_group_keys, + block_facts={}, + scan_root=scan_root, + ) + structural_groups = _build_structural_groups( + structural_findings, + scan_root=scan_root, + ) + dead_code_groups = _build_dead_code_groups( + metrics_payload, + scan_root=scan_root, + ) + dead_code_family = _as_mapping( + _as_mapping(metrics_payload.get("families")).get(FAMILY_DEAD_CODE) + ) + dead_code_summary = _as_mapping(dead_code_family.get("summary")) + dead_code_suppressed = _as_int( + dead_code_summary.get( + "suppressed", + len(_as_sequence(dead_code_family.get("suppressed_items"))), + ) + ) + design_groups = _build_design_groups( + metrics_payload, + scan_root=scan_root, + ) + return { + "summary": _findings_summary( + clone_functions=clone_functions, + clone_blocks=clone_blocks, + clone_segments=clone_segments, + structural_groups=structural_groups, + dead_code_groups=dead_code_groups, + design_groups=design_groups, + dead_code_suppressed=dead_code_suppressed, + ), + "groups": { + FAMILY_CLONES: { + "functions": clone_functions, + "blocks": clone_blocks, + "segments": clone_segments, + }, + FAMILY_STRUCTURAL: { + "groups": structural_groups, + }, + FAMILY_DEAD_CODE: { + "groups": dead_code_groups, + }, + "design": { + "groups": design_groups, + }, + }, + } + + +def _canonical_integrity_payload( + *, + report_schema_version: str, + meta: Mapping[str, object], + inventory: Mapping[str, object], + findings: Mapping[str, object], + metrics: Mapping[str, object], +) -> dict[str, object]: + canonical_meta = { + str(key): value for key, value in meta.items() if str(key) != "runtime" + } + + def _strip_noncanonical(value: object) -> object: + if isinstance(value, Mapping): + return { + str(key): _strip_noncanonical(item) + for key, item in value.items() + if str(key) != "display_facts" + } + if isinstance(value, Sequence) and not isinstance( + value, + (str, bytes, bytearray), + ): + return [_strip_noncanonical(item) for item in value] + return value + + return { + "report_schema_version": report_schema_version, + "meta": canonical_meta, + "inventory": inventory, + "findings": _strip_noncanonical(findings), + "metrics": metrics, + } + + +def _build_integrity_payload( + *, + report_schema_version: str, + meta: Mapping[str, object], + inventory: Mapping[str, object], + findings: Mapping[str, object], + metrics: Mapping[str, object], +) -> dict[str, object]: + canonical_payload = _canonical_integrity_payload( + report_schema_version=report_schema_version, + meta=meta, + inventory=inventory, + findings=findings, + metrics=metrics, + ) + canonical_json = json.dumps( + canonical_payload, + ensure_ascii=False, + separators=(",", ":"), + sort_keys=True, + ).encode("utf-8") + payload_sha = sha256(canonical_json).hexdigest() + return { + "canonicalization": { + "version": "1", + "scope": "canonical_only", + "sections": [ + "report_schema_version", + "meta", + "inventory", + "findings", + "metrics", + ], + }, + "digest": { + "verified": True, + "algorithm": "sha256", + "value": payload_sha, + }, + } + + +def build_report_document( + *, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + meta: Mapping[str, object] | None = None, + inventory: Mapping[str, object] | None = None, + block_facts: Mapping[str, Mapping[str, str]] | None = None, + new_function_group_keys: Collection[str] | None = None, + new_block_group_keys: Collection[str] | None = None, + new_segment_group_keys: Collection[str] | None = None, + metrics: Mapping[str, object] | None = None, + suggestions: Sequence[Suggestion] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, +) -> dict[str, object]: + report_schema_version = REPORT_SCHEMA_VERSION + scan_root = str(_as_mapping(meta).get("scan_root", "")) + meta_payload = _build_meta_payload(meta, scan_root=scan_root) + metrics_payload = _build_metrics_payload(metrics, scan_root=scan_root) + file_list = _collect_report_file_list( + inventory=inventory, + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + metrics=metrics, + structural_findings=structural_findings, + ) + inventory_payload = _build_inventory_payload( + inventory=inventory, + file_list=file_list, + metrics_payload=metrics_payload, + scan_root=scan_root, + ) + findings_payload = _build_findings_payload( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + block_facts=block_facts or {}, + structural_findings=structural_findings, + metrics_payload=metrics_payload, + baseline_trusted=_baseline_is_trusted(meta_payload), + new_function_group_keys=new_function_group_keys, + new_block_group_keys=new_block_group_keys, + new_segment_group_keys=new_segment_group_keys, + scan_root=scan_root, + ) + overview_payload, hotlists_payload = _build_derived_overview( + findings=findings_payload, + metrics_payload=metrics_payload, + ) + derived_payload = { + "suggestions": _build_derived_suggestions(suggestions), + "overview": overview_payload, + "hotlists": hotlists_payload, + } + integrity_payload = _build_integrity_payload( + report_schema_version=report_schema_version, + meta=meta_payload, + inventory=inventory_payload, + findings=findings_payload, + metrics=metrics_payload, + ) + return { + "report_schema_version": report_schema_version, + "meta": meta_payload, + "inventory": inventory_payload, + "findings": findings_payload, + "metrics": metrics_payload, + "derived": derived_payload, + "integrity": integrity_payload, + } diff --git a/codeclone/report/markdown.py b/codeclone/report/markdown.py new file mode 100644 index 0000000..71e1eef --- /dev/null +++ b/codeclone/report/markdown.py @@ -0,0 +1,524 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Collection, Mapping, Sequence +from typing import TYPE_CHECKING + +from .. import _coerce +from ..domain.findings import FAMILY_CLONE, FAMILY_DEAD_CODE, FAMILY_STRUCTURAL +from ._formatting import format_spread_text +from .json_contract import build_report_document + +if TYPE_CHECKING: + from ..models import StructuralFindingGroup, Suggestion + from .types import GroupMapLike + +MARKDOWN_SCHEMA_VERSION = "1.0" +_MAX_FINDING_LOCATIONS = 5 +_MAX_METRIC_ITEMS = 10 + +_ANCHORS: tuple[tuple[str, str, int], ...] = ( + ("overview", "Overview", 2), + ("inventory", "Inventory", 2), + ("findings-summary", "Findings Summary", 2), + ("top-risks", "Top Risks", 2), + ("suggestions", "Suggestions", 2), + ("findings", "Findings", 2), + ("clone-findings", "Clone Findings", 3), + ("structural-findings", "Structural Findings", 3), + ("dead-code-findings", "Dead Code Findings", 3), + ("design-findings", "Design Findings", 3), + ("metrics", "Metrics", 2), + ("health", "Health", 3), + ("complexity", "Complexity", 3), + ("coupling", "Coupling", 3), + ("cohesion", "Cohesion", 3), + ("dependencies", "Dependencies", 3), + ("dead-code-metrics", "Dead Code", 3), + ("dead-code-suppressed", "Suppressed Dead Code", 3), + ("integrity", "Integrity", 2), +) + +_as_int = _coerce.as_int +_as_float = _coerce.as_float +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + + +def _text(value: object) -> str: + if value is None: + return "(none)" + if isinstance(value, float): + return f"{value:.2f}".rstrip("0").rstrip(".") or "0" + if isinstance(value, bool): + return "true" if value else "false" + text = str(value).strip() + return text or "(none)" + + +def _source_scope_text(scope: Mapping[str, object]) -> str: + dominant = _text(scope.get("dominant_kind")) + impact = _text(scope.get("impact_scope")) + return f"{dominant} / {impact}" + + +def _spread_text(spread: Mapping[str, object]) -> str: + return format_spread_text( + _as_int(spread.get("files")), + _as_int(spread.get("functions")), + ) + + +def _location_text(item: Mapping[str, object]) -> str: + relative_path = _text(item.get("relative_path")) + start_line = _as_int(item.get("start_line")) + end_line = _as_int(item.get("end_line")) + qualname = str(item.get("qualname", "")).strip() + line_part = "" + if start_line > 0: + line_part = f":{start_line}" + if end_line > 0 and end_line != start_line: + line_part += f"-{end_line}" + if qualname: + return f"`{relative_path}{line_part}` :: `{qualname}`" + return f"`{relative_path}{line_part}`" + + +def _append_anchor(lines: list[str], anchor_id: str, title: str, level: int) -> None: + lines.append(f'') + lines.append(f"{'#' * level} {title}") + lines.append("") + + +def _append_kv_bullets( + lines: list[str], + rows: Sequence[tuple[str, object]], +) -> None: + for label, value in rows: + lines.append(f"- {label}: {_text(value)}") + lines.append("") + + +def _finding_heading(group: Mapping[str, object]) -> str: + family = str(group.get("family", "")).strip() + category = str(group.get("category", "")).strip() + clone_type = str(group.get("clone_type", "")).strip() + if family == FAMILY_CLONE: + suffix = f" ({clone_type})" if clone_type else "" + return f"{category.title()} clone group{suffix}" + if family == FAMILY_STRUCTURAL: + return f"Structural finding: {category}" + if family == FAMILY_DEAD_CODE: + return f"Dead code: {category}" + return f"Design finding: {category}" + + +def _append_facts_block( + lines: list[str], + *, + title: str, + facts: Mapping[str, object], +) -> None: + if not facts: + return + lines.append(f"- {title}:") + lines.extend(f" - `{key}`: {_text(facts[key])}" for key in sorted(facts)) + + +def _append_findings_section( + lines: list[str], + *, + groups: Sequence[object], +) -> None: + finding_rows = [_as_mapping(group) for group in groups] + if not finding_rows: + lines.append("_None._") + lines.append("") + return + for group in finding_rows: + lines.append(f"#### {_finding_heading(group)}") + lines.append("") + _append_kv_bullets( + lines, + ( + ("Finding ID", f"`{_text(group.get('id'))}`"), + ("Family", group.get("family")), + ("Category", group.get("category")), + ("Kind", group.get("kind")), + ("Severity", group.get("severity")), + ("Confidence", group.get("confidence")), + ("Priority", _as_float(group.get("priority"))), + ("Scope", _source_scope_text(_as_mapping(group.get("source_scope")))), + ("Spread", _spread_text(_as_mapping(group.get("spread")))), + ("Occurrences", group.get("count")), + ), + ) + facts = _as_mapping(group.get("facts")) + display_facts = _as_mapping(group.get("display_facts")) + if facts or display_facts: + _append_facts_block(lines, title="Facts", facts=facts) + _append_facts_block(lines, title="Presentation facts", facts=display_facts) + lines.append("") + items = list(map(_as_mapping, _as_sequence(group.get("items")))) + lines.append("- Locations:") + visible_items = items[:_MAX_FINDING_LOCATIONS] + lines.extend(f" - {_location_text(item)}" for item in visible_items) + if len(items) > len(visible_items): + lines.append( + f" - ... and {len(items) - len(visible_items)} more occurrence(s)" + ) + lines.append("") + + +def _append_metric_items( + lines: list[str], + *, + items: Sequence[object], + key_order: Sequence[str], +) -> None: + metric_rows = [_as_mapping(item) for item in items[:_MAX_METRIC_ITEMS]] + if not metric_rows: + lines.append("_No detailed items._") + lines.append("") + return + for item in metric_rows: + parts = [f"{key}={_text(item[key])}" for key in key_order if key in item] + if "relative_path" in item: + parts.append(_location_text(item)) + lines.append(f"- {'; '.join(parts)}") + if len(items) > len(metric_rows): + lines.append(f"- ... and {len(items) - len(metric_rows)} more item(s)") + lines.append("") + + +def render_markdown_report_document(payload: Mapping[str, object]) -> str: + meta = _as_mapping(payload.get("meta")) + inventory = _as_mapping(payload.get("inventory")) + findings = _as_mapping(payload.get("findings")) + metrics = _as_mapping(payload.get("metrics")) + derived = _as_mapping(payload.get("derived")) + integrity = _as_mapping(payload.get("integrity")) + runtime = _as_mapping(meta.get("runtime")) + findings_summary = _as_mapping(findings.get("summary")) + findings_groups = _as_mapping(findings.get("groups")) + clone_groups = _as_mapping(findings_groups.get("clones")) + overview = _as_mapping(derived.get("overview")) + hotlists = _as_mapping(derived.get("hotlists")) + suggestions = _as_sequence(derived.get("suggestions")) + metrics_families = _as_mapping(metrics.get("families")) + health_snapshot = _as_mapping(overview.get("health_snapshot")) + inventory_files = _as_mapping(inventory.get("files")) + inventory_code = _as_mapping(inventory.get("code")) + digest = _as_mapping(integrity.get("digest")) + canonicalization = _as_mapping(integrity.get("canonicalization")) + family_summary = _as_mapping(findings_summary.get("families")) + severity_summary = _as_mapping(findings_summary.get("severity")) + impact_summary = _as_mapping(findings_summary.get("impact_scope")) + source_breakdown = _as_mapping(overview.get("source_scope_breakdown")) + + lines = [ + "# CodeClone Report", + "", + f"- Markdown schema: {MARKDOWN_SCHEMA_VERSION}", + f"- Source report schema: {_text(payload.get('report_schema_version'))}", + f"- Project: {_text(meta.get('project_name'))}", + f"- Analysis mode: {_text(meta.get('analysis_mode'))}", + f"- Report mode: {_text(meta.get('report_mode'))}", + f"- Generated by: codeclone {_text(meta.get('codeclone_version'))}", + f"- Python: {_text(meta.get('python_tag'))}", + f"- Report generated (UTC): {_text(runtime.get('report_generated_at_utc'))}", + "", + ] + + _append_anchor(lines, *_ANCHORS[0]) + _append_kv_bullets( + lines, + ( + ("Project", meta.get("project_name")), + ( + "Health", + ( + f"{_text(health_snapshot.get('score'))} " + f"({_text(health_snapshot.get('grade'))})" + ), + ), + ("Total findings", findings_summary.get("total")), + ( + "Families", + ", ".join( + f"{name}={_text(family_summary.get(name))}" + for name in ("clones", "structural", "dead_code", "design") + ), + ), + ("Strongest dimension", health_snapshot.get("strongest_dimension")), + ("Weakest dimension", health_snapshot.get("weakest_dimension")), + ), + ) + + _append_anchor(lines, *_ANCHORS[1]) + _append_kv_bullets( + lines, + ( + ( + "Files", + ", ".join( + f"{name}={_text(inventory_files.get(name))}" + for name in ( + "total_found", + "analyzed", + "cached", + "skipped", + "source_io_skipped", + ) + ), + ), + ( + "Code", + ", ".join( + f"{name}={_text(inventory_code.get(name))}" + for name in ( + "parsed_lines", + "functions", + "methods", + "classes", + ) + ), + ), + ), + ) + + _append_anchor(lines, *_ANCHORS[2]) + _append_kv_bullets( + lines, + ( + ("Total", findings_summary.get("total")), + ( + "By family", + ", ".join( + f"{name}={_text(family_summary.get(name))}" + for name in ("clones", "structural", "dead_code", "design") + ), + ), + ( + "By severity", + ", ".join( + f"{name}={_text(severity_summary.get(name))}" + for name in ("critical", "warning", "info") + ), + ), + ( + "By impact scope", + ", ".join( + f"{name}={_text(impact_summary.get(name))}" + for name in ("runtime", "non_runtime", "mixed") + ), + ), + ( + "Source scope breakdown", + ", ".join( + f"{name}={_text(source_breakdown.get(name))}" + for name in ("production", "tests", "fixtures", "other") + if name in source_breakdown + ) + or "(none)", + ), + ), + ) + + _append_anchor(lines, *_ANCHORS[3]) + top_risks = [_as_mapping(item) for item in _as_sequence(overview.get("top_risks"))] + if top_risks: + for idx, risk in enumerate(top_risks[:10], start=1): + lines.append( + f"{idx}. {_text(risk.get('label'))} " + f"(family={_text(risk.get('family'))}, " + f"scope={_text(risk.get('scope'))}, " + f"count={_text(risk.get('count'))})" + ) + else: + lines.append("_None._") + lines.append("") + + if suggestions: + _append_anchor(lines, *_ANCHORS[4]) + for suggestion in map(_as_mapping, suggestions): + action = _as_mapping(suggestion.get("action")) + lines.append(f"### {_text(suggestion.get('title'))}") + lines.append("") + _append_kv_bullets( + lines, + ( + ("Finding", f"`{_text(suggestion.get('finding_id'))}`"), + ("Summary", suggestion.get("summary")), + ("Location", suggestion.get("location_label")), + ("Effort", action.get("effort")), + ), + ) + representative = [ + _as_mapping(item) + for item in _as_sequence(suggestion.get("representative_locations")) + ] + if representative: + lines.append(f"- Example: {_location_text(representative[0])}") + steps = [str(step).strip() for step in _as_sequence(action.get("steps"))] + if steps: + lines.append("- Steps:") + for idx, step in enumerate(steps, start=1): + lines.append(f" {idx}. {step}") + lines.append("") + + _append_anchor(lines, *_ANCHORS[5]) + _append_anchor(lines, *_ANCHORS[6]) + _append_findings_section( + lines, + groups=[ + *_as_sequence(clone_groups.get("functions")), + *_as_sequence(clone_groups.get("blocks")), + *_as_sequence(clone_groups.get("segments")), + ], + ) + + _append_anchor(lines, *_ANCHORS[7]) + _append_findings_section( + lines, + groups=_as_sequence( + _as_mapping(findings_groups.get("structural")).get("groups") + ), + ) + + _append_anchor(lines, *_ANCHORS[8]) + _append_findings_section( + lines, + groups=_as_sequence( + _as_mapping(findings_groups.get("dead_code")).get("groups") + ), + ) + + _append_anchor(lines, *_ANCHORS[9]) + _append_findings_section( + lines, + groups=_as_sequence(_as_mapping(findings_groups.get("design")).get("groups")), + ) + + _append_anchor(lines, *_ANCHORS[10]) + for anchor_id, title, summary_keys, item_keys in ( + ("health", "Health", ("score", "grade"), ()), + ( + "complexity", + "Complexity", + ("total", "average", "max", "high_risk"), + ("cyclomatic_complexity", "nesting_depth", "risk"), + ), + ( + "coupling", + "Coupling", + ("total", "average", "max", "high_risk"), + ("cbo", "risk"), + ), + ( + "cohesion", + "Cohesion", + ("total", "average", "max", "low_cohesion"), + ("lcom4", "method_count", "instance_var_count", "risk"), + ), + ( + "dependencies", + "Dependencies", + ("modules", "edges", "cycles", "max_depth"), + ("source", "target", "import_type", "line"), + ), + ( + "dead-code-metrics", + "Dead Code", + ("total", "high_confidence", "suppressed"), + ("kind", "confidence"), + ), + ): + family_key = "dead_code" if anchor_id == "dead-code-metrics" else anchor_id + family_payload = _as_mapping(metrics_families.get(family_key)) + family_summary_map = _as_mapping(family_payload.get("summary")) + _append_anchor(lines, anchor_id, title, 3) + _append_kv_bullets( + lines, + tuple((key, family_summary_map.get(key)) for key in summary_keys), + ) + _append_metric_items( + lines, + items=_as_sequence(family_payload.get("items")), + key_order=item_keys, + ) + + dead_code_family_payload = _as_mapping(metrics_families.get("dead_code")) + _append_anchor(lines, *_ANCHORS[17]) + _append_metric_items( + lines, + items=_as_sequence(dead_code_family_payload.get("suppressed_items")), + key_order=("kind", "confidence", "suppression_rule", "suppression_source"), + ) + + _append_anchor(lines, *_ANCHORS[18]) + _append_kv_bullets( + lines, + ( + ("Canonicalization version", canonicalization.get("version")), + ("Canonicalization scope", canonicalization.get("scope")), + ( + "Canonical sections", + ", ".join( + str(item) for item in _as_sequence(canonicalization.get("sections")) + ), + ), + ("Digest algorithm", digest.get("algorithm")), + ("Digest verified", digest.get("verified")), + ("Digest value", digest.get("value")), + ( + "Hotlists", + ", ".join( + f"{name}={len(_as_sequence(hotlists.get(name)))}" + for name in ( + "most_actionable_ids", + "highest_spread_ids", + "production_hotspot_ids", + "test_fixture_hotspot_ids", + ) + ), + ), + ), + ) + + return "\n".join(lines).rstrip() + "\n" + + +def to_markdown_report( + *, + report_document: Mapping[str, object] | None = None, + meta: Mapping[str, object], + inventory: Mapping[str, object] | None = None, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + block_facts: Mapping[str, Mapping[str, str]] | None = None, + new_function_group_keys: Collection[str] | None = None, + new_block_group_keys: Collection[str] | None = None, + new_segment_group_keys: Collection[str] | None = None, + metrics: Mapping[str, object] | None = None, + suggestions: Collection[Suggestion] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, +) -> str: + payload = report_document or build_report_document( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + meta=meta, + inventory=inventory, + block_facts=block_facts or {}, + new_function_group_keys=new_function_group_keys, + new_block_group_keys=new_block_group_keys, + new_segment_group_keys=new_segment_group_keys, + metrics=metrics, + suggestions=tuple(suggestions or ()), + structural_findings=tuple(structural_findings or ()), + ) + return render_markdown_report_document(payload) diff --git a/codeclone/report/merge.py b/codeclone/report/merge.py new file mode 100644 index 0000000..fc59e9e --- /dev/null +++ b/codeclone/report/merge.py @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Callable + + from .types import GroupItem, GroupItemLike, GroupItemsLike + + +def coerce_positive_int(value: object) -> int | None: + if isinstance(value, bool): + integer = int(value) + elif isinstance(value, int): + integer = value + elif isinstance(value, str): + try: + integer = int(value) + except ValueError: + return None + else: + return None + return integer if integer > 0 else None + + +def merge_overlapping_items( + items: GroupItemsLike, + *, + sort_key: Callable[[GroupItemLike], tuple[str, str, int, int]], +) -> list[GroupItem]: + """Merge overlapping or adjacent ranges for the same file/function pair.""" + if not items: + return [] + + sorted_items = sorted(items, key=sort_key) + merged: list[GroupItem] = [] + current: GroupItem | None = None + + for item in sorted_items: + start_line = coerce_positive_int(item.get("start_line")) + end_line = coerce_positive_int(item.get("end_line")) + if start_line is None or end_line is None or end_line < start_line: + continue + + if current is None: + current = dict(item) + current["start_line"] = start_line + current["end_line"] = end_line + current["size"] = max(1, end_line - start_line + 1) + continue + + same_owner = str(current.get("filepath", "")) == str( + item.get("filepath", "") + ) and str(current.get("qualname", "")) == str(item.get("qualname", "")) + current_end = coerce_positive_int(current.get("end_line")) or 0 + current_start = coerce_positive_int(current.get("start_line")) or current_end + if same_owner and start_line <= current_end + 1: + merged_end = max(current_end, end_line) + current["end_line"] = merged_end + current["size"] = max( + 1, + merged_end - current_start + 1, + ) + continue + + merged.append(current) + current = dict(item) + current["start_line"] = start_line + current["end_line"] = end_line + current["size"] = max(1, end_line - start_line + 1) + + if current is not None: + merged.append(current) + + return merged diff --git a/codeclone/report/overview.py b/codeclone/report/overview.py new file mode 100644 index 0000000..14fac90 --- /dev/null +++ b/codeclone/report/overview.py @@ -0,0 +1,497 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections import Counter +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING + +from .. import _coerce +from ..domain.findings import ( + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, + CLONE_KIND_BLOCK, + CLONE_KIND_FUNCTION, + CLONE_KIND_SEGMENT, + FAMILY_CLONES, + FAMILY_DEAD_CODE, + FAMILY_DESIGN, + FAMILY_METRICS, + FAMILY_STRUCTURAL, + STRUCTURAL_KIND_CLONE_COHORT_DRIFT, + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, +) +from ..domain.source_scope import ( + SOURCE_KIND_FIXTURES, + SOURCE_KIND_OTHER, + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, +) +from ..domain.source_scope import SOURCE_KIND_ORDER as _SOURCE_KIND_ORDER +from ..report.explain_contract import ( + BLOCK_HINT_ASSERT_ONLY, + BLOCK_PATTERN_REPEATED_STMT_HASH, +) +from .derived import format_spread_location_label + +if TYPE_CHECKING: + from ..models import Suggestion + +__all__ = [ + "build_report_overview", + "materialize_report_overview", + "serialize_suggestion_card", +] + +_as_int = _coerce.as_int +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + + +def serialize_suggestion_card(suggestion: Suggestion) -> dict[str, object]: + return { + "title": suggestion.title, + "family": suggestion.finding_family, + "category": suggestion.category, + "summary": suggestion.fact_summary, + "severity": suggestion.severity, + "priority": suggestion.priority, + "confidence": suggestion.confidence, + "source_kind": suggestion.source_kind, + "location": suggestion.location_label or suggestion.location, + "clone_type": suggestion.clone_type, + "count": suggestion.fact_count, + "spread": { + "files": suggestion.spread_files, + "functions": suggestion.spread_functions, + }, + } + + +def _flatten_findings(findings: Mapping[str, object]) -> list[Mapping[str, object]]: + groups = _as_mapping(findings.get("groups")) + clone_groups = _as_mapping(groups.get(FAMILY_CLONES)) + return [ + *map(_as_mapping, _as_sequence(clone_groups.get("functions"))), + *map(_as_mapping, _as_sequence(clone_groups.get("blocks"))), + *map(_as_mapping, _as_sequence(clone_groups.get("segments"))), + *map( + _as_mapping, + _as_sequence(_as_mapping(groups.get(FAMILY_STRUCTURAL)).get("groups")), + ), + *map( + _as_mapping, + _as_sequence(_as_mapping(groups.get(FAMILY_DEAD_CODE)).get("groups")), + ), + *map( + _as_mapping, + _as_sequence(_as_mapping(groups.get(FAMILY_DESIGN)).get("groups")), + ), + ] + + +def _clone_fact_kind(kind: str) -> str: + return { + CLONE_KIND_FUNCTION: "Function clone group", + CLONE_KIND_BLOCK: "Block clone group", + CLONE_KIND_SEGMENT: "Segment clone group", + }.get(kind, "Clone group") + + +def _clone_summary_from_group(group: Mapping[str, object]) -> str: + kind = str(group.get("category", "")).strip() + clone_type = str(group.get("clone_type", "")).strip() + facts = _as_mapping(group.get("facts")) + if kind == CLONE_KIND_FUNCTION: + match clone_type: + case "Type-1": + return "same exact function body" + case "Type-2": + return "same parameterized function body" + case "Type-3": + return "same structural function body with small identifier changes" + case _: + return "same structural function body" + if kind == CLONE_KIND_BLOCK: + hint = str(facts.get("hint", "")).strip() + pattern = str(facts.get("pattern", "")).strip() + if hint == BLOCK_HINT_ASSERT_ONLY: + return "same assertion template" + if pattern == BLOCK_PATTERN_REPEATED_STMT_HASH: + return "same repeated setup/assert pattern" + return "same structural sequence with small value changes" + return "same structural segment sequence" + + +def _structural_summary_from_group(group: Mapping[str, object]) -> tuple[str, str]: + category = str(group.get("category", "")).strip() + if category == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: + return ( + "Clone guard/exit divergence", + "clone cohort members differ in entry guards or early-exit behavior", + ) + if category == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: + return ( + "Clone cohort drift", + "clone cohort members drift from majority terminal/guard/try profile", + ) + + signature = _as_mapping(group.get("signature")) + debug = _as_mapping(signature.get("debug")) + terminal = str( + _as_mapping(signature.get("stable")).get( + "terminal_kind", + debug.get("terminal", ""), + ) + ).strip() + stmt_seq = str(debug.get("stmt_seq", "")).strip() + raises = str(debug.get("raises", "")).strip() + has_loop = str(debug.get("has_loop", "")).strip() + raise_like = terminal == "raise" or raises not in {"", "0"} + match (raise_like, terminal, has_loop): + case (True, _, _): + return "Repeated branch family", "same repeated guard/validation branch" + case (False, "return", _): + return "Repeated branch family", "same repeated return branch" + case (False, _, "1"): + return "Repeated branch family", "same repeated loop branch" + case _: + if stmt_seq: + return "Repeated branch family", ( + f"same repeated branch shape ({stmt_seq})" + ) + return "Repeated branch family", "same repeated branch shape" + + +def _single_item_location(item: Mapping[str, object]) -> str: + module = str(item.get("module", "")).strip() + if module: + return module + relative_path = str(item.get("relative_path", "")).strip() + if not relative_path: + return "(unknown)" + start_line = _as_int(item.get("start_line")) + end_line = _as_int(item.get("end_line")) + if start_line <= 0: + return relative_path + line = f"{start_line}-{end_line}" if end_line > start_line else str(start_line) + return f"{relative_path}:{line}" + + +def _group_location_label(group: Mapping[str, object]) -> str: + items = tuple(_as_mapping(item) for item in _as_sequence(group.get("items"))) + category = str(group.get("category", "")).strip() + if category == CATEGORY_DEPENDENCY: + modules = [str(item.get("module", "")).strip() for item in items] + joined = " -> ".join(module for module in modules if module) + if joined: + return joined + count = _as_int(group.get("count")) + if count <= 1 and items: + return _single_item_location(items[0]) + spread = _as_mapping(group.get("spread")) + files = _as_int(spread.get("files")) + functions = _as_int(spread.get("functions")) + return format_spread_location_label( + count, + files=files, + functions=functions, + ) + + +def serialize_finding_group_card(group: Mapping[str, object]) -> dict[str, object]: + family = str(group.get("family", "")).strip() + category = str(group.get("category", "")).strip() + facts = _as_mapping(group.get("facts")) + source_scope = _as_mapping(group.get("source_scope")) + + title = "Finding" + summary = "" + clone_type = str(group.get("clone_type", "")).strip() + if family == "clone": + title = f"{_clone_fact_kind(category)} ({clone_type or 'Type-4'})" + summary = _clone_summary_from_group(group) + elif family == FAMILY_STRUCTURAL: + title, summary = _structural_summary_from_group(group) + elif family == FAMILY_DEAD_CODE: + title = "Remove or explicitly keep unused code" + confidence = str(group.get("confidence", "medium")).strip() or "medium" + summary = f"{category or 'symbol'} with {confidence} confidence" + elif family == FAMILY_DESIGN: + if category == CATEGORY_COMPLEXITY: + title = "Reduce high-complexity function" + summary = ( + "cyclomatic_complexity=" + f"{_as_int(facts.get('cyclomatic_complexity'))}, " + f"nesting_depth={_as_int(facts.get('nesting_depth'))}" + ) + elif category == CATEGORY_COUPLING: + title = "Split high-coupling class" + summary = f"cbo={_as_int(facts.get('cbo'))}" + elif category == CATEGORY_COHESION: + title = "Split low-cohesion class" + summary = f"lcom4={_as_int(facts.get('lcom4'))}" + elif category == CATEGORY_DEPENDENCY: + title = "Break circular dependency" + cycle_length = _as_int( + facts.get("cycle_length"), + _as_int(group.get("count")), + ) + summary = f"{cycle_length} modules participate in this cycle" + + return { + "title": title, + "family": family, + "category": category, + "summary": summary, + "severity": str(group.get("severity", "info")), + "priority": group.get("priority"), + "confidence": str(group.get("confidence", "medium")), + "source_kind": str(source_scope.get("dominant_kind", SOURCE_KIND_OTHER)).strip() + or SOURCE_KIND_OTHER, + "location": _group_location_label(group), + "clone_type": clone_type, + "count": _as_int(group.get("count")), + "spread": { + "files": _as_int(_as_mapping(group.get("spread")).get("files")), + "functions": _as_int(_as_mapping(group.get("spread")).get("functions")), + }, + } + + +def materialize_report_overview( + *, + overview: Mapping[str, object], + hotlists: Mapping[str, object], + findings: Mapping[str, object], +) -> dict[str, object]: + materialized = dict(overview) + if "source_breakdown" not in materialized: + materialized["source_breakdown"] = dict( + _as_mapping(overview.get("source_scope_breakdown")) + ) + + finding_index = { + str(group.get("id")): group for group in _flatten_findings(findings) + } + for overview_key, hotlist_key in ( + ("most_actionable", "most_actionable_ids"), + ("highest_spread", "highest_spread_ids"), + ("production_hotspots", "production_hotspot_ids"), + ("test_fixture_hotspots", "test_fixture_hotspot_ids"), + ): + if _as_sequence(materialized.get(overview_key)): + continue + materialized[overview_key] = [ + serialize_finding_group_card(group) + for finding_id in _as_sequence(hotlists.get(hotlist_key)) + if (group := _as_mapping(finding_index.get(str(finding_id)))) + ] + return materialized + + +def _card_key(suggestion: Suggestion) -> tuple[float, int, int, int, str, str]: + return ( + -suggestion.priority, + -suggestion.spread_files, + -suggestion.spread_functions, + -suggestion.fact_count, + suggestion.location_label or suggestion.location, + suggestion.title, + ) + + +def _spread_key(suggestion: Suggestion) -> tuple[int, int, int, float, str]: + return ( + -suggestion.spread_files, + -suggestion.spread_functions, + -suggestion.fact_count, + -suggestion.priority, + suggestion.title, + ) + + +def _source_counts( + suggestions: Sequence[Suggestion], +) -> dict[str, int]: + counts: Counter[str] = Counter(suggestion.source_kind for suggestion in suggestions) + ordered_kinds = tuple( + sorted(_SOURCE_KIND_ORDER, key=lambda kind: _SOURCE_KIND_ORDER[kind]) + ) + return {kind: counts[kind] for kind in ordered_kinds if counts[kind] > 0} | { + kind: counts[kind] + for kind in sorted(counts) + if kind not in ordered_kinds and counts[kind] > 0 + } + + +def _health_snapshot(metrics: Mapping[str, object]) -> dict[str, object]: + health = metrics.get("health") + if not isinstance(health, Mapping): + return {} + dimensions = health.get("dimensions") + if not isinstance(dimensions, Mapping): + return { + "score": health.get("score"), + "grade": health.get("grade"), + "strongest_dimension": None, + "weakest_dimension": None, + } + normalized_dimensions = { + str(key): int(value) + for key, value in dimensions.items() + if isinstance(key, str) and isinstance(value, int) + } + strongest = None + weakest = None + if normalized_dimensions: + strongest = min( + sorted(normalized_dimensions), + key=lambda key: (-normalized_dimensions[key], key), + ) + weakest = min( + sorted(normalized_dimensions), + key=lambda key: (normalized_dimensions[key], key), + ) + return { + "score": health.get("score"), + "grade": health.get("grade"), + "strongest_dimension": strongest, + "weakest_dimension": weakest, + } + + +def _metric_summary_count( + metrics: Mapping[str, object], + metric_name: str, + summary_key: str, + *, + fallback_key: str | None = None, +) -> int: + metric_map = metrics.get(metric_name) + if not isinstance(metric_map, Mapping): + return 0 + summary = metric_map.get("summary") + if not isinstance(summary, Mapping): + return 0 + return int(summary.get(summary_key, summary.get(fallback_key, 0))) + + +def _top_risks( + suggestions: Sequence[Suggestion], + *, + metrics: Mapping[str, object], +) -> list[str]: + risks: list[str] = [] + high_conf = _metric_summary_count( + metrics, + "dead_code", + "high_confidence", + fallback_key="critical", + ) + if high_conf > 0: + noun = "item" if high_conf == 1 else "items" + risks.append(f"{high_conf} dead code {noun}") + + low = _metric_summary_count(metrics, "cohesion", "low_cohesion") + if low > 0: + noun = "class" if low == 1 else "classes" + risks.append(f"{low} low cohesion {noun}") + production_structural = sum( + 1 + for suggestion in suggestions + if suggestion.finding_family == FAMILY_STRUCTURAL + and suggestion.source_kind == SOURCE_KIND_PRODUCTION + ) + if production_structural > 0: + noun = "finding" if production_structural == 1 else "findings" + risks.append(f"{production_structural} structural {noun} in production code") + test_clone_groups = sum( + 1 + for suggestion in suggestions + if suggestion.finding_family == FAMILY_CLONES + and suggestion.source_kind in {SOURCE_KIND_TESTS, SOURCE_KIND_FIXTURES} + ) + if test_clone_groups > 0: + noun = "group" if test_clone_groups == 1 else "groups" + risks.append(f"{test_clone_groups} clone {noun} in tests/fixtures") + return risks[:6] + + +def build_report_overview( + *, + suggestions: Sequence[Suggestion], + metrics: Mapping[str, object] | None = None, +) -> dict[str, object]: + metrics_map = metrics if isinstance(metrics, Mapping) else {} + metrics_suggestions = tuple( + suggestion + for suggestion in suggestions + if suggestion.finding_family == FAMILY_METRICS + and suggestion.category != CATEGORY_DEAD_CODE + ) + actionable = tuple( + suggestion for suggestion in suggestions if suggestion.severity != "info" + ) + highest_spread = tuple(sorted(suggestions, key=_spread_key))[:5] + production_hotspots = tuple( + sorted( + ( + suggestion + for suggestion in suggestions + if suggestion.source_kind == SOURCE_KIND_PRODUCTION + ), + key=_card_key, + ) + )[:5] + test_fixture_hotspots = tuple( + sorted( + ( + suggestion + for suggestion in suggestions + if suggestion.source_kind in {SOURCE_KIND_TESTS, SOURCE_KIND_FIXTURES} + ), + key=_card_key, + ) + )[:5] + return { + "families": { + "clone_groups": sum( + 1 + for suggestion in suggestions + if suggestion.finding_family == FAMILY_CLONES + ), + "structural_findings": sum( + 1 + for suggestion in suggestions + if suggestion.finding_family == FAMILY_STRUCTURAL + ), + "dead_code": sum( + 1 + for suggestion in suggestions + if suggestion.category == CATEGORY_DEAD_CODE + ), + "metric_hotspots": len(metrics_suggestions), + }, + "top_risks": _top_risks(suggestions, metrics=metrics_map), + "health": _health_snapshot(metrics_map), + "source_breakdown": _source_counts(suggestions), + "most_actionable": [ + serialize_suggestion_card(suggestion) + for suggestion in tuple(sorted(actionable, key=_card_key))[:5] + ], + "highest_spread": [ + serialize_suggestion_card(suggestion) for suggestion in highest_spread + ], + "production_hotspots": [ + serialize_suggestion_card(suggestion) for suggestion in production_hotspots + ], + "test_fixture_hotspots": [ + serialize_suggestion_card(suggestion) + for suggestion in test_fixture_hotspots + ], + } diff --git a/codeclone/report/sarif.py b/codeclone/report/sarif.py new file mode 100644 index 0000000..c6bd6ff --- /dev/null +++ b/codeclone/report/sarif.py @@ -0,0 +1,944 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import json +from collections.abc import Collection, Mapping, Sequence +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, cast + +from .. import _coerce +from ..contracts import DOCS_URL, REPOSITORY_URL +from ..domain.findings import ( + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CLONE_KIND_BLOCK, + CLONE_KIND_FUNCTION, + FAMILY_CLONE, + FAMILY_CLONES, + FAMILY_DEAD_CODE, + FAMILY_DESIGN, + FAMILY_STRUCTURAL, + FINDING_KIND_CLASS_HOTSPOT, + FINDING_KIND_CLONE_GROUP, + FINDING_KIND_CYCLE, + FINDING_KIND_FUNCTION_HOTSPOT, + FINDING_KIND_UNUSED_SYMBOL, + STRUCTURAL_KIND_CLONE_COHORT_DRIFT, + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + STRUCTURAL_KIND_DUPLICATED_BRANCHES, + SYMBOL_KIND_CLASS, + SYMBOL_KIND_METHOD, +) +from ..domain.quality import ( + CONFIDENCE_HIGH, + CONFIDENCE_MEDIUM, + SEVERITY_CRITICAL, + SEVERITY_WARNING, +) +from .json_contract import build_report_document + +if TYPE_CHECKING: + from ..models import StructuralFindingGroup, Suggestion + from .types import GroupMapLike + +SARIF_VERSION = "2.1.0" +SARIF_PROFILE_VERSION = "1.0" +SARIF_SCHEMA_URL = "https://json.schemastore.org/sarif-2.1.0.json" +SARIF_SRCROOT_BASE_ID = "%SRCROOT%" + + +@dataclass(frozen=True, slots=True) +class _RuleSpec: + rule_id: str + short_description: str + full_description: str + default_level: str + category: str + kind: str + precision: str + + +_as_int = _coerce.as_int +_as_float = _coerce.as_float +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + + +def _text(value: object) -> str: + if value is None: + return "" + return str(value).strip() + + +def _severity_to_level(severity: str) -> str: + if severity == SEVERITY_CRITICAL: + return "error" + if severity == SEVERITY_WARNING: + return SEVERITY_WARNING + return "note" + + +def _slug(text: str) -> str: + slug_chars: list[str] = [] + prev_dash = False + for char in text.lower(): + if char.isalnum(): + slug_chars.append(char) + prev_dash = False + continue + if not prev_dash: + slug_chars.append("-") + prev_dash = True + return "".join(slug_chars).strip("-") or "finding" + + +def _rule_name(spec: _RuleSpec) -> str: + return f"codeclone.{_slug(spec.short_description)}" + + +def _rule_remediation(spec: _RuleSpec) -> str: + rule_id = spec.rule_id + if rule_id.startswith("CCLONE"): + return ( + "Review the representative occurrence and related occurrences, " + "then extract shared behavior or keep accepted debt in the baseline." + ) + if rule_id == "CSTRUCT001": + return ( + "Collapse repeated branch shapes into a shared helper, validator, " + "or control-flow abstraction where the behavior is intentionally shared." + ) + if rule_id == "CSTRUCT002": + return ( + "Review the clone cohort and reconcile guard or early-exit behavior " + "if those members are expected to stay aligned." + ) + if rule_id == "CSTRUCT003": + return ( + "Review the clone cohort and reconcile terminal, guard, or try/finally " + "profiles if the drift is not intentional." + ) + if rule_id.startswith("CDEAD"): + return ( + "Remove the unused symbol or keep it explicitly documented/suppressed " + "when runtime dynamics call it intentionally." + ) + if rule_id == "CDESIGN001": + return ( + "Split the class or regroup behavior so responsibilities become cohesive." + ) + if rule_id == "CDESIGN002": + return "Split the function or simplify control flow to reduce complexity." + if rule_id == "CDESIGN003": + return "Reduce dependencies or split responsibilities to lower coupling." + return ( + "Break the cycle or invert dependencies so modules no longer depend " + "on each other circularly." + ) + + +def _rule_help(spec: _RuleSpec) -> dict[str, str]: + remediation = _rule_remediation(spec) + return { + "text": f"{spec.full_description} {remediation}", + "markdown": ( + f"{spec.full_description}\n\n" + f"{remediation}\n\n" + f"See [CodeClone docs]({DOCS_URL})." + ), + } + + +def _scan_root_uri(payload: Mapping[str, object]) -> str: + meta = _as_mapping(payload.get("meta")) + runtime = _as_mapping(meta.get("runtime")) + scan_root_absolute = _text(runtime.get("scan_root_absolute")) + if not scan_root_absolute: + return "" + scan_root_path = Path(scan_root_absolute) + if not scan_root_path.is_absolute(): + return "" + try: + uri = scan_root_path.as_uri() + except ValueError: + return "" + return uri if uri.endswith("/") else f"{uri}/" + + +def _flatten_findings(payload: Mapping[str, object]) -> list[Mapping[str, object]]: + findings = _as_mapping(payload.get("findings")) + groups = _as_mapping(findings.get("groups")) + clones = _as_mapping(groups.get(FAMILY_CLONES)) + structural = _as_mapping(groups.get(FAMILY_STRUCTURAL)) + dead_code = _as_mapping(groups.get(FAMILY_DEAD_CODE)) + design = _as_mapping(groups.get(FAMILY_DESIGN)) + return [ + *map(_as_mapping, _as_sequence(clones.get("functions"))), + *map(_as_mapping, _as_sequence(clones.get("blocks"))), + *map(_as_mapping, _as_sequence(clones.get("segments"))), + *map(_as_mapping, _as_sequence(structural.get("groups"))), + *map(_as_mapping, _as_sequence(dead_code.get("groups"))), + *map(_as_mapping, _as_sequence(design.get("groups"))), + ] + + +def _artifact_catalog( + findings: Sequence[Mapping[str, object]], + *, + use_uri_base_id: bool, +) -> tuple[list[dict[str, object]], dict[str, int]]: + artifact_paths = sorted( + { + relative_path + for group in findings + for item in map(_as_mapping, _as_sequence(group.get("items"))) + for relative_path in (_text(item.get("relative_path")),) + if relative_path + } + ) + artifact_index_map = {path: index for index, path in enumerate(artifact_paths)} + artifacts = [ + { + "location": { + "uri": path, + **({"uriBaseId": SARIF_SRCROOT_BASE_ID} if use_uri_base_id else {}), + } + } + for path in artifact_paths + ] + return cast(list[dict[str, object]], artifacts), artifact_index_map + + +def _clone_rule_spec(category: str) -> _RuleSpec: + if category == CLONE_KIND_FUNCTION: + return _RuleSpec( + "CCLONE001", + "Function clone group", + "Multiple functions share the same normalized function body.", + SEVERITY_WARNING, + FAMILY_CLONE, + FINDING_KIND_CLONE_GROUP, + CONFIDENCE_HIGH, + ) + if category == CLONE_KIND_BLOCK: + return _RuleSpec( + "CCLONE002", + "Block clone group", + "Repeated normalized statement blocks were detected across occurrences.", + SEVERITY_WARNING, + FAMILY_CLONE, + FINDING_KIND_CLONE_GROUP, + CONFIDENCE_HIGH, + ) + return _RuleSpec( + "CCLONE003", + "Segment clone group", + "Repeated normalized statement segments were detected across occurrences.", + "note", + FAMILY_CLONE, + FINDING_KIND_CLONE_GROUP, + CONFIDENCE_MEDIUM, + ) + + +def _structural_rule_spec(kind: str) -> _RuleSpec: + if kind == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: + return _RuleSpec( + "CSTRUCT002", + "Clone guard/exit divergence", + ( + "Members of the same function-clone cohort diverged in " + "entry guards or early-exit behavior." + ), + SEVERITY_WARNING, + FAMILY_STRUCTURAL, + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + CONFIDENCE_HIGH, + ) + if kind == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: + return _RuleSpec( + "CSTRUCT003", + "Clone cohort drift", + ( + "Members of the same function-clone cohort drifted from " + "the majority terminal/guard/try profile." + ), + SEVERITY_WARNING, + FAMILY_STRUCTURAL, + STRUCTURAL_KIND_CLONE_COHORT_DRIFT, + CONFIDENCE_HIGH, + ) + return _RuleSpec( + "CSTRUCT001", + "Duplicated branches", + "Repeated branch families with matching structural signatures were detected.", + SEVERITY_WARNING, + FAMILY_STRUCTURAL, + kind or STRUCTURAL_KIND_DUPLICATED_BRANCHES, + CONFIDENCE_MEDIUM, + ) + + +def _dead_code_rule_spec(category: str) -> _RuleSpec: + if category == CLONE_KIND_FUNCTION: + return _RuleSpec( + "CDEAD001", + "Unused function", + "Function appears to be unused with high confidence.", + SEVERITY_WARNING, + FAMILY_DEAD_CODE, + FINDING_KIND_UNUSED_SYMBOL, + CONFIDENCE_HIGH, + ) + if category == SYMBOL_KIND_CLASS: + return _RuleSpec( + "CDEAD002", + "Unused class", + "Class appears to be unused with high confidence.", + SEVERITY_WARNING, + FAMILY_DEAD_CODE, + FINDING_KIND_UNUSED_SYMBOL, + CONFIDENCE_HIGH, + ) + if category == SYMBOL_KIND_METHOD: + return _RuleSpec( + "CDEAD003", + "Unused method", + "Method appears to be unused with high confidence.", + SEVERITY_WARNING, + FAMILY_DEAD_CODE, + FINDING_KIND_UNUSED_SYMBOL, + CONFIDENCE_HIGH, + ) + return _RuleSpec( + "CDEAD004", + "Unused symbol", + "Symbol appears to be unused with reported confidence.", + SEVERITY_WARNING, + FAMILY_DEAD_CODE, + FINDING_KIND_UNUSED_SYMBOL, + CONFIDENCE_MEDIUM, + ) + + +def _design_rule_spec(category: str, kind: str) -> _RuleSpec: + if category == CATEGORY_COHESION: + return _RuleSpec( + "CDESIGN001", + "Low cohesion class", + "Class cohesion is low according to LCOM4 hotspot thresholds.", + SEVERITY_WARNING, + FAMILY_DESIGN, + kind or FINDING_KIND_CLASS_HOTSPOT, + CONFIDENCE_HIGH, + ) + if category == CATEGORY_COMPLEXITY: + return _RuleSpec( + "CDESIGN002", + "Complexity hotspot", + "Function exceeds the project complexity hotspot threshold.", + SEVERITY_WARNING, + FAMILY_DESIGN, + kind or FINDING_KIND_FUNCTION_HOTSPOT, + CONFIDENCE_HIGH, + ) + if category == CATEGORY_COUPLING: + return _RuleSpec( + "CDESIGN003", + "Coupling hotspot", + "Class exceeds the project coupling hotspot threshold.", + SEVERITY_WARNING, + FAMILY_DESIGN, + kind or FINDING_KIND_CLASS_HOTSPOT, + CONFIDENCE_HIGH, + ) + return _RuleSpec( + "CDESIGN004", + "Dependency cycle", + "A dependency cycle was detected between project modules.", + "error", + FAMILY_DESIGN, + kind or FINDING_KIND_CYCLE, + CONFIDENCE_HIGH, + ) + + +def _rule_spec(group: Mapping[str, object]) -> _RuleSpec: + family = _text(group.get("family")) + category = _text(group.get("category")) + kind = _text(group.get("kind")) + if family == FAMILY_CLONE: + return _clone_rule_spec(category) + if family == FAMILY_STRUCTURAL: + return _structural_rule_spec(kind) + if family == FAMILY_DEAD_CODE: + return _dead_code_rule_spec(category) + return _design_rule_spec(category, kind) + + +def _structural_signature(group: Mapping[str, object]) -> Mapping[str, object]: + return _as_mapping(_as_mapping(group.get("signature")).get("stable")) + + +def _clone_result_message( + group: Mapping[str, object], + *, + category: str, + count: int, + spread: Mapping[str, object], +) -> str: + clone_type = _text(group.get("clone_type")) + return ( + f"{category.title()} clone group ({clone_type}), {count} occurrences " + f"across {_as_int(spread.get('files'))} files." + ) + + +def _structural_result_message( + group: Mapping[str, object], + *, + count: int, + qualname: str, +) -> str: + signature = _structural_signature(group) + signature_family = _text(signature.get("family")) + if signature_family == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: + cohort_id = _text(signature.get("cohort_id")) + return ( + "Clone guard/exit divergence" + f" ({count} divergent members) in cohort " + f"{cohort_id or 'unknown'}." + ) + if signature_family == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: + drift_fields = _as_sequence(signature.get("drift_fields")) + drift_label = ",".join(_text(item) for item in drift_fields) or "profile" + cohort_id = _text(signature.get("cohort_id")) + return ( + f"Clone cohort drift ({drift_label}), " + f"{count} divergent members in cohort {cohort_id or 'unknown'}." + ) + stmt_shape = _text(signature.get("stmt_shape")) + if qualname: + return ( + f"Repeated branch family ({stmt_shape}), {count} occurrences in {qualname}." + ) + return f"Repeated branch family ({stmt_shape}), {count} occurrences." + + +def _dead_code_result_message( + group: Mapping[str, object], + *, + category: str, + qualname: str, + relative_path: str, +) -> str: + confidence = _text(group.get("confidence")) or "reported" + target = qualname or relative_path + return f"Unused {category} with {confidence} confidence: {target}" + + +def _design_result_message( + *, + category: str, + facts: Mapping[str, object], + qualname: str, + items: Sequence[Mapping[str, object]], +) -> str: + if category == CATEGORY_COHESION: + lcom4 = _as_int(facts.get("lcom4")) + return f"Low cohesion class (LCOM4={lcom4}): {qualname}" + if category == CATEGORY_COMPLEXITY: + cc = _as_int(facts.get("cyclomatic_complexity")) + return f"High complexity function (CC={cc}): {qualname}" + if category == CATEGORY_COUPLING: + cbo = _as_int(facts.get("cbo")) + return f"High coupling class (CBO={cbo}): {qualname}" + modules = [_text(item.get("module")) for item in items if _text(item.get("module"))] + return f"Dependency cycle ({len(modules)} modules): {' -> '.join(modules)}" + + +def _result_message(group: Mapping[str, object]) -> str: + family = _text(group.get("family")) + category = _text(group.get("category")) + count = _as_int(group.get("count")) + spread = _as_mapping(group.get("spread")) + items = [_as_mapping(item) for item in _as_sequence(group.get("items"))] + first_item = items[0] if items else {} + qualname = _text(first_item.get("qualname")) + if family == FAMILY_CLONE: + return _clone_result_message( + group, + category=category, + count=count, + spread=spread, + ) + if family == FAMILY_STRUCTURAL: + return _structural_result_message( + group, + count=count, + qualname=qualname, + ) + if family == FAMILY_DEAD_CODE: + return _dead_code_result_message( + group, + category=category, + qualname=qualname, + relative_path=_text(first_item.get("relative_path")), + ) + return _design_result_message( + category=category, + facts=_as_mapping(group.get("facts")), + qualname=qualname, + items=items, + ) + + +def _logical_locations(item: Mapping[str, object]) -> list[dict[str, object]]: + qualname = _text(item.get("qualname")) + if qualname: + return [{"fullyQualifiedName": qualname}] + module = _text(item.get("module")) + if module: + return [{"fullyQualifiedName": module}] + return [] + + +def _location_message( + group: Mapping[str, object], + *, + related_id: int | None = None, +) -> str: + family = _text(group.get("family")) + category = _text(group.get("category")) + if family == FAMILY_CLONE: + return ( + "Representative occurrence" + if related_id is None + else f"Related occurrence #{related_id}" + ) + if family == FAMILY_STRUCTURAL: + return ( + "Representative occurrence" + if related_id is None + else f"Related occurrence #{related_id}" + ) + if family == FAMILY_DEAD_CODE: + return ( + "Unused symbol declaration" + if related_id is None + else f"Related declaration #{related_id}" + ) + if category == "dependency": + return ( + "Cycle member" + if related_id is None + else f"Related cycle member #{related_id}" + ) + return ( + "Primary location" if related_id is None else f"Related location #{related_id}" + ) + + +def _location_entry( + item: Mapping[str, object], + *, + related_id: int | None = None, + artifact_index_map: Mapping[str, int] | None = None, + use_uri_base_id: bool = False, + message_text: str = "", +) -> dict[str, object]: + relative_path = _text(item.get("relative_path")) + location: dict[str, object] = {} + if relative_path: + artifact_location: dict[str, object] = { + "uri": relative_path, + } + if use_uri_base_id: + artifact_location["uriBaseId"] = SARIF_SRCROOT_BASE_ID + if artifact_index_map and relative_path in artifact_index_map: + artifact_location["index"] = artifact_index_map[relative_path] + physical_location: dict[str, object] = { + "artifactLocation": artifact_location, + } + else: + physical_location = {} + start_line = _as_int(item.get("start_line")) + end_line = _as_int(item.get("end_line")) + if physical_location and start_line > 0: + region: dict[str, object] = {"startLine": start_line} + if end_line > 0: + region["endLine"] = end_line + physical_location["region"] = region + if physical_location: + location["physicalLocation"] = physical_location + logical_locations = _logical_locations(item) + if logical_locations: + location["logicalLocations"] = logical_locations + if message_text: + location["message"] = {"text": message_text} + if related_id is not None: + location["id"] = related_id + return location + + +def _generic_properties(group: Mapping[str, object]) -> dict[str, object]: + source_scope = _as_mapping(group.get("source_scope")) + spread = _as_mapping(group.get("spread")) + properties: dict[str, object] = { + "findingId": _text(group.get("id")), + "family": _text(group.get("family")), + "category": _text(group.get("category")), + "kind": _text(group.get("kind")), + "confidence": _text(group.get("confidence")), + "priority": round(_as_float(group.get("priority")), 2), + "impactScope": _text(source_scope.get("impact_scope")), + "sourceKind": _text(source_scope.get("dominant_kind")), + "spreadFiles": _as_int(spread.get("files")), + "spreadFunctions": _as_int(spread.get("functions")), + "helpUri": DOCS_URL, + } + return properties + + +def _clone_result_properties( + props: dict[str, object], + group: Mapping[str, object], +) -> dict[str, object]: + props.update( + { + "novelty": _text(group.get("novelty")), + "cloneKind": _text(group.get("clone_kind")), + "cloneType": _text(group.get("clone_type")), + "groupArity": _as_int(group.get("count")), + } + ) + return props + + +def _structural_signature_properties( + signature: Mapping[str, object], +) -> dict[str, object]: + signature_family = _text(signature.get("family")) + if signature_family == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: + return { + "cohortId": _text(signature.get("cohort_id")), + "majorityGuardCount": _as_int( + signature.get("majority_guard_count"), + ), + "majorityTerminalKind": _text( + signature.get("majority_terminal_kind"), + ), + } + if signature_family == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: + return { + "cohortId": _text(signature.get("cohort_id")), + "driftFields": [ + _text(field) for field in _as_sequence(signature.get("drift_fields")) + ], + } + return { + "statementShape": _text(signature.get("stmt_shape")), + "terminalKind": _text(signature.get("terminal_kind")), + } + + +def _structural_result_properties( + props: dict[str, object], + group: Mapping[str, object], +) -> dict[str, object]: + signature = _structural_signature(group) + props["occurrenceCount"] = _as_int(group.get("count")) + props.update(_structural_signature_properties(signature)) + return props + + +def _design_result_properties( + props: dict[str, object], + *, + facts: Mapping[str, object], +) -> dict[str, object]: + for key in ( + "lcom4", + "method_count", + "instance_var_count", + "cbo", + "cyclomatic_complexity", + "nesting_depth", + "cycle_length", + ): + if key in facts: + props[key] = facts[key] + return props + + +def _result_properties(group: Mapping[str, object]) -> dict[str, object]: + props = _generic_properties(group) + family = _text(group.get("family")) + if family == FAMILY_CLONE: + return _clone_result_properties(props, group) + if family == FAMILY_STRUCTURAL: + return _structural_result_properties(props, group) + if family == FAMILY_DESIGN: + return _design_result_properties( + props, + facts=_as_mapping(group.get("facts")), + ) + if family == FAMILY_DEAD_CODE: + props["confidence"] = _text(group.get("confidence")) + return props + + +def _partial_fingerprints( + *, + rule_id: str, + group: Mapping[str, object], + primary_item: Mapping[str, object], +) -> dict[str, str]: + finding_id = _text(group.get("id")) + path = _text(primary_item.get("relative_path")) + qualname = _text(primary_item.get("qualname")) + start_line = _as_int(primary_item.get("start_line")) + end_line = _as_int(primary_item.get("end_line")) + fingerprints = { + "rule": rule_id, + "path": path, + } + if qualname: + fingerprints["qualname"] = qualname + if start_line > 0: + fingerprints["region"] = f"{start_line}-{end_line or start_line}" + if path and start_line > 0: + fingerprint_material = "\0".join( + ( + rule_id, + finding_id, + path, + qualname, + str(start_line), + str(end_line or start_line), + ) + ) + fingerprints["primaryLocationLineHash"] = ( + f"{hashlib.sha256(fingerprint_material.encode('utf-8')).hexdigest()[:16]}" + f":{start_line}" + ) + fingerprints["finding"] = finding_id + return fingerprints + + +def _baseline_state(group: Mapping[str, object]) -> str: + novelty = _text(group.get("novelty")) + if novelty == "new": + return "new" + if novelty == "known": + return "unchanged" + return "" + + +def _result_entry( + *, + group: Mapping[str, object], + rule_id: str, + rule_index: int, + artifact_index_map: Mapping[str, int], + use_uri_base_id: bool, +) -> dict[str, object]: + items = [_as_mapping(item) for item in _as_sequence(group.get("items"))] + primary_item = items[0] if items else {} + primary_location = ( + _location_entry( + primary_item, + artifact_index_map=artifact_index_map, + use_uri_base_id=use_uri_base_id, + message_text=_location_message(group), + ) + if primary_item + else {} + ) + result: dict[str, object] = { + "ruleId": rule_id, + "ruleIndex": rule_index, + "level": _severity_to_level(_text(group.get("severity"))), + "message": { + "text": _result_message(group), + }, + "locations": [primary_location] if primary_location else [], + "fingerprints": { + "codecloneFindingId": _text(group.get("id")), + }, + "partialFingerprints": _partial_fingerprints( + rule_id=rule_id, + group=group, + primary_item=primary_item, + ), + "properties": _result_properties(group), + } + baseline_state = _baseline_state(group) + if baseline_state: + result["baselineState"] = baseline_state + related_items = items[1:] + if related_items: + related_locations = [ + _location_entry( + item, + related_id=index, + artifact_index_map=artifact_index_map, + use_uri_base_id=use_uri_base_id, + message_text=_location_message(group, related_id=index), + ) + for index, item in enumerate(related_items, start=1) + ] + result["relatedLocations"] = [ + location for location in related_locations if location + ] + return result + + +def render_sarif_report_document(payload: Mapping[str, object]) -> str: + meta = _as_mapping(payload.get("meta")) + runtime = _as_mapping(meta.get("runtime")) + generated_at = _text(runtime.get("report_generated_at_utc")) + analysis_mode = _text(meta.get("analysis_mode")) or "full" + findings = sorted( + _flatten_findings(payload), + key=lambda group: ( + _rule_spec(group).rule_id, + _text(group.get("id")), + ), + ) + scan_root_uri = _scan_root_uri(payload) + use_uri_base_id = bool(scan_root_uri) + artifacts, artifact_index_map = _artifact_catalog( + findings, + use_uri_base_id=use_uri_base_id, + ) + used_rule_specs = { + spec.rule_id: spec for spec in (_rule_spec(group) for group in findings) + } + ordered_rule_specs = [used_rule_specs[key] for key in sorted(used_rule_specs)] + rule_index_map = { + spec.rule_id: index for index, spec in enumerate(ordered_rule_specs) + } + results = [ + _result_entry( + group=group, + rule_id=rule.rule_id, + rule_index=rule_index_map[rule.rule_id], + artifact_index_map=artifact_index_map, + use_uri_base_id=use_uri_base_id, + ) + for group in findings + for rule in (_rule_spec(group),) + ] + invocation: dict[str, object] = { + "executionSuccessful": True, + **({"endTimeUtc": generated_at} if generated_at else {}), + } + if scan_root_uri: + invocation["workingDirectory"] = {"uri": scan_root_uri} + run: dict[str, object] = { + "tool": { + "driver": { + "name": "codeclone", + "version": _text(meta.get("codeclone_version")), + "semanticVersion": _text(meta.get("codeclone_version")), + "informationUri": REPOSITORY_URL, + "rules": [ + { + "id": spec.rule_id, + "name": _rule_name(spec), + "shortDescription": {"text": spec.short_description}, + "fullDescription": {"text": spec.full_description}, + "help": _rule_help(spec), + "defaultConfiguration": {"level": spec.default_level}, + "helpUri": DOCS_URL, + "properties": { + "category": spec.category, + "kind": spec.kind, + "precision": spec.precision, + "tags": [spec.category, spec.kind, spec.precision], + }, + } + for spec in ordered_rule_specs + ], + } + }, + "automationDetails": { + "id": f"codeclone/{analysis_mode}", + }, + **( + { + "originalUriBaseIds": { + SARIF_SRCROOT_BASE_ID: { + "uri": scan_root_uri, + "description": {"text": "The root of the scanned source tree."}, + } + } + } + if scan_root_uri + else {} + ), + "artifacts": artifacts, + "results": results, + "invocations": [invocation], + "properties": { + "profileVersion": SARIF_PROFILE_VERSION, + "reportSchemaVersion": _text(payload.get("report_schema_version")), + "analysisMode": analysis_mode, + "reportMode": _text(meta.get("report_mode")), + "canonicalDigestSha256": _text( + _as_mapping(_as_mapping(payload.get("integrity")).get("digest")).get( + "value" + ) + ), + **({"reportGeneratedAtUtc": generated_at} if generated_at else {}), + }, + "columnKind": "utf16CodeUnits", + } + return json.dumps( + { + "$schema": SARIF_SCHEMA_URL, + "version": SARIF_VERSION, + "runs": [run], + }, + ensure_ascii=False, + indent=2, + ) + + +def to_sarif_report( + *, + report_document: Mapping[str, object] | None = None, + meta: Mapping[str, object], + inventory: Mapping[str, object] | None = None, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + block_facts: Mapping[str, Mapping[str, str]] | None = None, + new_function_group_keys: Collection[str] | None = None, + new_block_group_keys: Collection[str] | None = None, + new_segment_group_keys: Collection[str] | None = None, + metrics: Mapping[str, object] | None = None, + suggestions: Collection[Suggestion] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, +) -> str: + payload = report_document or build_report_document( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + meta=meta, + inventory=inventory, + block_facts=block_facts or {}, + new_function_group_keys=new_function_group_keys, + new_block_group_keys=new_block_group_keys, + new_segment_group_keys=new_segment_group_keys, + metrics=metrics, + suggestions=tuple(suggestions or ()), + structural_findings=tuple(structural_findings or ()), + ) + return render_sarif_report_document(payload) diff --git a/codeclone/report/segments.py b/codeclone/report/segments.py new file mode 100644 index 0000000..ba5ec9a --- /dev/null +++ b/codeclone/report/segments.py @@ -0,0 +1,209 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import ast +from collections.abc import Sequence +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING + +from ..extractor import _QualnameCollector +from .merge import coerce_positive_int, merge_overlapping_items + +if TYPE_CHECKING: + from .types import GroupItem, GroupItemLike, GroupItemsLike, GroupMap, GroupMapLike + +SEGMENT_MIN_UNIQUE_STMT_TYPES = 2 + +_CONTROL_FLOW_STMTS = ( + ast.If, + ast.For, + ast.While, + ast.Try, + ast.With, + ast.Match, + ast.AsyncFor, + ast.AsyncWith, +) +_FORBIDDEN_STMTS = (ast.Return, ast.Raise, ast.Assert) + + +@dataclass(frozen=True, slots=True) +class _SegmentAnalysis: + unique_stmt_types: int + has_control_flow: bool + is_boilerplate: bool + + +def segment_item_sort_key(item: GroupItemLike) -> tuple[str, str, int, int]: + return ( + str(item.get("filepath", "")), + str(item.get("qualname", "")), + coerce_positive_int(item.get("start_line")) or 0, + coerce_positive_int(item.get("end_line")) or 0, + ) + + +def merge_segment_items(items: GroupItemsLike) -> list[GroupItem]: + return merge_overlapping_items(items, sort_key=segment_item_sort_key) + + +def collect_file_functions( + filepath: str, +) -> dict[str, ast.FunctionDef | ast.AsyncFunctionDef] | None: + try: + source = Path(filepath).read_text("utf-8") + except OSError: + return None + try: + tree = ast.parse(source) + except SyntaxError: + return None + + collector = _QualnameCollector() + collector.visit(tree) + return collector.funcs + + +def segment_statements( + func_node: ast.FunctionDef | ast.AsyncFunctionDef, start_line: int, end_line: int +) -> list[ast.stmt]: + body = getattr(func_node, "body", None) + if not isinstance(body, list): + return [] + + statements: list[ast.stmt] = [] + for statement in body: + lineno = getattr(statement, "lineno", None) + end_lineno = getattr(statement, "end_lineno", None) + if lineno is None or end_lineno is None: + continue + if lineno >= start_line and end_lineno <= end_line: + statements.append(statement) + return statements + + +def assign_targets_attribute_only(statement: ast.stmt) -> bool: + if isinstance(statement, ast.Assign): + return all(isinstance(target, ast.Attribute) for target in statement.targets) + if isinstance(statement, ast.AnnAssign): + return isinstance(statement.target, ast.Attribute) + return False + + +def analyze_segment_statements(statements: list[ast.stmt]) -> _SegmentAnalysis | None: + if not statements: + return None + + unique_types = {type(statement) for statement in statements} + has_control_flow = any( + isinstance(statement, _CONTROL_FLOW_STMTS) for statement in statements + ) + has_forbidden = any( + isinstance(statement, _FORBIDDEN_STMTS) for statement in statements + ) + has_call_statement = any( + isinstance(statement, ast.Expr) and isinstance(statement.value, ast.Call) + for statement in statements + ) + + assign_statements = [ + statement + for statement in statements + if isinstance(statement, (ast.Assign, ast.AnnAssign)) + ] + assign_ratio = len(assign_statements) / len(statements) + assign_attr_only = all( + assign_targets_attribute_only(statement) for statement in assign_statements + ) + + is_boilerplate = ( + assign_ratio >= 0.8 + and assign_attr_only + and not has_control_flow + and not has_forbidden + and not has_call_statement + ) + + return _SegmentAnalysis( + unique_stmt_types=len(unique_types), + has_control_flow=has_control_flow, + is_boilerplate=is_boilerplate, + ) + + +def _analyze_segment_item( + item: GroupItemLike, + *, + file_cache: dict[str, dict[str, ast.FunctionDef | ast.AsyncFunctionDef] | None], +) -> _SegmentAnalysis | None: + filepath = str(item.get("filepath", "")) + qualname = str(item.get("qualname", "")) + start_line = coerce_positive_int(item.get("start_line")) or 0 + end_line = coerce_positive_int(item.get("end_line")) or 0 + if not filepath or not qualname or start_line <= 0 or end_line <= 0: + return None + + if filepath not in file_cache: + file_cache[filepath] = collect_file_functions(filepath) + functions_by_qualname = file_cache[filepath] + if not functions_by_qualname: + return None + + local_name = qualname.split(":", 1)[1] if ":" in qualname else qualname + func_node = functions_by_qualname.get(local_name) + if func_node is None: + return None + + statements = segment_statements(func_node, start_line, end_line) + return analyze_segment_statements(statements) + + +def _analyze_segment_group( + items: Sequence[GroupItemLike], + *, + file_cache: dict[str, dict[str, ast.FunctionDef | ast.AsyncFunctionDef] | None], +) -> list[_SegmentAnalysis] | None: + analyses: list[_SegmentAnalysis] = [] + for item in items: + analysis = _analyze_segment_item(item, file_cache=file_cache) + if analysis is None: + return None + analyses.append(analysis) + return analyses + + +def prepare_segment_report_groups(segment_groups: GroupMapLike) -> tuple[GroupMap, int]: + """ + Merge overlapping segment windows and suppress low-value boilerplate groups + for reporting. Detection hashes remain unchanged. + """ + suppressed = 0 + filtered: GroupMap = {} + file_cache: dict[str, dict[str, ast.FunctionDef | ast.AsyncFunctionDef] | None] = {} + + for key, items in segment_groups.items(): + merged_items = merge_segment_items(items) + if not merged_items: + continue + + analyses = _analyze_segment_group(merged_items, file_cache=file_cache) + if analyses is None: + filtered[key] = merged_items + continue + + all_boilerplate = all(analysis.is_boilerplate for analysis in analyses) + all_too_simple = all( + (not analysis.has_control_flow) + and (analysis.unique_stmt_types < SEGMENT_MIN_UNIQUE_STMT_TYPES) + for analysis in analyses + ) + if all_boilerplate or all_too_simple: + suppressed += 1 + continue + + filtered[key] = merged_items + + return filtered, suppressed diff --git a/codeclone/report/serialize.py b/codeclone/report/serialize.py new file mode 100644 index 0000000..f074cd3 --- /dev/null +++ b/codeclone/report/serialize.py @@ -0,0 +1,717 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +from collections.abc import Mapping, Sequence + +from .. import _coerce +from ..domain.source_scope import IMPACT_SCOPE_NON_RUNTIME, SOURCE_KIND_OTHER +from ._formatting import format_spread_text + +_as_int = _coerce.as_int +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + + +def render_json_report_document(payload: Mapping[str, object]) -> str: + return json.dumps( + payload, + ensure_ascii=False, + indent=2, + ) + + +def format_meta_text_value(value: object) -> str: + if isinstance(value, bool): + return "true" if value else "false" + if value is None: + return "(none)" + if isinstance(value, float): + return f"{value:.2f}".rstrip("0").rstrip(".") or "0" + if isinstance(value, Sequence) and not isinstance( + value, + (str, bytes, bytearray), + ): + formatted = [format_meta_text_value(item) for item in value] + return ", ".join(formatted) if formatted else "(none)" + text = str(value).strip() + return text if text else "(none)" + + +def _format_key_values( + mapping: Mapping[str, object], + keys: Sequence[str], + *, + skip_empty: bool = False, +) -> str: + parts: list[str] = [] + for key in keys: + if key not in mapping: + continue + formatted = format_meta_text_value(mapping.get(key)) + if skip_empty and formatted == "(none)": + continue + parts.append(f"{key}={formatted}") + return " ".join(parts) if parts else "(none)" + + +def _spread_text(spread: Mapping[str, object]) -> str: + return format_spread_text( + _as_int(spread.get("files")), + _as_int(spread.get("functions")), + ) + + +def _scope_text(source_scope: Mapping[str, object]) -> str: + dominant = str(source_scope.get("dominant_kind", "")).strip() or SOURCE_KIND_OTHER + impact = ( + str(source_scope.get("impact_scope", "")).strip() or IMPACT_SCOPE_NON_RUNTIME + ) + return f"{dominant}/{impact}" + + +def _structural_kind_label(kind: object) -> str: + kind_text = str(kind).strip() + match kind_text: + case "duplicated_branches": + return "Duplicated branches" + case "clone_guard_exit_divergence": + return "Clone guard/exit divergence" + case "clone_cohort_drift": + return "Clone cohort drift" + case _: + return kind_text or "(none)" + + +def _location_line( + item: Mapping[str, object], + *, + metric_name: str | None = None, +) -> str: + metric_suffix = "" + if metric_name is not None and metric_name in item: + metric_suffix = ( + f" {metric_name}={format_meta_text_value(item.get(metric_name))}" + ) + return ( + f"- {format_meta_text_value(item.get('qualname'))} " + f"{format_meta_text_value(item.get('relative_path'))}:" + f"{format_meta_text_value(item.get('start_line'))}-" + f"{format_meta_text_value(item.get('end_line'))}" + f"{metric_suffix}" + ) + + +def _append_clone_section( + lines: list[str], + *, + title: str, + groups: Sequence[object], + novelty: str, + metric_name: str, +) -> None: + section_groups = [ + _as_mapping(group) + for group in groups + if str(_as_mapping(group).get("novelty", "")) == novelty + ] + lines.append(f"{title} ({novelty.upper()}) (groups={len(section_groups)})") + if not section_groups: + lines.append("(none)") + return + for idx, group in enumerate(section_groups, start=1): + lines.append(f"=== Clone group #{idx} ===") + lines.append( + "id=" + f"{format_meta_text_value(group.get('id'))} " + f"clone_type={format_meta_text_value(group.get('clone_type'))} " + f"severity={format_meta_text_value(group.get('severity'))} " + f"count={format_meta_text_value(group.get('count'))} " + f"spread={_spread_text(_as_mapping(group.get('spread')))} " + f"scope={_scope_text(_as_mapping(group.get('source_scope')))}" + ) + facts = _as_mapping(group.get("facts")) + if facts: + lines.append( + "facts: " + + _format_key_values( + facts, + tuple(sorted(str(key) for key in facts)), + skip_empty=True, + ) + ) + display_facts = _as_mapping(group.get("display_facts")) + if display_facts: + lines.append( + "display_facts: " + + _format_key_values( + display_facts, + tuple(sorted(str(key) for key in display_facts)), + skip_empty=True, + ) + ) + lines.extend( + _location_line(item, metric_name=metric_name) + for item in map(_as_mapping, _as_sequence(group.get("items"))) + ) + lines.append("") + if lines[-1] == "": + lines.pop() + + +def _append_structural_findings(lines: list[str], groups: Sequence[object]) -> None: + structural_groups = [_as_mapping(group) for group in groups] + lines.append(f"STRUCTURAL FINDINGS (groups={len(structural_groups)})") + if not structural_groups: + lines.append("(none)") + return + for idx, group in enumerate(structural_groups, start=1): + lines.append(f"=== Structural finding #{idx} ===") + signature = _as_mapping(group.get("signature")) + stable = _as_mapping(signature.get("stable")) + control_flow = _as_mapping(stable.get("control_flow")) + lines.append( + "id=" + f"{format_meta_text_value(group.get('id'))} " + f"kind={format_meta_text_value(group.get('kind'))} " + f"label={_structural_kind_label(group.get('kind'))} " + f"severity={format_meta_text_value(group.get('severity'))} " + f"confidence={format_meta_text_value(group.get('confidence'))} " + f"count={format_meta_text_value(group.get('count'))} " + f"spread={_spread_text(_as_mapping(group.get('spread')))} " + f"scope={_scope_text(_as_mapping(group.get('source_scope')))}" + ) + stable_family = str(stable.get("family", "")).strip() + match stable_family: + case "clone_guard_exit_divergence": + lines.append( + "signature: " + f"cohort_id={format_meta_text_value(stable.get('cohort_id'))} " + f"majority_guard_count=" + f"{format_meta_text_value(stable.get('majority_guard_count'))} " + f"majority_terminal_kind=" + f"{format_meta_text_value(stable.get('majority_terminal_kind'))}" + ) + case "clone_cohort_drift": + majority_profile = _as_mapping(stable.get("majority_profile")) + lines.append( + "signature: " + f"cohort_id={format_meta_text_value(stable.get('cohort_id'))} " + f"drift_fields=" + f"{format_meta_text_value(stable.get('drift_fields'))} " + f"majority_terminal_kind=" + f"{format_meta_text_value(majority_profile.get('terminal_kind'))}" + ) + case _: + lines.append( + "signature: " + f"stmt_shape={format_meta_text_value(stable.get('stmt_shape'))} " + f"terminal_kind=" + f"{format_meta_text_value(stable.get('terminal_kind'))} " + f"has_loop={format_meta_text_value(control_flow.get('has_loop'))} " + f"has_try={format_meta_text_value(control_flow.get('has_try'))} " + f"nested_if={format_meta_text_value(control_flow.get('nested_if'))}" + ) + facts = _as_mapping(group.get("facts")) + if facts: + lines.append( + "facts: " + + _format_key_values( + facts, + tuple(sorted(str(key) for key in facts)), + skip_empty=True, + ) + ) + items = list(map(_as_mapping, _as_sequence(group.get("items")))) + visible_items = items[:3] + lines.extend(_location_line(item) for item in visible_items) + if len(items) > len(visible_items): + lines.append(f"... and {len(items) - len(visible_items)} more occurrences") + lines.append("") + if lines[-1] == "": + lines.pop() + + +def _append_single_item_findings( + lines: list[str], + *, + title: str, + groups: Sequence[object], + fact_keys: Sequence[str], +) -> None: + finding_groups = [_as_mapping(group) for group in groups] + lines.append(f"{title} (groups={len(finding_groups)})") + if not finding_groups: + lines.append("(none)") + return + for idx, group in enumerate(finding_groups, start=1): + lines.append(f"=== Finding #{idx} ===") + lines.append( + "id=" + f"{format_meta_text_value(group.get('id'))} " + f"category={format_meta_text_value(group.get('category'))} " + f"kind={format_meta_text_value(group.get('kind'))} " + f"severity={format_meta_text_value(group.get('severity'))} " + f"confidence={format_meta_text_value(group.get('confidence'))} " + f"scope={_scope_text(_as_mapping(group.get('source_scope')))}" + ) + facts = _as_mapping(group.get("facts")) + if facts: + lines.append( + f"facts: {_format_key_values(facts, fact_keys, skip_empty=True)}" + ) + lines.extend( + _location_line(item) + for item in map(_as_mapping, _as_sequence(group.get("items"))) + ) + lines.append("") + if lines[-1] == "": + lines.pop() + + +def _suppression_bindings_text(item: Mapping[str, object]) -> str: + bindings = [ + _as_mapping(binding) + for binding in _as_sequence(item.get("suppressed_by")) + if isinstance(binding, Mapping) + ] + if bindings: + parts = [] + for binding in bindings: + rule = str(binding.get("rule", "")).strip() or "unknown" + source = str(binding.get("source", "")).strip() or "unknown" + parts.append(f"{rule}@{source}") + return ",".join(parts) + rule = str(item.get("suppression_rule", "")).strip() + source = str(item.get("suppression_source", "")).strip() + if rule or source: + return f"{rule or 'unknown'}@{source or 'unknown'}" + return "(none)" + + +def _append_suppressed_dead_code_items( + lines: list[str], + *, + items: Sequence[object], +) -> None: + suppressed_items = [_as_mapping(item) for item in items] + lines.append(f"SUPPRESSED DEAD CODE (items={len(suppressed_items)})") + if not suppressed_items: + lines.append("(none)") + return + for idx, item in enumerate(suppressed_items, start=1): + lines.append(f"=== Suppressed dead-code item #{idx} ===") + lines.append( + "kind=" + f"{format_meta_text_value(item.get('kind'))} " + f"confidence={format_meta_text_value(item.get('confidence'))} " + f"suppressed_by={_suppression_bindings_text(item)}" + ) + lines.append(_location_line(item)) + lines.append("") + if lines[-1] == "": + lines.pop() + + +def _flatten_findings(findings: Mapping[str, object]) -> list[Mapping[str, object]]: + groups = _as_mapping(findings.get("groups")) + clone_groups = _as_mapping(groups.get("clones")) + flat_groups = [ + *map(_as_mapping, _as_sequence(clone_groups.get("functions"))), + *map(_as_mapping, _as_sequence(clone_groups.get("blocks"))), + *map(_as_mapping, _as_sequence(clone_groups.get("segments"))), + *map( + _as_mapping, + _as_sequence(_as_mapping(groups.get("structural")).get("groups")), + ), + *map( + _as_mapping, + _as_sequence(_as_mapping(groups.get("dead_code")).get("groups")), + ), + *map( + _as_mapping, + _as_sequence(_as_mapping(groups.get("design")).get("groups")), + ), + ] + return flat_groups + + +def _append_suggestions( + lines: list[str], + *, + suggestions: Sequence[object], + findings: Mapping[str, object], +) -> None: + suggestion_rows = [_as_mapping(item) for item in suggestions] + finding_index = { + str(group.get("id")): group for group in _flatten_findings(findings) + } + lines.append(f"SUGGESTIONS (count={len(suggestion_rows)})") + if not suggestion_rows: + lines.append("(none)") + return + for idx, suggestion in enumerate(suggestion_rows, start=1): + finding = finding_index.get(str(suggestion.get("finding_id")), {}) + lines.append( + f"{idx}. " + f"[{format_meta_text_value(finding.get('severity'))}] " + f"{format_meta_text_value(suggestion.get('title'))}" + ) + lines.append( + " " + f"finding_id={format_meta_text_value(suggestion.get('finding_id'))} " + f"effort={format_meta_text_value(_as_mapping(suggestion.get('action')).get('effort'))}" + ) + summary = str(suggestion.get("summary", "")).strip() + if summary: + lines.append(f" summary: {summary}") + lines.append( + f" location: {format_meta_text_value(suggestion.get('location_label'))}" + ) + representative = list( + map(_as_mapping, _as_sequence(suggestion.get("representative_locations"))) + ) + if representative: + lines.append(f" example: {_location_line(representative[0])[2:]}") + steps = [ + str(step).strip() + for step in _as_sequence(_as_mapping(suggestion.get("action")).get("steps")) + if str(step).strip() + ] + lines.extend(f" - {step}" for step in steps[:2]) + + +def _append_overview( + lines: list[str], + overview: Mapping[str, object], + hotlists: Mapping[str, object], +) -> None: + lines.append("DERIVED OVERVIEW") + families = _as_mapping(overview.get("families")) + lines.append( + "Families: " + + _format_key_values( + families, + ("clones", "structural", "dead_code", "design"), + ) + ) + source_breakdown = _as_mapping(overview.get("source_scope_breakdown")) + lines.append( + "Source scope breakdown: " + + _format_key_values( + source_breakdown, + ("production", "tests", "fixtures", "other"), + ) + ) + health_snapshot = _as_mapping(overview.get("health_snapshot")) + lines.append( + "Health snapshot: " + + _format_key_values( + health_snapshot, + ("score", "grade", "strongest_dimension", "weakest_dimension"), + ) + ) + hotlist_counts = { + "most_actionable": len(_as_sequence(hotlists.get("most_actionable_ids"))), + "highest_spread": len(_as_sequence(hotlists.get("highest_spread_ids"))), + "production_hotspots": len( + _as_sequence(hotlists.get("production_hotspot_ids")) + ), + "test_fixture_hotspots": len( + _as_sequence(hotlists.get("test_fixture_hotspot_ids")) + ), + } + lines.append( + "Hotlists: " + + _format_key_values( + hotlist_counts, + ( + "most_actionable", + "highest_spread", + "production_hotspots", + "test_fixture_hotspots", + ), + ) + ) + top_risks = list(map(_as_mapping, _as_sequence(overview.get("top_risks")))) + if not top_risks: + lines.append("Top risks: (none)") + return + lines.append("Top risks:") + lines.extend( + ( + "- " + f"{format_meta_text_value(risk.get('family'))} " + f"count={format_meta_text_value(risk.get('count'))} " + f"scope={format_meta_text_value(risk.get('scope'))} " + f"label={format_meta_text_value(risk.get('label'))}" + ) + for risk in top_risks + ) + + +def render_text_report_document(payload: Mapping[str, object]) -> str: + meta_payload = _as_mapping(payload.get("meta")) + baseline = _as_mapping(meta_payload.get("baseline")) + cache = _as_mapping(meta_payload.get("cache")) + metrics_baseline = _as_mapping(meta_payload.get("metrics_baseline")) + inventory_payload = _as_mapping(payload.get("inventory")) + inventory_files = _as_mapping(inventory_payload.get("files")) + inventory_code = _as_mapping(inventory_payload.get("code")) + file_registry = _as_mapping(inventory_payload.get("file_registry")) + findings = _as_mapping(payload.get("findings")) + findings_summary = _as_mapping(findings.get("summary")) + findings_families = _as_mapping(findings_summary.get("families")) + findings_severity = _as_mapping(findings_summary.get("severity")) + findings_impact_scope = _as_mapping(findings_summary.get("impact_scope")) + findings_clones = _as_mapping(findings_summary.get("clones")) + findings_suppressed = _as_mapping(findings_summary.get("suppressed")) + metrics_payload = _as_mapping(payload.get("metrics")) + metrics_summary = _as_mapping(metrics_payload.get("summary")) + metrics_families = _as_mapping(metrics_payload.get("families")) + derived = _as_mapping(payload.get("derived")) + overview = _as_mapping(derived.get("overview")) + hotlists = _as_mapping(derived.get("hotlists")) + suggestions_payload = _as_sequence(derived.get("suggestions")) + integrity = _as_mapping(payload.get("integrity")) + canonicalization = _as_mapping(integrity.get("canonicalization")) + digest = _as_mapping(integrity.get("digest")) + findings_groups = _as_mapping(findings.get("groups")) + clone_groups = _as_mapping(findings_groups.get("clones")) + runtime_meta = _as_mapping(meta_payload.get("runtime")) + + lines = [ + "REPORT METADATA", + "Report schema version: " + f"{format_meta_text_value(payload.get('report_schema_version'))}", + "CodeClone version: " + f"{format_meta_text_value(meta_payload.get('codeclone_version'))}", + f"Project name: {format_meta_text_value(meta_payload.get('project_name'))}", + f"Scan root: {format_meta_text_value(meta_payload.get('scan_root'))}", + f"Python version: {format_meta_text_value(meta_payload.get('python_version'))}", + f"Python tag: {format_meta_text_value(meta_payload.get('python_tag'))}", + f"Analysis mode: {format_meta_text_value(meta_payload.get('analysis_mode'))}", + f"Report mode: {format_meta_text_value(meta_payload.get('report_mode'))}", + "Report generated (UTC): " + f"{format_meta_text_value(runtime_meta.get('report_generated_at_utc'))}", + "Computed metric families: " + f"{format_meta_text_value(meta_payload.get('computed_metric_families'))}", + f"Baseline path: {format_meta_text_value(baseline.get('path'))}", + "Baseline fingerprint version: " + f"{format_meta_text_value(baseline.get('fingerprint_version'))}", + "Baseline schema version: " + f"{format_meta_text_value(baseline.get('schema_version'))}", + f"Baseline Python tag: {format_meta_text_value(baseline.get('python_tag'))}", + "Baseline generator name: " + f"{format_meta_text_value(baseline.get('generator_name'))}", + "Baseline generator version: " + f"{format_meta_text_value(baseline.get('generator_version'))}", + "Baseline payload sha256: " + f"{format_meta_text_value(baseline.get('payload_sha256'))}", + "Baseline payload verified: " + f"{format_meta_text_value(baseline.get('payload_sha256_verified'))}", + f"Baseline loaded: {format_meta_text_value(baseline.get('loaded'))}", + f"Baseline status: {format_meta_text_value(baseline.get('status'))}", + f"Cache path: {format_meta_text_value(cache.get('path'))}", + f"Cache schema version: {format_meta_text_value(cache.get('schema_version'))}", + f"Cache status: {format_meta_text_value(cache.get('status'))}", + f"Cache used: {format_meta_text_value(cache.get('used'))}", + "Metrics baseline path: " + f"{format_meta_text_value(metrics_baseline.get('path'))}", + "Metrics baseline loaded: " + f"{format_meta_text_value(metrics_baseline.get('loaded'))}", + "Metrics baseline status: " + f"{format_meta_text_value(metrics_baseline.get('status'))}", + "Metrics baseline schema version: " + f"{format_meta_text_value(metrics_baseline.get('schema_version'))}", + "Metrics baseline payload sha256: " + f"{format_meta_text_value(metrics_baseline.get('payload_sha256'))}", + "Metrics baseline payload verified: " + f"{format_meta_text_value(metrics_baseline.get('payload_sha256_verified'))}", + ] + + if ( + baseline.get("loaded") is not True + or str(baseline.get("status", "")).strip().lower() != "ok" + ): + lines.append("Note: baseline is untrusted; all groups are treated as NEW.") + + lines.extend( + [ + "", + "INVENTORY", + "Files: " + + _format_key_values( + inventory_files, + ( + "total_found", + "analyzed", + "cached", + "skipped", + "source_io_skipped", + ), + ), + "Code: " + + _format_key_values( + inventory_code, + ("scope", "parsed_lines", "functions", "methods", "classes"), + ), + "File registry: " + f"encoding={format_meta_text_value(file_registry.get('encoding'))} " + f"count={len(_as_sequence(file_registry.get('items')))}", + "", + "FINDINGS SUMMARY", + f"Total groups: {format_meta_text_value(findings_summary.get('total'))}", + "Families: " + + _format_key_values( + findings_families, + ("clones", "structural", "dead_code", "design"), + ), + "Severity: " + + _format_key_values( + findings_severity, + ("critical", "warning", "info"), + ), + "Impact scope: " + + _format_key_values( + findings_impact_scope, + ("runtime", "non_runtime", "mixed"), + ), + "Clones: " + + _format_key_values( + findings_clones, + ("functions", "blocks", "segments", "new", "known"), + ), + "Suppressed: " + + _format_key_values( + findings_suppressed, + ("dead_code",), + ), + "", + "METRICS SUMMARY", + ] + ) + for family_name in ( + "complexity", + "coupling", + "cohesion", + "dependencies", + "dead_code", + "health", + ): + family_summary = _as_mapping(metrics_summary.get(family_name)) + keys: Sequence[str] + match family_name: + case "complexity" | "coupling": + keys = ("total", "average", "max", "high_risk") + case "cohesion": + keys = ("total", "average", "max", "low_cohesion") + case "dependencies": + keys = ("modules", "edges", "cycles", "max_depth") + case "dead_code": + keys = ("total", "high_confidence", "suppressed") + case _: + keys = ("score", "grade") + lines.append(f"{family_name}: {_format_key_values(family_summary, keys)}") + + lines.append("") + _append_overview(lines, overview, hotlists) + + lines.append("") + _append_suggestions(lines, suggestions=suggestions_payload, findings=findings) + + lines.append("") + _append_clone_section( + lines, + title="FUNCTION CLONES", + groups=_as_sequence(clone_groups.get("functions")), + novelty="new", + metric_name="loc", + ) + lines.append("") + _append_clone_section( + lines, + title="FUNCTION CLONES", + groups=_as_sequence(clone_groups.get("functions")), + novelty="known", + metric_name="loc", + ) + lines.append("") + _append_clone_section( + lines, + title="BLOCK CLONES", + groups=_as_sequence(clone_groups.get("blocks")), + novelty="new", + metric_name="size", + ) + lines.append("") + _append_clone_section( + lines, + title="BLOCK CLONES", + groups=_as_sequence(clone_groups.get("blocks")), + novelty="known", + metric_name="size", + ) + lines.append("") + _append_clone_section( + lines, + title="SEGMENT CLONES", + groups=_as_sequence(clone_groups.get("segments")), + novelty="new", + metric_name="size", + ) + lines.append("") + _append_clone_section( + lines, + title="SEGMENT CLONES", + groups=_as_sequence(clone_groups.get("segments")), + novelty="known", + metric_name="size", + ) + lines.append("") + _append_structural_findings( + lines, + _as_sequence(_as_mapping(findings_groups.get("structural")).get("groups")), + ) + lines.append("") + _append_single_item_findings( + lines, + title="DEAD CODE FINDINGS", + groups=_as_sequence( + _as_mapping(findings_groups.get("dead_code")).get("groups") + ), + fact_keys=("kind", "confidence"), + ) + lines.append("") + dead_code_family = _as_mapping(metrics_families.get("dead_code")) + _append_suppressed_dead_code_items( + lines, + items=_as_sequence(dead_code_family.get("suppressed_items")), + ) + lines.append("") + _append_single_item_findings( + lines, + title="DESIGN FINDINGS", + groups=_as_sequence(_as_mapping(findings_groups.get("design")).get("groups")), + fact_keys=("lcom4", "method_count", "instance_var_count", "fan_out", "risk"), + ) + lines.extend( + [ + "", + "INTEGRITY", + "Canonicalization: " + + _format_key_values( + canonicalization, + ("version", "scope", "sections"), + ), + "Digest: " + + _format_key_values( + digest, + ("algorithm", "verified", "value"), + ), + ] + ) + + return "\n".join(lines).rstrip() + "\n" diff --git a/codeclone/report/suggestions.py b/codeclone/report/suggestions.py new file mode 100644 index 0000000..f1277e2 --- /dev/null +++ b/codeclone/report/suggestions.py @@ -0,0 +1,731 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal + +from .. import _coerce +from ..domain.findings import ( + CATEGORY_CLONE, + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, + CATEGORY_STRUCTURAL, + CLONE_KIND_BLOCK, + CLONE_KIND_FUNCTION, + CLONE_KIND_SEGMENT, + FAMILY_CLONES, + FAMILY_METRICS, + FAMILY_STRUCTURAL, +) +from ..domain.quality import ( + CONFIDENCE_HIGH, + CONFIDENCE_MEDIUM, + EFFORT_EASY, + EFFORT_HARD, + EFFORT_MODERATE, + EFFORT_WEIGHT, + SEVERITY_CRITICAL, + SEVERITY_INFO, + SEVERITY_RANK, + SEVERITY_WARNING, +) +from ..models import ( + ClassMetrics, + GroupItemLike, + ProjectMetrics, + ReportLocation, + SourceKind, + StructuralFindingGroup, + Suggestion, +) +from ..report.explain_contract import ( + BLOCK_HINT_ASSERT_ONLY, + BLOCK_PATTERN_REPEATED_STMT_HASH, +) +from ..structural_findings import normalize_structural_findings +from .derived import ( + combine_source_kinds, + format_group_location_label, + format_report_location_label, + group_spread, + relative_report_path, + report_location_from_group_item, + report_location_from_structural_occurrence, + representative_locations, + source_kind_breakdown, +) + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + +Severity = Literal["critical", "warning", "info"] +Effort = Literal["easy", "moderate", "hard"] +CloneType = Literal["Type-1", "Type-2", "Type-3", "Type-4"] +SuggestionCategory = Literal[ + "clone", + "structural", + "complexity", + "coupling", + "cohesion", + "dead_code", + "dependency", +] + +_as_int = _coerce.as_int +_as_str = _coerce.as_str + + +def _priority(severity: Severity, effort: Effort) -> float: + return float(SEVERITY_RANK[severity]) / float(EFFORT_WEIGHT[effort]) + + +def classify_clone_type( + *, + items: Sequence[GroupItemLike], + kind: Literal["function", "block", "segment"], +) -> CloneType: + if kind in {CLONE_KIND_BLOCK, CLONE_KIND_SEGMENT}: + return "Type-4" + + raw_hashes = sorted( + { + _as_str(item.get("raw_hash")) + for item in items + if _as_str(item.get("raw_hash")) + } + ) + fingerprints = sorted( + { + _as_str(item.get("fingerprint")) + for item in items + if _as_str(item.get("fingerprint")) + } + ) + if raw_hashes and len(raw_hashes) == 1: + return "Type-1" + if len(fingerprints) == 1: + return "Type-2" + if fingerprints: + return "Type-3" + return "Type-4" + + +def _source_context( + locations: Sequence[ReportLocation], + *, + scan_root: str, +) -> tuple[SourceKind, tuple[tuple[SourceKind, int], ...]]: + breakdown = source_kind_breakdown( + (location.filepath for location in locations), + scan_root=scan_root, + ) + source_kind = combine_source_kinds(kind for kind, _count in breakdown) + return source_kind, breakdown + + +def _clone_fact_kind(kind: Literal["function", "block", "segment"]) -> str: + return { + CLONE_KIND_FUNCTION: "Function clone group", + CLONE_KIND_BLOCK: "Block clone group", + CLONE_KIND_SEGMENT: "Segment clone group", + }[kind] + + +def _clone_summary( + *, + kind: Literal["function", "block", "segment"], + clone_type: CloneType, + facts: Mapping[str, str], +) -> str: + if kind == CLONE_KIND_FUNCTION: + match clone_type: + case "Type-1": + return "same exact function body" + case "Type-2": + return "same parameterized function body" + case "Type-3": + return "same structural function body with small identifier changes" + case _: + return "same structural function body" + if kind == CLONE_KIND_BLOCK: + hint = str(facts.get("hint", "")).strip() + pattern = str(facts.get("pattern", "")).strip() + if hint == BLOCK_HINT_ASSERT_ONLY: + return "same assertion template" + if pattern == BLOCK_PATTERN_REPEATED_STMT_HASH: + return "same repeated setup/assert pattern" + return "same structural sequence with small value changes" + return "same structural segment sequence" + + +def _clone_steps( + *, + kind: Literal["function", "block", "segment"], + clone_type: CloneType, + facts: Mapping[str, str], +) -> tuple[str, ...]: + hint = str(facts.get("hint", "")).strip() + if kind == CLONE_KIND_FUNCTION and clone_type == "Type-1": + return ( + "Keep one canonical implementation and remove the exact duplicates.", + "Route the remaining call sites to the shared implementation.", + ) + if kind == CLONE_KIND_FUNCTION and clone_type == "Type-2": + return ( + "Extract a shared implementation with explicit parameters.", + "Replace identifier-only variations with arguments.", + ) + if kind == CLONE_KIND_BLOCK and hint == BLOCK_HINT_ASSERT_ONLY: + return ( + "Collapse the repeated assertion template into a helper or loop.", + "Keep the asserted values as data instead of copy-pasted statements.", + ) + if kind == CLONE_KIND_BLOCK: + return ( + "Extract the repeated statement sequence into a helper.", + "Keep setup data close to the call site and move shared logic out.", + ) + if kind == CLONE_KIND_SEGMENT: + return ( + "Review whether the repeated segment should become shared utility code.", + "Keep this as a report hint only if the duplication is intentional.", + ) + return ( + "Extract the repeated logic into a shared abstraction.", + "Replace the duplicated bodies with calls to the shared code.", + ) + + +def _clone_suggestion( + *, + group_key: str, + items: Sequence[GroupItemLike], + kind: Literal["function", "block", "segment"], + facts: Mapping[str, str], + scan_root: str, +) -> Suggestion: + locations = tuple( + report_location_from_group_item(item, scan_root=scan_root) for item in items + ) + representative = representative_locations(locations) + spread_files, spread_functions = group_spread(locations) + clone_type = classify_clone_type(items=items, kind=kind) + source_kind, breakdown = _source_context(locations, scan_root=scan_root) + count = len(items) + severity: Severity + if count >= 4: + severity = SEVERITY_CRITICAL + elif clone_type in {"Type-1", "Type-2"}: + severity = SEVERITY_WARNING + else: + severity = SEVERITY_INFO + effort: Effort = ( + EFFORT_EASY if clone_type in {"Type-1", "Type-2"} else EFFORT_MODERATE + ) + summary = _clone_summary(kind=kind, clone_type=clone_type, facts=facts) + location_label = format_group_location_label( + representative, + total_count=count, + spread_files=spread_files, + spread_functions=spread_functions, + ) + return Suggestion( + severity=severity, + category=CATEGORY_CLONE, + title=f"{_clone_fact_kind(kind)} ({clone_type})", + location=location_label, + steps=_clone_steps(kind=kind, clone_type=clone_type, facts=facts), + effort=effort, + priority=_priority(severity, effort), + finding_family=FAMILY_CLONES, + finding_kind=kind, + subject_key=group_key, + fact_kind=_clone_fact_kind(kind), + fact_summary=summary, + fact_count=count, + spread_files=spread_files, + spread_functions=spread_functions, + clone_type=clone_type, + confidence=CONFIDENCE_HIGH, + source_kind=source_kind, + source_breakdown=breakdown, + representative_locations=representative, + location_label=location_label, + ) + + +def _clone_suggestions( + *, + func_groups: Mapping[str, Sequence[GroupItemLike]], + block_groups: Mapping[str, Sequence[GroupItemLike]], + segment_groups: Mapping[str, Sequence[GroupItemLike]], + block_group_facts: Mapping[str, Mapping[str, str]], + scan_root: str, +) -> list[Suggestion]: + suggestions: list[Suggestion] = [] + for group_key, items in sorted(func_groups.items()): + suggestions.append( + _clone_suggestion( + group_key=group_key, + items=items, + kind=CLONE_KIND_FUNCTION, + facts={}, + scan_root=scan_root, + ) + ) + for group_key, items in sorted(block_groups.items()): + suggestions.append( + _clone_suggestion( + group_key=group_key, + items=items, + kind=CLONE_KIND_BLOCK, + facts=block_group_facts.get(group_key, {}), + scan_root=scan_root, + ) + ) + for group_key, items in sorted(segment_groups.items()): + suggestions.append( + _clone_suggestion( + group_key=group_key, + items=items, + kind=CLONE_KIND_SEGMENT, + facts={}, + scan_root=scan_root, + ) + ) + return suggestions + + +def _single_location_suggestion( + *, + severity: Severity, + category: SuggestionCategory, + title: str, + steps: tuple[str, ...], + effort: Effort, + fact_kind: str, + fact_summary: str, + filepath: str, + start_line: int, + end_line: int, + qualname: str, + subject_key: str, + finding_kind: str, + confidence: Literal["high", "medium", "low"], + scan_root: str, +) -> Suggestion: + source_kind = report_location_from_group_item( + { + "filepath": filepath, + "start_line": start_line, + "end_line": end_line, + "qualname": qualname, + }, + scan_root=scan_root, + ).source_kind + location = ReportLocation( + filepath=filepath, + relative_path=relative_report_path(filepath, scan_root=scan_root), + start_line=start_line, + end_line=end_line, + qualname=qualname, + source_kind=source_kind, + ) + location_label = format_report_location_label(location) + return Suggestion( + severity=severity, + category=category, + title=title, + location=location_label, + steps=steps, + effort=effort, + priority=_priority(severity, effort), + finding_family=FAMILY_METRICS, + finding_kind=finding_kind, + subject_key=subject_key, + fact_kind=fact_kind, + fact_summary=fact_summary, + fact_count=1, + spread_files=1, + spread_functions=1, + confidence=confidence, + source_kind=location.source_kind, + source_breakdown=((location.source_kind, 1),), + representative_locations=(location,), + location_label=location_label, + ) + + +def _complexity_suggestions( + units: Sequence[GroupItemLike], + *, + scan_root: str, +) -> list[Suggestion]: + suggestions: list[Suggestion] = [] + for unit in sorted( + units, + key=lambda item: ( + _as_int(item.get("cyclomatic_complexity")), + _as_int(item.get("nesting_depth")), + _as_str(item.get("qualname")), + ), + reverse=True, + ): + cc = _as_int(unit.get("cyclomatic_complexity"), 1) + if cc <= 20: + continue + severity: Severity = SEVERITY_CRITICAL if cc > 40 else SEVERITY_WARNING + nesting = _as_int(unit.get("nesting_depth")) + qualname = _as_str(unit.get("qualname")) + suggestions.append( + _single_location_suggestion( + severity=severity, + category=CATEGORY_COMPLEXITY, + title="Reduce function complexity", + steps=( + "Split the function into smaller deterministic stages.", + "Extract helper functions for nested branches.", + ), + effort=EFFORT_MODERATE, + fact_kind="Function complexity hotspot", + fact_summary=f"cyclomatic_complexity={cc}, nesting_depth={nesting}", + filepath=_as_str(unit.get("filepath")), + start_line=_as_int(unit.get("start_line")), + end_line=_as_int(unit.get("end_line")), + qualname=qualname, + subject_key=qualname, + finding_kind="function_hotspot", + confidence=CONFIDENCE_HIGH, + scan_root=scan_root, + ) + ) + return suggestions + + +def _coupling_and_cohesion_suggestions( + class_metrics: Sequence[ClassMetrics], + *, + scan_root: str, +) -> list[Suggestion]: + suggestions: list[Suggestion] = [] + for metric in sorted( + class_metrics, + key=lambda item: (item.filepath, item.start_line, item.end_line, item.qualname), + ): + if metric.cbo > 10: + suggestions.append( + _single_location_suggestion( + severity=SEVERITY_WARNING, + category=CATEGORY_COUPLING, + title="Reduce class coupling", + steps=( + "Reduce external dependencies of this class.", + "Move unrelated responsibilities to collaborator classes.", + ), + effort=EFFORT_MODERATE, + fact_kind="Class coupling hotspot", + fact_summary=f"cbo={metric.cbo}", + filepath=metric.filepath, + start_line=metric.start_line, + end_line=metric.end_line, + qualname=metric.qualname, + subject_key=metric.qualname, + finding_kind="class_hotspot", + confidence=CONFIDENCE_HIGH, + scan_root=scan_root, + ) + ) + if metric.lcom4 > 3: + suggestions.append( + _single_location_suggestion( + severity=SEVERITY_WARNING, + category=CATEGORY_COHESION, + title="Split low-cohesion class", + steps=( + "Split class by responsibility boundaries.", + "Group methods by shared state and extract subcomponents.", + ), + effort=EFFORT_MODERATE, + fact_kind="Low cohesion class", + fact_summary=f"lcom4={metric.lcom4}", + filepath=metric.filepath, + start_line=metric.start_line, + end_line=metric.end_line, + qualname=metric.qualname, + subject_key=metric.qualname, + finding_kind="class_hotspot", + confidence=CONFIDENCE_HIGH, + scan_root=scan_root, + ) + ) + return suggestions + + +def _dead_code_suggestions( + project_metrics: ProjectMetrics, + *, + scan_root: str, +) -> list[Suggestion]: + suggestions: list[Suggestion] = [] + for item in project_metrics.dead_code: + if item.confidence != CONFIDENCE_HIGH: + continue + suggestions.append( + _single_location_suggestion( + severity=SEVERITY_WARNING, + category=CATEGORY_DEAD_CODE, + title="Remove or explicitly keep unused code", + steps=( + "Remove or deprecate the unused symbol.", + "If intentionally reserved, add explicit keep marker and test.", + ), + effort=EFFORT_EASY, + fact_kind="Dead code item", + fact_summary=f"{item.kind} with {item.confidence} confidence", + filepath=item.filepath, + start_line=item.start_line, + end_line=item.end_line, + qualname=item.qualname, + subject_key=item.qualname, + finding_kind="unused_symbol", + confidence=CONFIDENCE_HIGH, + scan_root=scan_root, + ) + ) + return suggestions + + +def _module_source_kind(modules: Sequence[str]) -> SourceKind: + pseudo_paths = tuple(module.replace(".", "/") + ".py" for module in modules) + return combine_source_kinds( + source_kind for source_kind, _count in source_kind_breakdown(pseudo_paths) + ) + + +def _dependency_suggestions(project_metrics: ProjectMetrics) -> list[Suggestion]: + suggestions: list[Suggestion] = [] + for cycle in project_metrics.dependency_cycles: + location = " -> ".join(cycle) + source_kind = _module_source_kind(list(cycle)) + suggestions.append( + Suggestion( + severity=SEVERITY_CRITICAL, + category=CATEGORY_DEPENDENCY, + title="Break circular dependency", + location=location, + steps=( + "Break the cycle by extracting a shared abstraction.", + "Invert one dependency edge through an interface or protocol.", + ), + effort=EFFORT_HARD, + priority=_priority(SEVERITY_CRITICAL, EFFORT_HARD), + finding_family=FAMILY_METRICS, + finding_kind="cycle", + subject_key=location, + fact_kind="Dependency cycle", + fact_summary=f"{len(cycle)} modules participate in this cycle", + fact_count=len(cycle), + spread_files=len(cycle), + spread_functions=0, + confidence=CONFIDENCE_HIGH, + source_kind=source_kind, + source_breakdown=((source_kind, len(cycle)),), + location_label=location, + ) + ) + return suggestions + + +def _structural_summary(group: StructuralFindingGroup) -> tuple[str, str]: + match group.finding_kind: + case "clone_guard_exit_divergence": + return ( + "Clone guard/exit divergence", + "clone cohort members differ in entry guards or early-exit behavior", + ) + case "clone_cohort_drift": + return ( + "Clone cohort drift", + "clone cohort members drift from majority terminal/guard/try profile", + ) + case _: + pass + + terminal = str(group.signature.get("terminal", "")).strip() + stmt_seq = str(group.signature.get("stmt_seq", "")).strip() + raises = str(group.signature.get("raises", "")).strip() + has_loop = str(group.signature.get("has_loop", "")).strip() + raise_like = terminal == "raise" or raises not in {"", "0"} + match (raise_like, terminal, has_loop): + case (True, _, _): + return "Repeated branch family", "same repeated guard/validation branch" + case (False, "return", _): + return "Repeated branch family", "same repeated return branch" + case (False, _, "1"): + return "Repeated branch family", "same repeated loop branch" + case _: + if stmt_seq: + return "Repeated branch family", ( + f"same repeated branch shape ({stmt_seq})" + ) + return "Repeated branch family", "same repeated branch shape" + + +def _structural_steps(group: StructuralFindingGroup) -> tuple[str, ...]: + match group.finding_kind: + case "clone_guard_exit_divergence": + return ( + ( + "Compare divergent clone members against the majority " + "guard/exit profile." + ), + "If divergence is accidental, align guard exits across the cohort.", + ) + case "clone_cohort_drift": + return ( + "Review whether cohort drift is intentional for this clone family.", + ( + "If not intentional, reconcile terminal/guard/try profiles " + "across members." + ), + ) + case _: + pass + + terminal = str(group.signature.get("terminal", "")).strip() + match terminal: + case "raise": + return ( + "Factor the repeated validation/guard path into a shared helper.", + ( + "Keep the branch-specific inputs at the call site and share " + "the exit policy." + ), + ) + case "return": + return ( + "Consolidate the repeated return-path logic into a shared helper.", + "Keep the branch predicate local and share the emitted behavior.", + ) + case _: + return ( + "Review whether the repeated branch family should become a helper.", + ( + "Keep this as a report-only hint if the local duplication is " + "intentional." + ), + ) + + +def _structural_suggestions( + structural_findings: Sequence[StructuralFindingGroup], + *, + scan_root: str, +) -> list[Suggestion]: + suggestions: list[Suggestion] = [] + for group in normalize_structural_findings(structural_findings): + locations = tuple( + report_location_from_structural_occurrence(item, scan_root=scan_root) + for item in group.items + ) + representative = representative_locations(locations) + spread_files, spread_functions = group_spread(locations) + source_kind, breakdown = _source_context(locations, scan_root=scan_root) + count = len(locations) + severity: Severity = ( + SEVERITY_WARNING if count >= 4 or spread_functions > 1 else SEVERITY_INFO + ) + if group.finding_kind in { + "clone_guard_exit_divergence", + "clone_cohort_drift", + }: + severity = SEVERITY_WARNING + title, summary = _structural_summary(group) + location_label = format_group_location_label( + representative, + total_count=count, + spread_files=spread_files, + spread_functions=spread_functions, + ) + suggestions.append( + Suggestion( + severity=severity, + category=CATEGORY_STRUCTURAL, + title=title, + location=location_label, + steps=_structural_steps(group), + effort=EFFORT_MODERATE, + priority=_priority(severity, EFFORT_MODERATE), + finding_family=FAMILY_STRUCTURAL, + finding_kind=group.finding_kind, + subject_key=group.finding_key, + fact_kind="Structural finding", + fact_summary=summary, + fact_count=count, + spread_files=spread_files, + spread_functions=spread_functions, + confidence=( + CONFIDENCE_HIGH + if group.finding_kind + in {"clone_guard_exit_divergence", "clone_cohort_drift"} + else CONFIDENCE_MEDIUM + ), + source_kind=source_kind, + source_breakdown=breakdown, + representative_locations=representative, + location_label=location_label, + ) + ) + return suggestions + + +def generate_suggestions( + *, + project_metrics: ProjectMetrics, + units: Sequence[GroupItemLike], + class_metrics: Sequence[ClassMetrics], + func_groups: Mapping[str, Sequence[GroupItemLike]], + block_groups: Mapping[str, Sequence[GroupItemLike]], + segment_groups: Mapping[str, Sequence[GroupItemLike]], + block_group_facts: Mapping[str, Mapping[str, str]] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, + scan_root: str = "", +) -> tuple[Suggestion, ...]: + suggestions = [ + *_clone_suggestions( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + block_group_facts=block_group_facts or {}, + scan_root=scan_root, + ), + *_structural_suggestions(structural_findings or (), scan_root=scan_root), + *_complexity_suggestions(units, scan_root=scan_root), + *_coupling_and_cohesion_suggestions(class_metrics, scan_root=scan_root), + *_dead_code_suggestions(project_metrics, scan_root=scan_root), + *_dependency_suggestions(project_metrics), + ] + return tuple( + sorted( + suggestions, + key=lambda item: ( + -item.priority, + item.severity, + item.category, + item.source_kind, + item.location_label or item.location, + item.title, + item.subject_key, + ), + ) + ) + + +__all__ = [ + "classify_clone_type", + "generate_suggestions", +] diff --git a/codeclone/report/types.py b/codeclone/report/types.py new file mode 100644 index 0000000..42bd16d --- /dev/null +++ b/codeclone/report/types.py @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ..models import ( + BlockGroupItem, + FunctionGroupItem, + GroupItem, + GroupItemLike, + GroupItemsLike, + GroupMap, + GroupMapLike, + SegmentGroupItem, +) + +__all__ = [ + "BlockGroupItem", + "FunctionGroupItem", + "GroupItem", + "GroupItemLike", + "GroupItemsLike", + "GroupMap", + "GroupMapLike", + "SegmentGroupItem", +] diff --git a/codeclone/scanner.py b/codeclone/scanner.py index 0588701..42ed7f7 100644 --- a/codeclone/scanner.py +++ b/codeclone/scanner.py @@ -1,19 +1,18 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations +import os import tempfile -from collections.abc import Iterable from pathlib import Path +from typing import TYPE_CHECKING from .errors import ValidationError +if TYPE_CHECKING: + from collections.abc import Iterable + DEFAULT_EXCLUDES = ( ".git", ".venv", @@ -46,6 +45,69 @@ def _get_tempdir() -> Path: return Path(tempfile.gettempdir()).resolve() +def _is_under_root(path: Path, root: Path) -> bool: + try: + path.relative_to(root) + return True + except ValueError: + return False + + +def _ensure_not_sensitive_root(*, rootp: Path, root_arg: str) -> None: + root_str = str(rootp) + temp_root = _get_tempdir() + try: + rootp.relative_to(temp_root) + return + except ValueError: + pass + + if root_str in SENSITIVE_DIRS: + raise ValidationError(f"Cannot scan sensitive directory: {root_arg}") + + for sensitive in SENSITIVE_DIRS: + if root_str.startswith(sensitive + "/"): + raise ValidationError(f"Cannot scan under sensitive directory: {root_arg}") + + +def _is_included_python_file( + *, + file_path: Path, + excludes_set: set[str], + rootp: Path, +) -> bool: + if not file_path.name.endswith(".py"): + return False + if any(part in excludes_set for part in file_path.parts): + return False + if not file_path.is_symlink(): + return True + try: + resolved = file_path.resolve() + except OSError: + return False + return _is_under_root(resolved, rootp) + + +def _walk_file_candidate( + *, + dirpath: str, + filename: str, + excludes_set: set[str], + rootp: Path, +) -> str | None: + if not filename.endswith(".py"): + return None + file_path = os.path.join(dirpath, filename) + if os.path.islink(file_path) and not _is_included_python_file( + file_path=Path(file_path), + excludes_set=excludes_set, + rootp=rootp, + ): + return None + return file_path + + def iter_py_files( root: str, excludes: tuple[str, ...] = DEFAULT_EXCLUDES, @@ -60,47 +122,41 @@ def iter_py_files( if not rootp.is_dir(): raise ValidationError(f"Root must be a directory: {root}") - root_str = str(rootp) - temp_root = _get_tempdir() - in_temp = False - try: - rootp.relative_to(temp_root) - in_temp = True - except ValueError: - in_temp = False - - if not in_temp: - if root_str in SENSITIVE_DIRS: - raise ValidationError(f"Cannot scan sensitive directory: {root}") - - for sensitive in SENSITIVE_DIRS: - if root_str.startswith(sensitive + "/"): - raise ValidationError(f"Cannot scan under sensitive directory: {root}") - - # Collect and filter first, then sort — avoids sorting excluded paths - candidates: list[Path] = [] - for p in rootp.rglob("*.py"): - # Verify path is actually under root (prevent symlink attacks) - try: - p.resolve().relative_to(rootp) - except ValueError: - # Skipping file outside root (possible symlink traversal) - continue - - parts = set(p.parts) - if any(ex in parts for ex in excludes): - continue - - candidates.append(p) - - if len(candidates) > max_files: - raise ValidationError( - f"File count exceeds limit of {max_files}. " - "Use more specific root or increase limit." - ) - - for p in sorted(candidates, key=lambda path: str(path)): - yield str(p) + _ensure_not_sensitive_root(rootp=rootp, root_arg=root) + + excludes_set = set(excludes) + + # Keep legacy behavior only when the requested root directory itself is excluded + # (e.g. scanning "/__pycache__"). Parent directories must not suppress + # scanning, otherwise valid roots like ".../build/project" become empty. + if rootp.name in excludes_set: + return + + # Collect and filter first, then sort for deterministic output. + candidates: list[str] = [] + for dirpath, dirnames, filenames in os.walk( + rootp, + topdown=True, + followlinks=False, + ): + dirnames[:] = [name for name in dirnames if name not in excludes_set] + for filename in filenames: + candidate = _walk_file_candidate( + dirpath=dirpath, + filename=filename, + excludes_set=excludes_set, + rootp=rootp, + ) + if candidate is None: + continue + candidates.append(candidate) + if len(candidates) > max_files: + raise ValidationError( + f"File count exceeds limit of {max_files}. " + "Use more specific root or increase limit." + ) + + yield from sorted(candidates) def module_name_from_path(root: str, filepath: str) -> str: diff --git a/codeclone/structural_findings.py b/codeclone/structural_findings.py new file mode 100644 index 0000000..a6aaaf1 --- /dev/null +++ b/codeclone/structural_findings.py @@ -0,0 +1,1047 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""CodeClone — structural code quality analysis for Python. + +Structural findings extraction layer (Phase 1: duplicated_branches). + +This module is report-only: findings do not affect clone detection, +fingerprints, baseline semantics, exit codes, or health scores. +""" + +from __future__ import annotations + +import ast +import sys +from collections import Counter, defaultdict +from dataclasses import dataclass +from hashlib import sha1 +from typing import TYPE_CHECKING + +from .domain.findings import ( + STRUCTURAL_KIND_CLONE_COHORT_DRIFT, + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + STRUCTURAL_KIND_DUPLICATED_BRANCHES, +) +from .models import GroupItemLike, StructuralFindingGroup, StructuralFindingOccurrence + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + +__all__ = [ + "build_clone_cohort_structural_findings", + "is_reportable_structural_signature", + "normalize_structural_finding_group", + "normalize_structural_findings", + "scan_function_structure", +] + +_FINDING_KIND_BRANCHES = STRUCTURAL_KIND_DUPLICATED_BRANCHES +_FINDING_KIND_CLONE_GUARD_EXIT_DIVERGENCE = STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE +_FINDING_KIND_CLONE_COHORT_DRIFT = STRUCTURAL_KIND_CLONE_COHORT_DRIFT +_TRIVIAL_STMT_TYPES = frozenset( + { + "AnnAssign", + "Assert", + "Assign", + "AugAssign", + "Expr", + "Raise", + "Return", + } +) + + +@dataclass(frozen=True, slots=True) +class _BranchWalkStats: + call_count: int + raise_count: int + has_nested_if: bool + has_loop: bool + has_try: bool + + +@dataclass(frozen=True, slots=True) +class FunctionStructureFacts: + nesting_depth: int + structural_findings: tuple[StructuralFindingGroup, ...] + entry_guard_count: int + entry_guard_terminal_profile: str + entry_guard_has_side_effect_before: bool + terminal_kind: str + try_finally_profile: str + side_effect_order_profile: str + + +# --------------------------------------------------------------------------- +# Branch signature helpers +# --------------------------------------------------------------------------- + + +def _stmt_type_sequence(body: list[ast.stmt]) -> str: + """Comma-joined AST node type names for a statement list.""" + return ",".join(type(s).__name__ for s in body) + + +def _terminal_kind(body: list[ast.stmt]) -> str: + """Classify the terminal (last) statement of a branch body.""" + if not body: + return "fallthrough" + last = body[-1] + if isinstance(last, ast.Return): + val = last.value + if val is None: + return "return_none" + if isinstance(val, ast.Constant): + return "return_const" + if isinstance(val, ast.Name): + return "return_name" + return "return_expr" + if isinstance(last, ast.Raise): + return "raise" + if isinstance(last, (ast.Assign, ast.AugAssign, ast.AnnAssign)): + return "assign" + if isinstance(last, ast.Expr): + return "expr" + return "fallthrough" + + +def _bucket_calls(call_count: int) -> str: + """Bucketed count of ast.Call nodes inside a branch body.""" + match call_count: + case 0: + return "0" + case 1: + return "1" + case _: + return "2+" + + +def _stmt_names_from_signature(signature: Mapping[str, str]) -> tuple[str, ...]: + stmt_seq = signature.get("stmt_seq", "").strip() + if not stmt_seq: + return () + return tuple(part for part in stmt_seq.split(",") if part) + + +def _has_non_trivial_stmt_names(stmt_names: Sequence[str]) -> bool: + return any(name not in _TRIVIAL_STMT_TYPES for name in stmt_names) + + +def is_reportable_structural_signature(signature: Mapping[str, str]) -> bool: + """Return whether a structural signature is meaningful enough to report. + + Current policy intentionally suppresses single-statement boilerplate + families built from trivial statement kinds such as Expr / Assign / Raise / + Return. Multi-statement bodies are kept when they carry either structural + control-flow mass or an explicit terminal exit (`return` / `raise`) that + makes the branch family meaningfully distinct. + """ + stmt_names = _stmt_names_from_signature(signature) + if not stmt_names: + return False + if ( + signature.get("nested_if") == "1" + or signature.get("has_loop") == "1" + or signature.get("has_try") == "1" + ): + return True + if len(stmt_names) == 1: + return _has_non_trivial_stmt_names(stmt_names) + if _has_non_trivial_stmt_names(stmt_names): + return True + return "Return" in stmt_names or "Raise" in stmt_names + + +def _kind_requires_branch_signature(finding_kind: str) -> bool: + return finding_kind == _FINDING_KIND_BRANCHES + + +def _kind_min_occurrence_count(finding_kind: str) -> int: + match finding_kind: + case kind if kind in { + _FINDING_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + _FINDING_KIND_CLONE_COHORT_DRIFT, + }: + return 1 + case _: + return 2 + + +def _normalize_occurrences( + items: Sequence[StructuralFindingOccurrence], +) -> tuple[StructuralFindingOccurrence, ...]: + deduped_items = { + (item.file_path, item.qualname, item.start, item.end): item + for item in sorted( + items, + key=lambda occ: (occ.file_path, occ.qualname, occ.start, -occ.end), + ) + } + kept: list[StructuralFindingOccurrence] = [] + for item in deduped_items.values(): + if not kept: + kept.append(item) + continue + previous = kept[-1] + same_scope = ( + previous.file_path == item.file_path and previous.qualname == item.qualname + ) + overlaps = item.start <= previous.end + if same_scope and overlaps: + # Prefer the earlier / outer range so nested branches do not inflate + # one finding group with overlapping occurrences. + continue + kept.append(item) + return tuple(kept) + + +def normalize_structural_finding_group( + group: StructuralFindingGroup, +) -> StructuralFindingGroup | None: + """Normalize one structural finding group for stable report/cache output.""" + if _kind_requires_branch_signature( + group.finding_kind + ) and not is_reportable_structural_signature(group.signature): + return None + normalized_items = _normalize_occurrences(group.items) + if len(normalized_items) < _kind_min_occurrence_count(group.finding_kind): + return None + return StructuralFindingGroup( + finding_kind=group.finding_kind, + finding_key=group.finding_key, + signature=dict(group.signature), + items=normalized_items, + ) + + +def normalize_structural_findings( + groups: Sequence[StructuralFindingGroup], +) -> tuple[StructuralFindingGroup, ...]: + """Normalize and sort structural findings for deterministic consumers.""" + normalized = [ + candidate + for candidate in (normalize_structural_finding_group(group) for group in groups) + if candidate is not None + ] + normalized.sort(key=lambda group: (-len(group.items), group.finding_key)) + return tuple(normalized) + + +def _summarize_branch(body: list[ast.stmt]) -> dict[str, str] | None: + """Build deterministic structural signature for a meaningful branch body.""" + if not body or all(isinstance(stmt, ast.Pass) for stmt in body): + return None + + call_count = raise_count = 0 + has_nested_if, has_loop, has_try = False, False, False + try_star = getattr(ast, "TryStar", None) + for node in ast.walk(ast.Module(body=body, type_ignores=[])): + if isinstance(node, ast.Call): + call_count += 1 + elif isinstance(node, ast.Raise): + raise_count += 1 + elif isinstance(node, ast.If): + has_nested_if = True + elif isinstance(node, (ast.For, ast.While, ast.AsyncFor)): + has_loop = True + elif isinstance(node, ast.Try) or ( + try_star is not None and isinstance(node, try_star) + ): + has_try = True + + stats = _BranchWalkStats( + call_count=call_count, + raise_count=raise_count, + has_nested_if=has_nested_if, + has_loop=has_loop, + has_try=has_try, + ) + signature = { + "stmt_seq": _stmt_type_sequence(body), + "terminal": _terminal_kind(body), + "calls": _bucket_calls(stats.call_count), + "raises": "0" if stats.raise_count == 0 else "1+", + "nested_if": "1" if stats.has_nested_if else "0", + "has_loop": "1" if stats.has_loop else "0", + "has_try": "1" if stats.has_try else "0", + } + if not is_reportable_structural_signature(signature): + return None + return signature + + +def _sig_canonical(sig: dict[str, str]) -> str: + """Canonical string representation of a signature (sorted keys).""" + return "|".join(f"{k}={v}" for k, v in sorted(sig.items())) + + +def _finding_key(qualname: str, sig_canonical: str) -> str: + """SHA1-based deterministic finding key.""" + raw = f"duplicated_branches|qualname={qualname}|sig={sig_canonical}" + return sha1(raw.encode("utf-8")).hexdigest() + + +# --------------------------------------------------------------------------- +# Branch body collection from ast.If chains +# --------------------------------------------------------------------------- + + +def _collect_if_branch_bodies(if_node: ast.If) -> list[tuple[list[ast.stmt], int, int]]: + """Collect all branch bodies from an if/elif/else chain. + + Returns list of (body, start_line, end_line) tuples. + Traverses elif chains without recursing into nested ifs inside bodies. + """ + results: list[tuple[list[ast.stmt], int, int]] = [] + + current: ast.If | None = if_node + while current is not None: + body = current.body + if body and not all(isinstance(stmt, ast.Pass) for stmt in body): + start = body[0].lineno + end = getattr(body[-1], "end_lineno", body[-1].lineno) + results.append((body, start, end)) + + orelse = current.orelse + if not orelse: + break + # elif: orelse contains exactly one ast.If + if len(orelse) == 1 and isinstance(orelse[0], ast.If): + current = orelse[0] + else: + # else block + if orelse and not all(isinstance(stmt, ast.Pass) for stmt in orelse): + start = orelse[0].lineno + end = getattr(orelse[-1], "end_lineno", orelse[-1].lineno) + results.append((orelse, start, end)) + break + + return results + + +# --------------------------------------------------------------------------- +# Branch body collection from ast.Match (Python 3.10+) +# --------------------------------------------------------------------------- + + +def _collect_match_branch_bodies( + match_node: object, +) -> list[tuple[list[ast.stmt], int, int]]: + """Collect branch bodies from a match/case statement (Python 3.10+).""" + results: list[tuple[list[ast.stmt], int, int]] = [] + cases = getattr(match_node, "cases", []) + for case in cases: + body: list[ast.stmt] = getattr(case, "body", []) + if body and not all(isinstance(stmt, ast.Pass) for stmt in body): + start = body[0].lineno + end = getattr(body[-1], "end_lineno", body[-1].lineno) + results.append((body, start, end)) + return results + + +def _is_ignorable_entry_statement(statement: ast.stmt) -> bool: + if isinstance(statement, ast.Pass): + return True + if isinstance(statement, ast.Expr): + value = statement.value + return isinstance(value, ast.Constant) and isinstance(value.value, str) + return False + + +def _expr_has_side_effect(expr: ast.AST) -> bool: + return any( + isinstance(node, (ast.Call, ast.Await, ast.Yield, ast.YieldFrom)) + for node in ast.walk(expr) + ) + + +def _statement_has_side_effect(statement: ast.stmt) -> bool: + if isinstance( + statement, + ( + ast.Assign, + ast.AnnAssign, + ast.AugAssign, + ast.Delete, + ast.Import, + ast.ImportFrom, + ast.With, + ast.AsyncWith, + ast.Raise, + ast.Yield, + ast.Return, + ast.Break, + ast.Continue, + ), + ): + return True + if isinstance(statement, ast.Expr): + return _expr_has_side_effect(statement.value) + return False + + +def _is_guard_exit_if(statement: ast.stmt) -> tuple[bool, str]: + if not isinstance(statement, ast.If): + return False, "none" + if statement.orelse: + return False, "none" + terminal = _terminal_kind(statement.body) + if terminal.startswith("return") or terminal == "raise": + return True, terminal + return False, "none" + + +def _entry_guard_facts( + statements: Sequence[ast.stmt], +) -> tuple[int, tuple[str, ...], bool]: + guard_terminals: list[str] = [] + side_effect_before_first_guard = False + seen_guard = False + + for statement in statements: + if _is_ignorable_entry_statement(statement): + continue + is_guard, terminal = _is_guard_exit_if(statement) + if is_guard: + seen_guard = True + guard_terminals.append(terminal) + continue + if seen_guard: + break + if _statement_has_side_effect(statement): + side_effect_before_first_guard = True + + return ( + len(guard_terminals), + tuple(guard_terminals), + side_effect_before_first_guard if guard_terminals else False, + ) + + +def _guard_profile_text( + *, + count: int, + terminal_profile: str, +) -> str: + if count <= 0: + return "none" + return f"{count}x:{terminal_profile}" + + +class _FunctionStructureScanner: + __slots__ = ( + "_collect_findings", + "_filepath", + "_has_finally", + "_has_match", + "_has_side_effect_any", + "_has_try", + "_match_type", + "_qualname", + "_sig_to_branches", + "max_depth", + ) + + def __init__( + self, + *, + filepath: str, + qualname: str, + collect_findings: bool, + ) -> None: + self._filepath = filepath + self._qualname = qualname + self._collect_findings = collect_findings + self._sig_to_branches: dict[str, list[tuple[dict[str, str], int, int]]] = ( + defaultdict(list) + ) + self.max_depth = 0 + self._has_try = False + self._has_finally = False + self._has_side_effect_any = False + self._match_type = getattr(ast, "Match", None) + self._has_match = self._match_type is not None and sys.version_info >= (3, 10) + + def scan( + self, + node: ast.FunctionDef | ast.AsyncFunctionDef, + ) -> FunctionStructureFacts: + statements = list(node.body) + self._visit_statements(statements, depth=0) + guard_count, guard_terminals, side_effect_before_first_guard = ( + _entry_guard_facts(statements) + ) + guard_terminal_profile = ( + ",".join(guard_terminals) if guard_terminals else "none" + ) + terminal_kind = _terminal_kind(statements) + try_finally_profile = ( + "try_finally" + if self._has_finally + else ("try_no_finally" if self._has_try else "none") + ) + if guard_count > 0: + side_effect_order_profile = ( + "effect_before_guard" + if side_effect_before_first_guard + else "guard_then_effect" + ) + elif self._has_side_effect_any: + side_effect_order_profile = "effect_only" + else: + side_effect_order_profile = "none" + + return FunctionStructureFacts( + nesting_depth=self.max_depth, + structural_findings=tuple(self._build_groups()), + entry_guard_count=guard_count, + entry_guard_terminal_profile=guard_terminal_profile, + entry_guard_has_side_effect_before=side_effect_before_first_guard, + terminal_kind=terminal_kind, + try_finally_profile=try_finally_profile, + side_effect_order_profile=side_effect_order_profile, + ) + + def _visit_statements( + self, + statements: list[ast.stmt], + *, + depth: int, + suppress_if_chain_head: bool = False, + ) -> None: + for idx, statement in enumerate(statements): + suppress_group = ( + suppress_if_chain_head + and idx == 0 + and len(statements) == 1 + and isinstance(statement, ast.If) + ) + self._visit_statement( + statement, + depth=depth, + suppress_if_chain_head=suppress_group, + ) + + def _visit_statement( + self, + statement: ast.stmt, + *, + depth: int, + suppress_if_chain_head: bool, + ) -> None: + if _statement_has_side_effect(statement): + self._has_side_effect_any = True + + if isinstance(statement, ast.If): + next_depth = depth + 1 + self.max_depth = max(self.max_depth, next_depth) + if not suppress_if_chain_head and self._collect_findings: + self._record_if_chain(statement) + self._visit_statements(statement.body, depth=next_depth) + if statement.orelse: + self._visit_statements( + statement.orelse, + depth=next_depth, + suppress_if_chain_head=( + len(statement.orelse) == 1 + and isinstance(statement.orelse[0], ast.If) + ), + ) + return + + if ( + self._has_match + and self._match_type is not None + and isinstance(statement, self._match_type) + ): + next_depth = depth + 1 + self.max_depth = max(self.max_depth, next_depth) + if self._collect_findings: + self._record_match(statement) + for case in getattr(statement, "cases", []): + body: list[ast.stmt] = getattr(case, "body", []) + self._visit_statements(body, depth=next_depth) + return + + if isinstance( + statement, + (ast.For, ast.While, ast.AsyncFor, ast.Try, ast.With, ast.AsyncWith), + ): + next_depth = depth + 1 + if isinstance(statement, ast.Try): + self._has_try = True + if statement.finalbody: + self._has_finally = True + self.max_depth = max(self.max_depth, next_depth) + for nested in self._iter_nested_statement_lists(statement): + self._visit_statements(nested, depth=next_depth) + return + + nested_body = getattr(statement, "body", None) + if isinstance(nested_body, list): + self._visit_statements(nested_body, depth=depth) + + def _iter_nested_statement_lists(self, node: ast.AST) -> tuple[list[ast.stmt], ...]: + if isinstance(node, (ast.For, ast.While, ast.AsyncFor)): + result = [node.body] + if node.orelse: + result.append(node.orelse) + return tuple(result) + if isinstance(node, (ast.With, ast.AsyncWith)): + return (node.body,) + if isinstance(node, ast.Try): + result = [node.body] + result.extend(handler.body for handler in node.handlers) + if node.orelse: + result.append(node.orelse) + if node.finalbody: + result.append(node.finalbody) + return tuple(result) + return () + + def _record_if_chain(self, if_node: ast.If) -> None: + for body, start, end in _collect_if_branch_bodies(if_node): + sig = _summarize_branch(body) + if sig is None: + continue + self._sig_to_branches[_sig_canonical(sig)].append((sig, start, end)) + + def _record_match(self, match_node: object) -> None: + for body, start, end in _collect_match_branch_bodies(match_node): + sig = _summarize_branch(body) + if sig is None: + continue + self._sig_to_branches[_sig_canonical(sig)].append((sig, start, end)) + + def _build_groups(self) -> list[StructuralFindingGroup]: + if not self._collect_findings: + return [] + + groups: list[StructuralFindingGroup] = [] + for sig_key, occurrences in self._sig_to_branches.items(): + deduped_occurrences = { + (start, end): (sig, start, end) for sig, start, end in occurrences + } + if len(deduped_occurrences) < 2: + continue + + sorted_occurrences = sorted( + deduped_occurrences.values(), + key=lambda item: (item[1], item[2]), + ) + sig_dict = sorted_occurrences[0][0] + fkey = _finding_key(self._qualname, sig_key) + raw_group = StructuralFindingGroup( + finding_kind=_FINDING_KIND_BRANCHES, + finding_key=fkey, + signature=sig_dict, + items=tuple( + StructuralFindingOccurrence( + finding_kind=_FINDING_KIND_BRANCHES, + finding_key=fkey, + file_path=self._filepath, + qualname=self._qualname, + start=start, + end=end, + signature=sig_dict, + ) + for _, start, end in sorted_occurrences + ), + ) + normalized_group = normalize_structural_finding_group(raw_group) + if normalized_group is None: + continue + groups.append(normalized_group) + + groups.sort(key=lambda g: (-len(g.items), g.finding_key)) + return groups + + +def scan_function_structure( + node: ast.FunctionDef | ast.AsyncFunctionDef, + filepath: str, + qualname: str, + *, + collect_findings: bool = True, +) -> FunctionStructureFacts: + """Collect per-function structural facts in one recursive traversal.""" + scanner = _FunctionStructureScanner( + filepath=filepath, + qualname=qualname, + collect_findings=collect_findings, + ) + return scanner.scan(node) + + +@dataclass(frozen=True, slots=True) +class _CloneCohortMember: + file_path: str + qualname: str + start: int + end: int + entry_guard_count: int + entry_guard_terminal_profile: str + entry_guard_has_side_effect_before: bool + terminal_kind: str + try_finally_profile: str + side_effect_order_profile: str + + @property + def guard_exit_profile(self) -> str: + return _guard_profile_text( + count=self.entry_guard_count, + terminal_profile=self.entry_guard_terminal_profile, + ) + + +def _as_item_str(value: object, default: str = "") -> str: + return value if isinstance(value, str) else default + + +def _as_item_int(value: object, default: int = 0) -> int: + if isinstance(value, bool): + return int(value) + if isinstance(value, int): + return value + if isinstance(value, str): + try: + return int(value) + except ValueError: + return default + return default + + +def _as_item_bool(value: object, default: bool = False) -> bool: + if isinstance(value, bool): + return value + if isinstance(value, int): + return value != 0 + if isinstance(value, str): + normalized = value.strip().lower() + match normalized: + case "1" | "true" | "yes": + return True + case "0" | "false" | "no": + return False + case _: + pass + return default + + +def _group_item_sort_key(item: GroupItemLike) -> tuple[str, str, int, int]: + return ( + _as_item_str(item.get("filepath")), + _as_item_str(item.get("qualname")), + _as_item_int(item.get("start_line")), + _as_item_int(item.get("end_line")), + ) + + +def _clone_member_sort_key( + member: _CloneCohortMember, +) -> tuple[str, str, int, int]: + return ( + member.file_path, + member.qualname, + member.start, + member.end, + ) + + +def _clone_member_from_item(item: GroupItemLike) -> _CloneCohortMember | None: + file_path = _as_item_str(item.get("filepath")).strip() + qualname = _as_item_str(item.get("qualname")).strip() + start = _as_item_int(item.get("start_line")) + end = _as_item_int(item.get("end_line")) + if not file_path or not qualname or start <= 0 or end <= 0: + return None + terminal_kind = _as_item_str(item.get("terminal_kind"), "fallthrough").strip() + try_finally_profile = _as_item_str(item.get("try_finally_profile"), "none").strip() + side_effect_order_profile = _as_item_str( + item.get("side_effect_order_profile"), + "none", + ).strip() + entry_guard_terminal_profile = _as_item_str( + item.get("entry_guard_terminal_profile"), + "none", + ).strip() + return _CloneCohortMember( + file_path=file_path, + qualname=qualname, + start=start, + end=end, + entry_guard_count=max(0, _as_item_int(item.get("entry_guard_count"))), + entry_guard_terminal_profile=( + entry_guard_terminal_profile if entry_guard_terminal_profile else "none" + ), + entry_guard_has_side_effect_before=_as_item_bool( + item.get("entry_guard_has_side_effect_before"), + default=False, + ), + terminal_kind=terminal_kind if terminal_kind else "fallthrough", + try_finally_profile=try_finally_profile if try_finally_profile else "none", + side_effect_order_profile=( + side_effect_order_profile if side_effect_order_profile else "none" + ), + ) + + +def _majority_str(values: Sequence[str], *, default: str) -> str: + if not values: + return default + counts = Counter(values) + top = max(counts.values()) + winners = sorted(value for value, count in counts.items() if count == top) + return winners[0] if winners else default + + +def _majority_int(values: Sequence[int], *, default: int) -> int: + if not values: + return default + counts = Counter(values) + top = max(counts.values()) + winners = sorted(value for value, count in counts.items() if count == top) + return winners[0] if winners else default + + +def _majority_bool(values: Sequence[bool], *, default: bool) -> bool: + if not values: + return default + counts = Counter(values) + top = max(counts.values()) + winners = sorted(value for value, count in counts.items() if count == top) + return winners[0] if winners else default + + +def _cohort_finding_key(kind: str, cohort_id: str) -> str: + return sha1(f"{kind}|cohort={cohort_id}".encode()).hexdigest() + + +def _cohort_group_items( + *, + finding_kind: str, + finding_key: str, + signature: dict[str, str], + members: Sequence[_CloneCohortMember], +) -> tuple[StructuralFindingOccurrence, ...]: + return tuple( + StructuralFindingOccurrence( + finding_kind=finding_kind, + finding_key=finding_key, + file_path=member.file_path, + qualname=member.qualname, + start=member.start, + end=member.end, + signature=signature, + ) + for member in sorted(members, key=_clone_member_sort_key) + ) + + +def _clone_guard_exit_divergence( + cohort_id: str, + members: Sequence[_CloneCohortMember], +) -> StructuralFindingGroup | None: + if len(members) < 3: + return None + guard_counts = [member.entry_guard_count for member in members] + if not any(count > 0 for count in guard_counts): + return None + + guard_terminal_profiles = [ + member.entry_guard_terminal_profile for member in members + ] + terminal_kinds = [member.terminal_kind for member in members] + side_effect_before_guard_values = [ + member.entry_guard_has_side_effect_before + for member in members + if member.entry_guard_count > 0 + ] + + unique_guard_counts = sorted({str(value) for value in guard_counts}) + unique_guard_terminals = sorted(set(guard_terminal_profiles)) + unique_terminal_kinds = sorted(set(terminal_kinds)) + unique_side_effect_before_guard = sorted( + {"1" if value else "0" for value in side_effect_before_guard_values} + ) + if ( + len(unique_guard_counts) <= 1 + and len(unique_guard_terminals) <= 1 + and len(unique_terminal_kinds) <= 1 + and len(unique_side_effect_before_guard) <= 1 + ): + return None + + majority_guard_count = _majority_int(guard_counts, default=0) + majority_guard_terminal_profile = _majority_str( + guard_terminal_profiles, + default="none", + ) + majority_terminal_kind = _majority_str(terminal_kinds, default="fallthrough") + majority_side_effect_before_guard = _majority_bool( + side_effect_before_guard_values, + default=False, + ) + + divergent_members = [ + member + for member in members + if ( + member.entry_guard_count != majority_guard_count + or member.entry_guard_terminal_profile != majority_guard_terminal_profile + or member.terminal_kind != majority_terminal_kind + or ( + member.entry_guard_count > 0 + and member.entry_guard_has_side_effect_before + != majority_side_effect_before_guard + ) + ) + ] + if not divergent_members: + return None + + finding_key = _cohort_finding_key( + _FINDING_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + cohort_id, + ) + signature = { + "cohort_id": cohort_id, + "cohort_arity": str(len(members)), + "divergent_members": str(len(divergent_members)), + "majority_guard_count": str(majority_guard_count), + "majority_guard_terminal_profile": majority_guard_terminal_profile, + "majority_terminal_kind": majority_terminal_kind, + "majority_side_effect_before_guard": ( + "1" if majority_side_effect_before_guard else "0" + ), + "guard_count_values": ",".join(unique_guard_counts) + if unique_guard_counts + else "0", + "guard_terminal_values": ( + ",".join(unique_guard_terminals) if unique_guard_terminals else "none" + ), + "terminal_values": ( + ",".join(unique_terminal_kinds) if unique_terminal_kinds else "fallthrough" + ), + "side_effect_before_guard_values": ( + ",".join(unique_side_effect_before_guard) + if unique_side_effect_before_guard + else "0" + ), + } + return StructuralFindingGroup( + finding_kind=_FINDING_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + finding_key=finding_key, + signature=signature, + items=_cohort_group_items( + finding_kind=_FINDING_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + finding_key=finding_key, + signature=signature, + members=divergent_members, + ), + ) + + +def _clone_cohort_drift( + cohort_id: str, + members: Sequence[_CloneCohortMember], +) -> StructuralFindingGroup | None: + if len(members) < 3: + return None + + value_space: dict[str, list[str]] = { + "terminal_kind": [member.terminal_kind for member in members], + "guard_exit_profile": [member.guard_exit_profile for member in members], + "try_finally_profile": [member.try_finally_profile for member in members], + "side_effect_order_profile": [ + member.side_effect_order_profile for member in members + ], + } + drift_fields = sorted( + field for field, values in value_space.items() if len(set(values)) > 1 + ) + if not drift_fields: + return None + + majority_profile = { + field: _majority_str(values, default="none") + for field, values in value_space.items() + } + divergent_members = [ + member + for member in members + if any( + _member_profile_value(member, field) != majority_profile[field] + for field in drift_fields + ) + ] + if not divergent_members: + return None + + finding_key = _cohort_finding_key(_FINDING_KIND_CLONE_COHORT_DRIFT, cohort_id) + signature = { + "cohort_id": cohort_id, + "cohort_arity": str(len(members)), + "divergent_members": str(len(divergent_members)), + "drift_fields": ",".join(drift_fields), + "majority_terminal_kind": majority_profile["terminal_kind"], + "majority_guard_exit_profile": majority_profile["guard_exit_profile"], + "majority_try_finally_profile": majority_profile["try_finally_profile"], + "majority_side_effect_order_profile": majority_profile[ + "side_effect_order_profile" + ], + } + return StructuralFindingGroup( + finding_kind=_FINDING_KIND_CLONE_COHORT_DRIFT, + finding_key=finding_key, + signature=signature, + items=_cohort_group_items( + finding_kind=_FINDING_KIND_CLONE_COHORT_DRIFT, + finding_key=finding_key, + signature=signature, + members=divergent_members, + ), + ) + + +def _member_profile_value(member: _CloneCohortMember, field: str) -> str: + match field: + case "terminal_kind": + return member.terminal_kind + case "guard_exit_profile": + return member.guard_exit_profile + case "try_finally_profile": + return member.try_finally_profile + case "side_effect_order_profile": + return member.side_effect_order_profile + case _: + return "" + + +def build_clone_cohort_structural_findings( + *, + func_groups: Mapping[str, Sequence[GroupItemLike]], +) -> tuple[StructuralFindingGroup, ...]: + groups: list[StructuralFindingGroup] = [] + for cohort_id in sorted(func_groups): + rows = func_groups[cohort_id] + if len(rows) < 3: + continue + members = [ + member + for member in (_clone_member_from_item(row) for row in rows) + if member is not None + ] + if len(members) < 3: + continue + + guard_exit_group = _clone_guard_exit_divergence(cohort_id, members) + if guard_exit_group is not None: + groups.append(guard_exit_group) + + cohort_drift_group = _clone_cohort_drift(cohort_id, members) + if cohort_drift_group is not None: + groups.append(cohort_drift_group) + + return normalize_structural_findings(groups) diff --git a/codeclone/suppressions.py b/codeclone/suppressions.py new file mode 100644 index 0000000..2a9984d --- /dev/null +++ b/codeclone/suppressions.py @@ -0,0 +1,263 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import io +import re +import tokenize +from dataclasses import dataclass +from typing import TYPE_CHECKING, Final, Literal + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + +DEAD_CODE_RULE_ID: Final[str] = "dead-code" +SUPPORTED_RULE_IDS: Final[frozenset[str]] = frozenset( + { + DEAD_CODE_RULE_ID, + "clone-cohort-drift", + "clone-guard-exit-divergence", + } +) + +DirectiveBindingKind = Literal["inline", "leading"] +DeclarationKind = Literal["function", "method", "class"] +SuppressionSource = Literal["inline_codeclone"] +INLINE_CODECLONE_SUPPRESSION_SOURCE: Final[SuppressionSource] = "inline_codeclone" +SuppressionTargetKey = tuple[str, str, int, int, DeclarationKind] + +_SUPPRESSION_DIRECTIVE_PATTERN: Final[re.Pattern[str]] = re.compile( + r"^\s*#\s*codeclone\s*:\s*ignore\s*\[(?P[^\]]+)\]\s*$" +) +_RULE_ID_PATTERN: Final[re.Pattern[str]] = re.compile(r"^[a-z0-9][a-z0-9-]*$") + +__all__ = [ + "DEAD_CODE_RULE_ID", + "INLINE_CODECLONE_SUPPRESSION_SOURCE", + "SUPPORTED_RULE_IDS", + "DeclarationKind", + "DeclarationTarget", + "DirectiveBindingKind", + "SuppressionBinding", + "SuppressionDirective", + "SuppressionTargetKey", + "bind_suppressions_to_declarations", + "build_suppression_index", + "extract_suppression_directives", + "suppression_target_key", +] + + +@dataclass(frozen=True, slots=True) +class SuppressionDirective: + line: int + binding: DirectiveBindingKind + rules: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class DeclarationTarget: + filepath: str + qualname: str + start_line: int + end_line: int + kind: DeclarationKind + declaration_end_line: int | None = None + + +@dataclass(frozen=True, slots=True) +class SuppressionBinding: + filepath: str + qualname: str + start_line: int + end_line: int + kind: DeclarationKind + rules: tuple[str, ...] + source: SuppressionSource = "inline_codeclone" + + +def _merge_rules( + base: tuple[str, ...], + incoming: Sequence[str], +) -> tuple[str, ...]: + if not incoming: + return base + seen = set(base) + merged = list(base) + for rule_id in incoming: + if rule_id in seen: + continue + seen.add(rule_id) + merged.append(rule_id) + return tuple(merged) + + +def _parse_rule_ids( + raw: str, + *, + supported_rules: frozenset[str], +) -> tuple[str, ...]: + parsed: tuple[str, ...] = () + for token in raw.split(","): + rule_id = token.strip() + if not rule_id: + continue + if _RULE_ID_PATTERN.fullmatch(rule_id) is None: + continue + if rule_id not in supported_rules: + continue + parsed = _merge_rules(parsed, (rule_id,)) + return parsed + + +def extract_suppression_directives( + source: str, + *, + supported_rules: frozenset[str] = SUPPORTED_RULE_IDS, +) -> tuple[SuppressionDirective, ...]: + # Fast-path: skip tokenization when no directive marker exists. + # Every valid directive contains the literal "codeclone:" — if absent, + # no comment can match _SUPPRESSION_DIRECTIVE_PATTERN. + if "codeclone:" not in source: + return () + lines = source.splitlines() + directives: list[SuppressionDirective] = [] + + try: + tokens = tokenize.generate_tokens(io.StringIO(source).readline) + for token in tokens: + if token.type != tokenize.COMMENT: + continue + match = _SUPPRESSION_DIRECTIVE_PATTERN.fullmatch(token.string) + if match is None: + continue + parsed_rules = _parse_rule_ids( + match.group("rules"), + supported_rules=supported_rules, + ) + if not parsed_rules: + continue + + line_no = token.start[0] + col_no = token.start[1] + line_text = lines[line_no - 1] if 0 < line_no <= len(lines) else "" + binding: DirectiveBindingKind = ( + "inline" if line_text[:col_no].strip() else "leading" + ) + directives.append( + SuppressionDirective( + line=line_no, + binding=binding, + rules=parsed_rules, + ) + ) + except tokenize.TokenError: + return () + + return tuple( + sorted( + directives, + key=lambda item: (item.line, item.binding, item.rules), + ) + ) + + +def _declaration_inline_lines(target: DeclarationTarget) -> tuple[int, ...]: + end_line = target.declaration_end_line or target.start_line + if end_line <= 0 or end_line == target.start_line: + return (target.start_line,) + return (target.start_line, end_line) + + +def _bound_inline_rules( + *, + target: DeclarationTarget, + inline_rules_by_line: Mapping[int, tuple[str, ...]], +) -> tuple[str, ...]: + rules: tuple[str, ...] = () + for line_no in _declaration_inline_lines(target): + rules = _merge_rules(rules, inline_rules_by_line.get(line_no, ())) + return rules + + +def bind_suppressions_to_declarations( + *, + directives: Sequence[SuppressionDirective], + declarations: Sequence[DeclarationTarget], +) -> tuple[SuppressionBinding, ...]: + leading_rules_by_line: dict[int, tuple[str, ...]] = {} + inline_rules_by_line: dict[int, tuple[str, ...]] = {} + + for directive in directives: + target_map = ( + inline_rules_by_line + if directive.binding == "inline" + else leading_rules_by_line + ) + existing = target_map.get(directive.line, ()) + target_map[directive.line] = _merge_rules(existing, directive.rules) + + bindings: list[SuppressionBinding] = [] + for target in declarations: + bound_rules = _merge_rules( + leading_rules_by_line.get(target.start_line - 1, ()), + _bound_inline_rules( + target=target, + inline_rules_by_line=inline_rules_by_line, + ), + ) + if not bound_rules: + continue + bindings.append( + SuppressionBinding( + filepath=target.filepath, + qualname=target.qualname, + start_line=target.start_line, + end_line=target.end_line, + kind=target.kind, + rules=bound_rules, + ) + ) + + return tuple( + sorted( + bindings, + key=lambda item: ( + item.filepath, + item.start_line, + item.end_line, + item.qualname, + item.kind, + item.rules, + ), + ) + ) + + +def suppression_target_key( + *, + filepath: str, + qualname: str, + start_line: int, + end_line: int, + kind: DeclarationKind, +) -> SuppressionTargetKey: + return (filepath, qualname, start_line, end_line, kind) + + +def build_suppression_index( + bindings: Sequence[SuppressionBinding], +) -> Mapping[SuppressionTargetKey, tuple[str, ...]]: + index: dict[SuppressionTargetKey, tuple[str, ...]] = {} + for binding in bindings: + key = suppression_target_key( + filepath=binding.filepath, + qualname=binding.qualname, + start_line=binding.start_line, + end_line=binding.end_line, + kind=binding.kind, + ) + existing = index.get(key, ()) + index[key] = _merge_rules(existing, binding.rules) + return index diff --git a/codeclone/templates.py b/codeclone/templates.py index 448e781..eed9082 100644 --- a/codeclone/templates.py +++ b/codeclone/templates.py @@ -1,11 +1,11 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Minimal HTML skeleton template for the report. -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. +CSS and JS are injected via ${css} and ${js} placeholders. +Body content is injected via ${body}. """ -# ruff: noqa: E501 from __future__ import annotations @@ -20,7 +20,7 @@ REPORT_TEMPLATE = Template( r""" - + @@ -28,3076 +28,11 @@ - - + - -
    -
    -
    -

    CodeClone Report

    - v${version} -
    -
    - - - - -
    -
    -
    - -
    - ${report_meta_html} - ${global_novelty_html} - - - ${func_section} - ${block_section} - ${segment_section} - ${empty_state_html} -
    - Generated by CodeClone v${version} - - search - / - - commands - - - help - - - theme - T -
    -
    - - -
    -
    - -
    -
    -
    - - -
    -
    -
    -

    Clone Group Metrics

    - -
    -
    - -
    -
    -
    - - -
    -
    -
    -

    Help & Support

    - -
    -
    -
    -
    Quick Shortcuts
    -
    -
    -
    Command Palette
    -
    -
    -
    -
    Search
    -
    /
    -
    -
    -
    Toggle Theme
    -
    T
    -
    -
    -
    Close Overlays
    -
    Esc
    -
    -
    -
    - -
    -
    -
    - - -
    - - +${body} + - -""" +""" ) diff --git a/codeclone/ui_messages.py b/codeclone/ui_messages.py index 00ea7d4..c95a9f3 100644 --- a/codeclone/ui_messages.py +++ b/codeclone/ui_messages.py @@ -1,3 +1,6 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import platform @@ -8,61 +11,160 @@ from . import __version__ from .contracts import ISSUES_URL +from .domain.quality import ( + HEALTH_GRADE_A, + HEALTH_GRADE_B, + HEALTH_GRADE_C, + HEALTH_GRADE_D, + HEALTH_GRADE_F, +) -BANNER_SUBTITLE = "[italic]Architectural duplication detector[/italic]" +BANNER_SUBTITLE = "Structural code analysis" MARKER_CONTRACT_ERROR = "[error]CONTRACT ERROR:[/error]" -MARKER_GATING_FAILURE = "[error]GATING FAILURE:[/error]" MARKER_INTERNAL_ERROR = "[error]INTERNAL ERROR:[/error]" REPORT_BLOCK_GROUP_DISPLAY_NAME_ASSERT_PATTERN = "Assert pattern block" -REPORT_BLOCK_GROUP_COMPARE_NOTE_N_WAY = ( - "N-way group: each block matches {peer_count} peers in this group." -) HELP_VERSION = "Print the CodeClone version and exit." -HELP_ROOT = "Project root directory to scan." -HELP_MIN_LOC = "Minimum Lines of Code (LOC) to consider." -HELP_MIN_STMT = "Minimum AST statements to consider." -HELP_PROCESSES = "Number of parallel worker processes." -HELP_CACHE_PATH = "Path to the cache file. Default: /.cache/codeclone/cache.json." -HELP_CACHE_DIR_LEGACY = "Legacy alias for --cache-path." -HELP_MAX_BASELINE_SIZE_MB = "Maximum baseline file size in MB." -HELP_MAX_CACHE_SIZE_MB = "Maximum cache file size in MB." -HELP_BASELINE = "Path to the baseline file (stored in repo)." -HELP_UPDATE_BASELINE = "Overwrite the baseline file with current results." -HELP_FAIL_ON_NEW = "Exit with error if NEW clones (not in baseline) are detected." +HELP_ROOT = "Project root directory to scan.\nDefaults to the current directory." +HELP_MIN_LOC = "Minimum Lines of Code (LOC) required for clone analysis.\nDefault: 10." +HELP_MIN_STMT = "Minimum AST statement count required for clone analysis.\nDefault: 6." +HELP_PROCESSES = "Number of parallel worker processes.\nDefault: 4." +HELP_CACHE_PATH = ( + "Path to the cache file.\n" + "If FILE is omitted, uses /.cache/codeclone/cache.json." +) +HELP_CACHE_DIR_LEGACY = ( + "Legacy alias for --cache-path.\nPrefer --cache-path in new configurations." +) +HELP_MAX_BASELINE_SIZE_MB = "Maximum allowed baseline size in MB.\nDefault: 5." +HELP_MAX_CACHE_SIZE_MB = "Maximum cache file size in MB.\nDefault: 50." +HELP_BASELINE = ( + "Path to the clone baseline.\n" + f"If FILE is omitted, uses {Path('codeclone.baseline.json')}." +) +HELP_UPDATE_BASELINE = ( + "Overwrite the clone baseline with current results.\nDisabled by default." +) +HELP_FAIL_ON_NEW = ( + "Exit with code 3 if NEW clone findings not present in the baseline\nare detected." +) HELP_FAIL_THRESHOLD = ( - "Exit with error if total clone groups (function + block) exceed this number." -) -HELP_CI = "CI preset: --fail-on-new --no-color --quiet." -HELP_HTML = "Generate an HTML report to FILE." -HELP_JSON = "Generate a JSON report to FILE." -HELP_TEXT = "Generate a text report to FILE." -HELP_NO_PROGRESS = "Disable the progress bar (recommended for CI logs)." -HELP_NO_COLOR = "Disable ANSI colors in output." -HELP_QUIET = "Minimize output (still shows warnings and errors)." -HELP_VERBOSE = "Print detailed hash identifiers for new clones." -HELP_DEBUG = "Print debug details (traceback and environment) on internal errors." - -SUMMARY_TITLE = "Analysis Summary" -CLI_LAYOUT_WIDTH = 40 + "Exit with code 3 if the total number of function + block clone groups\n" + "exceeds this value.\n" + "Disabled unless set." +) +HELP_FAIL_COMPLEXITY = ( + "Exit with code 3 if any function exceeds the cyclomatic complexity\n" + "threshold.\n" + "If enabled without a value, uses 20." +) +HELP_FAIL_COUPLING = ( + "Exit with code 3 if any class exceeds the coupling threshold.\n" + "If enabled without a value, uses 10." +) +HELP_FAIL_COHESION = ( + "Exit with code 3 if any class exceeds the cohesion threshold.\n" + "If enabled without a value, uses 4." +) +HELP_FAIL_CYCLES = "Exit with code 3 if circular module dependencies are detected." +HELP_FAIL_DEAD_CODE = "Exit with code 3 if high-confidence dead code is detected." +HELP_FAIL_HEALTH = ( + "Exit with code 3 if the overall health score falls below the threshold.\n" + "If enabled without a value, uses 60." +) +HELP_FAIL_ON_NEW_METRICS = ( + "Exit with code 3 if new metrics violations appear relative to the\n" + "metrics baseline." +) +HELP_CI = ( + "Enable CI preset.\n" + "Equivalent to: --fail-on-new --no-color --quiet.\n" + "When a trusted metrics baseline is available, CI mode also enables\n" + "metrics regression gating." +) +HELP_UPDATE_METRICS_BASELINE = ( + "Overwrite the metrics baseline with current metrics.\nDisabled by default." +) +HELP_METRICS_BASELINE = ( + "Path to the metrics baseline.\n" + f"If FILE is omitted, uses {Path('codeclone.baseline.json')}." +) +HELP_SKIP_METRICS = "Skip full metrics analysis and run in clone-only mode." +HELP_SKIP_DEAD_CODE = "Skip dead code detection." +HELP_SKIP_DEPENDENCIES = "Skip dependency graph analysis." +HELP_HTML = ( + "Generate an HTML report.\n" + "If FILE is omitted, writes to .cache/codeclone/report.html." +) +HELP_JSON = ( + "Generate the canonical JSON report.\n" + "If FILE is omitted, writes to .cache/codeclone/report.json." +) +HELP_MD = ( + "Generate a Markdown report.\n" + "If FILE is omitted, writes to .cache/codeclone/report.md." +) +HELP_SARIF = ( + "Generate a SARIF 2.1.0 report.\n" + "If FILE is omitted, writes to .cache/codeclone/report.sarif." +) +HELP_TEXT = ( + "Generate a plain-text report.\n" + "If FILE is omitted, writes to .cache/codeclone/report.txt." +) +HELP_OPEN_HTML_REPORT = ( + "Open the generated HTML report in the default browser.\nRequires --html." +) +HELP_TIMESTAMPED_REPORT_PATHS = ( + "Append a UTC timestamp to default report filenames.\n" + "Applies only to report flags passed without FILE." +) +HELP_NO_PROGRESS = "Disable progress output.\nRecommended for CI logs." +HELP_PROGRESS = "Force-enable progress output." +HELP_NO_COLOR = "Disable ANSI colors." +HELP_COLOR = "Force-enable ANSI colors." +HELP_QUIET = "Reduce output to warnings, errors, and essential summaries." +HELP_VERBOSE = "Include detailed identifiers for NEW clone findings." +HELP_DEBUG = ( + "Print debug details for internal errors, including traceback and\n" + "environment information." +) + +SUMMARY_TITLE = "Summary" +METRICS_TITLE = "Metrics" + +CLI_LAYOUT_MAX_WIDTH = 80 + SUMMARY_LABEL_FILES_FOUND = "Files found" -SUMMARY_LABEL_FILES_ANALYZED = "Files analyzed" -SUMMARY_LABEL_CACHE_HITS = "Cache hits" -SUMMARY_LABEL_FILES_SKIPPED = "Files skipped" -SUMMARY_LABEL_FUNCTION = "Function clone groups" -SUMMARY_LABEL_BLOCK = "Block clone groups" -SUMMARY_LABEL_SEGMENT = "Segment clone groups" -SUMMARY_LABEL_SUPPRESSED = "Suppressed segment groups" +SUMMARY_LABEL_FILES_ANALYZED = " analyzed" +SUMMARY_LABEL_CACHE_HITS = " from cache" +SUMMARY_LABEL_FILES_SKIPPED = " skipped" +SUMMARY_LABEL_LINES_ANALYZED = "Lines (this run)" +SUMMARY_LABEL_FUNCTIONS_ANALYZED = "Functions (this run)" +SUMMARY_LABEL_METHODS_ANALYZED = "Methods (this run)" +SUMMARY_LABEL_CLASSES_ANALYZED = "Classes (this run)" +SUMMARY_LABEL_FUNCTION = "Function clones" +SUMMARY_LABEL_BLOCK = "Block clones" +SUMMARY_LABEL_SEGMENT = "Segment clones" +SUMMARY_LABEL_SUPPRESSED = " suppressed" SUMMARY_LABEL_NEW_BASELINE = "New vs baseline" -SUMMARY_COMPACT_INPUT = ( - "Input: found={found} analyzed={analyzed} cache_hits={cache_hits} skipped={skipped}" + +SUMMARY_COMPACT = ( + "Summary found={found} analyzed={analyzed}" + " cached={cache_hits} skipped={skipped}" ) SUMMARY_COMPACT_CLONES = ( - "Clone groups: function={function} block={block} " - "segment={segment} suppressed={suppressed} new_vs_baseline={new}" + "Clones func={function} block={block} seg={segment}" + " suppressed={suppressed} new={new}" ) +SUMMARY_COMPACT_METRICS = ( + "Metrics cc={cc_avg}/{cc_max} cbo={cbo_avg}/{cbo_max}" + " lcom4={lcom_avg}/{lcom_max} cycles={cycles} dead_code={dead}" + " health={health}({grade})" +) + WARN_SUMMARY_ACCOUNTING_MISMATCH = ( "Summary accounting mismatch: " "files_found != files_analyzed + cache_hits + files_skipped" @@ -71,13 +173,8 @@ STATUS_DISCOVERING = "[bold green]Discovering Python files..." STATUS_GROUPING = "[bold green]Grouping clones..." -INFO_SCANNING_ROOT = "[info]Scanning root:[/info] {root}" INFO_PROCESSING_CHANGED = "[info]Processing {count} changed files...[/info]" -INFO_HTML_REPORT_SAVED = "[info]HTML report saved:[/info] [bold]{path}[/bold]" -INFO_JSON_REPORT_SAVED = "[info]JSON report saved:[/info] [bold]{path}[/bold]" -INFO_TEXT_REPORT_SAVED = "[info]Text report saved:[/info] [bold]{path}[/bold]" -WARN_SKIPPING_FILE = "[warning]Skipping file {path}: {error}[/warning]" WARN_WORKER_FAILED = "[warning]Worker failed: {error}[/warning]" WARN_BATCH_ITEM_FAILED = "[warning]Failed to process batch item: {error}[/warning]" WARN_PARALLEL_FALLBACK = ( @@ -86,6 +183,9 @@ ) WARN_FAILED_FILES_HEADER = "\n[warning]{count} files failed to process:[/warning]" WARN_CACHE_SAVE_FAILED = "[warning]Failed to save cache: {error}[/warning]" +WARN_HTML_REPORT_OPEN_FAILED = ( + "[warning]Failed to open HTML report in browser: {path} ({error}).[/warning]" +) ERR_INVALID_OUTPUT_EXT = ( "[error]Invalid {label} output extension: {path} " @@ -104,6 +204,13 @@ ERR_REPORT_WRITE_FAILED = ( "[error]Failed to write {label} report: {path} ({error}).[/error]" ) +ERR_OPEN_HTML_REPORT_REQUIRES_HTML = ( + "[error]--open-html-report requires --html.[/error]" +) +ERR_TIMESTAMPED_REPORT_PATHS_REQUIRES_REPORT = ( + "[error]--timestamped-report-paths requires at least one report output " + "flag.[/error]" +) ERR_UNREADABLE_SOURCE_IN_GATING = ( "One or more source files could not be read in CI/gating mode.\n" "Unreadable source files: {count}." @@ -147,8 +254,8 @@ FAIL_NEW_ACCEPT_COMMAND = " codeclone . --update-baseline" FAIL_NEW_DETAIL_FUNCTION = "Details (function clone hashes):" FAIL_NEW_DETAIL_BLOCK = "Details (block clone hashes):" +FAIL_METRICS_TITLE = "[error]FAILED: Metrics quality gate triggered.[/error]" -ERR_FAIL_THRESHOLD = "Total clones ({total}) exceed threshold ({threshold})." WARN_NEW_CLONES_WITHOUT_FAIL = ( "\n[warning]New clones detected but --fail-on-new not set.[/warning]\n" "Run with --update-baseline to accept them as technical debt." @@ -161,7 +268,8 @@ def version_output(version: str) -> str: def banner_title(version: str) -> str: return ( - f"[bold white]CodeClone[/bold white] [dim]v{version}[/dim]\n{BANNER_SUBTITLE}" + f" [bold white]CodeClone[/bold white] [dim]v{version}[/dim]" + f" [dim]\u00b7[/dim] [dim]{BANNER_SUBTITLE}[/dim]" ) @@ -189,22 +297,18 @@ def fmt_report_write_failed(*, label: str, path: Path, error: object) -> str: return ERR_REPORT_WRITE_FAILED.format(label=label, path=path, error=error) -def fmt_unreadable_source_in_gating(*, count: int) -> str: - return ERR_UNREADABLE_SOURCE_IN_GATING.format(count=count) +def fmt_html_report_open_failed(*, path: Path, error: object) -> str: + return WARN_HTML_REPORT_OPEN_FAILED.format(path=path, error=error) -def fmt_scanning_root(root: Path) -> str: - return INFO_SCANNING_ROOT.format(root=root) +def fmt_unreadable_source_in_gating(*, count: int) -> str: + return ERR_UNREADABLE_SOURCE_IN_GATING.format(count=count) def fmt_processing_changed(count: int) -> str: return INFO_PROCESSING_CHANGED.format(count=count) -def fmt_skipping_file(path: str, error: object) -> str: - return WARN_SKIPPING_FILE.format(path=path, error=error) - - def fmt_worker_failed(error: object) -> str: return WARN_WORKER_FAILED.format(error=error) @@ -237,10 +341,10 @@ def fmt_path(template: str, path: Path) -> str: return template.format(path=path) -def fmt_summary_compact_input( +def fmt_summary_compact( *, found: int, analyzed: int, cache_hits: int, skipped: int ) -> str: - return SUMMARY_COMPACT_INPUT.format( + return SUMMARY_COMPACT.format( found=found, analyzed=analyzed, cache_hits=cache_hits, skipped=skipped ) @@ -262,16 +366,162 @@ def fmt_summary_compact_clones( ) -def fmt_fail_threshold(*, total: int, threshold: int) -> str: - return ERR_FAIL_THRESHOLD.format(total=total, threshold=threshold) +def fmt_summary_compact_metrics( + *, + cc_avg: float, + cc_max: int, + cbo_avg: float, + cbo_max: int, + lcom_avg: float, + lcom_max: int, + cycles: int, + dead: int, + health: int, + grade: str, +) -> str: + return SUMMARY_COMPACT_METRICS.format( + cc_avg=f"{cc_avg:.1f}", + cc_max=cc_max, + cbo_avg=f"{cbo_avg:.1f}", + cbo_max=cbo_max, + lcom_avg=f"{lcom_avg:.1f}", + lcom_max=lcom_max, + cycles=cycles, + dead=dead, + health=health, + grade=grade, + ) -def fmt_contract_error(message: str) -> str: - return f"{MARKER_CONTRACT_ERROR}\n{message}" +_HEALTH_GRADE_STYLE: dict[str, str] = { + HEALTH_GRADE_A: "bold green", + HEALTH_GRADE_B: "green", + HEALTH_GRADE_C: "yellow", + HEALTH_GRADE_D: "bold red", + HEALTH_GRADE_F: "bold red", +} + +_L = 12 # label column width (after 2-space indent) + + +def _v(n: int, style: str = "") -> str: + """Format value: dim if zero, styled otherwise.""" + match (n == 0, bool(style)): + case (True, _): + return f"[dim]{n}[/dim]" + case (False, True): + return f"[{style}]{n}[/{style}]" + case _: + return str(n) + + +def _vn(n: int, style: str = "") -> str: + """Format value with comma separator: dim if zero, styled otherwise.""" + match (n == 0, bool(style)): + case (True, _): + return f"[dim]{n:,}[/dim]" + case (False, True): + return f"[{style}]{n:,}[/{style}]" + case _: + return f"{n:,}" + + +def fmt_summary_files(*, found: int, analyzed: int, cached: int, skipped: int) -> str: + parts = [ + f"{_v(found, 'bold')} found", + f"{_v(analyzed, 'bold cyan')} analyzed", + f"{_v(cached)} cached", + f"{_v(skipped)} skipped", + ] + val = " \u00b7 ".join(parts) + return f" {'Files':<{_L}}{val}" + + +def fmt_summary_parsed( + *, lines: int, functions: int, methods: int, classes: int +) -> str | None: + if lines == 0 and functions == 0 and methods == 0 and classes == 0: + return None + parts = [f"{_vn(lines, 'bold cyan')} lines"] + if functions: + parts.append(f"{_v(functions, 'bold cyan')} functions") + if methods: + parts.append(f"{_v(methods, 'bold cyan')} methods") + if classes: + parts.append(f"{_v(classes, 'bold cyan')} classes") + val = " \u00b7 ".join(parts) + return f" {'Parsed':<{_L}}{val}" + + +def fmt_summary_clones( + *, func: int, block: int, segment: int, suppressed: int, new: int +) -> str: + clone_parts = [ + f"{_v(func, 'bold yellow')} func", + f"{_v(block, 'bold yellow')} block", + ] + if segment: + clone_parts.append(f"{_v(segment, 'bold yellow')} seg") + main = " \u00b7 ".join(clone_parts) + quals = [ + f"{_v(suppressed, 'yellow')} suppressed", + f"{_v(new, 'bold red')} new", + ] + return f" {'Clones':<{_L}}{main} ({', '.join(quals)})" + + +def fmt_metrics_health(total: int, grade: str) -> str: + s = _HEALTH_GRADE_STYLE.get(grade, "bold") + return f" {'Health':<{_L}}[{s}]{total}/100 ({grade})[/{s}]" + +def fmt_metrics_cc(avg: float, max_val: int, high_risk: int) -> str: + hr = ( + f"[bold red]{high_risk} high-risk[/bold red]" + if high_risk + else "[dim]0 high-risk[/dim]" + ) + return f" {'CC':<{_L}}avg {avg:.1f} \u00b7 max {max_val} \u00b7 {hr}" + + +def fmt_metrics_coupling(avg: float, max_val: int) -> str: + return f" {'Coupling':<{_L}}avg {avg:.1f} \u00b7 max {max_val}" + + +def fmt_metrics_cohesion(avg: float, max_val: int) -> str: + return f" {'Cohesion':<{_L}}avg {avg:.1f} \u00b7 max {max_val}" -def fmt_gating_failure(message: str) -> str: - return f"{MARKER_GATING_FAILURE}\n{message}" + +def fmt_metrics_cycles(count: int) -> str: + match count: + case 0: + return f" {'Cycles':<{_L}}[green]\u2714 clean[/green]" + case _: + return f" {'Cycles':<{_L}}[bold red]{count} detected[/bold red]" + + +def fmt_metrics_dead_code(count: int, *, suppressed: int = 0) -> str: + suppressed_suffix = ( + f" [dim]({suppressed} suppressed)[/dim]" if suppressed > 0 else "" + ) + match count: + case 0: + return ( + f" {'Dead code':<{_L}}[green]\u2714 clean[/green]{suppressed_suffix}" + ) + case _: + return ( + f" {'Dead code':<{_L}}[bold red]{count} found[/bold red]" + f"{suppressed_suffix}" + ) + + +def fmt_pipeline_done(elapsed: float) -> str: + return f" [dim]Pipeline done in {elapsed:.2f}s[/dim]" + + +def fmt_contract_error(message: str) -> str: + return f"{MARKER_CONTRACT_ERROR}\n{message}" def fmt_internal_error( @@ -317,7 +567,3 @@ def fmt_internal_error( ] ) return "\n".join(lines) - - -def fmt_report_block_group_compare_note_n_way(*, peer_count: int) -> str: - return REPORT_BLOCK_GROUP_COMPARE_NOTE_N_WAY.format(peer_count=peer_count) diff --git a/docs/README.md b/docs/README.md index 5eb4a88..b46ffd7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,47 +1,82 @@ # CodeClone Docs -This directory has two documentation layers. +This site is built with MkDocs and published to +[orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/). -- [`docs/book/`](book/): **contract-first** documentation. This is the canonical source for **schemas**, **statuses**, * - *exit codes**, **trust model**, and **determinism guarantees**. Everything here is derived from code + locked tests. -- [`docs/architecture.md`](architecture.md), [`docs/cfg.md`](cfg.md): **deep-dive narrative** docs (architecture and CFG - semantics). These may include rationale and design intent, but must not contradict the contract book. +It has two documentation layers: + +- [Contracts Book](book/README.md): **contract-first** documentation. This is the canonical + source for **schemas**, **statuses**, **exit codes**, **trust model**, and + **determinism guarantees**. Everything here is derived from code + locked + tests. +- [Architecture Narrative](architecture.md), [CFG Semantics](cfg.md): + **deep-dive narrative** docs (architecture and CFG semantics). These may + include rationale and design intent, but must not contradict the contract + book. + +The published site also exposes a live sample report generated from the current +repository build: + +- [Examples / Sample Report](examples/report.md) ## Start Here -- Contracts and guarantees: [`docs/book/00-intro.md`](book/00-intro.md) -- Architecture map (components + ownership): [`docs/book/01-architecture-map.md`](book/01-architecture-map.md) -- Terminology: [`docs/book/02-terminology.md`](book/02-terminology.md) +- [Contracts and guarantees](book/00-intro.md) +- [Architecture map (components + ownership)](book/01-architecture-map.md) +- [Terminology](book/02-terminology.md) ## Core Contracts -- Exit codes and failure policy: [`docs/book/03-contracts-exit-codes.md`](book/03-contracts-exit-codes.md) -- Config and defaults: [`docs/book/04-config-and-defaults.md`](book/04-config-and-defaults.md) -- Core pipeline and invariants: [`docs/book/05-core-pipeline.md`](book/05-core-pipeline.md) -- Baseline contract (schema v1): [`docs/book/06-baseline.md`](book/06-baseline.md) -- Cache contract (schema v1.3): [`docs/book/07-cache.md`](book/07-cache.md) -- Report contract (schema v1.1): [`docs/book/08-report.md`](book/08-report.md) +- [Exit codes and failure policy](book/03-contracts-exit-codes.md) +- [Config and defaults](book/04-config-and-defaults.md) +- [Core pipeline and invariants](book/05-core-pipeline.md) +- [Baseline contract (schema v2.0)](book/06-baseline.md) +- [Cache contract (schema v2.2)](book/07-cache.md) +- [Report contract (schema v2.1)](book/08-report.md) ## Interfaces -- CLI behavior, modes, and UX: [`docs/book/09-cli.md`](book/09-cli.md) -- HTML report rendering contract: [`docs/book/10-html-render.md`](book/10-html-render.md) +- [CLI behavior, modes, and UX](book/09-cli.md) +- [HTML report rendering contract](book/10-html-render.md) ## System Properties -- Security model and threat boundaries: [`docs/book/11-security-model.md`](book/11-security-model.md) -- Determinism policy: [`docs/book/12-determinism.md`](book/12-determinism.md) -- Tests as specification: [`docs/book/13-testing-as-spec.md`](book/13-testing-as-spec.md) -- Compatibility and versioning rules: [ - `docs/book/14-compatibility-and-versioning.md`](book/14-compatibility-and-versioning.md) +- [Security model and threat boundaries](book/11-security-model.md) +- [Determinism policy](book/12-determinism.md) +- [Tests as specification](book/13-testing-as-spec.md) +- [Compatibility and versioning rules](book/14-compatibility-and-versioning.md) + +## Quality Contracts + +- [Metrics mode and quality gates](book/15-metrics-and-quality-gates.md) +- [Dead-code contract and test-boundary policy](book/16-dead-code-contract.md) +- [Suggestions and clone typing contract](book/17-suggestions-and-clone-typing.md) +- [Reproducible Docker benchmarking](book/18-benchmarking.md) +- [Inline suppressions contract](book/19-inline-suppressions.md) ## Deep Dives -- Architecture narrative: [`docs/architecture.md`](architecture.md) -- CFG design and semantics: [`docs/cfg.md`](cfg.md) +- [Architecture narrative](architecture.md) +- [CFG design and semantics](cfg.md) +- [SARIF integration for IDE/code-scanning use](sarif.md) +- [Docs publishing and Pages workflow](publishing.md) ## Reference Appendices -- Status enums and typed contracts: [`docs/book/appendix/a-status-enums.md`](book/appendix/a-status-enums.md) -- Schema layouts (baseline/cache/report): [`docs/book/appendix/b-schema-layouts.md`](book/appendix/b-schema-layouts.md) -- Error catalog (contract vs internal): [`docs/book/appendix/c-error-catalog.md`](book/appendix/c-error-catalog.md) +- [Status enums and typed contracts](book/appendix/a-status-enums.md) +- [Schema layouts (baseline/cache/report)](book/appendix/b-schema-layouts.md) +- [Error catalog (contract vs internal)](book/appendix/c-error-catalog.md) + +## Local Preview + +Build the docs site with MkDocs, then generate the sample report into the built +site: + +```bash +uv run --with mkdocs --with mkdocs-material mkdocs build --strict +uv run python scripts/build_docs_example_report.py --output-dir site/examples/report/live +``` + +GitHub Pages publishing is handled by +[`docs.yml`](https://github.com/orenlab/codeclone/blob/main/.github/workflows/docs.yml) +via a custom Actions workflow. diff --git a/docs/architecture.md b/docs/architecture.md index 87ff40b..1cce692 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -96,15 +96,17 @@ generators with strict hash confirmation. ## 7. Clone Detection -Two clone types are detected: +Clone groups are detected at three granularities: -### Function clones (Type-2) +### Function clone groups -- Entire function CFGs are identical. +- Grouped by `fingerprint|loc_bucket`. +- Report typing is deterministic (`Type-1`..`Type-4`) in report layer. -### Block clones (Type-3-lite) +### Block clone groups -- Repeated structural statement blocks inside larger functions. +- Repeated structural statement windows across functions. +- Report typing is `Type-4` with explainability facts from core. Noise filters applied: @@ -115,7 +117,7 @@ Noise filters applied: --- -### Segment clones (internal) +### Segment clones (internal/report-only) - Detected only **inside the same function**. - Used for internal copy‑paste discovery and report explainability. @@ -126,38 +128,40 @@ Noise filters applied: --- +### Structural findings (report-only) + +- `duplicated_branches`: repeated branch-body signatures. +- `clone_guard_exit_divergence`: guard/terminal divergence inside one function-clone cohort. +- `clone_cohort_drift`: drift from majority terminal/guard/try/side-effect profile. + +These findings are rendered in reports only and do not change baseline diff or CI +gating decisions. + +--- + ## 8. Reporting -Detected clone groups can be: - -- printed as text, -- exported as JSON, -- rendered as an interactive HTML report. - -All report formats include provenance metadata: - -- `report_schema_version` -- `codeclone_version` -- `python_version` (runtime major.minor, human-readable) -- `python_tag` (runtime compatibility tag used by baseline/cache contracts) -- `baseline_path` -- `baseline_fingerprint_version` -- `baseline_schema_version` -- `baseline_python_tag` -- `baseline_generator_name` -- `baseline_generator_version` -- `baseline_payload_sha256` -- `baseline_payload_sha256_verified` -- `baseline_loaded` -- `baseline_status` - ( - `ok | missing | too_large | invalid_json | invalid_type | missing_fields | mismatch_schema_version | mismatch_fingerprint_version | mismatch_python_version | generator_mismatch | integrity_missing | integrity_failed`; - `mismatch_python_version` is the status name used for `python_tag` mismatch) -- `cache_path` -- `cache_schema_version` -- `cache_status` -- `cache_used` -- `files_skipped_source_io` +Detected findings can be rendered as: + +- interactive HTML (`--html`), +- canonical JSON (`--json`, schema `2.1`), +- deterministic text projection (`--text`), +- deterministic Markdown projection (`--md`), +- deterministic SARIF projection (`--sarif`). + +Reporting uses a layered model: + +- canonical sections: `report_schema_version`, `meta`, `inventory`, `findings`, `metrics` +- non-canonical view layer: `derived` +- integrity metadata: `integrity` (`canonicalization` + `digest`) + +Provenance is carried through `meta` and includes: + +- runtime/context (`codeclone_version`, `python_version`, `python_tag`, `analysis_mode`, `report_mode`) +- baseline status block (`meta.baseline.*`) +- cache status block (`meta.cache.*`) +- metrics-baseline status block (`meta.metrics_baseline.*`) +- generation timestamp (`meta.runtime.report_generated_at_utc`) Explainability contract (v1): @@ -172,7 +176,8 @@ Explainability contract (v1): Baseline comparison allows CI to fail **only on new clones**, enabling gradual architectural improvement. -Baseline files use a stable v1 contract. Compatibility is checked by +Baseline files use a stable v2 contract (schema `2.0`, with compatibility +support for major `1` legacy schema checks where applicable). Compatibility is checked by `schema_version`, `fingerprint_version`, `python_tag`, and `generator.name`, not package patch/minor version. Regeneration is typically required when `fingerprint_version` or `python_tag` changes. @@ -229,5 +234,5 @@ patch updates within the same interpreter tag. ## Summary -CodeClone is an **architectural duplication radar**, -not a static analyzer or linter. +CodeClone provides **structural code quality analysis** for Python — +clone detection, quality metrics, and baseline-aware CI governance. diff --git a/docs/assets/codeclone-wordmark.svg b/docs/assets/codeclone-wordmark.svg new file mode 100644 index 0000000..967edc7 --- /dev/null +++ b/docs/assets/codeclone-wordmark.svg @@ -0,0 +1,18 @@ + + + + + + + CodeClone + + diff --git a/docs/book/00-intro.md b/docs/book/00-intro.md index 637b2f2..c3e1cb7 100644 --- a/docs/book/00-intro.md +++ b/docs/book/00-intro.md @@ -1,59 +1,73 @@ # 00. Intro ## Purpose -This book is the executable contract for CodeClone behavior in v1.x. It describes only behavior that is present in code and/or locked by tests. + +This book is the executable contract for CodeClone behavior in v2.x. It +describes only behavior that is present in code and/or locked by tests. ## Public surface + - CLI entrypoint: `codeclone/cli.py:main` - Package version: `codeclone/__init__.py:__version__` - Global contract constants: `codeclone/contracts.py` ## Contracts -CodeClone provides these guarantees when inputs are identical (same repository content, same Python tag, same tool version, same baseline/cache/report schemas): + +CodeClone provides these guarantees when inputs are identical (same repository content, same Python tag, same tool +version, same baseline/cache/report schemas): + - Deterministic clone grouping and report serialization. - Explicit trust model for baseline/cache. - Stable exit-code categories for contract vs gating vs internal failures. Refs: -- `codeclone/_report_serialize.py:to_json_report` + +- `codeclone/report/json_contract.py:build_report_document` - `codeclone/baseline.py:Baseline.verify_compatibility` - `codeclone/cache.py:Cache.load` - `codeclone/contracts.py:ExitCode` ## Invariants (MUST) + - Contract errors and gating failures are separate categories. - Baseline trust is explicit (`baseline_loaded`, `baseline_status`). - Cache is optimization-only; invalid cache never becomes source of truth. Refs: + - `codeclone/cli.py:_main_impl` - `codeclone/baseline.py:BASELINE_UNTRUSTED_STATUSES` - `codeclone/cache.py:Cache._ignore_cache` ## Failure modes -| Condition | Behavior | -| --- | --- | -| Invalid/untrusted baseline in normal mode | Warning + compare against empty baseline | -| Invalid/untrusted baseline in CI/gating mode | Contract error (exit 2) | -| New clones in gating mode | Gating failure (exit 3) | -| Unexpected runtime exception | Internal error (exit 5) | + +| Condition | Behavior | +|----------------------------------------------|------------------------------------------| +| Invalid/untrusted baseline in normal mode | Warning + compare against empty baseline | +| Invalid/untrusted baseline in CI/gating mode | Contract error (exit 2) | +| New clones in gating mode | Gating failure (exit 3) | +| Unexpected runtime exception | Internal error (exit 5) | Refs: + - `codeclone/cli.py:_main_impl` - `codeclone/cli.py:main` ## Determinism / canonicalization + - Filesystem traversal is sorted before processing. - Group keys and serialized arrays are sorted in report JSON/TXT. - Baseline and cache payload hashing uses canonical JSON serialization. Refs: + - `codeclone/scanner.py:iter_py_files` -- `codeclone/_report_serialize.py:to_json_report` +- `codeclone/report/json_contract.py:build_report_document` - `codeclone/baseline.py:_compute_payload_sha256` - `codeclone/cache.py:_canonical_json` ## Locked by tests + - `tests/test_detector_golden.py::test_detector_output_matches_golden_fixture` - `tests/test_report.py::test_report_json_deterministic_group_order` - `tests/test_baseline.py::test_baseline_hash_canonical_determinism` @@ -61,6 +75,28 @@ Refs: - `tests/test_cli_unit.py::test_cli_help_text_consistency` ## Non-guarantees + - Cross-Python-tag clone IDs are not guaranteed identical. - UI wording and visual layout may evolve without schema bumps. - Performance characteristics are best-effort, not strict SLA. + +## Recommended reading paths + +- CI contract path: + [03-contracts-exit-codes.md](03-contracts-exit-codes.md) -> + [06-baseline.md](06-baseline.md) -> + [07-cache.md](07-cache.md) -> + [08-report.md](08-report.md) -> + [09-cli.md](09-cli.md) +- Metrics governance path: + [04-config-and-defaults.md](04-config-and-defaults.md) -> + [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) -> + [16-dead-code-contract.md](16-dead-code-contract.md) -> + [19-inline-suppressions.md](19-inline-suppressions.md) -> + [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) +- Determinism and compatibility path: + [12-determinism.md](12-determinism.md) -> + [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) +- Benchmarking path: + [12-determinism.md](12-determinism.md) -> + [18-benchmarking.md](18-benchmarking.md) diff --git a/docs/book/01-architecture-map.md b/docs/book/01-architecture-map.md index ade5e32..a7c0114 100644 --- a/docs/book/01-architecture-map.md +++ b/docs/book/01-architecture-map.md @@ -2,68 +2,76 @@ ## Purpose -Document the current module boundaries and ownership in the codebase. +Document current module boundaries and ownership in CodeClone v2.x. ## Public surface Main ownership layers: -- Core detection pipeline: scanner → extractor → cfg/normalize → grouping. -- Contracts/IO: baseline, cache, CLI validation, exit semantics. -- Report model/serialization: JSON/TXT generation and explainability facts. +- Core detection pipeline: `scanner` -> `extractor` -> `cfg/normalize` -> `grouping`. +- Quality metrics pipeline: complexity/coupling/cohesion/dependencies/dead-code/health. +- Contracts and persistence: baseline, metrics baseline, cache, exit semantics. +- Report model and serialization: deterministic JSON/TXT + explainability facts. - Render layer: HTML rendering and template assets. ## Data model -| Layer | Modules | Responsibility | -|------------------------|----------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------| -| Contracts | `codeclone/contracts.py`, `codeclone/errors.py` | Shared schema versions, URLs, exit-code enum, typed exceptions | -| Discovery + parsing | `codeclone/scanner.py`, `codeclone/extractor.py` | Enumerate files, parse AST, extract function/block/segment units | -| Structural analysis | `codeclone/cfg.py`, `codeclone/normalize.py`, `codeclone/blockhash.py`, `codeclone/fingerprint.py`, `codeclone/blocks.py` | CFG, normalization, statement hashes, block/segment windows | -| Grouping + report core | `codeclone/_report_grouping.py`, `codeclone/_report_blocks.py`, `codeclone/_report_segments.py`, `codeclone/_report_explain.py` | Build groups, merge windows, suppress segment noise, compute explainability facts | -| Report serialization | `codeclone/_report_serialize.py`, `codeclone/_cli_meta.py` | Canonical JSON/TXT schema + shared report metadata | -| Rendering | `codeclone/html_report.py`, `codeclone/_html_escape.py`, `codeclone/_html_snippets.py`, `codeclone/templates.py` | HTML-only view layer over report model | -| Runtime orchestration | `codeclone/cli.py`, `codeclone/_cli_args.py`, `codeclone/_cli_paths.py`, `codeclone/_cli_summary.py`, `codeclone/ui_messages.py` | CLI UX, status handling, outputs, error category markers | +| Layer | Modules | Responsibility | +|-----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------| +| Contracts | `codeclone/contracts.py`, `codeclone/errors.py` | Shared schema versions, URLs, exit-code enum, typed exceptions | +| Domain models | `codeclone/models.py`, `codeclone/domain/*.py` | Typed dataclasses/enums plus centralized finding/scope/severity taxonomies | +| Discovery + parsing | `codeclone/scanner.py`, `codeclone/extractor.py` | Enumerate files, parse AST, extract function/block/segment units | +| Structural analysis | `codeclone/cfg.py`, `codeclone/normalize.py`, `codeclone/blockhash.py`, `codeclone/fingerprint.py`, `codeclone/blocks.py` | CFG, normalization, statement hashes, block/segment windows | +| Grouping | `codeclone/grouping.py` | Build function/block/segment groups | +| Metrics | `codeclone/metrics/*` | Compute complexity/coupling/cohesion/dependency/dead-code/health signals | +| Report core | `codeclone/report/*`, `codeclone/_cli_meta.py` | Merge windows, explainability facts, deterministic JSON/TXT schema + shared metadata | +| Persistence | `codeclone/baseline.py`, `codeclone/metrics_baseline.py`, `codeclone/cache.py` | Baseline/cache trust/compat/integrity and atomic persistence | +| Runtime orchestration | `codeclone/pipeline.py`, `codeclone/cli.py`, `codeclone/_cli_args.py`, `codeclone/_cli_paths.py`, `codeclone/_cli_summary.py`, `codeclone/_cli_config.py`, `codeclone/ui_messages.py` | CLI UX, stage orchestration, status handling, outputs, error markers | +| Rendering | `codeclone/html_report.py`, `codeclone/_html_report/*`, `codeclone/_html_badges.py`, `codeclone/_html_js.py`, `codeclone/_html_escape.py`, `codeclone/_html_snippets.py`, `codeclone/templates.py` | HTML-only view layer over report data | Refs: -- `codeclone/report.py` +- `codeclone/pipeline.py` - `codeclone/cli.py:_main_impl` ## Contracts -- Core pipeline does not depend on HTML modules. -- HTML rendering receives already-computed report data/facts. -- Baseline and cache contracts are validated before being trusted. +- Core analysis modules do not depend on render/UI modules. +- HTML renderer receives already-computed report data/facts and does not + recompute detection semantics. +- Baseline, metrics baseline, and cache are validated before being trusted. Refs: -- `codeclone/report.py` +- `codeclone/report/json_contract.py:build_report_document` - `codeclone/html_report.py:build_html_report` - `codeclone/baseline.py:Baseline.load` +- `codeclone/metrics_baseline.py:MetricsBaseline.load` - `codeclone/cache.py:Cache.load` ## Invariants (MUST) - Report serialization is deterministic and schema-versioned. -- UI is render-only and must not recompute detection semantics. -- Status enums are domain-owned in baseline/cache modules. +- UI is render-only and must not change gating semantics. +- Status enums remain domain-owned in baseline/metrics-baseline/cache modules. Refs: -- `codeclone/_report_serialize.py:to_json_report` -- `codeclone/_report_explain.py:build_block_group_facts` +- `codeclone/report/json_contract.py:build_report_document` +- `codeclone/report/explain.py:build_block_group_facts` - `codeclone/baseline.py:BaselineStatus` +- `codeclone/metrics_baseline.py:MetricsBaselineStatus` - `codeclone/cache.py:CacheStatus` ## Failure modes -| Condition | Layer | -|----------------------------------------|---------------------------------------------------| -| Invalid CLI args / invalid output path | Runtime orchestration (`_cli_args`, `_cli_paths`) | -| Baseline schema/integrity mismatch | Baseline contract layer | -| Cache corruption/version mismatch | Cache contract layer (fail-open) | -| HTML snippet read failure | Render layer fallback snippet | +| Condition | Layer | +|--------------------------------------------|---------------------------------------------------| +| Invalid CLI args / invalid output path | Runtime orchestration (`_cli_args`, `_cli_paths`) | +| Baseline schema/integrity mismatch | Baseline contract layer | +| Metrics baseline schema/integrity mismatch | Metrics baseline contract layer | +| Cache corruption/version mismatch | Cache contract layer (fail-open) | +| HTML snippet read failure | Render layer fallback snippet | ## Determinism / canonicalization @@ -73,16 +81,32 @@ Refs: Refs: - `codeclone/scanner.py:iter_py_files` -- `codeclone/_report_serialize.py:GROUP_ITEM_LAYOUT` +- `codeclone/report/json_contract.py:build_report_document` ## Locked by tests -- `tests/test_report.py::test_report_json_compact_v11_contract` +- `tests/test_report.py::test_report_json_compact_v21_contract` - `tests/test_html_report.py::test_html_report_uses_core_block_group_facts` - `tests/test_cache.py::test_cache_v13_uses_relpaths_when_root_set` - `tests/test_cli_unit.py::test_argument_parser_contract_error_marker_for_invalid_args` +- `tests/test_architecture.py::test_architecture_layer_violations` ## Non-guarantees -- Internal module split may change in v1.x if public contracts are preserved. -- Import tree acyclicity is a policy goal, not currently enforced by tooling. +- Internal module split may evolve in v2.x if public contracts are preserved. +- Import tree acyclicity is policy and test-enforced where explicitly asserted. + +## Chapter map + +| Topic | Primary chapters | +|---------------------------------------|------------------------------------------------------------------------------------------------------------------| +| CLI behavior and failure routing | [03-contracts-exit-codes.md](03-contracts-exit-codes.md), [09-cli.md](09-cli.md) | +| Config precedence and defaults | [04-config-and-defaults.md](04-config-and-defaults.md) | +| Core processing pipeline | [05-core-pipeline.md](05-core-pipeline.md) | +| Clone baseline trust/compat/integrity | [06-baseline.md](06-baseline.md) | +| Cache trust and fail-open behavior | [07-cache.md](07-cache.md) | +| Report schema and provenance | [08-report.md](08-report.md), [10-html-render.md](10-html-render.md) | +| Metrics gates and metrics baseline | [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) | +| Dead-code liveness policy | [16-dead-code-contract.md](16-dead-code-contract.md) | +| Suggestions and clone typing | [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) | +| Determinism and versioning policy | [12-determinism.md](12-determinism.md), [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) | diff --git a/docs/book/02-terminology.md b/docs/book/02-terminology.md index 284a1e3..73a29e1 100644 --- a/docs/book/02-terminology.md +++ b/docs/book/02-terminology.md @@ -1,75 +1,95 @@ # 02. Terminology ## Purpose + Define terms exactly as used by code and tests. ## Public surface + - Baseline identifiers and statuses: `codeclone/baseline.py` - Cache statuses and compact layout: `codeclone/cache.py` -- Report schema and group layouts: `codeclone/_report_serialize.py` +- Report schema and group layouts: `codeclone/report/json_contract.py` ## Data model + - **fingerprint**: function-level CFG fingerprint (`sha1`) + LOC bucket key. - **block_hash**: ordered sequence of normalized statement hashes in a fixed window. - **segment_hash**: hash of ordered segment window. - **segment_sig**: hash of sorted segment window (candidate grouping signature). +- **stable structure facts**: per-function deterministic structure profile fields + (`entry_guard_*`, `terminal_kind`, `try_finally_profile`, + `side_effect_order_profile`) reused by report families. +- **cohort structural findings**: report-only structural families derived from + existing function-clone groups (`clone_guard_exit_divergence`, + `clone_cohort_drift`). - **python_tag**: runtime compatibility tag like `cp313`. - **schema_version**: - - baseline schema (`meta.schema_version`) for baseline compatibility. - - cache schema (`v`) for cache compatibility. - - report schema (`meta.report_schema_version`) for report format compatibility. + - baseline schema (`meta.schema_version`) for baseline compatibility. + - cache schema (`v`) for cache compatibility. + - report schema (`report_schema_version`) for report format compatibility. - **payload_sha256**: canonical baseline semantic hash. - **trusted baseline**: baseline loaded + status `ok`. Refs: -- `codeclone/_report_grouping.py:build_groups` + +- `codeclone/grouping.py:build_groups` - `codeclone/blocks.py:extract_blocks` - `codeclone/blocks.py:extract_segments` - `codeclone/baseline.py:current_python_tag` - `codeclone/baseline.py:Baseline.verify_compatibility` ## Contracts + - New/known classification is key-based, not item-heuristic-based. - Baseline trust is status-driven. - Cache trust is status-driven and independent from baseline trust. Refs: -- `codeclone/_report_serialize.py:_split_for` + +- `codeclone/report/json_contract.py:build_report_document` - `codeclone/cli.py:_main_impl` ## Invariants (MUST) + - Function group key format: `fingerprint|loc_bucket`. - Block group key format: `block_hash`. - Segment group key format: `segment_hash|qualname` (internal/report-only grouping path). Refs: -- `codeclone/_report_grouping.py:build_groups` -- `codeclone/_report_grouping.py:build_block_groups` -- `codeclone/_report_grouping.py:build_segment_groups` + +- `codeclone/grouping.py:build_groups` +- `codeclone/grouping.py:build_block_groups` +- `codeclone/grouping.py:build_segment_groups` ## Failure modes -| Condition | Result | -| --- | --- | -| Baseline generator name != `codeclone` | `generator_mismatch` | -| Baseline python tag mismatch | `mismatch_python_version` | -| Cache signature mismatch | `integrity_failed` cache status | + +| Condition | Result | +|----------------------------------------|---------------------------------| +| Baseline generator name != `codeclone` | `generator_mismatch` | +| Baseline python tag mismatch | `mismatch_python_version` | +| Cache signature mismatch | `integrity_failed` cache status | Refs: + - `codeclone/baseline.py:Baseline.verify_compatibility` -- `codeclone/cache.py:Cache._parse_cache_document` +- `codeclone/cache.py:Cache.load` ## Determinism / canonicalization + - Baseline clone ID lists must be sorted and unique. - Cache compact arrays are sorted by deterministic tuple keys before write. Refs: + - `codeclone/baseline.py:_require_sorted_unique_ids` - `codeclone/cache.py:_encode_wire_file_entry` ## Locked by tests + - `tests/test_baseline.py::test_baseline_id_lists_must_be_sorted_and_unique` - `tests/test_report.py::test_report_json_group_order_is_lexicographic` - `tests/test_cache.py::test_cache_version_mismatch_warns` ## Non-guarantees + - Exact wording of status descriptions in UI is not a schema contract. diff --git a/docs/book/03-contracts-exit-codes.md b/docs/book/03-contracts-exit-codes.md index 46a47f6..89a1747 100644 --- a/docs/book/03-contracts-exit-codes.md +++ b/docs/book/03-contracts-exit-codes.md @@ -1,63 +1,77 @@ # 03. Contracts: Exit Codes ## Purpose + Define stable process exit semantics and category boundaries. ## Public surface + - Exit enum: `codeclone/contracts.py:ExitCode` - CLI categorization and exits: `codeclone/cli.py:_main_impl`, `codeclone/cli.py:main` - Error markers: `codeclone/ui_messages.py` ## Data model -| Exit code | Category | Meaning | -| --- | --- | --- | -| 0 | success | Run completed without gating failures | -| 2 | contract error | Input/contract violation (baseline trust, output path/ext, unreadable source in gating) | -| 3 | gating failure | Analysis succeeded but policy failed (`--fail-on-new`, `--fail-threshold`) | -| 5 | internal error | Unexpected exception escaped `_main_impl` | + +| Exit code | Category | Meaning | +|-----------|----------------|------------------------------------------------------------------------------------------------------------------------| +| 0 | success | Run completed without gating failures | +| 2 | contract error | Input/contract violation (baseline trust, output path/ext, invalid CLI flag combinations, unreadable source in gating) | +| 3 | gating failure | Analysis succeeded but policy failed (`--fail-on-new`, `--fail-threshold`, metrics gates) | +| 5 | internal error | Unexpected exception escaped `_main_impl` | Refs: + - `codeclone/contracts.py:ExitCode` - `codeclone/_cli_args.py:_ArgumentParser.error` ## Contracts + - Contract errors must use `CONTRACT ERROR:` marker. - Gating failures must use `GATING FAILURE:` marker. - Internal errors are formatted by `fmt_internal_error`; traceback hidden unless debug enabled. Refs: + - `codeclone/ui_messages.py:fmt_contract_error` - `codeclone/ui_messages.py:fmt_gating_failure` - `codeclone/ui_messages.py:fmt_internal_error` ## Invariants (MUST) + - `SystemExit` from contract/gating paths must pass through `main()` unchanged. - Only non-`SystemExit` exceptions in `main()` become exit 5. - In gating mode, unreadable source files force exit 2 even if clone gating would also fail. Refs: + - `codeclone/cli.py:main` - `codeclone/cli.py:_main_impl` ## Failure modes -| Condition | Marker | Exit | -| --- | --- | --- | -| Invalid output extension | CONTRACT ERROR | 2 | -| Untrusted baseline in CI/gating | CONTRACT ERROR | 2 | -| Unreadable source in CI/gating | CONTRACT ERROR | 2 | -| New clones with `--fail-on-new` | GATING FAILURE | 3 | -| Threshold exceeded | GATING FAILURE | 3 | -| Unexpected exception in main pipeline | INTERNAL ERROR | 5 | + +| Condition | Marker | Exit | +|----------------------------------------------|----------------|------| +| Invalid output extension | CONTRACT ERROR | 2 | +| `--open-html-report` without `--html` | CONTRACT ERROR | 2 | +| `--timestamped-report-paths` without reports | CONTRACT ERROR | 2 | +| Untrusted baseline in CI/gating | CONTRACT ERROR | 2 | +| Unreadable source in CI/gating | CONTRACT ERROR | 2 | +| New clones with `--fail-on-new` | GATING FAILURE | 3 | +| Threshold exceeded | GATING FAILURE | 3 | +| Unexpected exception in main pipeline | INTERNAL ERROR | 5 | ## Determinism / canonicalization + - Help epilog strings are generated from static constants. - Error category markers are static constants. Refs: + - `codeclone/contracts.py:cli_help_epilog` - `codeclone/ui_messages.py:MARKER_CONTRACT_ERROR` ## Locked by tests + - `tests/test_cli_unit.py::test_cli_help_text_consistency` - `tests/test_cli_unit.py::test_cli_internal_error_marker` - `tests/test_cli_unit.py::test_cli_internal_error_debug_flag_includes_traceback` @@ -65,4 +79,10 @@ Refs: - `tests/test_cli_inprocess.py::test_cli_contract_error_priority_over_gating_failure_for_unreadable_source` ## Non-guarantees + - Exact message body text may evolve; category marker and exit code are contract. + +## See also + +- [09-cli.md](09-cli.md) +- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) diff --git a/docs/book/04-config-and-defaults.md b/docs/book/04-config-and-defaults.md index 6e18e03..eb7d8f5 100644 --- a/docs/book/04-config-and-defaults.md +++ b/docs/book/04-config-and-defaults.md @@ -7,36 +7,82 @@ Describe effective runtime configuration and defaults that affect behavior. ## Public surface - CLI parser and defaults: `codeclone/_cli_args.py:build_parser` -- Effective cache default path logic: `codeclone/cli.py:_main_impl` +- Pyproject config loader: `codeclone/_cli_config.py` +- Effective cache default path logic: `codeclone/cli.py:_resolve_cache_path` +- Metrics-mode selection logic: `codeclone/cli.py:_configure_metrics_mode` - Debug mode sources: `codeclone/cli.py:_is_debug_enabled` ## Data model Configuration sources, in precedence order: -1. CLI flags (`argparse`) -2. Environment (`CODECLONE_DEBUG=1` for debug diagnostics) +1. CLI flags (`argparse`, explicit options only) +2. `pyproject.toml` section `[tool.codeclone]` 3. Code defaults in parser and runtime +`CODECLONE_DEBUG=1` affects debug diagnostics only and is not part of analysis +or gating configuration precedence. + Key defaults: - `root="."` -- `--min-loc=15` +- `--min-loc=10` - `--min-stmt=6` - `--processes=4` - `--baseline=codeclone.baseline.json` - `--max-baseline-size-mb=5` - `--max-cache-size-mb=50` - default cache path (when no cache flag is given): `/.cache/codeclone/cache.json` +- `--metrics-baseline=codeclone.baseline.json` (same default path as `--baseline`) +- bare reporting flags use default report paths: + - `--html` -> `/.cache/codeclone/report.html` + - `--json` -> `/.cache/codeclone/report.json` + - `--md` -> `/.cache/codeclone/report.md` + - `--sarif` -> `/.cache/codeclone/report.sarif` + - `--text` -> `/.cache/codeclone/report.txt` + +Fragment-level admission thresholds (pyproject.toml only, advanced tuning): + +- `block_min_loc=20` — minimum function LOC for block-level sliding window +- `block_min_stmt=8` — minimum function statements for block-level sliding window +- `segment_min_loc=20` — minimum function LOC for segment-level sliding window +- `segment_min_stmt=10` — minimum function statements for segment-level sliding window + +Example project-level config: + +```toml +[tool.codeclone] +min_loc = 10 +min_stmt = 6 +baseline = "codeclone.baseline.json" +skip_metrics = true +quiet = true +``` + +CLI always has precedence when option is explicitly provided, including boolean +overrides via `--foo/--no-foo` (e.g. `--no-skip-metrics`). + +Path values loaded from `pyproject.toml` are normalized relative to resolved +scan root when provided as relative paths. + +Metrics baseline path selection contract: + +- If `--metrics-baseline` is explicitly set, that path is used. +- If `metrics_baseline` in `pyproject.toml` differs from parser default, that + configured path is used even without explicit CLI flag. +- Otherwise, metrics baseline defaults to the clone baseline path. Refs: - `codeclone/_cli_args.py:build_parser` - `codeclone/cli.py:_main_impl` +- `codeclone/cli.py:_configure_metrics_mode` ## Contracts - `--ci` is a preset: enables `fail_on_new`, `no_color`, `quiet`. +- In CI mode, if trusted metrics baseline is loaded, runtime also enables + `fail_on_new_metrics`. - `--quiet` implies `--no-progress`. - Negative size limits are contract errors. @@ -46,15 +92,22 @@ Refs: ## Invariants (MUST) -- Detection thresholds (`min-loc`, `min-stmt`) affect extraction. -- Detection thresholds (`min-loc`, `min-stmt`) are part of cache compatibility (`payload.ap`). -- Reporting flags (`--html/--json/--text`) affect output only. +- Detection thresholds (`min-loc`, `min-stmt`) affect function-level extraction. +- Fragment thresholds (`block_min_loc/stmt`, `segment_min_loc/stmt`) affect block/segment extraction. +- All six thresholds are part of cache compatibility (`payload.ap`). +- Reporting flags (`--html/--json/--md/--sarif/--text`) affect output only. +- Reporting flags accept optional path values; passing bare flag writes to + deterministic default path under `.cache/codeclone/`. - `--cache-path` overrides project-local cache default; legacy alias `--cache-dir` maps to same destination. +- Metrics baseline update/gating flags require metrics mode; incompatible + combinations with `--skip-metrics` are contract errors. +- Unknown keys or invalid value types in `[tool.codeclone]` are contract errors (exit 2). Refs: -- `codeclone/extractor.py:extract_units_from_source` +- `codeclone/extractor.py:extract_units_and_stats_from_source` - `codeclone/_cli_args.py:build_parser` +- `codeclone/cli.py:_configure_metrics_mode` ## Failure modes @@ -89,3 +142,8 @@ Refs: ## Non-guarantees - CLI help section ordering is stable today but not versioned independently from the CLI contract. + +## See also + +- [09-cli.md](09-cli.md) +- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) diff --git a/docs/book/05-core-pipeline.md b/docs/book/05-core-pipeline.md index 521247c..81ed0ee 100644 --- a/docs/book/05-core-pipeline.md +++ b/docs/book/05-core-pipeline.md @@ -1,77 +1,112 @@ # 05. Core Pipeline ## Purpose + Describe the detection pipeline from file discovery to grouped clones. ## Public surface + Pipeline entrypoints: -- Discovery: `codeclone/scanner.py:iter_py_files` -- Per-file processing: `codeclone/cli.py:process_file` -- Extraction: `codeclone/extractor.py:extract_units_from_source` -- Grouping: `codeclone/_report_grouping.py` + +- Discovery stage: `codeclone/pipeline.py:discover` +- Per-file processing: `codeclone/pipeline.py:process_file` +- Extraction: `codeclone/extractor.py:extract_units_and_stats_from_source` +- Grouping: `codeclone/grouping.py` ## Data model + Stages: + 1. Discover Python files (`iter_py_files`, sorted traversal) 2. Load from cache if `stat` signature matches 3. Process changed files: - - read source - - AST parse with limits - - extract units/blocks/segments + - read source + - AST parse with limits + - extract units/blocks/segments 4. Build groups: - - function groups by `fingerprint|loc_bucket` - - block groups by `block_hash` - - segment groups by `segment_sig` then `segment_hash|qualname` + - function groups by `fingerprint|loc_bucket` + - block groups by `block_hash` + - segment groups by `segment_sig` then `segment_hash|qualname` 5. Report-layer post-processing: - - merge block windows to maximal regions - - merge/suppress segment report groups + - merge block windows to maximal regions + - merge/suppress segment report groups +6. Structural report findings: + - duplicated branch families from per-function AST structure facts + - clone cohort drift families built from existing function groups (no rescan) Refs: -- `codeclone/cli.py:_main_impl` -- `codeclone/extractor.py:extract_units_from_source` -- `codeclone/_report_blocks.py:prepare_block_report_groups` -- `codeclone/_report_segments.py:prepare_segment_report_groups` + +- `codeclone/pipeline.py` +- `codeclone/extractor.py:extract_units_and_stats_from_source` +- `codeclone/report/blocks.py:prepare_block_report_groups` +- `codeclone/report/segments.py:prepare_segment_report_groups` ## Contracts + - Detection core (`extractor`, `normalize`, `cfg`, `blocks`) computes clone candidates. - Report-layer transformations do not change function/block grouping keys used for baseline diff. - Segment groups are report-only and do not participate in baseline diff/gating. +- Structural findings are report-only and do not participate in baseline diff/gating. +- Dead-code liveness references from test paths are excluded at extraction/cache-load boundaries for both + local-name references and canonical qualname references. Refs: + - `codeclone/cli.py:_main_impl` (diff uses only function/block groups) - `codeclone/baseline.py:Baseline.diff` +- `codeclone/extractor.py:extract_units_and_stats_from_source` +- `codeclone/pipeline.py:_load_cached_metrics` ## Invariants (MUST) + - `Files found = Files analyzed + Cache hits + Files skipped` warning if broken. - In gating mode, unreadable source IO (`source_read_error`) is a contract failure. - Parser time/resource protections are applied in POSIX mode via `_parse_limits`. Refs: + - `codeclone/_cli_summary.py:_print_summary` - `codeclone/cli.py:_main_impl` - `codeclone/extractor.py:_parse_limits` ## Failure modes -| Condition | Behavior | -| --- | --- | -| File stat/read/encoding error | File skipped; tracked as failed file; source-read subset tracked separately | -| Source read error in gating mode | Contract error exit 2 | -| Parser timeout | `ParseError` returned through processing failure path | -| Unexpected per-file exception | Captured as `ProcessingResult(error_kind="unexpected_error")` | + +| Condition | Behavior | +|----------------------------------|-----------------------------------------------------------------------------| +| File stat/read/encoding error | File skipped; tracked as failed file; source-read subset tracked separately | +| Source read error in gating mode | Contract error exit 2 | +| Parser timeout | `ParseError` returned through processing failure path | +| Unexpected per-file exception | Captured as `ProcessingResult(error_kind="unexpected_error")` | ## Determinism / canonicalization + - File list is sorted. - Group sorting in reports is deterministic by key and stable item sort. Refs: + - `codeclone/scanner.py:iter_py_files` -- `codeclone/_report_serialize.py:_item_sort_key` +- `codeclone/report/json_contract.py:_build_clone_groups` +- `codeclone/report/json_contract.py:_build_structural_groups` +- `codeclone/report/json_contract.py:_build_integrity_payload` ## Locked by tests + - `tests/test_scanner_extra.py::test_iter_py_files_deterministic_sorted_order` - `tests/test_cli_inprocess.py::test_cli_summary_cache_miss_metrics` - `tests/test_cli_inprocess.py::test_cli_unreadable_source_fails_in_ci_with_contract_error` - `tests/test_extractor.py::test_parse_limits_triggers_timeout` +- `tests/test_extractor.py::test_dead_code_marks_symbol_dead_when_referenced_only_by_tests` +- `tests/test_extractor.py::test_extract_collects_referenced_qualnames_for_import_aliases` +- `tests/test_pipeline_metrics.py::test_load_cached_metrics_ignores_referenced_names_from_test_files` ## Non-guarantees + - Parallel scheduling order is not guaranteed; only final grouped output determinism is guaranteed. + +## See also + +- [08-report.md](08-report.md) +- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) +- [16-dead-code-contract.md](16-dead-code-contract.md) +- [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) diff --git a/docs/book/06-baseline.md b/docs/book/06-baseline.md index a52e6a0..4547257 100644 --- a/docs/book/06-baseline.md +++ b/docs/book/06-baseline.md @@ -1,99 +1,133 @@ # 06. Baseline ## Purpose -Specify baseline schema v1, trust/compatibility checks, integrity hashing, and runtime behavior. + +Specify baseline schema v2, trust/compatibility checks, integrity hashing, and +runtime behavior. ## Public surface + - Baseline object lifecycle: `codeclone/baseline.py:Baseline` - Baseline statuses: `codeclone/baseline.py:BaselineStatus` - Baseline status coercion: `codeclone/baseline.py:coerce_baseline_status` - CLI integration: `codeclone/cli.py:_main_impl` ## Data model + Canonical baseline shape: -- Top-level keys: `meta`, `clones` -- `meta` required keys: `generator`, `schema_version`, `fingerprint_version`, `python_tag`, `created_at`, `payload_sha256` + +- Required top-level keys: `meta`, `clones` +- Optional top-level key: `metrics` (unified baseline flow) +- `meta` required keys: + `generator`, `schema_version`, `fingerprint_version`, `python_tag`, + `created_at`, `payload_sha256` - `clones` required keys: `functions`, `blocks` - `functions` and `blocks` are sorted/unique `list[str]` Refs: -- `codeclone/baseline.py:_TOP_LEVEL_KEYS` + +- `codeclone/baseline.py:_TOP_LEVEL_REQUIRED_KEYS` +- `codeclone/baseline.py:_TOP_LEVEL_OPTIONAL_KEYS` - `codeclone/baseline.py:_META_REQUIRED_KEYS` - `codeclone/baseline.py:_CLONES_REQUIRED_KEYS` - `codeclone/baseline.py:_require_sorted_unique_ids` ## Contracts + Compatibility gates (`verify_compatibility`): + - `generator == "codeclone"` -- `schema_version` major/minor compatible with supported schema +- `schema_version` major/minor must be supported by runtime - `fingerprint_version == BASELINE_FINGERPRINT_VERSION` - `python_tag == current_python_tag()` - integrity verified via `payload_sha256` +Embedded metrics contract: + +- Top-level `metrics` is allowed only for baseline schema `>= 2.0`. +- Clone baseline save preserves existing embedded `metrics` payload and + `meta.metrics_payload_sha256`. + Integrity payload includes only: + - `clones.functions` - `clones.blocks` - `meta.fingerprint_version` - `meta.python_tag` Integrity payload excludes: + - `meta.schema_version` - `meta.generator.*` - `meta.created_at` Refs: + - `codeclone/baseline.py:Baseline.verify_compatibility` - `codeclone/baseline.py:_compute_payload_sha256` +- `codeclone/baseline.py:_preserve_embedded_metrics` ## Invariants (MUST) -- Legacy top-level baselines (`functions`/`blocks` at root) are untrusted and require regeneration. + +- Legacy top-level baselines (`functions`/`blocks` at root) are untrusted and + require regeneration. - Baseline writes are atomic (`*.tmp` + `os.replace`, same filesystem). -- Baseline diff is set-based and ignores deleted baseline keys. +- Baseline diff is set-based and deterministic. Refs: + - `codeclone/baseline.py:_is_legacy_baseline_payload` - `codeclone/baseline.py:_atomic_write_json` - `codeclone/baseline.py:Baseline.diff` ## Failure modes -| Condition | Status | -| --- | --- | -| File missing | `missing` | -| Too large | `too_large` | -| JSON decode failure | `invalid_json` | + +| Condition | Status | +|-------------------------------|-----------------------------------| +| File missing | `missing` | +| Too large | `too_large` | +| JSON decode failure | `invalid_json` | | Top-level shape/type mismatch | `invalid_type` / `missing_fields` | -| Schema mismatch | `mismatch_schema_version` | -| Fingerprint mismatch | `mismatch_fingerprint_version` | -| Python tag mismatch | `mismatch_python_version` | -| Generator mismatch | `generator_mismatch` | -| Hash missing/invalid | `integrity_missing` | -| Hash mismatch | `integrity_failed` | +| Schema mismatch | `mismatch_schema_version` | +| Fingerprint mismatch | `mismatch_fingerprint_version` | +| Python tag mismatch | `mismatch_python_version` | +| Generator mismatch | `generator_mismatch` | +| Hash missing/invalid | `integrity_missing` | +| Hash mismatch | `integrity_failed` | CLI behavior: + - Normal mode: untrusted baseline is ignored, diff runs against empty baseline. -- Gating mode (`--ci` / `--fail-on-new`): untrusted baseline is contract error (exit 2). +- Gating mode (`--ci` / `--fail-on-new`): untrusted baseline is contract error + (exit 2). Refs: + - `codeclone/baseline.py:BaselineStatus` - `codeclone/cli.py:_main_impl` ## Determinism / canonicalization + - Clone IDs are serialized sorted. - Hash serialization uses canonical JSON (`sort_keys=True`, compact separators). - `payload_sha256` uses `hmac.compare_digest` during verification. Refs: + - `codeclone/baseline.py:_baseline_payload` - `codeclone/baseline.py:_compute_payload_sha256` - `codeclone/baseline.py:Baseline.verify_integrity` ## Locked by tests + - `tests/test_baseline.py::test_baseline_roundtrip_v1` - `tests/test_baseline.py::test_baseline_payload_fields_contract_invariant` - `tests/test_baseline.py::test_baseline_payload_sha256_independent_of_schema_version` - `tests/test_baseline.py::test_baseline_verify_python_tag_mismatch` -- `tests/test_cli_inprocess.py::test_cli_untrusted_baseline_fails_in_ci` +- `tests/test_cli_inprocess.py::test_cli_reports_include_audit_metadata_schema_mismatch` ## Non-guarantees -- Baseline generator version (`meta.generator.version`) is informational and not a compatibility gate. -- Baseline file ordering/indentation style is not part of compatibility contract. + +- Baseline generator version (`meta.generator.version`) is informational and not + a compatibility gate. +- Baseline file indentation/style is not part of compatibility contract. diff --git a/docs/book/07-cache.md b/docs/book/07-cache.md index ee92209..3690c7c 100644 --- a/docs/book/07-cache.md +++ b/docs/book/07-cache.md @@ -2,7 +2,7 @@ ## Purpose -Define cache schema v1.3, integrity verification, and fail-open behavior. +Define cache schema v2.2, integrity verification, and fail-open behavior. ## Public surface @@ -13,19 +13,34 @@ Define cache schema v1.3, integrity verification, and fail-open behavior. ## Data model -On-disk schema (`v == "1.3"`): +On-disk schema (`v == "2.2"`): - Top-level: `v`, `payload`, `sig` -- `payload` keys: `py`, `fp`, `ap`, `files` -- `ap` (`analysis_profile`) keys: `min_loc`, `min_stmt` +- `payload` keys: `py`, `fp`, `ap`, `files`, optional `sr` +- `ap` (`analysis_profile`) keys: + - `min_loc`, `min_stmt` + - `block_min_loc`, `block_min_stmt` + - `segment_min_loc`, `segment_min_stmt` - `files` map stores compact per-file entries: - `st`: `[mtime_ns, size]` - - optional `u` (units), `b` (blocks), `s` (segments) + - `ss`: `[lines, functions, methods, classes]` (source stats snapshot) + - `u` (function units): compact row layout with structural facts: + `[qualname,start,end,loc,stmt_count,fingerprint,loc_bucket,cc,nesting,risk,raw_hash,entry_guard_count,entry_guard_terminal_profile,entry_guard_has_side_effect_before,terminal_kind,try_finally_profile,side_effect_order_profile]` + - optional analysis sections (`b`/`s` and metrics-related sections) + - `rn`: referenced local names (non-test files only) + - `rq`: referenced canonical qualnames (non-test files only) - file keys are wire relpaths when `root` is configured +- optional `sr` (`segment report projection`) stores precomputed segment-report + merge/suppression output: + - `d`: digest of raw segment groups + - `s`: suppressed segment groups count + - `g`: grouped merged segment items (wire rows) +- per-file `dc` (`dead_candidates`) rows do not repeat filepath; path is implied by + the containing file entry Refs: -- `codeclone/cache.py:Cache._parse_cache_document` +- `codeclone/cache.py:Cache.load` - `codeclone/cache.py:_encode_wire_file_entry` - `codeclone/cache.py:_decode_wire_file_entry` @@ -33,11 +48,15 @@ Refs: - Cache is optimization-only; invalid cache never blocks analysis. - Any cache trust failure triggers warning + empty cache fallback. +- Cached file entry without valid `ss` (`source_stats`) is treated as cache-miss for + processing counters and reprocessed. - Cache compatibility gates: - version `v == CACHE_VERSION` - `payload.py == current_python_tag()` - `payload.fp == BASELINE_FINGERPRINT_VERSION` - - `payload.ap == {"min_loc": , "min_stmt": }` + - `payload.ap` matches the current six-threshold analysis profile + (`min_loc`, `min_stmt`, `block_min_loc`, `block_min_stmt`, + `segment_min_loc`, `segment_min_stmt`) - `sig` equals deterministic hash of canonical payload Refs: @@ -50,12 +69,16 @@ Refs: - Cache save writes canonical JSON and atomically replaces target file. - Empty sections (`u`, `b`, `s`) are omitted from written wire entries. +- `rn`/`rq` are serialized as sorted unique arrays and omitted when empty. +- `ss` is written when source stats are available and is required for full cache-hit + accounting in discovery stage. - Legacy secret file `.cache_secret` is never used for trust; warning only. Refs: - `codeclone/cache.py:Cache.save` - `codeclone/cache.py:_encode_wire_file_entry` +- `codeclone/pipeline.py:discover` - `codeclone/cache.py:LEGACY_CACHE_SECRET_FILENAME` ## Failure modes @@ -84,6 +107,12 @@ Refs: - Cache signatures are computed over canonical JSON payload. - Wire file paths and row arrays are sorted before write. +- `rn`/`rq` are deterministically normalized to sorted unique arrays. +- Current schema decodes only the canonical row shapes that current runtime writes; + for `u` rows, decoder accepts legacy 11-column layout and canonical 17-column + layout (missing structural columns default to neutral values). +- `sr` is additive and optional; invalid/missing projection never invalidates the + cache and simply falls back to runtime recomputation. Refs: @@ -100,6 +129,7 @@ Refs: - `tests/test_cache.py::test_cache_too_large_warns` - `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag` - `tests/test_cli_inprocess.py::test_cli_cache_analysis_profile_compatibility` +- `tests/test_pipeline_metrics.py::test_load_cached_metrics_ignores_referenced_names_from_test_files` ## Non-guarantees diff --git a/docs/book/08-report.md b/docs/book/08-report.md index 3625b44..411267e 100644 --- a/docs/book/08-report.md +++ b/docs/book/08-report.md @@ -1,92 +1,153 @@ # 08. Report ## Purpose -Define report schema v1.1 and shared metadata contract across JSON/TXT/HTML. + +Define report contracts in `2.0.0b1`: canonical JSON (`report_schema_version=2.1`) +plus deterministic TXT/Markdown/SARIF projections. ## Public surface -- JSON serializer: `codeclone/_report_serialize.py:to_json_report` -- TXT serializer: `codeclone/_report_serialize.py:to_text_report` -- Shared metadata builder: `codeclone/_cli_meta.py:_build_report_meta` + +- Canonical report builder: `codeclone/report/json_contract.py:build_report_document` +- JSON/TXT renderers: `codeclone/report/serialize.py` +- Markdown renderer: `codeclone/report/markdown.py` +- SARIF renderer: `codeclone/report/sarif.py` - HTML renderer: `codeclone/html_report.py:build_html_report` +- Shared metadata source: `codeclone/_cli_meta.py:_build_report_meta` ## Data model -JSON v1.1 top-level fields: -- `meta` -- `files` -- `groups` -- `groups_split` -- `group_item_layout` -- optional `facts` -`group_item_layout` is explicit positional schema for compact arrays. +JSON report top-level (v2.1): -Refs: -- `codeclone/_report_serialize.py:GROUP_ITEM_LAYOUT` -- `codeclone/contracts.py:REPORT_SCHEMA_VERSION` +- `report_schema_version` +- `meta` +- `inventory` +- `findings` +- `metrics` +- `derived` +- `integrity` + +Canonical vs non-canonical split: + +- Canonical: `report_schema_version`, `meta`, `inventory`, `findings`, `metrics` +- Non-canonical projection layer: `derived` +- Integrity metadata: `integrity` (`canonicalization` + `digest`) + +Derived projection layer: + +- `derived.suggestions[*]` — actionable projection cards keyed back to canonical + findings via `finding_id` +- `derived.overview` — summary-only overview facts: + - `families` + - `top_risks` + - `source_scope_breakdown` + - `health_snapshot` +- `derived.hotlists` — deterministic lists of canonical finding IDs: + - `most_actionable_ids` + - `highest_spread_ids` + - `production_hotspot_ids` + - `test_fixture_hotspot_ids` + +Finding families: + +- `findings.groups.clones.{functions,blocks,segments}` +- `findings.groups.structural.groups` +- `findings.groups.dead_code.groups` +- `findings.groups.design.groups` +- `findings.summary.suppressed.dead_code` (suppressed counter, non-active findings) + +Structural finding kinds currently emitted by core/report pipeline: + +- `duplicated_branches` +- `clone_guard_exit_divergence` +- `clone_cohort_drift` + +Per-group common axes (family-specific fields may extend): + +- identity: `id`, `family`, `category`, `kind` +- assessment: `severity`, `confidence`, `priority` +- scope: `source_scope` (`dominant_kind`, `breakdown`, `impact_scope`) +- spread: `spread.files`, `spread.functions` +- evidence: `items`, `facts` (+ optional `display_facts`) ## Contracts -Shared `meta` contract is produced once in CLI and consumed by all formats. -Key fields include: -- runtime: `codeclone_version`, `python_version`, `python_tag`, `report_schema_version` -- deterministic aggregates: `groups_counts.{functions|blocks|segments}.{total,new,known}` -- baseline provenance: `baseline_*`, including `baseline_payload_sha256` and verification flag -- cache provenance: `cache_path`, `cache_used`, `cache_status`, `cache_schema_version` -- IO transparency: `files_skipped_source_io` -Refs: -- `codeclone/_cli_meta.py:ReportMeta` -- `codeclone/_cli_meta.py:_build_report_meta` - -NEW/KNOWN split contract: -- Trusted baseline (`baseline_loaded=true` and `baseline_status=ok`): - - `new` from `new_*_group_keys` - - `known` is remaining keys -- Untrusted baseline: all groups are NEW, KNOWN is empty - -Refs: -- `codeclone/_report_serialize.py:_baseline_is_trusted` -- `codeclone/_report_serialize.py:to_json_report` -- `codeclone/_report_serialize.py:to_text_report` +- JSON is source of truth for report semantics. +- Markdown and SARIF are deterministic projections from the same report document. +- SARIF is an IDE/code-scanning-oriented projection: + - repo-relative result paths are anchored via `%SRCROOT%` + - referenced files are listed under `run.artifacts` + - clone results carry `baselineState` when clone novelty is known +- Derived layer (`suggestions`, `overview`, `hotlists`) does not replace canonical + findings/metrics. +- HTML overview cards are materialized from canonical findings plus + `derived.overview` + `derived.hotlists`; pre-expanded overview card payloads are + not part of the report contract. +- Overview hotspot/source-breakdown sections must resolve from canonical report + data or deterministic derived IDs; HTML must not silently substitute stale + placeholders such as `n/a` or empty-state cards when canonical data exists. +- `report_generated_at_utc` is carried in `meta.runtime` and reused by UI/renderers. +- Canonical `meta.scan_root` is normalized to `"."`; absolute runtime paths are + exposed under `meta.runtime.*_absolute`. +- `clone_type` and `novelty` are group-level properties inside clone groups. +- Cohort-drift structural families are report-only and must not affect baseline diff + or CI gating decisions. +- Dead-code suppressed candidates are carried only under metrics + (`metrics.families.dead_code.suppressed_items`) and never promoted to + active `findings.groups.dead_code`. ## Invariants (MUST) -- `groups_split` is key-index only; clone payload stays in `groups`. -- `groups_split[new] ∩ groups_split[known] = ∅` per section. -- `groups_split[new] ∪ groups_split[known] = groups.keys()` per section. -- Facts are core-owned (`build_block_group_facts`) and renderers only display them. -Refs: -- `codeclone/_report_serialize.py:to_json_report` -- `codeclone/_report_explain.py:build_block_group_facts` +- Stable ordering for groups/items/suggestions/hotlists. +- Stable ordering for SARIF rules, artifacts, and results. +- `derived.suggestions[*].finding_id` references existing canonical finding IDs. +- `derived.hotlists.*_ids` reference existing canonical finding IDs. +- SARIF `artifacts[*]` and `locations[*].artifactLocation.index` stay aligned. +- `integrity.digest` is computed from canonical sections only (derived excluded). +- `source_scope.impact_scope` is explicit and deterministic (`runtime`, + `non_runtime`, `mixed`). ## Failure modes -| Condition | Behavior | -| --- | --- | -| Missing meta fields at render time | TXT/HTML render placeholders `(none)` or empty values | -| Untrusted baseline | JSON/TXT classify all groups as NEW; HTML shows untrusted note | -| Missing source snippets | HTML shows safe fallback snippet | -Refs: -- `codeclone/_report_serialize.py:_format_meta_text_value` -- `codeclone/html_report.py:build_html_report` -- `codeclone/_html_snippets.py:_render_code_block` +| Condition | Behavior | +|---------------------------------|------------------------------------------------| +| Missing optional UI/meta fields | Renderer falls back to empty/`(none)` display | +| Untrusted baseline | Clone novelty resolves to `new` for all groups | +| Missing snippet source in HTML | Safe fallback snippet block | ## Determinism / canonicalization -- `files` list is sorted and unique by collection strategy. -- Group keys are serialized in sorted order. -- Items are encoded and sorted by deterministic tuple keys. + +- Canonical payload is serialized with sorted keys for digest computation. +- Inventory file registry is normalized to relative paths. +- Structural findings are normalized, deduplicated, and sorted before serialization. Refs: -- `codeclone/_report_serialize.py:_collect_files` -- `codeclone/_report_serialize.py:_function_record_sort_key` -- `codeclone/_report_serialize.py:_block_record_sort_key` + +- `codeclone/report/json_contract.py:_build_integrity_payload` +- `codeclone/report/json_contract.py:_build_inventory_payload` +- `codeclone/structural_findings.py:normalize_structural_findings` ## Locked by tests -- `tests/test_report.py::test_report_json_compact_v11_contract` -- `tests/test_report.py::test_report_json_groups_split_trusted_baseline` -- `tests/test_report.py::test_report_json_groups_split_untrusted_baseline` -- `tests/test_report.py::test_to_text_report_trusted_baseline_split_sections` -- `tests/test_report.py::test_to_text_report_untrusted_baseline_known_sections_empty` + +- `tests/test_report.py::test_report_json_compact_v21_contract` +- `tests/test_report.py::test_report_json_integrity_matches_canonical_sections` +- `tests/test_report.py::test_report_json_integrity_ignores_derived_changes` +- `tests/test_report_contract_coverage.py::test_report_document_rich_invariants_and_renderers` +- `tests/test_report_contract_coverage.py::test_markdown_and_sarif_reuse_prebuilt_report_document` +- `tests/test_report_branch_invariants.py::test_overview_and_sarif_branch_invariants` +- `tests/test_report.py::test_json_includes_clone_guard_exit_divergence_structural_group` +- `tests/test_report.py::test_json_includes_clone_cohort_drift_structural_group` +- `tests/test_report.py::test_report_json_dead_code_suppressed_items_are_reported_separately` ## Non-guarantees -- Optional `facts` payload may expand in v1.x without changing clone group semantics. -- HTML visual grouping controls are not part of JSON schema contract. + +- Human-readable wording in `derived` or HTML may evolve without schema bump. +- CSS/layout changes are not part of JSON contract. + +## See also + +- [07-cache.md](07-cache.md) +- [09-cli.md](09-cli.md) +- [10-html-render.md](10-html-render.md) +- [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) +- [../sarif.md](../sarif.md) +- [../examples/report.md](../examples/report.md) diff --git a/docs/book/09-cli.md b/docs/book/09-cli.md index fc6d136..f5b15c1 100644 --- a/docs/book/09-cli.md +++ b/docs/book/09-cli.md @@ -1,9 +1,11 @@ # 09. CLI ## Purpose + Define observable CLI behavior: argument handling, summaries, error UI, and output writing. ## Public surface + - CLI runner: `codeclone/cli.py:main`, `codeclone/cli.py:_main_impl` - Parser: `codeclone/_cli_args.py:build_parser` - Summary renderer: `codeclone/_cli_summary.py:_print_summary` @@ -11,61 +13,94 @@ Define observable CLI behavior: argument handling, summaries, error UI, and outp - Message catalog: `codeclone/ui_messages.py` ## Data model + CLI modes: + - Normal mode - Gating mode (`--ci`, `--fail-on-new`, `--fail-threshold>=0`) - Update mode (`--update-baseline`) Summary metrics: + - files found/analyzed/cache hits/skipped +- structural counters: analyzed lines/functions/methods/classes - function/block/segment groups - suppressed segment groups +- dead-code active/suppressed status in metrics line - new vs baseline Refs: -- `codeclone/_cli_summary.py:_build_summary_rows` + +- `codeclone/_cli_summary.py:_print_summary` +- `codeclone/ui_messages.py:fmt_summary_files` ## Contracts + - Help output includes canonical exit-code section and project links. +- Reporting flag UX uses explicit pairs (`--no-progress`/`--progress`, + `--no-color`/`--color`) and avoids generated double-negation aliases. +- `--open-html-report` is a local UX action layered on top of `--html`; it does not implicitly enable HTML output. +- `--timestamped-report-paths` only rewrites default report paths requested via bare report flags; explicit FILE values + stay unchanged. - Contract errors are prefixed by `CONTRACT ERROR:`. - Gating failures are prefixed by `GATING FAILURE:`. - Internal errors use `fmt_internal_error` with optional debug details. +- Runtime footer uses explicit wording: `Pipeline done in s`. + This metric is CLI pipeline time and does not include external launcher/startup overhead (for example `uv run`). +- Dead-code metric line is stateful and deterministic: + - `N found (M suppressed)` when active dead-code items exist + - `✔ clean` when both active and suppressed are zero + - `✔ clean (M suppressed)` when active is zero but suppressed > 0 Refs: + - `codeclone/contracts.py:cli_help_epilog` - `codeclone/ui_messages.py:fmt_contract_error` - `codeclone/ui_messages.py:fmt_internal_error` ## Invariants (MUST) -- Report writes (`--html/--json/--text`) are path-validated and write failures are contract errors. + +- Report writes (`--html/--json/--md/--sarif/--text`) are path-validated and write failures are contract errors. +- Bare reporting flags write to default deterministic paths under + `.cache/codeclone/`. +- `--open-html-report` requires `--html`; invalid combination is a contract error. +- `--timestamped-report-paths` requires at least one requested report output; invalid combination is a contract error. +- Browser-open failure after a successful HTML write is warning-only and does not change the process exit code. - Baseline update write failure is contract error. - In gating mode, unreadable source files are contract errors with higher priority than clone gating failure. Refs: + - `codeclone/cli.py:_write_report_output` - `codeclone/cli.py:_main_impl` ## Failure modes -| Condition | User-facing category | Exit | -| --- | --- | --- | -| Invalid CLI flag | contract | 2 | -| Invalid output extension/path | contract | 2 | -| Baseline untrusted in CI/gating | contract | 2 | -| Unreadable source in CI/gating | contract | 2 | -| New clones with `--fail-on-new` | gating | 3 | -| Threshold exceeded | gating | 3 | -| Unexpected exception | internal | 5 | + +| Condition | User-facing category | Exit | +|----------------------------------------------|----------------------|------| +| Invalid CLI flag | contract | 2 | +| Invalid output extension/path | contract | 2 | +| `--open-html-report` without `--html` | contract | 2 | +| `--timestamped-report-paths` without reports | contract | 2 | +| Baseline untrusted in CI/gating | contract | 2 | +| Unreadable source in CI/gating | contract | 2 | +| New clones with `--fail-on-new` | gating | 3 | +| Threshold exceeded | gating | 3 | +| Unexpected exception | internal | 5 | ## Determinism / canonicalization + - Summary metric ordering is fixed. - Compact summary mode (`--quiet`) is fixed-format text. - Help epilog is generated from static constants. Refs: -- `codeclone/_cli_summary.py:_build_summary_rows` + +- `codeclone/_cli_summary.py:_print_summary` - `codeclone/contracts.py:EXIT_CODE_DESCRIPTIONS` ## Locked by tests + - `tests/test_cli_unit.py::test_cli_help_text_consistency` - `tests/test_cli_unit.py::test_argument_parser_contract_error_marker_for_invalid_args` - `tests/test_cli_inprocess.py::test_cli_summary_format_stable` @@ -73,5 +108,12 @@ Refs: - `tests/test_cli_inprocess.py::test_cli_contract_error_priority_over_gating_failure_for_unreadable_source` ## Non-guarantees + - Rich styling details are not part of machine-facing CLI contract. - Warning phrasing may evolve if category markers and exit semantics stay stable. + +## See also + +- [04-config-and-defaults.md](04-config-and-defaults.md) +- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) +- [16-dead-code-contract.md](16-dead-code-contract.md) diff --git a/docs/book/10-html-render.md b/docs/book/10-html-render.md index 8c58386..e93161f 100644 --- a/docs/book/10-html-render.md +++ b/docs/book/10-html-render.md @@ -1,67 +1,100 @@ # 10. HTML Render ## Purpose + Document HTML rendering as a pure view layer over report data/facts. ## Public surface + - Main renderer: `codeclone/html_report.py:build_html_report` +- HTML assembly package: `codeclone/_html_report/*` +- Overview materialization bridge: `codeclone/report/overview.py:materialize_report_overview` - Escaping helpers: `codeclone/_html_escape.py` - Snippet/highlight helpers: `codeclone/_html_snippets.py` - Static template: `codeclone/templates.py:REPORT_TEMPLATE` ## Data model + Inputs to renderer: -- grouped clone data (`func_groups`, `block_groups`, `segment_groups`) -- block explainability facts (`block_group_facts`) -- novelty key sets (`new_function_group_keys`, `new_block_group_keys`) -- shared report metadata (`report_meta`) + +- canonical report document (`report_document`) when available (preferred path) +- compatibility inputs for direct rendering path: + - grouped clone data (`func_groups`, `block_groups`, `segment_groups`) + - block explainability facts (`block_group_facts`) + - novelty key sets (`new_function_group_keys`, `new_block_group_keys`) + - shared report metadata (`report_meta`) Output: + - single self-contained HTML string Refs: + - `codeclone/html_report.py:build_html_report` ## Contracts + - HTML must not recompute detection semantics; it renders facts from core/report layers. - Explainability hints shown in UI are sourced from `build_block_group_facts` data. - Provenance panel mirrors report metadata contract. +- HTML may expose local UX affordances such as the health-grade badge dialog + or provenance modal, but those actions are projections over already computed + report/meta facts. +- Overview UI is a report projection: + - KPI cards with baseline-aware tone (`✓ baselined` / `+N` regression) + - Health gauge with baseline delta arc (improvement/degradation) + - Executive Summary: issue breakdown (sorted bars) + source breakdown + - Health Profile: full-width radar chart of dimension scores + - Get Badge modal: grade-only / score+grade variants with shields.io embed +- Dead-code UI is a single top-level `Dead Code` tab with deterministic split + sub-tabs: `Active` and `Suppressed`. Refs: -- `codeclone/_report_explain.py:build_block_group_facts` -- `codeclone/html_report.py:_render_group_explanation` -- `codeclone/html_report.py:report_meta_html` + +- `codeclone/report/explain.py:build_block_group_facts` +- `codeclone/report/overview.py:materialize_report_overview` +- `codeclone/_html_report/_sections/_clones.py:_render_group_explanation` +- `codeclone/_html_report/_sections/_meta.py:render_meta_panel` ## Invariants (MUST) + - All user/content fields are escaped for text/attributes before insertion. - Missing file snippets render explicit fallback blocks. - Novelty controls reflect baseline trust split note and per-group novelty flags. +- Suppressed dead-code rows are rendered only from report dead-code suppression + payloads and do not become active dead-code findings in UI tables. Refs: + - `codeclone/_html_escape.py:_escape_attr` - `codeclone/_html_snippets.py:_render_code_block` -- `codeclone/html_report.py:global_novelty_html` +- `codeclone/_html_report/_sections/_clones.py:render_clones_panel` ## Failure modes -| Condition | Behavior | -| --- | --- | -| Source file unreadable for snippet | Render fallback snippet with message | + +| Condition | Behavior | +|-------------------------------------|---------------------------------------------| +| Source file unreadable for snippet | Render fallback snippet with message | | Missing/invalid optional meta field | Render empty or `(none)`-equivalent display | -| Pygments unavailable | Escape-only fallback code rendering | +| Pygments unavailable | Escape-only fallback code rendering | Refs: + - `codeclone/_html_snippets.py:_FileCache.get_lines_range` - `codeclone/_html_snippets.py:_try_pygments` ## Determinism / canonicalization + - Section/group ordering follows sorted report inputs. - Metadata rows are built in fixed order. Refs: -- `codeclone/html_report.py:build_html_report` -- `codeclone/html_report.py:meta_rows` + +- `codeclone/_html_report/_assemble.py:build_html_report` +- `codeclone/_html_report/_sections/_meta.py:render_meta_panel` ## Locked by tests + - `tests/test_html_report.py::test_html_report_uses_core_block_group_facts` - `tests/test_html_report.py::test_html_report_escapes_meta_and_title` - `tests/test_html_report.py::test_html_report_escapes_script_breakout_payload` @@ -69,5 +102,9 @@ Refs: - `tests/test_html_report.py::test_html_and_json_group_order_consistent` ## Non-guarantees + - CSS/visual system and interaction details may evolve without schema bump. -- HTML command palette action set is not a baseline/cache/report contract. +- HTML-only interaction affordances (theme toggle, provenance modal, badge + modal, radar chart) are not baseline/cache/report contracts. +- Overview layout (KPI grid, executive summary, analytics) is a pure view + concern; only the underlying data identity and ordering are contract-sensitive. diff --git a/docs/book/11-security-model.md b/docs/book/11-security-model.md index 56aadcc..d6a271a 100644 --- a/docs/book/11-security-model.md +++ b/docs/book/11-security-model.md @@ -1,62 +1,74 @@ # 11. Security Model ## Purpose + Describe implemented protections and explicit security boundaries. ## Public surface + - Scanner path validation: `codeclone/scanner.py:iter_py_files` - File read limits and parser limits: `codeclone/cli.py:process_file`, `codeclone/extractor.py:_parse_limits` - Baseline/cache validation: `codeclone/baseline.py`, `codeclone/cache.py` - HTML escaping: `codeclone/_html_escape.py`, `codeclone/html_report.py` ## Data model + Security-relevant input classes: + - filesystem paths (root/source/baseline/cache/report) - untrusted JSON files (baseline/cache) - untrusted source snippets and metadata rendered into HTML ## Contracts + - CodeClone parses source text; it does not execute repository Python code. - Sensitive root directories are blocked by scanner policy. - Symlink traversal outside root is skipped. - HTML report escapes text and attribute contexts before embedding. Refs: + - `codeclone/extractor.py:_parse_with_limits` - `codeclone/scanner.py:SENSITIVE_DIRS` - `codeclone/scanner.py:iter_py_files` - `codeclone/_html_escape.py:_escape_html` ## Invariants (MUST) + - Baseline and cache integrity checks use constant-time comparison. - Size guards are enforced before parsing baseline/cache JSON. - Cache failures degrade safely (warning + ignore), baseline trust failures follow trust model. Refs: + - `codeclone/baseline.py:Baseline.verify_integrity` - `codeclone/cache.py:Cache.load` - `codeclone/cli.py:_main_impl` ## Failure modes -| Condition | Security behavior | -| --- | --- | -| Symlink points outside root | File skipped | -| Root under sensitive dirs | Validation error | -| Oversized baseline | Baseline rejected | -| Oversized cache | Cache ignored | -| HTML-injected payload in metadata/source | Escaped output | + +| Condition | Security behavior | +|------------------------------------------|-------------------| +| Symlink points outside root | File skipped | +| Root under sensitive dirs | Validation error | +| Oversized baseline | Baseline rejected | +| Oversized cache | Cache ignored | +| HTML-injected payload in metadata/source | Escaped output | ## Determinism / canonicalization + - Canonical JSON hashing for baseline/cache prevents formatting-only drift. - Security failures map to explicit statuses (baseline/cache enums). Refs: + - `codeclone/baseline.py:_compute_payload_sha256` - `codeclone/cache.py:_canonical_json` - `codeclone/baseline.py:BaselineStatus` - `codeclone/cache.py:CacheStatus` ## Locked by tests + - `tests/test_security.py::test_scanner_path_traversal` - `tests/test_scanner_extra.py::test_iter_py_files_symlink_loop_does_not_traverse` - `tests/test_security.py::test_html_report_escapes_user_content` @@ -64,4 +76,6 @@ Refs: - `tests/test_cache.py::test_cache_too_large_warns` ## Non-guarantees -- Baseline/cache integrity is tamper-evident at file-content level; it is not cryptographic attestation against a privileged attacker. + +- Baseline/cache integrity is tamper-evident at file-content level; it is not cryptographic attestation against a + privileged attacker. diff --git a/docs/book/12-determinism.md b/docs/book/12-determinism.md index 9a43aa8..bf74e02 100644 --- a/docs/book/12-determinism.md +++ b/docs/book/12-determinism.md @@ -1,15 +1,19 @@ # 12. Determinism ## Purpose + Document deterministic behavior and canonicalization controls. ## Public surface -- Sorting and traversal: `codeclone/scanner.py`, `codeclone/_report_serialize.py`, `codeclone/cache.py` + +- Sorting and traversal: `codeclone/scanner.py`, `codeclone/report/serialize.py`, `codeclone/cache.py` - Canonical hashing: `codeclone/baseline.py`, `codeclone/cache.py` - Golden detector snapshot policy: `tests/test_detector_golden.py` ## Data model + Deterministic outputs depend on: + - fixed Python tag - fixed baseline/cache/report schemas - sorted file traversal @@ -17,46 +21,57 @@ Deterministic outputs depend on: - canonical JSON serialization for hashes ## Contracts + - JSON report uses deterministic ordering for files/groups/items. - TXT report uses deterministic metadata key order and group/item ordering. - Baseline hash is canonical and independent from non-payload metadata fields. - Cache signature is canonical and independent from JSON whitespace. Refs: -- `codeclone/_report_serialize.py:to_json_report` -- `codeclone/_report_serialize.py:to_text_report` + +- `codeclone/report/json_contract.py:build_report_document` +- `codeclone/report/serialize.py:render_text_report_document` - `codeclone/baseline.py:_compute_payload_sha256` - `codeclone/cache.py:_sign_data` ## Invariants (MUST) -- `files` list is lexicographically sorted. -- `groups_split` key lists are lexicographically sorted. + +- `inventory.file_registry.items` is lexicographically sorted. +- finding groups/items and derived hotlists are deterministically ordered. - Baseline clone lists are sorted and unique. - Golden detector test runs only on canonical Python tag from fixture metadata. Refs: -- `codeclone/_report_serialize.py:_collect_files` + +- `codeclone/report/json_contract.py:_build_inventory_payload` - `codeclone/baseline.py:_require_sorted_unique_ids` - `tests/test_detector_golden.py::test_detector_output_matches_golden_fixture` ## Failure modes -| Condition | Determinism impact | -| --- | --- | -| Different Python tag | Clone IDs may differ; baseline considered incompatible | -| Unsorted/non-canonical baseline IDs | Baseline rejected as invalid | -| Cache signature mismatch | Cache ignored and recomputed | + +| Condition | Determinism impact | +|-------------------------------------|--------------------------------------------------------| +| Different Python tag | Clone IDs may differ; baseline considered incompatible | +| Unsorted/non-canonical baseline IDs | Baseline rejected as invalid | +| Cache signature mismatch | Cache ignored and recomputed | +| Different cache provenance state | `meta.cache_*` differs by design | ## Determinism / canonicalization + Primary canonicalization points: -- `json.dumps(..., sort_keys=True, separators=(",", ":"), ensure_ascii=False)` for baseline/cache payload hash/signature. + +- `json.dumps(..., sort_keys=True, separators=(",", ":"), ensure_ascii=False)` for baseline/cache payload + hash/signature. - tuple-based sort keys for report record arrays. Refs: + - `codeclone/baseline.py:_compute_payload_sha256` - `codeclone/cache.py:_canonical_json` -- `codeclone/_report_serialize.py:_function_record_sort_key` +- `codeclone/report/json_contract.py:_build_integrity_payload` ## Locked by tests + - `tests/test_report.py::test_report_json_deterministic_group_order` - `tests/test_report.py::test_report_json_deterministic_with_shuffled_units` - `tests/test_report.py::test_text_report_deterministic_group_order` @@ -64,4 +79,7 @@ Refs: - `tests/test_cache.py::test_cache_signature_validation_ignores_json_whitespace` ## Non-guarantees + - Determinism is not guaranteed across different `python_tag` values. +- Byte-identical reports are not guaranteed across different cache provenance + states (`cache_status`, `cache_used`, `cache_schema_version`). diff --git a/docs/book/13-testing-as-spec.md b/docs/book/13-testing-as-spec.md index d6833ce..ac46762 100644 --- a/docs/book/13-testing-as-spec.md +++ b/docs/book/13-testing-as-spec.md @@ -14,6 +14,8 @@ Contract tests are concentrated in: - `tests/test_cli_inprocess.py` - `tests/test_cli_unit.py` - `tests/test_html_report.py` +- `tests/test_detector_golden.py` +- `tests/test_golden_v2.py` ## Data model @@ -30,9 +32,9 @@ The following matrix is treated as executable contract: | Contract | Tests | |--------------------------------------------|---------------------------------------------------------------------------------------------------------------| | Baseline schema/integrity/compat gates | `tests/test_baseline.py` | -| Cache fail-open + status mapping | `tests/test_cache.py`, `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag` | +| Cache v2.2 fail-open + status mapping | `tests/test_cache.py`, `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag` | | Exit code categories and markers | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py` | -| Report schema v1.1 JSON/TXT split + layout | `tests/test_report.py` | +| Report schema v2.1 canonical/derived/integrity + JSON/TXT/MD/SARIF projections | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py` | | HTML render-only explainability + escaping | `tests/test_html_report.py` | | Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` | @@ -41,10 +43,15 @@ The following matrix is treated as executable contract: - Every schema/status contract change requires tests and docs update. - Golden detector fixture is canonicalized to one Python tag. - Untrusted baseline behavior must be tested for both normal and gating modes. +- V2 golden fixtures lock dead-code/test-path semantics, metrics/dependency aggregates, + stable per-function structural fact surfaces (`stable_structure` / + `cohort_structural_findings`), and CLI+`pyproject.toml` contract behavior. Refs: - `tests/test_detector_golden.py::test_detector_output_matches_golden_fixture` +- `tests/test_golden_v2.py::test_golden_v2_analysis_contracts` +- `tests/test_golden_v2.py::test_golden_v2_cli_pyproject_contract` - `tests/test_cli_inprocess.py::test_cli_legacy_baseline_normal_mode_ignored_and_exit_zero` - `tests/test_cli_inprocess.py::test_cli_legacy_baseline_fail_on_new_fails_fast_exit_2` @@ -54,7 +61,7 @@ Refs: |---------------------------------|-----------------------------------------| | Baseline payload contract drift | baseline integrity/canonical tests fail | | Cache schema drift | cache version/parse tests fail | -| Report schema drift | compact v1.1 layout tests fail | +| Report schema drift | compact layout tests fail | | Exit priority drift | CI inprocess tests fail | ## Determinism / canonicalization @@ -65,9 +72,15 @@ Refs: - `tests/test_baseline.py::test_baseline_payload_fields_contract_invariant` - `tests/test_cache.py::test_cache_v13_missing_optional_sections_default_empty` -- `tests/test_report.py::test_report_json_compact_v11_contract` +- `tests/test_report.py::test_report_json_compact_v21_contract` - `tests/test_cli_inprocess.py::test_cli_contract_error_priority_over_gating_failure_for_unreadable_source` - `tests/test_html_report.py::test_html_and_json_group_order_consistent` +- `tests/test_detector_golden.py::test_detector_output_matches_golden_fixture` +- `tests/test_golden_v2.py::test_golden_v2_analysis_contracts` +- `tests/test_golden_v2.py::test_golden_v2_cli_pyproject_contract` +- `tests/test_extractor.py::test_extract_collects_referenced_qualnames_for_import_aliases` +- `tests/test_extractor.py::test_collect_dead_candidates_skips_protocol_and_stub_like_symbols` +- `tests/test_metrics_modules.py::test_find_unused_respects_referenced_qualnames` ## Non-guarantees diff --git a/docs/book/14-compatibility-and-versioning.md b/docs/book/14-compatibility-and-versioning.md index 9fd25d4..563ff7d 100644 --- a/docs/book/14-compatibility-and-versioning.md +++ b/docs/book/14-compatibility-and-versioning.md @@ -2,23 +2,26 @@ ## Purpose -Define when to bump baseline/cache/report/fingerprint versions and what breaks. +Define when to bump baseline/cache/report/fingerprint versions and how runtime +compatibility is enforced. ## Public surface - Version constants: `codeclone/contracts.py` - Baseline compatibility checks: `codeclone/baseline.py:Baseline.verify_compatibility` -- Cache compatibility checks: `codeclone/cache.py:Cache._parse_cache_document` -- Report schema assignment: `codeclone/_report_serialize.py:to_json_report` +- Metrics baseline compatibility checks: `codeclone/metrics_baseline.py:MetricsBaseline.verify_compatibility` +- Cache compatibility checks: `codeclone/cache.py:Cache.load` +- Report schema assignment: `codeclone/report/json_contract.py:build_report_document` ## Data model Current contract versions: -- `BASELINE_SCHEMA_VERSION = "1.0"` +- `BASELINE_SCHEMA_VERSION = "2.0"` - `BASELINE_FINGERPRINT_VERSION = "1"` -- `CACHE_VERSION = "1.3"` -- `REPORT_SCHEMA_VERSION = "1.1"` +- `CACHE_VERSION = "2.2"` +- `REPORT_SCHEMA_VERSION = "2.1"` +- `METRICS_BASELINE_SCHEMA_VERSION = "1.0"` (standalone metrics-baseline file) Refs: @@ -29,27 +32,29 @@ Refs: Version bump rules: - Bump **baseline schema** only for baseline JSON layout/type changes. -- Bump **fingerprint version** when detection semantics affecting function/block keys change. -- Bump **cache schema** for cache wire format changes. - - Example: adding `payload.ap` (`min_loc`, `min_stmt`) to cache compatibility. -- Bump **report schema** for JSON/TXT/HTML data contract changes. +- Bump **fingerprint version** when clone key semantics change. +- Bump **cache schema** for cache wire-format/validation changes. +- Bump **report schema** for canonical report document contract changes + (`report_schema_version`, consumed by JSON/TXT/Markdown/SARIF and HTML provenance/view). +- Bump **metrics-baseline schema** only for standalone metrics-baseline payload changes. + +Baseline compatibility rules: + +- Runtime accepts baseline schema majors `1` and `2` with supported minors. +- Runtime writes current schema (`2.0`) on new/updated baseline saves. +- Embedded top-level `metrics` is valid only for baseline schema `>= 2.0`. Baseline regeneration rules: - Required when `fingerprint_version` changes. - Required when `python_tag` changes. -- Not required for package patch/minor changes alone if compatibility gates still pass. - -Refs: - -- `codeclone/baseline.py:Baseline.from_groups` -- `codeclone/cli.py:_main_impl` +- Not required for package patch/minor updates if compatibility gates still pass. ## Invariants (MUST) -- Contract changes must include tests and changelog/docs updates. -- Schema mismatch must map to explicit statuses (not generic fallback). -- Legacy baseline layout is untrusted and requires explicit regeneration. +- Contract changes must include code updates and changelog/docs updates. +- Schema mismatches must map to explicit statuses. +- Legacy baseline payloads (<=1.3 layout) remain untrusted and require regeneration. Refs: @@ -58,36 +63,34 @@ Refs: ## Failure modes -| Change type | User impact | -|----------------------|--------------------------------------------------| -| Baseline schema bump | old baselines become untrusted until regenerated | -| Fingerprint bump | baseline diff keys change; regeneration required | -| Cache schema bump | old caches ignored and regenerated automatically | -| Report schema bump | downstream JSON/TXT consumers must update | +| Change type | User impact | +|------------------------------|-----------------------------------------------------------------------| +| Baseline schema bump | older unsupported baseline schemas become untrusted until regenerated | +| Fingerprint bump | clone IDs change; baseline regeneration required | +| Cache schema bump | old caches are ignored and rebuilt automatically | +| Report schema bump | downstream report consumers must update | +| Metrics-baseline schema bump | standalone metrics baseline must be regenerated | ## Determinism / canonicalization -- Version constants are explicit and imported where enforced. -- Compatibility is code-driven, not documentation-driven. +- Version constants are explicit and enforced in code. +- Compatibility decisions are runtime checks, not doc-only expectations. Refs: - `codeclone/contracts.py` - `codeclone/baseline.py:Baseline.verify_compatibility` +- `codeclone/metrics_baseline.py:MetricsBaseline.verify_compatibility` ## Locked by tests - `tests/test_baseline.py::test_baseline_verify_schema_too_new` +- `tests/test_baseline.py::test_baseline_verify_schema_major_mismatch` - `tests/test_baseline.py::test_baseline_verify_fingerprint_mismatch` - `tests/test_cache.py::test_cache_v_field_version_mismatch_warns` -- `tests/test_report.py::test_report_json_compact_v11_contract` +- `tests/test_report.py::test_report_json_compact_v21_contract` ## Non-guarantees -- Backward compatibility is not promised across incompatible schema/fingerprint bumps. - -## 1.5 architecture note - -Planned for v1.5: architecture-layer review and module organization cleanup. -No planned change to clone-detection semantics or determinism contracts unless accompanied by explicit -fingerprint/schema version bumps and tests. +- Backward compatibility is not guaranteed across incompatible schema/fingerprint + bumps. diff --git a/docs/book/15-metrics-and-quality-gates.md b/docs/book/15-metrics-and-quality-gates.md new file mode 100644 index 0000000..ed9d483 --- /dev/null +++ b/docs/book/15-metrics-and-quality-gates.md @@ -0,0 +1,129 @@ +# 15. Metrics and Quality Gates + +## Purpose + +Define metrics mode selection, metrics-baseline behavior, and gating semantics. + +## Public surface + +- Metrics mode wiring: `codeclone/cli.py:_configure_metrics_mode` +- Main orchestration and exit routing: `codeclone/cli.py:_main_impl` +- Gate evaluation: `codeclone/pipeline.py:metric_gate_reasons`, + `codeclone/pipeline.py:gate` +- Metrics baseline persistence/diff: `codeclone/metrics_baseline.py:MetricsBaseline` + +## Data model + +Metrics gate inputs: + +- threshold gates: + `--fail-complexity`, `--fail-coupling`, `--fail-cohesion`, `--fail-health` +- boolean structural gates: + `--fail-cycles`, `--fail-dead-code` +- delta gate: + `--fail-on-new-metrics` +- baseline update: + `--update-metrics-baseline` + +Modes: + +- `analysis_mode=full`: metrics computed and suggestions enabled +- `analysis_mode=clones_only`: metrics skipped +- Health score is a weighted blend: clones 25%, complexity 20%, cohesion 15%, + coupling 10%, dead code 10%, dependencies 10%, coverage 10%. +- Clone dimension uses a piecewise density curve with breakpoints at 0.05 + (score 90), 0.20 (score 50), 0.50 (score 0). Below 5% density the penalty + is mild; 5–20% is steep; above 20% is aggressive. +- Grade bands: A ≥90, B ≥75, C ≥60, D ≥40, F <40. + +Refs: + +- `codeclone/cli.py:_metrics_flags_requested` +- `codeclone/cli.py:_metrics_computed` +- `codeclone/_cli_meta.py:_build_report_meta` +- `codeclone/metrics/health.py:compute_health` +- `codeclone/contracts.py:HEALTH_WEIGHTS` + +## Contracts + +- `--skip-metrics` is incompatible with metrics gating/update flags and is a + contract error. +- If metrics are not explicitly requested and no metrics baseline exists, + runtime auto-enables clone-only mode (`skip_metrics=true`). +- In clone-only mode: + `skip_dead_code=true`, `skip_dependencies=true`. +- `--fail-dead-code` forces dead-code analysis on. +- `--fail-cycles` forces dependency analysis on. +- `--update-baseline` in full mode implies metrics-baseline update in the same + run. +- If metrics baseline path equals clone baseline path and clone baseline file is + missing, `--update-metrics-baseline` escalates to `--update-baseline` so + embedded metrics can be written safely. +- `--fail-on-new-metrics` requires trusted metrics baseline unless baseline is + being updated in the same run. +- In CI mode, if metrics baseline was loaded and trusted, runtime enables + `fail_on_new_metrics=true`. + +Refs: + +- `codeclone/cli.py:_configure_metrics_mode` +- `codeclone/cli.py:_main_impl` +- `codeclone/metrics_baseline.py:MetricsBaseline.verify_compatibility` + +## Invariants (MUST) + +- Metrics diff is computed only when: + metrics were computed and metrics baseline is trusted. +- Metric gate reasons are emitted in deterministic order: + threshold checks -> cycles/dead/health -> NEW-vs-baseline diffs. +- Metric gate reasons are namespaced as `metric:*` in gate output. + +Refs: + +- `codeclone/pipeline.py:metric_gate_reasons` +- `codeclone/pipeline.py:gate` + +## Failure modes + +| Condition | Behavior | +|------------------------------------------------------------|--------------------------| +| `--skip-metrics` with metrics flags | Contract error, exit `2` | +| `--fail-on-new-metrics` without trusted baseline | Contract error, exit `2` | +| `--update-metrics-baseline` when metrics were not computed | Contract error, exit `2` | +| Threshold breach or NEW-vs-baseline metric regressions | Gating failure, exit `3` | + +## Determinism / canonicalization + +- Metrics baseline snapshot fields are canonicalized and sorted where set-like. +- Metrics payload hash uses canonical JSON and constant-time comparison. +- Gate reason generation order is fixed by code path order. + +Refs: + +- `codeclone/metrics_baseline.py:snapshot_from_project_metrics` +- `codeclone/metrics_baseline.py:_compute_payload_sha256` +- `codeclone/metrics_baseline.py:MetricsBaseline.verify_integrity` + +## Locked by tests + +- `tests/test_cli_unit.py::test_configure_metrics_mode_rejects_skip_metrics_with_metrics_flags` +- `tests/test_cli_unit.py::test_main_impl_rejects_update_metrics_baseline_when_metrics_skipped` +- `tests/test_cli_unit.py::test_main_impl_fail_on_new_metrics_requires_existing_baseline` +- `tests/test_cli_unit.py::test_main_impl_ci_enables_fail_on_new_metrics_when_metrics_baseline_loaded` +- `tests/test_pipeline_metrics.py::test_metric_gate_reasons_collects_all_enabled_reasons` +- `tests/test_pipeline_metrics.py::test_metric_gate_reasons_partial_new_metrics_paths` +- `tests/test_metrics_baseline.py::test_metrics_baseline_embedded_clone_payload_and_schema_resolution` + +## Non-guarantees + +- Absolute threshold defaults are not frozen by this chapter. +- Metrics scoring internals, per-dimension weighting, and the exact clone + density curve may evolve if exit semantics and contract statuses stay stable. + +## See also + +- [04-config-and-defaults.md](04-config-and-defaults.md) +- [05-core-pipeline.md](05-core-pipeline.md) +- [09-cli.md](09-cli.md) +- [16-dead-code-contract.md](16-dead-code-contract.md) +- [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) diff --git a/docs/book/16-dead-code-contract.md b/docs/book/16-dead-code-contract.md new file mode 100644 index 0000000..118180b --- /dev/null +++ b/docs/book/16-dead-code-contract.md @@ -0,0 +1,133 @@ +# 16. Dead Code Contract + +## Purpose + +Define dead-code liveness rules, canonical symbol-usage boundaries, and gating semantics. + +## Public surface + +- Dead-code detection core: `codeclone/metrics/dead_code.py:find_unused` +- Test-path classifier: `codeclone/paths.py:is_test_filepath` +- Inline suppression parser/binder: `codeclone/suppressions.py` +- Extraction of referenced names/candidates: + `codeclone/extractor.py:extract_units_and_stats_from_source` +- Cache load boundary for referenced names: + `codeclone/pipeline.py:_load_cached_metrics` + +## Data model + +- Candidate model: `DeadCandidate` +- Output model: `DeadItem` (`confidence=high|medium`) +- Global liveness input: + - `referenced_names: frozenset[str]` + - `referenced_qualnames: frozenset[str]` + +Refs: + +- `codeclone/models.py:DeadCandidate` +- `codeclone/models.py:DeadItem` + +## Contracts + +- References from test files are excluded from liveness accounting. +- Symbols declared in test files are non-actionable and filtered. +- Symbols with names matching test entrypoint conventions are filtered: + `test_*`, `pytest_*`. +- Methods are filtered as non-actionable when dynamic/runtime dispatch is + expected: + dunder methods, `visit_*`, setup/teardown hooks. +- Module-level PEP 562 hooks are filtered as non-actionable: + `__getattr__`, `__dir__`. +- Declaration-level inline suppression is supported with: + `# codeclone: ignore[dead-code]` (leading or inline comment form). +- For multiline declaration headers, inline suppression may appear either on the + first declaration line or on the closing header line containing `:`. +- Suppression is declaration-scoped (`def`, `async def`, `class`) and does not + implicitly propagate to unrelated declaration targets. +- Candidate extraction excludes non-runtime declaration surfaces: + methods on `Protocol` classes, and callables decorated with + `@overload` / `@abstractmethod`. +- A symbol referenced by exact canonical qualname is not dead. +- A symbol referenced by local name is not dead. +- A symbol referenced only by qualified-name suffix (without canonical module + match) downgrades confidence to `medium`. +- `--fail-dead-code` gate counts only high-confidence dead-code items. +- Suppressed dead-code candidates are excluded from active dead-code findings + and from health-score dead-code penalties. +- Suppressed dead-code candidates are surfaced separately in report metrics + (`dead_code.summary.suppressed`, `dead_code.suppressed_items`) and in the + HTML dead-code split view (`Active` / `Suppressed`). + +Refs: + +- `codeclone/metrics/dead_code.py:_is_non_actionable_candidate` +- `codeclone/metrics/dead_code.py:find_unused` +- `codeclone/pipeline.py:metric_gate_reasons` + +## Invariants (MUST) + +- Output dead-code items are deterministically sorted by + `(filepath, start_line, end_line, qualname, kind)`. +- Test-path suppression is applied both on fresh extraction and cached-metrics + load for both `referenced_names` and `referenced_qualnames`. + +Refs: + +- `codeclone/metrics/dead_code.py:find_unused` +- `codeclone/extractor.py:extract_units_and_stats_from_source` +- `codeclone/pipeline.py:_load_cached_metrics` + +## Failure modes + +| Condition | Behavior | +|----------------------------------------------------|----------------------------------------| +| Dynamic method pattern (dunder/visitor/setup hook) | Candidate skipped as non-actionable | +| Module PEP 562 hook (`__getattr__`/`__dir__`) | Candidate skipped as non-actionable | +| Malformed/unknown `# codeclone: ignore[...]` rule | Ignored safely | +| Protocol or stub-like declaration surface | Candidate skipped as non-actionable | +| Definition appears only in tests | Candidate skipped | +| Symbol used only from tests | Remains actionable dead-code candidate | +| Symbol used through import alias / module alias | Matched via canonical qualname usage | +| `--fail-dead-code` with high-confidence dead items | Gating failure, exit `3` | + +## Determinism / canonicalization + +- Filtering rules are deterministic string/path predicates. +- Candidate and result ordering is deterministic. + +Refs: + +- `codeclone/paths.py:is_test_filepath` +- `codeclone/metrics/dead_code.py:_is_dunder` +- `codeclone/metrics/dead_code.py:find_unused` + +## Locked by tests + +- `tests/test_extractor.py::test_dead_code_marks_symbol_dead_when_referenced_only_by_tests` +- `tests/test_extractor.py::test_dead_code_skips_module_pep562_hooks` +- `tests/test_extractor.py::test_dead_code_applies_inline_suppression_per_declaration` +- `tests/test_extractor.py::test_dead_code_suppression_binding_is_scoped_to_target_symbol` +- `tests/test_extractor.py::test_extract_collects_referenced_qualnames_for_import_aliases` +- `tests/test_extractor.py::test_collect_dead_candidates_skips_protocol_and_stub_like_symbols` +- `tests/test_pipeline_metrics.py::test_load_cached_metrics_ignores_referenced_names_from_test_files` +- `tests/test_metrics_modules.py::test_find_unused_filters_non_actionable_and_preserves_ordering` +- `tests/test_metrics_modules.py::test_find_unused_respects_referenced_qualnames` +- `tests/test_metrics_modules.py::test_find_unused_keeps_non_pep562_module_dunders_actionable` +- `tests/test_metrics_modules.py::test_find_unused_applies_inline_dead_code_suppression` +- `tests/test_metrics_modules.py::test_find_suppressed_unused_returns_actionable_suppressed_candidates` +- `tests/test_report.py::test_report_json_dead_code_suppressed_items_are_reported_separately` +- `tests/test_html_report.py::test_html_report_renders_dead_code_split_with_suppressed_layer` +- `tests/test_suppressions.py::test_extract_suppression_directives_supports_inline_and_leading_forms` +- `tests/test_suppressions.py::test_bind_suppressions_applies_only_to_adjacent_declaration_line` + +## Non-guarantees + +- No full runtime call-graph resolution is performed. +- Medium-confidence dead items are informational and not used by + `--fail-dead-code` gating. + +## See also + +- [05-core-pipeline.md](05-core-pipeline.md) +- [09-cli.md](09-cli.md) +- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) diff --git a/docs/book/17-suggestions-and-clone-typing.md b/docs/book/17-suggestions-and-clone-typing.md new file mode 100644 index 0000000..5befb4f --- /dev/null +++ b/docs/book/17-suggestions-and-clone-typing.md @@ -0,0 +1,109 @@ +# 17. Suggestions and Clone Typing + +## Purpose + +Define deterministic clone-type classification and suggestion generation +contracts used by canonical report projections (`JSON` / `TXT` / `Markdown` / +`HTML`). + +## Public surface + +- Clone-type classifier: `codeclone/report/suggestions.py:classify_clone_type` +- Suggestion engine: `codeclone/report/suggestions.py:generate_suggestions` +- Pipeline integration: `codeclone/pipeline.py:compute_suggestions` +- Report serialization: `codeclone/report/json_contract.py:build_report_document` +- HTML render integration: `codeclone/html_report.py:build_html_report` + +## Data model + +Suggestion shape: + +- `severity`: `critical|warning|info` +- `category`: + `clone|structural|complexity|coupling|cohesion|dead_code|dependency` +- `title`, `location`, `steps`, `effort`, `priority` + +Clone typing: + +- function groups: + - Type-1: identical `raw_hash` + - Type-2: identical normalized `fingerprint` + - Type-3: mixed fingerprints (same group semantics) + - Type-4: fallback +- block/segment groups: Type-4 + +Refs: + +- `codeclone/models.py:Suggestion` +- `codeclone/report/suggestions.py:classify_clone_type` + +## Contracts + +- Suggestions are generated only in full metrics mode + (`skip_metrics=false`). +- Suggestions are advisory only and never directly control exit code. +- SARIF projection is finding-driven and does not consume suggestion cards. +- JSON report stores clone typing at group level: + - `findings.groups.clones.[*].clone_type` +- Suggestion location is deterministic: first item by stable path/line sort. + +Refs: + +- `codeclone/pipeline.py:analyze` +- `codeclone/pipeline.py:gate` +- `codeclone/report/json_contract.py:build_report_document` +- `codeclone/report/suggestions.py:generate_suggestions` + +## Invariants (MUST) + +- Suggestion priority formula is stable: + `severity_weight / effort_weight`. +- Suggestion output is sorted by: + `(-priority, severity, category, source_kind, location, title, subject_key)`. +- Derived suggestion serialization in report JSON applies deterministic ordering by + `(-priority, severity_rank, title, finding_id)`. +- Clone type output for a given group is deterministic for identical inputs. + +Refs: + +- `codeclone/report/suggestions.py:_priority` +- `codeclone/report/suggestions.py:generate_suggestions` + +## Failure modes + +| Condition | Behavior | +|----------------------------------------|---------------------------------------| +| Metrics mode skipped | Suggestions list is empty | +| No eligible findings | Suggestions list is empty | +| Missing optional fields in group items | Classifier/renderer use safe defaults | + +## Determinism / canonicalization + +- Classifier uses deterministic set normalization + sorted collections. +- Serializer emits suggestions in generator-provided deterministic order. + +Refs: + +- `codeclone/report/suggestions.py:classify_clone_type` +- `codeclone/report/suggestions.py:generate_suggestions` +- `codeclone/report/json_contract.py:build_report_document` + +## Locked by tests + +- `tests/test_report_suggestions.py::test_classify_clone_type_all_modes` +- `tests/test_report_suggestions.py::test_generate_suggestions_covers_clone_metrics_and_dependency_categories` +- `tests/test_report_suggestions.py::test_generate_suggestions_covers_skip_branches_for_optional_rules` +- `tests/test_html_report.py::test_html_report_suggestions_headers_include_help_tips` + +## Non-guarantees + +- Suggestion wording can evolve without schema bump. +- Suggestion heuristics may be refined if deterministic ordering and + non-gating behavior remain unchanged. + +## See also + +- [05-core-pipeline.md](05-core-pipeline.md) +- [08-report.md](08-report.md) +- [10-html-render.md](10-html-render.md) +- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) diff --git a/docs/book/18-benchmarking.md b/docs/book/18-benchmarking.md new file mode 100644 index 0000000..8e86b9b --- /dev/null +++ b/docs/book/18-benchmarking.md @@ -0,0 +1,109 @@ +# 18. Benchmarking (Docker) + +## Purpose + +Define a reproducible, deterministic benchmark workflow for CodeClone in Docker. + +## Public surface + +- Benchmark image: `benchmarks/Dockerfile` +- Benchmark runner (inside container): `benchmarks/run_benchmark.py` +- Host wrapper script: `benchmarks/run_docker_benchmark.sh` + +## Data model + +Benchmark output (`benchmark_schema_version=1.0`) contains: + +- tool metadata (`name`, `version`, `python_tag`) +- benchmark config (`target`, `runs`, `warmups`) +- execution environment (platform, cpu limits/affinity, cgroup limits) +- scenario results: + - `cold_full` (cold cache each run) + - `warm_full` (shared warm cache) + - `warm_clones_only` (shared warm cache with `--skip-metrics`) +- latency stats per scenario (`min`, `max`, `mean`, `median`, `p95`, `stdev`) +- deterministic digest check (`integrity.digest.value` must be stable within scenario) +- cross-scenario comparisons (speedup ratios) + +## Contracts + +- Benchmark must run in containerized, isolated environment. +- CPU/memory limits are pinned at container run time (`--cpuset-cpus`, `--cpus`, + `--memory`). +- Runtime environment is normalized: + `PYTHONHASHSEED=0`, `TZ=UTC`, `LC_ALL/LANG=C.UTF-8`. +- Each measured run must exit successfully (`exit=0`); any failure aborts the benchmark. +- Determinism guard: if scenario digest diverges across measured runs, benchmark fails. + +## Invariants (MUST) + +- Cold scenario uses a fixed cache path and removes cache file before each run + (cold cache with stable canonical metadata path). +- Warm scenarios seed one shared cache file before warmups/measured runs. +- Benchmark JSON write is atomic (`.tmp` + replace). +- Benchmark scenario ordering is stable and fixed. + +## Failure modes + +| Condition | Behavior | +|-----------------------------------------|-----------------------------------------------| +| Docker unavailable | Host wrapper fails fast | +| Non-zero CLI exit in any run | Runner aborts with command stdout/stderr tail | +| Missing/invalid report integrity digest | Runner aborts as invalid benchmark sample | +| Digest mismatch in one scenario | Runner aborts as non-deterministic | + +## Determinism / canonicalization + +- Per-run determinism uses canonical report digest: + `report.integrity.digest.value`. +- Digest intentionally ignores runtime timestamp (`meta.runtime`) in canonical payload, + so deterministic check remains valid. +- Output JSON is serialized with stable formatting (`indent=2`) and written atomically. + +Refs: + +- `codeclone/report/json_contract.py:_build_integrity_payload` +- `benchmarks/run_benchmark.py` + +## Recommended run profile + +```bash +./benchmarks/run_docker_benchmark.sh +``` + +Useful overrides: + +```bash +CPUSET=0 CPUS=1.0 MEMORY=2g RUNS=16 WARMUPS=4 \ + ./benchmarks/run_docker_benchmark.sh +``` + +Permissions note: + +- The host wrapper runs the container as host `uid:gid` by default + (`--user "$(id -u):$(id -g)"`) so benchmark artifact writes to bind-mounted + output paths are stable in CI. +- Override explicitly if needed: `CONTAINER_USER=10001:10001`. + +## GitHub Actions + +- Workflow: `.github/workflows/benchmark.yml` +- Triggers: + - manual (`workflow_dispatch`) + - pull requests targeting `feat/2.0.0` +- Job behavior: + - runs Docker benchmark with pinned runner limits + - uploads `.cache/benchmarks/codeclone-benchmark.json` as artifact + - emits scenario table and ratio table into `GITHUB_STEP_SUMMARY` + - prints ratios in job logs (important for quick trend checks) + +## Non-guarantees + +- Cross-host absolute timings are not comparable by contract. +- Throughput numbers can vary with host kernel, thermal state, and background load. + +## See also + +- [12-determinism.md](12-determinism.md) +- [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) +- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) diff --git a/docs/book/19-inline-suppressions.md b/docs/book/19-inline-suppressions.md new file mode 100644 index 0000000..3bee768 --- /dev/null +++ b/docs/book/19-inline-suppressions.md @@ -0,0 +1,107 @@ +# 19. Inline Suppressions + +## Purpose + +Define deterministic local suppressions for known findings false positives via +source comments, without introducing broad/project-wide ignores. + +## Public surface + +- Suppression directive parser and binder: `codeclone/suppressions.py` +- Dead-code final filter: `codeclone/metrics/dead_code.py:find_unused` +- Suppressed dead-code projection helper: + `codeclone/metrics/dead_code.py:find_suppressed_unused` +- Dead-code candidate extraction: `codeclone/extractor.py:_collect_dead_candidates` + +## Data model + +- Directive model: `SuppressionDirective` (`line`, `binding`, `rules`) +- Declaration target model: `DeclarationTarget` +- Bound suppression model: `SuppressionBinding` +- Candidate storage: `DeadCandidate.suppressed_rules` + +Refs: + +- `codeclone/suppressions.py:SuppressionDirective` +- `codeclone/suppressions.py:DeclarationTarget` +- `codeclone/suppressions.py:SuppressionBinding` +- `codeclone/models.py:DeadCandidate` + +## Contracts + +- Canonical syntax: `# codeclone: ignore[]` +- Supported placements: + - previous line before declaration (`leading`) + - end-of-line comment on declaration header (`inline`) + - same-line single-line declaration + - first line of a multiline declaration header + - closing header line containing `:` +- Current supported dead-code rule id: `dead-code`. +- Rule list supports comma-separated values and deduplicates deterministically. +- Suppression applies only to declaration targets (`def`, `async def`, `class`). +- Suppression is target-scoped: + class-level suppression does not implicitly suppress unrelated methods. +- Dead-code suppression is applied in final liveness filtering by rule id. +- Suppressed dead-code candidates are reported separately (not as active + findings) with deterministic suppression metadata in report metrics. + +## Invariants (MUST) + +- If no `# codeclone: ignore[...]` exists, behavior remains unchanged. +- Suppression matching never jumps across non-adjacent lines. +- Unknown/malformed suppressions never fail analysis. +- Suppression handling remains deterministic under identical inputs. + +## Failure modes + +| Condition | Behavior | +|---------------------------------------------------|-------------------------------------| +| malformed `# codeclone: ignore[...]` payload | ignored silently | +| unknown `codeclone[...]` rule id | ignored silently | +| suppression on non-declaration line | ignored silently | +| duplicate rule ids in one directive | deduplicated deterministically | +| suppression rule mismatch (`dead-code` vs others) | does not suppress dead-code finding | + +## Determinism / canonicalization + +- Directives are parsed from lexical comment tokens, not heuristic substring + scans. +- Binding is deterministic by declaration line and target identity. +- Inline binding for multiline declarations is deterministic across the + declaration header span only; it does not search arbitrary body lines. +- Candidate-level `suppressed_rules` are canonicalized and sorted in cache + payloads. +- Report-level suppressed dead-code payloads are deterministically sorted and + do not alter active finding IDs/order. + +Refs: + +- `codeclone/suppressions.py:extract_suppression_directives` +- `codeclone/suppressions.py:bind_suppressions_to_declarations` +- `codeclone/cache.py:_canonicalize_cache_entry` + +## Locked by tests + +- `tests/test_suppressions.py::test_extract_suppression_directives_supports_inline_and_leading_forms` +- `tests/test_suppressions.py::test_extract_suppression_directives_ignores_unknown_and_malformed_safely` +- `tests/test_suppressions.py::test_bind_suppressions_applies_only_to_adjacent_declaration_line` +- `tests/test_suppressions.py::test_bind_suppressions_does_not_propagate_class_inline_to_method` +- `tests/test_suppressions.py::test_bind_suppressions_applies_to_method_target` +- `tests/test_suppressions.py::test_build_suppression_index_deduplicates_rules_stably` +- `tests/test_extractor.py::test_dead_code_applies_inline_suppression_per_declaration` +- `tests/test_extractor.py::test_dead_code_suppression_binding_is_scoped_to_target_symbol` +- `tests/test_metrics_modules.py::test_find_unused_applies_inline_dead_code_suppression` +- `tests/test_metrics_modules.py::test_find_suppressed_unused_returns_actionable_suppressed_candidates` +- `tests/test_report.py::test_report_json_dead_code_suppressed_items_are_reported_separately` +- `tests/test_html_report.py::test_html_report_renders_dead_code_split_with_suppressed_layer` + +## Non-guarantees + +- No file-level/project-level suppressions are provided. +- No generic suppression UI over all finding families is guaranteed in this + chapter. + +## See also + +- [16-dead-code-contract.md](16-dead-code-contract.md) +- [08-report.md](08-report.md) diff --git a/docs/book/README.md b/docs/book/README.md index 6b06475..d2024cd 100644 --- a/docs/book/README.md +++ b/docs/book/README.md @@ -1,6 +1,6 @@ # CodeClone Contracts Book -This book is the contract-level documentation for CodeClone v1.x. +This book is the contract-level documentation for CodeClone v2.x. All guarantees here are derived from code and locked tests. If a statement is not enforced by code/tests, it is explicitly marked as non-contractual. @@ -38,6 +38,14 @@ If a statement is not enforced by code/tests, it is explicitly marked as non-con - [13-testing-as-spec.md](13-testing-as-spec.md) - [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) +### Quality and recommendations + +- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) +- [16-dead-code-contract.md](16-dead-code-contract.md) +- [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) +- [18-benchmarking.md](18-benchmarking.md) +- [19-inline-suppressions.md](19-inline-suppressions.md) + ### Appendix - [appendix/a-status-enums.md](appendix/a-status-enums.md) diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index d225883..9f99429 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -2,86 +2,64 @@ ## Purpose -Provide concise structural layouts for baseline/cache/report contracts. +Compact structural layouts for baseline/cache/report contracts in `2.0.0b1`. -## Baseline schema (v1.0) +## Baseline schema (`2.0`) ```json { "meta": { - "generator": { - "name": "codeclone", - "version": "1.4.0" - }, - "schema_version": "1.0", + "generator": { "name": "codeclone", "version": "2.0.0b1" }, + "schema_version": "2.0", "fingerprint_version": "1", "python_tag": "cp313", - "created_at": "2026-02-11T12:00:00Z", - "payload_sha256": "..." + "created_at": "2026-03-11T00:00:00Z", + "payload_sha256": "...", + "metrics_payload_sha256": "..." }, "clones": { - "functions": [ - "..." - ], - "blocks": [ - "..." - ] - } + "functions": ["|"], + "blocks": ["|||"] + }, + "metrics": { "...": "optional embedded metrics snapshot" } } ``` -Refs: - -- `codeclone/baseline.py:_baseline_payload` - -## Cache schema (v1.3) +## Cache schema (`2.2`) ```json { - "v": "1.3", + "v": "2.2", "payload": { "py": "cp313", "fp": "1", "ap": { - "min_loc": 15, - "min_stmt": 6 + "min_loc": 10, + "min_stmt": 6, + "block_min_loc": 20, + "block_min_stmt": 8, + "segment_min_loc": 20, + "segment_min_stmt": 10 }, "files": { - "rel/path.py": { - "st": [ - 1730000000000000000, - 2048 - ], - "u": [ - [ - "mod:f", - 1, - 10, - 10, - 3, - "fp", - "0-19" - ] - ], - "b": [ - [ - "mod:f", - 3, - 6, - 4, - "h1|h2|h3|h4" - ] - ], - "s": [ - [ - "mod:f", - 3, - 8, - 6, - "segment_hash", - "segment_sig" - ] - ] + "codeclone/cache.py": { + "st": [1730000000000000000, 2048], + "ss": [450, 12, 3, 1], + "u": [[ + "qualname", 1, 2, 2, 1, "fp", "0-19", 1, 0, "low", "raw_hash", + 0, "none", 0, "fallthrough", "none", "none" + ]], + "b": [["qualname", 10, 14, 5, "block_hash"]], + "s": [["qualname", 10, 14, 5, "segment_hash", "segment_sig"]], + "cm": [["qualname", 1, 30, 3, 2, 4, 2, "low", "low"]], + "cc": [["qualname", ["pkg.a", "pkg.b"]]], + "md": [["pkg.a", "pkg.b", "import", 10]], + "dc": [["pkg.a:unused_fn", "unused_fn", 20, 24, "function"]], + "rn": ["used_name"], + "rq": ["pkg.dep:used_name"], + "in": ["pkg.dep"], + "cn": ["ClassName"], + "sf": [["duplicated_branches", "key", [["stmt_seq", "Expr,Return"]], [["pkg.a:f", 10, 12]]]] } } }, @@ -89,156 +67,325 @@ Refs: } ``` -Refs: +Notes: -- `codeclone/cache.py:Cache.save` -- `codeclone/cache.py:_encode_wire_file_entry` +- File keys are wire paths (repo-relative when root is configured). +- Optional sections are omitted when empty. +- `ss` stores per-file source stats and is required for full cache-hit accounting + in discovery. +- `rn`/`rq` are optional and decode to empty arrays when absent. +- `u` row decoder accepts both legacy 11-column rows and canonical 17-column rows + (legacy rows map new structural fields to neutral defaults). -## Report schema (v1.1) +## Report schema (`2.1`) ```json { + "report_schema_version": "2.1", "meta": { - "report_schema_version": "1.1", - "codeclone_version": "1.4.0", - "python_version": "3.13", - "python_tag": "cp313", - "baseline_status": "ok", - "cache_status": "ok", - "groups_counts": { - "functions": { - "total": 1, - "new": 0, - "known": 1 + "codeclone_version": "2.0.0b1", + "project_name": "codeclone", + "scan_root": ".", + "analysis_mode": "full", + "report_mode": "full", + "baseline": { + "...": "..." + }, + "cache": { + "...": "..." + }, + "metrics_baseline": { + "...": "..." + }, + "runtime": { + "report_generated_at_utc": "2026-03-11T08:36:32Z" + } + }, + "inventory": { + "files": { + "...": "..." + }, + "code": { + "...": "..." + }, + "file_registry": { + "encoding": "relative_path", + "items": [] + } + }, + "findings": { + "summary": { + "...": "...", + "suppressed": { + "dead_code": 0 + } + }, + "groups": { + "clones": { + "functions": [], + "blocks": [], + "segments": [] }, - "blocks": { - "total": 7, - "new": 0, - "known": 7 + "structural": { + "groups": [ + { + "kind": "duplicated_branches", + "...": "..." + }, + { + "kind": "clone_guard_exit_divergence", + "...": "..." + }, + { + "kind": "clone_cohort_drift", + "...": "..." + } + ] }, - "segments": { - "total": 0, - "new": 0, - "known": 0 + "dead_code": { + "groups": [] + }, + "design": { + "groups": [] } } }, - "files": [ - "/abs/path.py" - ], - "groups": { - "functions": { - "group_key": [ - [ - 0, - "mod:f", - 1, - 20, - 20, - 6, - "fp", - "20-49" - ] - ] - }, - "blocks": { - "group_key": [ - [ - 0, - "mod:f", - 5, - 8, - 4 - ] - ] + "metrics": { + "summary": { + "...": "...", + "dead_code": { + "total": 0, + "high_confidence": 0, + "suppressed": 1 + } }, - "segments": { - "group_key": [ - [ - 0, - "mod:f", - 5, - 10, - 6, - "h", - "s" + "families": { + "complexity": {}, + "coupling": {}, + "cohesion": {}, + "dependencies": {}, + "dead_code": { + "summary": { + "total": 0, + "high_confidence": 0, + "suppressed": 1 + }, + "items": [], + "suppressed_items": [ + { + "...": "..." + } ] - ] + }, + "health": {} } }, - "groups_split": { - "functions": { - "new": [ - "..." - ], - "known": [ - "..." - ] + "derived": { + "suggestions": [], + "overview": { + "families": { + "clones": 0, + "structural": 0, + "dead_code": 0, + "design": 0 + }, + "top_risks": [], + "source_scope_breakdown": { + "production": 0, + "tests": 0, + "fixtures": 0 + }, + "health_snapshot": { + "score": 100, + "grade": "A" + } }, - "blocks": { - "new": [ - "..." - ], - "known": [ - "..." + "hotlists": { + "most_actionable_ids": [], + "highest_spread_ids": [], + "production_hotspot_ids": [], + "test_fixture_hotspot_ids": [] + } + }, + "integrity": { + "canonicalization": { + "version": "1", + "scope": "canonical_only", + "sections": [ + "report_schema_version", + "meta", + "inventory", + "findings", + "metrics" ] }, - "segments": { - "new": [ - "..." - ], - "known": [] + "digest": { + "verified": true, + "algorithm": "sha256", + "value": "..." } - }, - "group_item_layout": { - "functions": [ - "file_i", - "qualname", - "start", - "end", - "loc", - "stmt_count", - "fingerprint", - "loc_bucket" - ], - "blocks": [ - "file_i", - "qualname", - "start", - "end", - "size" - ], - "segments": [ - "file_i", - "qualname", - "start", - "end", - "size", - "segment_hash", - "segment_sig" - ] } } ``` -Refs: +## Markdown projection (`1.0`) -- `codeclone/_report_serialize.py:to_json_report` -- `codeclone/_report_serialize.py:GROUP_ITEM_LAYOUT` +```text +# CodeClone Report +- Markdown schema: 1.0 +- Source report schema: 2.1 +... +## Overview +## Inventory +## Findings Summary +## Top Risks +## Suggestions +## Findings +## Metrics +## Integrity +``` + +## SARIF projection (`2.1.0`, profile `1.0`) + +```json +{ + "$schema": "https://json.schemastore.org/sarif-2.1.0.json", + "version": "2.1.0", + "runs": [ + { + "originalUriBaseIds": { + "%SRCROOT%": { + "uri": "file:///repo/project/", + "description": { + "text": "The root of the scanned source tree." + } + } + }, + "tool": { + "driver": { + "name": "codeclone", + "version": "2.0.0b1", + "rules": [ + { + "id": "CCLONE001", + "name": "codeclone.function-clone-group", + "shortDescription": { + "text": "Function clone group" + }, + "fullDescription": { + "text": "Multiple functions share the same normalized function body." + }, + "help": { + "text": "...", + "markdown": "..." + }, + "defaultConfiguration": { + "level": "warning" + }, + "helpUri": "https://orenlab.github.io/codeclone/", + "properties": { + "category": "clone", + "kind": "clone_group", + "precision": "high", + "tags": [ + "clone", + "clone_group", + "high" + ] + } + } + ] + } + }, + "artifacts": [ + { + "location": { + "uri": "codeclone/report/sarif.py", + "uriBaseId": "%SRCROOT%" + } + } + ], + "invocations": [ + { + "executionSuccessful": true, + "workingDirectory": { + "uri": "file:///repo/project/" + } + } + ], + "columnKind": "utf16CodeUnits", + "properties": { + "profileVersion": "1.0", + "reportSchemaVersion": "2.1" + }, + "results": [ + { + "ruleId": "CCLONE001", + "ruleIndex": 0, + "baselineState": "new", + "message": { + "text": "Function clone group (Type-2), 2 occurrences across 2 files." + }, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "codeclone/report/sarif.py", + "uriBaseId": "%SRCROOT%", + "index": 0 + }, + "region": { + "startLine": 1, + "endLine": 10 + } + }, + "logicalLocations": [ + { + "fullyQualifiedName": "codeclone.report.sarif:render_sarif_report_document" + } + ], + "message": { + "text": "Representative occurrence" + } + } + ], + "relatedLocations": [], + "partialFingerprints": { + "primaryLocationLineHash": "0123456789abcdef:1" + } + } + ] + } + ] +} +``` ## TXT report sections ```text REPORT METADATA -... +INVENTORY +FINDINGS SUMMARY +METRICS SUMMARY +DERIVED OVERVIEW +SUGGESTIONS FUNCTION CLONES (NEW) FUNCTION CLONES (KNOWN) BLOCK CLONES (NEW) BLOCK CLONES (KNOWN) SEGMENT CLONES (NEW) SEGMENT CLONES (KNOWN) +STRUCTURAL FINDINGS +DEAD CODE FINDINGS +DESIGN FINDINGS +INTEGRITY ``` -Refs: +## Refs -- `codeclone/_report_serialize.py:to_text_report` +- `codeclone/baseline.py` +- `codeclone/cache.py` +- `codeclone/report/json_contract.py` +- `codeclone/report/serialize.py` +- `codeclone/report/markdown.py` +- `codeclone/report/sarif.py` diff --git a/docs/book/appendix/c-error-catalog.md b/docs/book/appendix/c-error-catalog.md index f03d2f3..24115c7 100644 --- a/docs/book/appendix/c-error-catalog.md +++ b/docs/book/appendix/c-error-catalog.md @@ -62,10 +62,10 @@ Refs: ## Report write errors -| Condition | Behavior | -|-----------------------------|-----------------------| -| Baseline write OSError | contract error exit 2 | -| HTML/JSON/TXT write OSError | contract error exit 2 | +| Condition | Behavior | +|--------------------------------------------|-----------------------| +| Baseline write OSError | contract error exit 2 | +| HTML/JSON/Markdown/SARIF/TXT write OSError | contract error exit 2 | Refs: diff --git a/docs/cfg.md b/docs/cfg.md index 4fb9500..4cb572a 100644 --- a/docs/cfg.md +++ b/docs/cfg.md @@ -126,8 +126,8 @@ In CFG v1: - `break` and `continue` are explicit terminating statements, - each maps to a deterministic jump target through loop context: - - `break` -> loop after-block, - - `continue` -> loop condition/iteration block, + - `break` -> loop after-block, + - `continue` -> loop condition/iteration block, - `for/while ... else` remains reachable only on normal loop completion (not through `break` paths). diff --git a/docs/examples/report.md b/docs/examples/report.md new file mode 100644 index 0000000..8e48661 --- /dev/null +++ b/docs/examples/report.md @@ -0,0 +1,47 @@ +# Sample Report + +This page links to a live example report generated from the current `codeclone` +repository at docs build time. + +The example is rebuilt from the same tree that produces the published +documentation, so the HTML, canonical JSON, and SARIF artifacts stay aligned. + +

    + + Open interactive HTML report + + + Open canonical JSON + + + Open SARIF + + + View generation manifest + +

    + +## What this contains + +- Full HTML report generated by `codeclone` against the current repository. +- Canonical JSON report rendered from the same analysis run. +- SARIF projection from the same canonical report. + +## Why this lives here + +- It gives readers a realistic, current example of the report surfaces. +- It keeps the sample aligned with the shipped report contract instead of + freezing a stale artifact in git. +- It makes the docs site useful as both reference and product demo. + +## Local preview + +Build the docs site, then generate the example report into the built site: + +```bash +uv run --with mkdocs --with mkdocs-material mkdocs build --strict +uv run python scripts/build_docs_example_report.py --output-dir site/examples/report/live +``` + +The generated assets are not committed to the repository; they are produced +locally for preview and automatically during the GitHub Pages publish workflow. diff --git a/docs/publishing.md b/docs/publishing.md new file mode 100644 index 0000000..b890b16 --- /dev/null +++ b/docs/publishing.md @@ -0,0 +1,94 @@ +# Publishing and Docs Site + +## Purpose + +Document how the documentation site is built, validated, and published. + +This page is operational, not contractual. The source of truth for behavior +remains the current repository code and CI workflow. + +## Current stack + +- Site generator: `MkDocs` +- Theme: `Material for MkDocs` +- Docs root: `docs/` +- Site config: `mkdocs.yml` +- Publish workflow: `.github/workflows/docs.yml` + +## What gets published + +The published site contains: + +- the documentation tree under `docs/` +- the contract book under `docs/book/` +- deep-dive pages such as architecture and CFG notes +- a live sample report for the current repository build under + `Examples / Sample Report` + +## Build flow + +The docs workflow follows this order: + +1. install project dependencies +2. build the MkDocs site with `mkdocs build --strict` +3. generate a live sample report into `site/examples/report/live` +4. upload the built site as a GitHub Pages artifact +5. deploy on pushes to `main` + +Relevant files: + +- `mkdocs.yml` +- `.github/workflows/docs.yml` +- `scripts/build_docs_example_report.py` + +## Sample report generation + +The sample report is generated from the current `codeclone` repository tree. + +Generated artifacts: + +- `site/examples/report/live/index.html` +- `site/examples/report/live/report.json` +- `site/examples/report/live/report.sarif` +- `site/examples/report/live/manifest.json` + +The sample report is generated during docs publishing and is not committed to +git. `site/` remains ignored. + +## Local preview + +Build the site: + +```bash +uv run --with mkdocs --with mkdocs-material mkdocs build --strict +``` + +Generate the sample report into the built site: + +```bash +uv run python scripts/build_docs_example_report.py --output-dir site/examples/report/live +``` + +Then open: + +- `site/index.html` +- `site/examples/report/live/index.html` + +## Maintenance rules + +- Keep `docs/` as the single source tree for site content. +- Do not commit generated `site/` artifacts. +- Keep docs publishing deterministic: no timestamps in published docs paths. +- Keep the sample report generated from the same commit as the site itself. +- Prefer documenting docs-site mechanics here or in adjacent deep-dive pages, + not inside contract chapters unless a public contract is affected. + +## When to update this page + +Update this page when you change: + +- `mkdocs.yml` +- `.github/workflows/docs.yml` +- `scripts/build_docs_example_report.py` +- the site navigation model +- the sample report publishing path/layout diff --git a/docs/sarif.md b/docs/sarif.md new file mode 100644 index 0000000..e62d4b8 --- /dev/null +++ b/docs/sarif.md @@ -0,0 +1,121 @@ +# SARIF for IDEs and Code Scanning + +## Purpose + +Explain how CodeClone projects canonical findings into SARIF and what IDEs or +code-scanning tools can rely on. + +SARIF is a machine-readable projection layer. The canonical source of report +truth remains the JSON report document. + +## Source files + +- `codeclone/report/sarif.py` +- `codeclone/report/json_contract.py` +- `codeclone/report/findings.py` + +## Design model + +CodeClone builds SARIF from the already materialized canonical report document. +It does not recompute analysis in the SARIF layer. + +That means: + +- finding identities come from canonical finding IDs +- severity/confidence/category data comes from canonical report payloads +- SARIF ordering remains deterministic + +## Path model + +To improve IDE and code-scanning integration, SARIF uses repo-relative paths +anchored through `%SRCROOT%`. + +Current behavior: + +- `run.originalUriBaseIds["%SRCROOT%"]` points at the scan root when an + absolute scan root is known +- `run.artifacts[*]` enumerates referenced files +- `artifactLocation.uri` uses repository-relative paths +- `artifactLocation.index` aligns locations with artifacts for stable linking +- `run.invocations[*].workingDirectory` mirrors the scan root URI when available +- `run.columnKind` is fixed to `utf16CodeUnits` + +This helps consumers resolve results back to workspace files consistently. + +## Result model + +Current SARIF output includes: + +- `tool.driver.rules[*]` with stable rule IDs and help links +- `results[*]` for clone groups, dead code, design findings, and structural findings +- `locations[*]` with primary file/line mapping +- `locations[*].message` and `relatedLocations[*].message` with + human-readable role labels such as `Representative occurrence` +- `relatedLocations[*]` when the result has multiple relevant locations +- `partialFingerprints.primaryLocationLineHash` for stable per-location identity + +For clone results, CodeClone also carries novelty-aware metadata when known: + +- `baselineState` + +This improves usefulness in IDE/code-scanning flows that distinguish new vs +known findings. + +## Rule metadata + +Rule records are intentionally richer than a minimal SARIF export. + +They include: + +- stable rule IDs +- display name +- help text / markdown +- tags +- docs-facing help URI + +The goal is not only schema compliance, but a better consumer experience in IDEs +and code-scanning platforms. + +## What SARIF is good for here + +SARIF is useful as: + +- an IDE-facing findings stream +- a code-scanning upload format +- another deterministic machine-readable projection over canonical report data + +It is not the source of truth for: + +- report integrity digest +- gating semantics +- baseline compatibility + +Those remain owned by the canonical report and baseline contracts. + +## Limitations + +- Consumer UX depends on the IDE/platform; not every SARIF field is shown by + every tool. +- HTML-only presentation details are not carried into SARIF. +- SARIF wording may evolve as long as IDs, semantics, and deterministic + structure remain stable. + +## Validation and tests + +Relevant tests: + +- `tests/test_report.py` +- `tests/test_report_contract_coverage.py` +- `tests/test_report_branch_invariants.py` + +Contract-adjacent coverage includes: + +- reuse of canonical report document +- stable SARIF branch invariants +- deterministic artifacts/rules/results ordering + +## See also + +- [08. Report](book/08-report.md) +- [10. HTML Render](book/10-html-render.md) +- [Examples / Sample Report](examples/report.md) diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..fae6e1d --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,95 @@ +site_name: CodeClone +site_description: Structural code quality analysis for Python +site_url: https://orenlab.github.io/codeclone/ +repo_url: https://github.com/orenlab/codeclone +repo_name: orenlab/codeclone +docs_dir: docs +edit_uri: blob/main/docs/ +strict: true + +theme: + name: material + icon: + repo: fontawesome/brands/github + features: + - navigation.tabs + - navigation.sections + - navigation.top + - search.suggest + - search.highlight + - content.code.copy + palette: + - media: "(prefers-color-scheme: light)" + scheme: default + primary: white + accent: indigo + toggle: + icon: material/weather-night + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/weather-sunny + name: Switch to light mode + +plugins: + - search + +markdown_extensions: + - admonition + - attr_list + - def_list + - footnotes + - tables + - toc: + permalink: true + - pymdownx.details + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.inlinehilite + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + +nav: + - Home: README.md + - Contracts Book: + - Overview: book/README.md + - Foundations: + - Intro: book/00-intro.md + - Architecture Map: book/01-architecture-map.md + - Terminology: book/02-terminology.md + - Contract Spine: + - Exit Codes: book/03-contracts-exit-codes.md + - Config and Defaults: book/04-config-and-defaults.md + - Core Pipeline: book/05-core-pipeline.md + - Baseline: book/06-baseline.md + - Cache: book/07-cache.md + - Report: book/08-report.md + - Interfaces: + - CLI: book/09-cli.md + - HTML Render: book/10-html-render.md + - System Properties: + - Security Model: book/11-security-model.md + - Determinism: book/12-determinism.md + - Testing as Spec: book/13-testing-as-spec.md + - Compatibility and Versioning: book/14-compatibility-and-versioning.md + - Quality: + - Metrics and Gates: book/15-metrics-and-quality-gates.md + - Dead Code: book/16-dead-code-contract.md + - Suggestions and Clone Typing: book/17-suggestions-and-clone-typing.md + - Benchmarking: book/18-benchmarking.md + - Inline Suppressions: book/19-inline-suppressions.md + - Appendix: + - Status Enums: book/appendix/a-status-enums.md + - Schema Layouts: book/appendix/b-schema-layouts.md + - Error Catalog: book/appendix/c-error-catalog.md + - Deep Dives: + - Architecture Narrative: architecture.md + - CFG Semantics: cfg.md + - SARIF for IDEs: sarif.md + - Publishing and Docs Site: publishing.md + - Examples: + - Sample Report: examples/report.md diff --git a/pyproject.toml b/pyproject.toml index f893463..52744d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,14 @@ [build-system] -requires = ["setuptools>=61.0", "wheel"] +requires = ["setuptools>=77.0.0", "wheel"] build-backend = "setuptools.build_meta" [project] name = "codeclone" -version = "1.4.4" -description = "AST and CFG-based code clone detector for Python focused on architectural duplication" +version = "2.0.0b1" +description = "Structural code quality analysis for Python" readme = { file = "README.md", content-type = "text/markdown" } -license = { text = "MIT" } +license = "MIT" +license-files = ["LICENSE"] authors = [ { name = "Den Rozhnovskiy", email = "pytelemonbot@mail.ru" } @@ -21,6 +22,7 @@ requires-python = ">=3.10" dependencies = [ "pygments>=2.19.2", "rich>=14.3.2", + "tomli>=2.0.1; python_version < '3.11'", ] keywords = [ @@ -36,12 +38,11 @@ keywords = [ ] classifiers = [ - "Development Status :: 5 - Production/Stable", + "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Topic :: Software Development :: Quality Assurance", "Topic :: Software Development :: Testing", "Typing :: Typed", - "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -49,6 +50,7 @@ classifiers = [ "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", + "Topic :: Software Development :: Libraries :: Python Modules", ] [project.urls] @@ -56,16 +58,16 @@ Homepage = "https://github.com/orenlab/codeclone" Repository = "https://github.com/orenlab/codeclone" Issues = "https://github.com/orenlab/codeclone/issues" Changelog = "https://github.com/orenlab/codeclone/releases" -Documentation = "https://github.com/orenlab/codeclone/tree/main/docs" +Documentation = "https://orenlab.github.io/codeclone/" [project.optional-dependencies] dev = [ "pytest>=9.0.0", - "pytest-cov>=6.1.0", - "build>=1.2.0", + "pytest-cov>=7.1.0", + "build>=1.4.1", "twine>=5.0.0", "mypy>=1.19.1", - "ruff>=0.15.0", + "ruff>=0.15.7", "pre-commit>=4.5.1", ] @@ -73,8 +75,14 @@ dev = [ codeclone = "codeclone.cli:main" [tool.setuptools] -packages = ["codeclone"] -license-files = ["LICENSE"] +packages = [ + "codeclone", + "codeclone._html_report", + "codeclone._html_report._sections", + "codeclone.domain", + "codeclone.metrics", + "codeclone.report", +] [tool.setuptools.package-data] codeclone = ["py.typed"] @@ -95,6 +103,10 @@ fail_under = 99 python_version = "3.10" strict = true warn_unused_configs = true +warn_return_any = true +disallow_any_generics = true +disallow_untyped_defs = true +no_implicit_optional = true files = ["codeclone", "tests"] [tool.ruff] @@ -104,6 +116,11 @@ target-version = "py310" [tool.ruff.lint] select = ["E", "F", "W", "I", "B", "UP", "SIM", "C4", "PIE", "PERF", "RUF"] +[tool.ruff.lint.per-file-ignores] +"codeclone/_html_css.py" = ["E501"] +"codeclone/_html_js.py" = ["E501"] +"codeclone/_html_report/_sections/*.py" = ["E501"] + [tool.ruff.format] quote-style = "double" indent-style = "space" diff --git a/scripts/build_docs_example_report.py b/scripts/build_docs_example_report.py new file mode 100644 index 0000000..5254c59 --- /dev/null +++ b/scripts/build_docs_example_report.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import argparse +import json +import os +import shutil +import subprocess +import sys +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from tempfile import TemporaryDirectory + +from codeclone import __version__ + +DEFAULT_OUTPUT_DIR = Path("site/examples/report/live") + + +@dataclass(frozen=True) +class ReportArtifacts: + html: Path + json: Path + sarif: Path + manifest: Path + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[1] + + +def _parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Build a live CodeClone sample report for the docs site." + ) + parser.add_argument( + "--output-dir", + type=Path, + default=DEFAULT_OUTPUT_DIR, + help="Directory that should receive index.html/report.json/report.sarif.", + ) + return parser + + +def _artifacts_for_dir(output_dir: Path) -> ReportArtifacts: + return ReportArtifacts( + html=output_dir / "index.html", + json=output_dir / "report.json", + sarif=output_dir / "report.sarif", + manifest=output_dir / "manifest.json", + ) + + +def _run_codeclone(scan_root: Path, artifacts: ReportArtifacts) -> None: + cmd = [ + sys.executable, + "-m", + "codeclone.cli", + str(scan_root), + "--html", + str(artifacts.html), + "--json", + str(artifacts.json), + "--sarif", + str(artifacts.sarif), + "--no-progress", + "--quiet", + ] + subprocess.run(cmd, cwd=scan_root, check=True) + + +def _manifest_payload(scan_root: Path) -> dict[str, object]: + return { + "project": scan_root.name, + "codeclone_version": __version__, + "generated_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"), + "git_sha": os.environ.get("GITHUB_SHA", "").strip(), + "scan_root": str(scan_root), + "artifacts": { + "html": "index.html", + "json": "report.json", + "sarif": "report.sarif", + }, + } + + +def _write_manifest(scan_root: Path, artifacts: ReportArtifacts) -> None: + artifacts.manifest.write_text( + json.dumps(_manifest_payload(scan_root), indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + + +def _copy_artifacts(source: ReportArtifacts, destination: ReportArtifacts) -> None: + destination.html.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source.html, destination.html) + shutil.copy2(source.json, destination.json) + shutil.copy2(source.sarif, destination.sarif) + shutil.copy2(source.manifest, destination.manifest) + + +def build_docs_example_report(output_dir: Path) -> None: + scan_root = _repo_root() + destination = _artifacts_for_dir(output_dir) + with TemporaryDirectory(prefix="codeclone-docs-report-") as tmp_dir_name: + tmp_dir = Path(tmp_dir_name) + working = _artifacts_for_dir(tmp_dir) + _run_codeclone(scan_root, working) + _write_manifest(scan_root, working) + _copy_artifacts(working, destination) + + +def main(argv: list[str] | None = None) -> int: + args = _parser().parse_args(argv) + build_docs_example_report(args.output_dir.resolve()) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/_assertions.py b/tests/_assertions.py new file mode 100644 index 0000000..619e882 --- /dev/null +++ b/tests/_assertions.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from collections.abc import Mapping + + +def assert_contains_all(text: str, *needles: str) -> None: + for needle in needles: + assert needle in text + + +def assert_mapping_entries( + mapping: Mapping[str, object], + /, + **expected: object, +) -> None: + for key, value in expected.items(): + assert mapping[key] == value + + +def snapshot_python_tag(snapshot: Mapping[str, object]) -> str: + meta = snapshot.get("meta", {}) + assert isinstance(meta, dict) + python_tag = meta.get("python_tag") + assert isinstance(python_tag, str) + return python_tag diff --git a/tests/_ast_helpers.py b/tests/_ast_helpers.py new file mode 100644 index 0000000..ce123be --- /dev/null +++ b/tests/_ast_helpers.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +import ast +from typing import TypeVar + +_FunctionDefT = TypeVar("_FunctionDefT", ast.FunctionDef, ast.AsyncFunctionDef) + + +def fix_missing_single_function(function_node: _FunctionDefT) -> _FunctionDefT: + module = ast.Module(body=[function_node], type_ignores=[]) + module = ast.fix_missing_locations(module) + node = module.body[0] + assert isinstance(node, type(function_node)) + return node diff --git a/tests/_report_access.py b/tests/_report_access.py new file mode 100644 index 0000000..9eeb760 --- /dev/null +++ b/tests/_report_access.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from collections.abc import Mapping + + +def _dict_at(payload: Mapping[str, object], *path: str) -> dict[str, object]: + current: object = payload + for key in path: + assert isinstance(current, Mapping) + current = current[key] + assert isinstance(current, dict) + return current + + +def _list_at(payload: Mapping[str, object], *path: str) -> list[dict[str, object]]: + current: object = payload + for key in path: + assert isinstance(current, Mapping) + current = current[key] + assert isinstance(current, list) + rows = current + assert all(isinstance(item, dict) for item in rows) + return rows + + +def report_meta_baseline(payload: dict[str, object]) -> dict[str, object]: + return _dict_at(payload, "meta", "baseline") + + +def report_meta_cache(payload: dict[str, object]) -> dict[str, object]: + return _dict_at(payload, "meta", "cache") + + +def report_inventory_files(payload: dict[str, object]) -> dict[str, object]: + return _dict_at(payload, "inventory", "files") + + +def report_clone_groups( + payload: dict[str, object], kind: str +) -> list[dict[str, object]]: + return _list_at(payload, "findings", "groups", "clones", kind) + + +def report_structural_groups(payload: dict[str, object]) -> list[dict[str, object]]: + return _list_at(payload, "findings", "groups", "structural", "groups") diff --git a/tests/conftest.py b/tests/conftest.py index 3c61731..7647800 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -32,6 +32,7 @@ def _make(**overrides: object) -> dict[str, object]: "cache_status": "ok", "cache_used": True, "files_skipped_source_io": 0, + "report_generated_at_utc": "2026-03-10T12:00:00Z", } meta.update(overrides) return meta diff --git a/tests/fixtures/golden_v2/clone_metrics_cycle/golden_expected_snapshot.json b/tests/fixtures/golden_v2/clone_metrics_cycle/golden_expected_snapshot.json new file mode 100644 index 0000000..40ac43e --- /dev/null +++ b/tests/fixtures/golden_v2/clone_metrics_cycle/golden_expected_snapshot.json @@ -0,0 +1,73 @@ +{ + "cohort_structural_findings": { + "count": 0, + "keys": [], + "kinds": [] + }, + "files": { + "classes": 2, + "count": 3, + "functions": 2, + "lines": 121, + "methods": 4 + }, + "groups": { + "block_keys": [ + "3c1b5cf24b4dfcd8e5736b735bfd3850940100d5|3c1b5cf24b4dfcd8e5736b735bfd3850940100d5|3c1b5cf24b4dfcd8e5736b735bfd3850940100d5|3c1b5cf24b4dfcd8e5736b735bfd3850940100d5", + "3c1b5cf24b4dfcd8e5736b735bfd3850940100d5|3c1b5cf24b4dfcd8e5736b735bfd3850940100d5|3c1b5cf24b4dfcd8e5736b735bfd3850940100d5|cb4fcbc1b2a65ec1346898fc0d660335e25d7cbc", + "8579659a9e8c9755a6d2f0b1d82dda8866fd243b|1912d2ee3c541cbf9e51f485348586afe1a00755|ee69aff0b7ea38927e5082ceef14115c805f6734|ee69aff0b7ea38927e5082ceef14115c805f6734", + "b4b5893be87edf98955f047cbf25ca755dc753b4|8579659a9e8c9755a6d2f0b1d82dda8866fd243b|1912d2ee3c541cbf9e51f485348586afe1a00755|ee69aff0b7ea38927e5082ceef14115c805f6734", + "b6ee70d0bd6ff4b593f127a137aed9ab41179145|cacc33d58f323481f65fed57873d1c840531859e|d60c0005a4c850c140378d1c82b81dde93a7ccab|d60c0005a4c850c140378d1c82b81dde93a7ccab", + "cacc33d58f323481f65fed57873d1c840531859e|d60c0005a4c850c140378d1c82b81dde93a7ccab|d60c0005a4c850c140378d1c82b81dde93a7ccab|b4b5893be87edf98955f047cbf25ca755dc753b4", + "ee69aff0b7ea38927e5082ceef14115c805f6734|fcd36b4275c94f1955fb55e1c1ca3c04c7c0bb26|3c1b5cf24b4dfcd8e5736b735bfd3850940100d5|3c1b5cf24b4dfcd8e5736b735bfd3850940100d5" + ], + "function_keys": [ + "efc8465229b381a3a50502d59d9539c0be3efe86|20-49" + ], + "segment_keys": [ + "1909dc28788ce0d16181afc930bf4ec8cea1a144|pkg.a:transform_shared_a", + "1909dc28788ce0d16181afc930bf4ec8cea1a144|pkg.b:transform_shared_b" + ] + }, + "meta": { + "python_tag": "cp313" + }, + "metrics": { + "cohesion_max": 2, + "complexity_max": 4, + "coupling_max": 0, + "dead_items": [], + "dependency_cycles": [ + [ + "pkg.a", + "pkg.b" + ] + ], + "dependency_max_depth": 3, + "health": { + "grade": "C", + "total": 68 + }, + "high_risk_classes": [], + "high_risk_functions": [], + "low_cohesion_classes": [] + }, + "stable_structure": { + "guard_terminal_profiles": [ + "none", + "return_expr" + ], + "guarded_functions": 2, + "side_effect_order_profiles": [ + "effect_only", + "guard_then_effect" + ], + "terminal_kinds": [ + "assign", + "return_expr" + ], + "try_finally_profiles": [ + "none" + ] + } +} diff --git a/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/a.py b/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/a.py new file mode 100644 index 0000000..88652fb --- /dev/null +++ b/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/a.py @@ -0,0 +1,56 @@ +# mypy: ignore-errors + +from pkg.b import ServiceB + + +class ServiceA: + def __init__(self) -> None: + self.worker = ServiceB() + + def compute(self, values: list[int]) -> tuple[list[int], int]: + return transform_shared_a(values) + + +def transform_shared_a(items: list[int]) -> tuple[list[int], int]: + if not items: + return [], 0 + + prepared: list[int] = [] + total = 0 + count = 0 + + for item in items: + doubled = item * 2 + if doubled % 3 == 0: + prepared.append(doubled) + else: + prepared.append(doubled + 1) + total += prepared[-1] + count += 1 + + average = total // count if count else 0 + marker = len(prepared) + audit = marker + average + checksum = audit + total + signature = checksum - marker + offset = signature + 1 + window = offset + 2 + anchor = window + 3 + tail = anchor + 4 + extra = tail + 5 + checksum2 = extra + 6 + checksum3 = checksum2 + 7 + checksum4 = checksum3 + 8 + checksum5 = checksum4 + 9 + checksum6 = checksum5 + 10 + checksum7 = checksum6 + 11 + checksum8 = checksum7 + 12 + checksum9 = checksum8 + 13 + checksum10 = checksum9 + 14 + checksum11 = checksum10 + 15 + checksum12 = checksum11 + 16 + checksum13 = checksum12 + 17 + checksum14 = checksum13 + 18 + checksum15 = checksum14 + 19 + + return prepared, checksum15 diff --git a/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/app.py b/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/app.py new file mode 100644 index 0000000..d7e90ab --- /dev/null +++ b/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/app.py @@ -0,0 +1,9 @@ +# mypy: ignore-errors + +from pkg.a import ServiceA, transform_shared_a +from pkg.b import transform_shared_b + +SERVICE = ServiceA() +RESULT_A = transform_shared_a([1, 2, 3]) +RESULT_B = transform_shared_b([1, 2, 3]) +RESULT_C = SERVICE.compute([1, 2, 3]) diff --git a/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/b.py b/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/b.py new file mode 100644 index 0000000..8ba3bd9 --- /dev/null +++ b/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/b.py @@ -0,0 +1,56 @@ +# mypy: ignore-errors + +from pkg.a import ServiceA + + +class ServiceB: + def __init__(self) -> None: + self.peer: ServiceA | None = None + + def compute(self, values: list[int]) -> tuple[list[int], int]: + return transform_shared_b(values) + + +def transform_shared_b(items: list[int]) -> tuple[list[int], int]: + if not items: + return [], 0 + + prepared: list[int] = [] + total = 0 + count = 0 + + for item in items: + doubled = item * 2 + if doubled % 3 == 0: + prepared.append(doubled) + else: + prepared.append(doubled + 1) + total += prepared[-1] + count += 1 + + average = total // count if count else 0 + marker = len(prepared) + audit = marker + average + checksum = audit + total + signature = checksum - marker + offset = signature + 1 + window = offset + 2 + anchor = window + 3 + tail = anchor + 4 + extra = tail + 5 + checksum2 = extra + 6 + checksum3 = checksum2 + 7 + checksum4 = checksum3 + 8 + checksum5 = checksum4 + 9 + checksum6 = checksum5 + 10 + checksum7 = checksum6 + 11 + checksum8 = checksum7 + 12 + checksum9 = checksum8 + 13 + checksum10 = checksum9 + 14 + checksum11 = checksum10 + 15 + checksum12 = checksum11 + 16 + checksum13 = checksum12 + 17 + checksum14 = checksum13 + 18 + checksum15 = checksum14 + 19 + + return prepared, checksum15 diff --git a/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json b/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json new file mode 100644 index 0000000..f202dbe --- /dev/null +++ b/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json @@ -0,0 +1,47 @@ +{ + "meta": { + "python_tag": "cp313" + }, + "report_schema_version": "2.1", + "project_name": "pyproject_defaults", + "scan_root": ".", + "baseline_status": "missing", + "baseline_loaded": false, + "cache_used": false, + "findings_summary": { + "total": 1, + "families": { + "clones": 1, + "structural": 0, + "dead_code": 0, + "design": 0 + }, + "severity": { + "critical": 0, + "warning": 1, + "info": 0 + }, + "impact_scope": { + "runtime": 1, + "non_runtime": 0, + "mixed": 0 + }, + "clones": { + "functions": 1, + "blocks": 0, + "segments": 0, + "new": 1, + "known": 0 + }, + "suppressed": { + "dead_code": 0 + } + }, + "function_group_ids": [ + "clone:function:c35ab49bab0141cbc3b2745742d0ff6e186ae15f|0-19" + ], + "block_group_ids": [], + "segment_group_ids": [], + "structural_group_ids": [], + "structural_group_kinds": [] +} diff --git a/tests/fixtures/golden_v2/pyproject_defaults/pkg/one.py b/tests/fixtures/golden_v2/pyproject_defaults/pkg/one.py new file mode 100644 index 0000000..b70aeba --- /dev/null +++ b/tests/fixtures/golden_v2/pyproject_defaults/pkg/one.py @@ -0,0 +1,3 @@ +def tiny(value: int) -> int: + data = value + 1 + return data * 2 diff --git a/tests/fixtures/golden_v2/pyproject_defaults/pkg/two.py b/tests/fixtures/golden_v2/pyproject_defaults/pkg/two.py new file mode 100644 index 0000000..b70aeba --- /dev/null +++ b/tests/fixtures/golden_v2/pyproject_defaults/pkg/two.py @@ -0,0 +1,3 @@ +def tiny(value: int) -> int: + data = value + 1 + return data * 2 diff --git a/tests/fixtures/golden_v2/pyproject_defaults/pyproject.toml b/tests/fixtures/golden_v2/pyproject_defaults/pyproject.toml new file mode 100644 index 0000000..3d4b934 --- /dev/null +++ b/tests/fixtures/golden_v2/pyproject_defaults/pyproject.toml @@ -0,0 +1,4 @@ +[tool.codeclone] +min_loc = 1 +min_stmt = 1 +skip_metrics = true diff --git a/tests/fixtures/golden_v2/test_only_usage/golden_expected_snapshot.json b/tests/fixtures/golden_v2/test_only_usage/golden_expected_snapshot.json new file mode 100644 index 0000000..6357123 --- /dev/null +++ b/tests/fixtures/golden_v2/test_only_usage/golden_expected_snapshot.json @@ -0,0 +1,62 @@ +{ + "cohort_structural_findings": { + "count": 0, + "keys": [], + "kinds": [] + }, + "files": { + "classes": 0, + "count": 4, + "functions": 5, + "lines": 33, + "methods": 0 + }, + "groups": { + "block_keys": [], + "function_keys": [], + "segment_keys": [] + }, + "meta": { + "python_tag": "cp313" + }, + "metrics": { + "cohesion_max": 0, + "complexity_max": 1, + "coupling_max": 0, + "dead_items": [ + { + "confidence": "high", + "filepath": "pkg/core.py", + "kind": "function", + "qualname": "pkg.core:orphan" + } + ], + "dependency_cycles": [], + "dependency_max_depth": 3, + "health": { + "grade": "A", + "total": 98 + }, + "high_risk_classes": [], + "high_risk_functions": [], + "low_cohesion_classes": [] + }, + "stable_structure": { + "guard_terminal_profiles": [ + "none" + ], + "guarded_functions": 0, + "side_effect_order_profiles": [ + "effect_only", + "none" + ], + "terminal_kinds": [ + "fallthrough", + "return_expr", + "return_name" + ], + "try_finally_profiles": [ + "none" + ] + } +} diff --git a/tests/fixtures/golden_v2/test_only_usage/pkg/consumer.py b/tests/fixtures/golden_v2/test_only_usage/pkg/consumer.py new file mode 100644 index 0000000..5973a01 --- /dev/null +++ b/tests/fixtures/golden_v2/test_only_usage/pkg/consumer.py @@ -0,0 +1,7 @@ +# mypy: ignore-errors + +from pkg.core import live + + +def run(value: int) -> int: + return live(value) diff --git a/tests/fixtures/golden_v2/test_only_usage/pkg/core.py b/tests/fixtures/golden_v2/test_only_usage/pkg/core.py new file mode 100644 index 0000000..9cc2537 --- /dev/null +++ b/tests/fixtures/golden_v2/test_only_usage/pkg/core.py @@ -0,0 +1,14 @@ +# mypy: ignore-errors + + +def helper(value: int) -> int: + return value + 1 + + +def live(value: int) -> int: + result = helper(value) + return result + + +def orphan(value: int) -> int: + return value - 1 diff --git a/tests/fixtures/golden_v2/test_only_usage/pkg/main.py b/tests/fixtures/golden_v2/test_only_usage/pkg/main.py new file mode 100644 index 0000000..c1475ce --- /dev/null +++ b/tests/fixtures/golden_v2/test_only_usage/pkg/main.py @@ -0,0 +1,5 @@ +# mypy: ignore-errors + +from pkg.consumer import run + +RESULT = run(3) diff --git a/tests/fixtures/golden_v2/test_only_usage/pkg/tests/fixture_core.py b/tests/fixtures/golden_v2/test_only_usage/pkg/tests/fixture_core.py new file mode 100644 index 0000000..65bc363 --- /dev/null +++ b/tests/fixtures/golden_v2/test_only_usage/pkg/tests/fixture_core.py @@ -0,0 +1,7 @@ +# mypy: ignore-errors + +from pkg.core import orphan + + +def test_orphan() -> None: + assert orphan(4) == 3 diff --git a/tests/test_architecture.py b/tests/test_architecture.py new file mode 100644 index 0000000..34101e9 --- /dev/null +++ b/tests/test_architecture.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +import ast +from pathlib import Path + + +def _module_name_from_path(path: Path) -> str: + parts = list(path.with_suffix("").parts) + return ".".join(parts) + + +def _resolve_import(module_name: str, node: ast.ImportFrom) -> str: + if node.level == 0: + return node.module or "" + + parts = module_name.split(".") + prefix_parts = parts[: -node.level] + if node.module: + return ".".join([*prefix_parts, node.module]) + return ".".join(prefix_parts) + + +def _iter_codeclone_modules(root: Path) -> list[tuple[str, Path]]: + return [ + (_module_name_from_path(path.relative_to(root)), path) + for path in sorted((root / "codeclone").rglob("*.py")) + ] + + +def _iter_local_imports(module_name: str, source: str) -> list[str]: + tree = ast.parse(source) + imports: list[str] = [] + for node in ast.walk(tree): + if isinstance(node, ast.Import): + imports.extend(alias.name for alias in node.names) + elif isinstance(node, ast.ImportFrom): + imports.append(_resolve_import(module_name, node)) + return [ + import_name for import_name in imports if import_name.startswith("codeclone") + ] + + +def _violates(import_name: str, forbidden_prefixes: tuple[str, ...]) -> bool: + return any( + import_name == prefix or import_name.startswith(prefix + ".") + for prefix in forbidden_prefixes + ) + + +def test_architecture_layer_violations() -> None: + root = Path(__file__).resolve().parents[1] + violations: list[str] = [] + + forbidden_by_module_prefix: tuple[tuple[str, tuple[str, ...]], ...] = ( + ( + "codeclone.report.", + ( + "codeclone.ui_messages", + "codeclone.html_report", + "codeclone.cli", + ), + ), + ( + "codeclone.extractor", + ( + "codeclone.report", + "codeclone.cli", + "codeclone.baseline", + ), + ), + ( + "codeclone.grouping", + ( + "codeclone.cli", + "codeclone.baseline", + "codeclone.html_report", + ), + ), + ( + "codeclone.baseline", + ( + "codeclone.cli", + "codeclone.ui_messages", + "codeclone.html_report", + ), + ), + ( + "codeclone.cache", + ( + "codeclone.cli", + "codeclone.ui_messages", + "codeclone.html_report", + ), + ), + ( + "codeclone.domain.", + ( + "codeclone.cli", + "codeclone.pipeline", + "codeclone.report", + "codeclone.html_report", + "codeclone.ui_messages", + "codeclone.baseline", + "codeclone.cache", + ), + ), + ) + + for module_name, path in _iter_codeclone_modules(root): + imports = _iter_local_imports(module_name, path.read_text("utf-8")) + + for module_prefix, forbidden_prefixes in forbidden_by_module_prefix: + if not module_name.startswith((module_prefix, module_prefix + ".")): + continue + violations.extend( + [ + ( + f"{module_name} -> {import_name} " + f"(forbidden: {forbidden_prefixes})" + ) + for import_name in imports + if _violates(import_name, forbidden_prefixes) + ] + ) + + if module_name == "codeclone.models": + allowed_prefixes = ("codeclone.contracts", "codeclone.errors") + for import_name in imports: + if import_name in allowed_prefixes or import_name.startswith( + tuple(f"{prefix}." for prefix in allowed_prefixes) + ): + continue + violations.append( + f"codeclone.models imports unexpected local module: {import_name}" + ) + + if ( + module_name.startswith("codeclone.domain.") + and module_name != "codeclone.domain.__init__" + ): + violations.extend( + [ + "codeclone.domain submodule imports unexpected local module: " + f"{module_name} -> {import_name}" + for import_name in imports + ] + ) + + assert violations == [] diff --git a/tests/test_baseline.py b/tests/test_baseline.py index f74ecdc..127af92 100644 --- a/tests/test_baseline.py +++ b/tests/test_baseline.py @@ -111,6 +111,30 @@ def test_baseline_roundtrip_v1(tmp_path: Path) -> None: assert loaded.blocks == {_block_id()} +def test_baseline_save_updates_runtime_meta_fields(tmp_path: Path) -> None: + baseline_path = tmp_path / "baseline.json" + baseline = Baseline.from_groups( + {_func_id(): []}, + {_block_id(): []}, + path=baseline_path, + ) + + assert baseline.payload_sha256 is None + assert baseline.created_at is None + + baseline.save() + + assert baseline.schema_version == BASELINE_SCHEMA_VERSION + assert baseline.fingerprint_version == BASELINE_FINGERPRINT_VERSION + assert baseline.python_tag == _python_tag() + assert baseline.generator == "codeclone" + assert isinstance(baseline.generator_version, str) + assert isinstance(baseline.created_at, str) + assert isinstance(baseline.payload_sha256, str) + assert len(baseline.payload_sha256) == 64 + baseline.verify_integrity() + + def test_baseline_save_atomic(tmp_path: Path) -> None: baseline_path = tmp_path / "baseline.json" baseline = Baseline(baseline_path) @@ -161,22 +185,26 @@ def _boom_exists(self: Path) -> bool: assert exc.value.status == "invalid_type" -def test_baseline_load_invalid_json(tmp_path: Path) -> None: - baseline_path = tmp_path / "baseline.json" - baseline_path.write_text("{broken json", "utf-8") - baseline = Baseline(baseline_path) - with pytest.raises(BaselineValidationError, match="Corrupted baseline file") as exc: - baseline.load() - assert exc.value.status == "invalid_json" - - -def test_baseline_load_non_object_payload(tmp_path: Path) -> None: +@pytest.mark.parametrize( + ("raw_payload", "error_match", "expected_status"), + [ + ("{broken json", "Corrupted baseline file", "invalid_json"), + ("[]", "must be an object", "invalid_type"), + ], + ids=["invalid_json", "non_object_payload"], +) +def test_baseline_load_rejects_invalid_json_shapes( + tmp_path: Path, + raw_payload: str, + error_match: str, + expected_status: str, +) -> None: baseline_path = tmp_path / "baseline.json" - baseline_path.write_text("[]", "utf-8") + baseline_path.write_text(raw_payload, "utf-8") baseline = Baseline(baseline_path) - with pytest.raises(BaselineValidationError, match="must be an object") as exc: + with pytest.raises(BaselineValidationError, match=error_match) as exc: baseline.load() - assert exc.value.status == "invalid_type" + assert exc.value.status == expected_status def test_baseline_load_legacy_payload(tmp_path: Path) -> None: @@ -348,12 +376,22 @@ def test_baseline_verify_generator_mismatch(tmp_path: Path) -> None: assert exc.value.status == "generator_mismatch" -def test_baseline_verify_schema_too_new(tmp_path: Path) -> None: +@pytest.mark.parametrize( + ("schema_version", "error_match"), + [ + ("1.1", "newer than supported"), + ("3.0", "schema version mismatch"), + ], + ids=["schema_too_new", "schema_major_mismatch"], +) +def test_baseline_verify_schema_incompatibilities( + tmp_path: Path, schema_version: str, error_match: str +) -> None: baseline_path = tmp_path / "baseline.json" - _write_payload(baseline_path, _trusted_payload(schema_version="1.1")) + _write_payload(baseline_path, _trusted_payload(schema_version=schema_version)) baseline = Baseline(baseline_path) baseline.load() - with pytest.raises(BaselineValidationError, match="newer than supported") as exc: + with pytest.raises(BaselineValidationError, match=error_match) as exc: baseline.verify_compatibility(current_python_tag=_python_tag()) assert exc.value.status == "mismatch_schema_version" @@ -658,16 +696,6 @@ def test_baseline_from_groups_defaults() -> None: assert baseline.generator == "codeclone" -def test_baseline_verify_schema_major_mismatch(tmp_path: Path) -> None: - baseline_path = tmp_path / "baseline.json" - _write_payload(baseline_path, _trusted_payload(schema_version="2.0")) - baseline = Baseline(baseline_path) - baseline.load() - with pytest.raises(BaselineValidationError, match="schema version mismatch") as exc: - baseline.verify_compatibility(current_python_tag=_python_tag()) - assert exc.value.status == "mismatch_schema_version" - - @pytest.mark.parametrize( ("attr", "match_text"), [ @@ -874,3 +902,247 @@ def test_baseline_require_sorted_unique_ids_non_string(tmp_path: Path) -> None: path=path, ) assert exc.value.status == "invalid_type" + + +def test_baseline_load_rejects_metrics_section_for_schema_v1(tmp_path: Path) -> None: + baseline_path = tmp_path / "baseline.json" + payload = _trusted_payload(schema_version="1.0") + assert isinstance(payload, dict) + payload["metrics"] = {"health_score": 80} + _write_payload(baseline_path, payload) + baseline = Baseline(baseline_path) + with pytest.raises( + BaselineValidationError, match=r"requires baseline schema >= 2\.0" + ) as exc: + baseline.load() + assert exc.value.status == "mismatch_schema_version" + + +def test_baseline_save_preserves_embedded_metrics_and_hash(tmp_path: Path) -> None: + baseline_path = tmp_path / "baseline.json" + payload = _trusted_payload() + assert isinstance(payload, dict) + payload["metrics"] = {"health_score": 70} + meta = payload.get("meta") + assert isinstance(meta, dict) + meta["metrics_payload_sha256"] = "f" * 64 + _write_payload(baseline_path, payload) + + baseline = Baseline(baseline_path) + baseline.load() + baseline.save() + + saved = json.loads(baseline_path.read_text("utf-8")) + saved_meta = saved.get("meta") + assert isinstance(saved_meta, dict) + assert saved["metrics"] == {"health_score": 70} + assert saved_meta["metrics_payload_sha256"] == "f" * 64 + + +def test_baseline_save_preserves_embedded_metrics_without_hash(tmp_path: Path) -> None: + baseline_path = tmp_path / "baseline.json" + payload = _trusted_payload() + assert isinstance(payload, dict) + payload["metrics"] = {"health_score": 65} + meta = payload.get("meta") + assert isinstance(meta, dict) + meta.pop("metrics_payload_sha256", None) + _write_payload(baseline_path, payload) + + baseline = Baseline(baseline_path) + baseline.load() + baseline.save() + + saved = json.loads(baseline_path.read_text("utf-8")) + saved_meta = saved.get("meta") + assert isinstance(saved_meta, dict) + assert saved["metrics"] == {"health_score": 65} + assert "metrics_payload_sha256" not in saved_meta + + +def test_preserve_embedded_metrics_variants(tmp_path: Path) -> None: + path = tmp_path / "baseline.json" + _write_payload(path, {"meta": {}, "clones": {"functions": [], "blocks": []}}) + assert baseline_mod._preserve_embedded_metrics(path) == (None, None) + + _write_payload( + path, + { + "meta": [], + "clones": {"functions": [], "blocks": []}, + "metrics": {"x": 1}, + }, + ) + assert baseline_mod._preserve_embedded_metrics(path) == ({"x": 1}, None) + + _write_payload( + path, + { + "meta": {"metrics_payload_sha256": 1}, + "clones": {"functions": [], "blocks": []}, + "metrics": {"x": 2}, + }, + ) + assert baseline_mod._preserve_embedded_metrics(path) == ({"x": 2}, None) + + _write_payload( + path, + { + "meta": {"metrics_payload_sha256": "a" * 64}, + "clones": {"functions": [], "blocks": []}, + "metrics": {"x": 3}, + }, + ) + assert baseline_mod._preserve_embedded_metrics(path) == ({"x": 3}, "a" * 64) + + +def test_baseline_save_defensive_non_mapping_meta( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + baseline_path = tmp_path / "baseline.json" + baseline = Baseline.from_groups( + {_func_id(): []}, + {_block_id(): []}, + path=baseline_path, + ) + + def _payload(**_kwargs: object) -> dict[str, object]: + return { + "meta": "broken-meta", + "clones": {"functions": [], "blocks": []}, + } + + monkeypatch.setattr(baseline_mod, "_baseline_payload", _payload) + monkeypatch.setattr( + baseline_mod, + "_preserve_embedded_metrics", + lambda _path: ({"health_score": 1}, "a" * 64), + ) + baseline.save() + + saved = json.loads(baseline_path.read_text("utf-8")) + assert saved["meta"] == "broken-meta" + assert saved["metrics"] == {"health_score": 1} + assert baseline.payload_sha256 is None + assert baseline.generator == "codeclone" + + +def test_baseline_save_syncs_generator_when_meta_uses_string( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + baseline_path = tmp_path / "baseline.json" + baseline = Baseline(baseline_path) + + def _payload(**_kwargs: object) -> dict[str, object]: + return { + "meta": { + "generator": "custom-generator", + "schema_version": "2.0", + "fingerprint_version": "1", + "python_tag": "cp313", + "created_at": "2026-03-07T12:00:00Z", + "payload_sha256": "f" * 64, + }, + "clones": {"functions": [], "blocks": []}, + } + + monkeypatch.setattr(baseline_mod, "_baseline_payload", _payload) + baseline.save() + + _assert_baseline_runtime_meta( + baseline, + generator="custom-generator", + schema_version="2.0", + fingerprint_version="1", + python_tag="cp313", + created_at="2026-03-07T12:00:00Z", + payload_sha256="f" * 64, + ) + + +def test_baseline_save_skips_non_string_meta_updates( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + baseline_path = tmp_path / "baseline.json" + baseline = Baseline(baseline_path) + baseline.generator = "keep-generator" + baseline.generator_version = "2.0.0" + baseline.schema_version = "2.0" + baseline.fingerprint_version = "1" + baseline.python_tag = "cp313" + baseline.created_at = "2026-03-07T00:00:00Z" + baseline.payload_sha256 = "e" * 64 + + def _payload(**_kwargs: object) -> dict[str, object]: + return { + "meta": { + "generator": {"name": 1, "version": 2}, + "schema_version": 2, + "fingerprint_version": 1, + "python_tag": 313, + "created_at": None, + "payload_sha256": 42, + }, + "clones": {"functions": [], "blocks": []}, + } + + monkeypatch.setattr(baseline_mod, "_baseline_payload", _payload) + baseline.save() + + _assert_baseline_runtime_meta( + baseline, + generator="keep-generator", + generator_version="2.0.0", + schema_version="2.0", + fingerprint_version="1", + python_tag="cp313", + created_at="2026-03-07T00:00:00Z", + payload_sha256="e" * 64, + ) + + +def _assert_baseline_runtime_meta( + baseline: Baseline, + *, + generator: str, + schema_version: str, + fingerprint_version: str, + python_tag: str, + created_at: str, + payload_sha256: str, + generator_version: str | None = None, +) -> None: + assert baseline.generator == generator + if generator_version is not None: + assert baseline.generator_version == generator_version + assert baseline.schema_version == schema_version + assert baseline.fingerprint_version == fingerprint_version + assert baseline.python_tag == python_tag + assert baseline.created_at == created_at + assert baseline.payload_sha256 == payload_sha256 + + +def test_baseline_save_ignores_non_string_non_mapping_generator( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + baseline_path = tmp_path / "baseline.json" + baseline = Baseline(baseline_path) + baseline.generator = "keep-generator" + + def _payload(**_kwargs: object) -> dict[str, object]: + return { + "meta": { + "generator": 123, + "schema_version": "2.0", + "fingerprint_version": "1", + "python_tag": "cp313", + "created_at": "2026-03-07T12:00:00Z", + "payload_sha256": "a" * 64, + }, + "clones": {"functions": [], "blocks": []}, + } + + monkeypatch.setattr(baseline_mod, "_baseline_payload", _payload) + baseline.save() + + assert baseline.generator == "keep-generator" diff --git a/tests/test_blockhash.py b/tests/test_blockhash.py index 346ae9e..003f120 100644 --- a/tests/test_blockhash.py +++ b/tests/test_blockhash.py @@ -1,6 +1,6 @@ import ast -from codeclone.blockhash import stmt_hash +from codeclone.blockhash import stmt_hashes from codeclone.normalize import NormalizationConfig @@ -8,4 +8,4 @@ def test_stmt_hash_normalizes_names() -> None: cfg = NormalizationConfig() s1 = ast.parse("a = b + 1").body[0] s2 = ast.parse("x = y + 2").body[0] - assert stmt_hash(s1, cfg) == stmt_hash(s2, cfg) + assert stmt_hashes([s1], cfg)[0] == stmt_hashes([s2], cfg)[0] diff --git a/tests/test_cache.py b/tests/test_cache.py index 773c6aa..e0c2cf3 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -2,6 +2,7 @@ import json import os +from collections.abc import Callable from pathlib import Path from typing import Any, cast @@ -80,6 +81,105 @@ def test_cache_roundtrip(tmp_path: Path) -> None: assert loaded.cache_schema_version == Cache._CACHE_VERSION +def test_cache_roundtrip_preserves_empty_structural_findings(tmp_path: Path) -> None: + cache_path = tmp_path / "cache.json" + cache = Cache(cache_path) + cache.put_file_entry( + "x.py", + {"mtime_ns": 1, "size": 10}, + [], + [], + [], + structural_findings=[], + ) + cache.save() + + loaded = Cache(cache_path) + loaded.load() + entry = loaded.get_file_entry("x.py") + assert entry is not None + assert "structural_findings" in entry + assert entry["structural_findings"] == [] + + +def test_cache_load_normalizes_stale_structural_findings(tmp_path: Path) -> None: + cache_path = tmp_path / "cache.json" + cache = Cache(cache_path) + entry = cast( + Any, + { + "stat": {"mtime_ns": 1, "size": 10}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [], + "module_deps": [], + "dead_candidates": [], + "referenced_names": [], + "import_names": [], + "class_names": [], + "structural_findings": [ + { + "finding_kind": "duplicated_branches", + "finding_key": "abc" * 13 + "a", + "signature": { + "calls": "2+", + "has_loop": "0", + "has_try": "0", + "nested_if": "0", + "raises": "0", + "stmt_seq": "Expr", + "terminal": "expr", + }, + "items": [ + {"qualname": "mod:fn", "start": 5, "end": 5}, + {"qualname": "mod:fn", "start": 8, "end": 8}, + ], + }, + { + "finding_kind": "duplicated_branches", + "finding_key": "def" * 13 + "d", + "signature": { + "calls": "0", + "has_loop": "0", + "has_try": "1", + "nested_if": "1", + "raises": "0", + "stmt_seq": "Try", + "terminal": "fallthrough", + }, + "items": [ + {"qualname": "mod:fn", "start": 10, "end": 20}, + {"qualname": "mod:fn", "start": 14, "end": 20}, + {"qualname": "mod:fn", "start": 30, "end": 35}, + ], + }, + ], + }, + ) + payload = _analysis_payload( + cache, + files={"x.py": cache_mod._encode_wire_file_entry(entry)}, + ) + signature = cache._sign_data(payload) + cache_path.write_text( + json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": signature}), + "utf-8", + ) + + loaded = Cache(cache_path) + loaded.load() + loaded_entry = loaded.get_file_entry("x.py") + assert loaded_entry is not None + findings = loaded_entry["structural_findings"] + assert len(findings) == 1 + assert findings[0]["finding_key"] == "def" * 13 + "d" + assert findings[0]["items"] == [ + {"qualname": "mod:fn", "start": 10, "end": 20}, + {"qualname": "mod:fn", "start": 30, "end": 35}, + ] + + def test_get_file_entry_uses_wire_key_fallback(tmp_path: Path) -> None: root = tmp_path / "project" file_path = root / "pkg" / "module.py" @@ -99,6 +199,61 @@ def test_get_file_entry_uses_wire_key_fallback(tmp_path: Path) -> None: assert cache.get_file_entry(non_canonical) is not None +def test_get_file_entry_keeps_loaded_cache_clean_on_canonical_hit( + tmp_path: Path, +) -> None: + cache_path = tmp_path / "cache.json" + cache = Cache(cache_path) + cache.put_file_entry("x.py", {"mtime_ns": 1, "size": 10}, [], [], []) + cache.save() + + loaded = Cache(cache_path) + loaded.load() + assert loaded._dirty is False + assert loaded.get_file_entry("x.py") is not None + assert loaded._dirty is False + + +def test_store_canonical_file_entry_marks_dirty_only_when_entry_changes( + tmp_path: Path, +) -> None: + cache = Cache(tmp_path / "cache.json") + canonical_entry = cast( + Any, + cache_mod._canonicalize_cache_entry( + { + "stat": {"mtime_ns": 1, "size": 1}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [], + "module_deps": [], + "dead_candidates": [], + "referenced_names": [], + "referenced_qualnames": [], + "import_names": [], + "class_names": [], + } + ), + ) + cache.data["files"]["x.py"] = canonical_entry + cache._canonical_runtime_paths.add("x.py") + cache._dirty = False + + cache._store_canonical_file_entry( + runtime_path="x.py", + canonical_entry=canonical_entry, + ) + assert cache._dirty is False + + cache._canonical_runtime_paths.clear() + cache._store_canonical_file_entry( + runtime_path="x.py", + canonical_entry=canonical_entry, + ) + assert cache._dirty is True + + def test_get_file_entry_missing_after_fallback_returns_none(tmp_path: Path) -> None: root = tmp_path / "project" root.mkdir() @@ -163,6 +318,59 @@ def test_cache_signature_validation_ignores_json_whitespace(tmp_path: Path) -> N assert loaded.get_file_entry("x.py") is not None +def test_decode_wire_file_and_name_section_helpers_cover_valid_and_invalid() -> None: + encoded = cache_mod._encode_wire_file_entry( + { + "stat": {"mtime_ns": 1, "size": 10}, + "units": [cache_mod._unit_dict_from_model(_make_unit("x.py"), "x.py")], + "blocks": [cache_mod._block_dict_from_model(_make_block("x.py"), "x.py")], + "segments": [ + cache_mod._segment_dict_from_model(_make_segment("x.py"), "x.py") + ], + "class_metrics": [], + "module_deps": [], + "dead_candidates": [], + "referenced_names": ["used"], + "referenced_qualnames": ["pkg.mod:used"], + "import_names": ["pkg"], + "class_names": ["Service"], + } + ) + assert isinstance(encoded, dict) + + file_sections = cache_mod._decode_wire_file_sections(obj=encoded, filepath="x.py") + assert file_sections is not None + units, blocks, segments, class_metrics, module_deps, dead_candidates = file_sections + assert units[0]["qualname"] == "mod:func" + assert blocks[0]["qualname"] == "mod:func" + assert segments[0]["qualname"] == "mod:func" + assert class_metrics == [] + assert module_deps == [] + assert dead_candidates == [] + + name_sections = cache_mod._decode_wire_name_sections(obj=encoded) + assert name_sections == ( + ["used"], + ["pkg.mod:used"], + ["pkg"], + ["Service"], + ) + + invalid_sections = dict(encoded) + invalid_sections["u"] = "bad" + assert ( + cache_mod._decode_wire_file_sections( + obj=invalid_sections, + filepath="x.py", + ) + is None + ) + + invalid_names = dict(encoded) + invalid_names["rn"] = 1 + assert cache_mod._decode_wire_name_sections(obj=invalid_names) is None + + def test_cache_signature_mismatch_warns(tmp_path: Path) -> None: cache_path = tmp_path / "cache.json" cache = Cache(cache_path) @@ -492,11 +700,7 @@ def _raise_stat(self: Path, *args: object, **kwargs: object) -> os.stat_result: monkeypatch.setattr(Path, "stat", _raise_stat) cache = Cache(cache_path) cache.load() - assert cache.load_warning is not None - assert "unreadable" in cache.load_warning - assert cache.data["files"] == {} - assert cache.load_status == CacheStatus.UNREADABLE - assert cache.cache_schema_version is None + _assert_unreadable_cache_contract(cache) def test_cache_load_unreadable_read_graceful_ignore( @@ -518,6 +722,10 @@ def _raise_read_text( monkeypatch.setattr(Path, "read_text", _raise_read_text) cache = Cache(cache_path) cache.load() + _assert_unreadable_cache_contract(cache) + + +def _assert_unreadable_cache_contract(cache: Cache) -> None: assert cache.load_warning is not None assert "unreadable" in cache.load_warning assert cache.data["files"] == {} @@ -543,22 +751,10 @@ def test_cache_save_error(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> No cache_path = tmp_path / "cache.json" cache = Cache(cache_path) - original_write_text = Path.write_text - - def _raise_write_text( - self: Path, - data: str, - encoding: str | None = None, - errors: str | None = None, - newline: str | None = None, - ) -> int: - if self.name.endswith(".tmp"): - raise OSError("nope") - return original_write_text( - self, data, encoding=encoding, errors=errors, newline=newline - ) + def _raise_fsync(_fd: int) -> None: + raise OSError("nope") - monkeypatch.setattr(Path, "write_text", _raise_write_text) + monkeypatch.setattr(os, "fsync", _raise_fsync) with pytest.raises(CacheError): cache.save() @@ -660,10 +856,21 @@ def test_cache_load_missing_payload_or_sig(tmp_path: Path) -> None: assert "format invalid" in cache.load_warning -def test_cache_load_missing_python_tag_in_payload(tmp_path: Path) -> None: +@pytest.mark.parametrize( + "payload_factory", + [ + lambda cache: {"fp": cache.data["fingerprint_version"], "files": {}}, + lambda cache: {"py": cache.data["python_tag"], "files": {}}, + ], + ids=["missing_python_tag", "missing_fingerprint_version"], +) +def test_cache_load_rejects_missing_required_payload_fields( + tmp_path: Path, + payload_factory: Callable[[Cache], dict[str, object]], +) -> None: cache_path = tmp_path / "cache.json" cache = Cache(cache_path) - payload = {"fp": cache.data["fingerprint_version"], "files": {}} + payload = payload_factory(cache) sig = cache._sign_data(payload) cache_path.write_text( json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8" @@ -691,19 +898,6 @@ def test_cache_load_python_tag_mismatch(tmp_path: Path) -> None: assert "python tag mismatch" in cache.load_warning -def test_cache_load_missing_fingerprint_version(tmp_path: Path) -> None: - cache_path = tmp_path / "cache.json" - cache = Cache(cache_path) - payload = {"py": cache.data["python_tag"], "files": {}} - sig = cache._sign_data(payload) - cache_path.write_text( - json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8" - ) - cache.load() - assert cache.load_warning is not None - assert "format invalid" in cache.load_warning - - def test_cache_load_fingerprint_version_mismatch(tmp_path: Path) -> None: cache_path = tmp_path / "cache.json" cache = Cache(cache_path) @@ -919,6 +1113,7 @@ def test_decode_wire_file_entry_invalid_variants(entry: object, filepath: str) - def test_decode_wire_item_type_failures() -> None: assert cache_mod._decode_wire_unit(["q", 1, 2, 3, 4, "fp"], "x.py") is None + assert cache_mod._decode_wire_unit(["q", 1, 2, 3, 4, "fp", "0-19"], "x.py") is None assert ( cache_mod._decode_wire_unit(["q", "1", 2, 3, 4, "fp", "0-19"], "x.py") is None ) @@ -928,6 +1123,23 @@ def test_decode_wire_item_type_failures() -> None: assert cache_mod._decode_wire_segment(["q", 1, 2, "3", "h", "sig"], "x.py") is None +def test_decode_wire_item_rejects_invalid_risk_fields() -> None: + assert ( + cache_mod._decode_wire_unit( + ["q", 1, 2, 3, 4, "fp", "0-19", 2, 1, "critical", "raw"], + "x.py", + ) + is None + ) + assert ( + cache_mod._decode_wire_class_metric( + ["pkg.mod:Service", 1, 10, 3, 2, 4, 1, 7, 8], + "x.py", + ) + is None + ) + + def test_resolve_root_oserror_returns_none( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: @@ -940,3 +1152,522 @@ def _resolve_with_error(self: Path, *, strict: bool = False) -> Path: monkeypatch.setattr(Path, "resolve", _resolve_with_error) assert cache_mod._resolve_root(tmp_path) is None + + +def test_cache_entry_rejects_invalid_metrics_sections(tmp_path: Path) -> None: + cache = Cache(tmp_path / "cache.json") + cache.data["files"]["x.py"] = cast( + Any, + { + "stat": {"mtime_ns": 1, "size": 1}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": "bad", + "module_deps": [], + "dead_candidates": [], + "referenced_names": [], + "import_names": [], + "class_names": [], + }, + ) + assert cache.get_file_entry("x.py") is None + + +def test_decode_wire_file_entry_rejects_metrics_related_invalid_sections() -> None: + assert ( + cache_mod._decode_wire_file_entry({"st": [1, 2], "cm": "bad"}, "x.py") is None + ) + assert ( + cache_mod._decode_wire_file_entry( + {"st": [1, 2], "cm": [["Q", 1, 2, 3, 4, 5, 6, "low"]]}, + "x.py", + ) + is None + ) + assert ( + cache_mod._decode_wire_file_entry({"st": [1, 2], "md": "bad"}, "x.py") is None + ) + assert ( + cache_mod._decode_wire_file_entry( + {"st": [1, 2], "md": [["source", "target", "import"]]}, + "x.py", + ) + is None + ) + assert ( + cache_mod._decode_wire_file_entry({"st": [1, 2], "dc": "bad"}, "x.py") is None + ) + decoded = cache_mod._decode_wire_file_entry( + {"st": [1, 2], "dc": [["q", "n", 1, 2, "function"]]}, + "x.py", + ) + assert decoded is not None + assert decoded["dead_candidates"][0]["filepath"] == "x.py" + assert cache_mod._decode_wire_file_entry({"st": [1, 2], "rn": [1]}, "x.py") is None + assert cache_mod._decode_wire_file_entry({"st": [1, 2], "in": [1]}, "x.py") is None + assert cache_mod._decode_wire_file_entry({"st": [1, 2], "cn": [1]}, "x.py") is None + assert ( + cache_mod._decode_wire_file_entry({"st": [1, 2], "cc": "bad"}, "x.py") is None + ) + assert ( + cache_mod._decode_wire_file_entry({"st": [1, 2], "cc": [["Q"]]}, "x.py") is None + ) + assert ( + cache_mod._decode_wire_file_entry( + {"st": [1, 2], "cc": [["Q", ["A", 1]]]}, + "x.py", + ) + is None + ) + + +def test_decode_wire_file_entry_accepts_metrics_sections() -> None: + decoded = cache_mod._decode_wire_file_entry( + { + "st": [1, 2], + "cm": [["pkg.mod:Service", 1, 10, 3, 2, 4, 1, "low", "medium"]], + "cc": [["pkg.mod:Service", ["Zeta", "Alpha"]]], + "md": [["a", "b", "import", 1]], + "dc": [["pkg.mod:unused", "unused", 1, 2, "function"]], + "rn": ["name"], + "in": ["typing", "os"], + "cn": ["Service", "Model"], + }, + "x.py", + ) + assert decoded is not None + assert decoded["class_metrics"][0]["qualname"] == "pkg.mod:Service" + assert decoded["class_metrics"][0]["coupled_classes"] == ["Alpha", "Zeta"] + assert decoded["module_deps"][0]["target"] == "b" + assert decoded["dead_candidates"][0]["qualname"] == "pkg.mod:unused" + assert decoded["import_names"] == ["typing", "os"] + assert decoded["class_names"] == ["Service", "Model"] + + +def test_decode_wire_file_entry_optional_source_stats() -> None: + decoded = cache_mod._decode_wire_file_entry( + {"st": [1, 2], "ss": [10, 3, 1, 1]}, + "x.py", + ) + assert decoded is not None + assert decoded["source_stats"] == { + "lines": 10, + "functions": 3, + "methods": 1, + "classes": 1, + } + + assert cache_mod._decode_optional_wire_source_stats(obj={"ss": "bad"}) is None + assert cache_mod._decode_optional_wire_source_stats(obj={"ss": [1, 2, 3]}) is None + assert ( + cache_mod._decode_optional_wire_source_stats(obj={"ss": [1, 2, -1, 0]}) is None + ) + + +def test_decode_optional_wire_coupled_classes_rejects_non_string_qualname() -> None: + assert ( + cache_mod._decode_optional_wire_coupled_classes( + obj={"cc": [[1, ["A"]]]}, + key="cc", + ) + is None + ) + + +def test_decode_wire_file_entry_skips_empty_coupled_classes_mapping() -> None: + decoded = cache_mod._decode_wire_file_entry( + { + "st": [1, 2], + "cm": [["pkg.mod:Service", 1, 10, 3, 2, 4, 1, "low", "medium"]], + "cc": [["pkg.mod:Service", ["", ""]]], + }, + "x.py", + ) + assert decoded is not None + assert "coupled_classes" not in decoded["class_metrics"][0] + + +def test_decode_wire_metrics_items_and_deps_roundtrip_shape() -> None: + class_metric = cache_mod._decode_wire_class_metric( + ["pkg.mod:Service", 1, 10, 3, 2, 4, 1, "low", "medium"], + "x.py", + ) + assert class_metric is not None + assert class_metric["filepath"] == "x.py" + assert ( + cache_mod._decode_wire_class_metric( + ["pkg.mod:Service", "1", 10, 3, 2, 4, 1, "low", "medium"], + "x.py", + ) + is None + ) + + module_dep = cache_mod._decode_wire_module_dep(["a", "b", "import", 1]) + assert module_dep is not None + assert module_dep["source"] == "a" + assert cache_mod._decode_wire_module_dep(["a", "b", "import", "1"]) is None + + dead_candidate = cache_mod._decode_wire_dead_candidate( + ["pkg.mod:unused", "unused", 1, 2, "function"], + "fallback.py", + ) + assert dead_candidate is not None + assert dead_candidate["filepath"] == "fallback.py" + assert ( + cache_mod._decode_wire_dead_candidate( + ["pkg.mod:unused", "unused", "1", 2, "function"], + "fallback.py", + ) + is None + ) + assert ( + cache_mod._decode_wire_dead_candidate( + ["pkg.mod:unused", "unused", 1, 2, "function", "legacy.py"], + "fallback.py", + ) + is None + ) + dead_candidate_with_suppression = cache_mod._decode_wire_dead_candidate( + ["pkg.mod:unused", "unused", 1, 2, "function", ["dead-code", "dead-code"]], + "fallback.py", + ) + assert dead_candidate_with_suppression is not None + assert dead_candidate_with_suppression["suppressed_rules"] == ["dead-code"] + + +def test_encode_wire_file_entry_includes_optional_metrics_sections() -> None: + entry: cache_mod.CacheEntry = { + "stat": {"mtime_ns": 1, "size": 2}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [ + { + "qualname": "pkg.mod:Service", + "filepath": "x.py", + "start_line": 1, + "end_line": 10, + "cbo": 3, + "lcom4": 2, + "method_count": 4, + "instance_var_count": 1, + "risk_coupling": "low", + "risk_cohesion": "medium", + "coupled_classes": ["ServiceB", "ServiceA"], + } + ], + "module_deps": [ + {"source": "a", "target": "b", "import_type": "import", "line": 1} + ], + "dead_candidates": [], + "referenced_names": [], + "import_names": ["z", "a"], + "class_names": ["B", "A"], + } + wire = cache_mod._encode_wire_file_entry(entry) + assert "cm" in wire + assert "cc" in wire + assert "md" in wire + assert wire["cc"] == [["pkg.mod:Service", ["ServiceA", "ServiceB"]]] + assert wire["in"] == ["a", "z"] + assert wire["cn"] == ["A", "B"] + + +def test_encode_wire_file_entry_compacts_dead_candidate_filepaths() -> None: + entry: cache_mod.CacheEntry = { + "stat": {"mtime_ns": 1, "size": 2}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [], + "module_deps": [], + "dead_candidates": [ + { + "qualname": "pkg.mod:unused", + "local_name": "unused", + "filepath": "/repo/pkg/mod.py", + "start_line": 3, + "end_line": 4, + "kind": "function", + } + ], + "referenced_names": [], + "import_names": [], + "class_names": [], + } + wire = cache_mod._encode_wire_file_entry(entry) + assert wire["dc"] == [["pkg.mod:unused", "unused", 3, 4, "function"]] + + +def test_encode_wire_file_entry_encodes_dead_candidate_suppressions() -> None: + entry: cache_mod.CacheEntry = { + "stat": {"mtime_ns": 1, "size": 2}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [], + "module_deps": [], + "dead_candidates": [ + { + "qualname": "pkg.mod:unused", + "local_name": "unused", + "filepath": "/repo/pkg/mod.py", + "start_line": 3, + "end_line": 4, + "kind": "function", + "suppressed_rules": ["dead-code", "dead-code"], + } + ], + "referenced_names": [], + "import_names": [], + "class_names": [], + } + wire = cache_mod._encode_wire_file_entry(entry) + assert wire["dc"] == [["pkg.mod:unused", "unused", 3, 4, "function", ["dead-code"]]] + + +def test_encode_wire_file_entry_skips_empty_or_invalid_coupled_classes() -> None: + entry: cache_mod.CacheEntry = { + "stat": {"mtime_ns": 1, "size": 2}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [ + { + "qualname": "pkg.mod:Empty", + "filepath": "x.py", + "start_line": 1, + "end_line": 2, + "cbo": 1, + "lcom4": 1, + "method_count": 1, + "instance_var_count": 1, + "risk_coupling": "low", + "risk_cohesion": "low", + "coupled_classes": [], + }, + { + "qualname": "pkg.mod:Invalid", + "filepath": "x.py", + "start_line": 3, + "end_line": 4, + "cbo": 1, + "lcom4": 1, + "method_count": 1, + "instance_var_count": 1, + "risk_coupling": "low", + "risk_cohesion": "low", + "coupled_classes": cast(Any, [1]), + }, + ], + "module_deps": [], + "dead_candidates": [], + "referenced_names": [], + "import_names": [], + "class_names": [], + } + wire = cache_mod._encode_wire_file_entry(entry) + assert "cc" not in wire + + +def test_get_file_entry_sorts_coupled_classes_in_runtime_payload( + tmp_path: Path, +) -> None: + cache = Cache(tmp_path / "cache.json") + cache.data["files"]["x.py"] = cast( + Any, + { + "stat": {"mtime_ns": 1, "size": 1}, + "source_stats": {"lines": 1, "functions": 1, "methods": 0, "classes": 0}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [ + { + "qualname": "pkg.mod:NoDeps", + "filepath": "x.py", + "start_line": 0, + "end_line": 0, + "cbo": 0, + "lcom4": 1, + "method_count": 0, + "instance_var_count": 0, + "risk_coupling": "low", + "risk_cohesion": "low", + "coupled_classes": [], + }, + { + "qualname": "pkg.mod:Service", + "filepath": "x.py", + "start_line": 1, + "end_line": 10, + "cbo": 2, + "lcom4": 1, + "method_count": 3, + "instance_var_count": 1, + "risk_coupling": "low", + "risk_cohesion": "low", + "coupled_classes": ["Zeta", "Alpha", "Alpha"], + }, + ], + "module_deps": [], + "dead_candidates": [], + "referenced_names": [], + "import_names": [], + "class_names": [], + }, + ) + entry = cache.get_file_entry("x.py") + assert entry is not None + assert len(entry["class_metrics"]) == 2 + assert entry["class_metrics"][0]["qualname"] == "pkg.mod:NoDeps" + assert entry["class_metrics"][1]["coupled_classes"] == ["Alpha", "Zeta"] + assert entry["source_stats"]["functions"] == 1 + + +def test_cache_entry_container_shape_rejects_invalid_source_stats() -> None: + assert ( + cache_mod._has_cache_entry_container_shape( + { + "stat": {"mtime_ns": 1, "size": 1}, + "source_stats": { + "lines": 1, + "functions": 1, + "methods": "0", + "classes": 0, + }, + "units": [], + "blocks": [], + "segments": [], + } + ) + is False + ) + + +def test_cache_type_predicates_reject_non_dict_variants() -> None: + assert cache_mod._is_class_metrics_dict([]) is False + assert cache_mod._is_module_dep_dict([]) is False + assert cache_mod._is_dead_candidate_dict([]) is False + assert ( + cache_mod._is_dead_candidate_dict( + { + "qualname": "pkg.mod:unused", + "local_name": "unused", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 2, + "kind": "function", + "suppressed_rules": ["dead-code"], + } + ) + is True + ) + assert ( + cache_mod._is_dead_candidate_dict( + { + "qualname": "pkg.mod:unused", + "local_name": "unused", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 2, + "kind": "function", + "suppressed_rules": [1], + } + ) + is False + ) + assert ( + cache_mod._is_class_metrics_dict( + { + "qualname": "pkg.mod:Service", + "filepath": "x.py", + "start_line": 1, + "end_line": 10, + "cbo": 3, + "lcom4": 2, + "method_count": 4, + "instance_var_count": 1, + "risk_coupling": "low", + "risk_cohesion": "high", + } + ) + is True + ) + assert ( + cache_mod._is_class_metrics_dict( + { + "qualname": "pkg.mod:Service", + "filepath": "x.py", + "start_line": 1, + "end_line": 10, + "cbo": 3, + "lcom4": 2, + "method_count": 4, + "instance_var_count": 1, + "risk_coupling": "low", + "risk_cohesion": "high", + "coupled_classes": ["Alpha", "Beta"], + } + ) + is True + ) + assert ( + cache_mod._is_class_metrics_dict( + { + "qualname": "pkg.mod:Service", + "filepath": "x.py", + "start_line": 1, + "end_line": 10, + "cbo": 3, + "lcom4": 2, + "method_count": 4, + "instance_var_count": 1, + "risk_coupling": "low", + "risk_cohesion": "high", + "coupled_classes": [1], + } + ) + is False + ) + assert cache_mod._is_class_metrics_dict({"qualname": "pkg.mod:Service"}) is False + assert ( + cache_mod._is_module_dep_dict( + { + "source": "a", + "target": "b", + "import_type": "import", + "line": 1, + } + ) + is True + ) + + +def test_decode_wire_int_fields_rejects_non_int_values() -> None: + assert cache_mod._decode_wire_int_fields(["x", "nope"], 1) is None + + +def test_decode_wire_block_rejects_missing_block_hash() -> None: + assert ( + cache_mod._decode_wire_block( + ["pkg.mod:func", 10, 12, 4, None], + "pkg/mod.py", + ) + is None + ) + + +def test_decode_wire_segment_rejects_missing_segment_signature() -> None: + assert ( + cache_mod._decode_wire_segment( + ["pkg.mod:func", 10, 12, 4, "seg-hash", None], + "pkg/mod.py", + ) + is None + ) + + +def test_decode_wire_dead_candidate_rejects_invalid_rows() -> None: + assert cache_mod._decode_wire_dead_candidate(object(), "pkg/mod.py") is None diff --git a/tests/test_cfg.py b/tests/test_cfg.py index 71e25ee..b0c4955 100644 --- a/tests/test_cfg.py +++ b/tests/test_cfg.py @@ -6,9 +6,10 @@ from codeclone.cfg import CFG, CFGBuilder from codeclone.cfg_model import CFG as CFGModel from codeclone.cfg_model import Block -from codeclone.extractor import get_cfg_fingerprint +from codeclone.extractor import _cfg_fingerprint_and_complexity from codeclone.meta_markers import CFG_META_PREFIX from codeclone.normalize import NormalizationConfig +from tests._ast_helpers import fix_missing_single_function def build_cfg_from_source(source: str) -> CFG: @@ -65,7 +66,7 @@ def _cfg_fingerprint( ) -> str: func = _parse_function(source, skip_reason=skip_reason) cfg = NormalizationConfig() - return get_cfg_fingerprint(func, cfg, qualname) + return _cfg_fingerprint_and_complexity(func, cfg, qualname)[0] def _assert_fingerprint_diff( @@ -86,6 +87,22 @@ def _single_return_block(cfg: CFG) -> Block: return return_blocks[0] +def _handler_predecessors_from_source(source: str) -> list[Block]: + cfg = build_cfg_from_source(source) + handler_blocks = [ + block + for block in cfg.blocks + if any( + (meta := _const_meta_value(stmt)) is not None + and meta.startswith(f"{CFG_META_PREFIX}TRY_HANDLER_TYPE:") + for stmt in block.statements + ) + ] + assert len(handler_blocks) == 1 + handler_block = handler_blocks[0] + return [block for block in cfg.blocks if handler_block in block.successors] + + def test_cfg_if_else() -> None: source = """ def f(a): @@ -498,24 +515,7 @@ def f(): except ValueError: pass """ - func = ast.parse(dedent(code)).body[0] - assert isinstance(func, (ast.FunctionDef, ast.AsyncFunctionDef)) - cfg = CFGBuilder().build("f", func) - - handler_blocks = [ - b - for b in cfg.blocks - if any( - (meta := _const_meta_value(s)) is not None - and meta.startswith(f"{CFG_META_PREFIX}TRY_HANDLER_TYPE:") - for s in b.statements - ) - ] - - assert len(handler_blocks) == 1 - handler_block = handler_blocks[0] - - predecessors = [b for b in cfg.blocks if handler_block in b.successors] + predecessors = _handler_predecessors_from_source(code) has_assign_only = any( any(isinstance(stmt, ast.Assign) for stmt in pred.statements) @@ -551,23 +551,7 @@ def f(): except ValueError: pass """ - func = ast.parse(dedent(code)).body[0] - assert isinstance(func, (ast.FunctionDef, ast.AsyncFunctionDef)) - cfg = CFGBuilder().build("f", func) - - handler_blocks = [ - b - for b in cfg.blocks - if any( - (meta := _const_meta_value(s)) is not None - and meta.startswith(f"{CFG_META_PREFIX}TRY_HANDLER_TYPE:") - for s in b.statements - ) - ] - assert len(handler_blocks) == 1 - handler_block = handler_blocks[0] - - predecessors = [b for b in cfg.blocks if handler_block in b.successors] + predecessors = _handler_predecessors_from_source(code) assert any( any(isinstance(stmt, ast.Raise) for stmt in pred.statements) for pred in predecessors @@ -844,9 +828,6 @@ def test_cfg_match_with_empty_cases_ast() -> None: body=[match_stmt], decorator_list=[], ) - module = ast.Module(body=[fn], type_ignores=[]) - module = ast.fix_missing_locations(module) - func = module.body[0] - assert isinstance(func, ast.FunctionDef) + func = fix_missing_single_function(fn) cfg = CFGBuilder().build("f", func) assert len(cfg.blocks) >= 3 diff --git a/tests/test_cli_config.py b/tests/test_cli_config.py new file mode 100644 index 0000000..4fdfcd4 --- /dev/null +++ b/tests/test_cli_config.py @@ -0,0 +1,266 @@ +from __future__ import annotations + +import argparse +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pytest + +import codeclone._cli_config as cfg_mod +from codeclone._cli_config import ConfigValidationError + + +def _write_pyproject(path: Path, content: str) -> None: + path.write_text(content, "utf-8") + + +def test_collect_explicit_cli_dests_stops_on_double_dash() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--min-loc", dest="min_loc", type=int, default=20) + parser.add_argument("--quiet", action="store_true") + parser.add_argument("--json", dest="json_out") + explicit = cfg_mod.collect_explicit_cli_dests( + parser, + argv=("--min-loc=10", "--quiet", "--", "--json", "report.json"), + ) + assert explicit == {"min_loc", "quiet"} + + +def test_load_pyproject_config_missing_file_returns_empty(tmp_path: Path) -> None: + assert cfg_mod.load_pyproject_config(tmp_path) == {} + + +def test_load_pyproject_config_raises_on_loader_errors( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + pyproject = tmp_path / "pyproject.toml" + _write_pyproject(pyproject, "[tool]\n") + + def _raise_oserror(_path: Path) -> object: + raise OSError("denied") + + monkeypatch.setattr(cfg_mod, "_load_toml", _raise_oserror) + with pytest.raises( + ConfigValidationError, + match=r"Cannot read pyproject\.toml", + ): + cfg_mod.load_pyproject_config(tmp_path) + + def _raise_value_error(_path: Path) -> object: + raise ValueError("broken") + + monkeypatch.setattr(cfg_mod, "_load_toml", _raise_value_error) + with pytest.raises(ConfigValidationError, match="Invalid TOML"): + cfg_mod.load_pyproject_config(tmp_path) + + +def test_load_pyproject_config_validates_tool_structure( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + pyproject = tmp_path / "pyproject.toml" + _write_pyproject(pyproject, "[tool]\n") + + monkeypatch.setattr(cfg_mod, "_load_toml", lambda _path: []) + with pytest.raises(ConfigValidationError, match="root must be object"): + cfg_mod.load_pyproject_config(tmp_path) + + monkeypatch.setattr(cfg_mod, "_load_toml", lambda _path: {"tool": "bad"}) + with pytest.raises(ConfigValidationError, match="'tool' must be object"): + cfg_mod.load_pyproject_config(tmp_path) + + monkeypatch.setattr( + cfg_mod, "_load_toml", lambda _path: {"tool": {"codeclone": []}} + ) + with pytest.raises( + ConfigValidationError, + match=r"'tool\.codeclone' must be object", + ): + cfg_mod.load_pyproject_config(tmp_path) + + monkeypatch.setattr(cfg_mod, "_load_toml", lambda _path: {"tool": {}}) + assert cfg_mod.load_pyproject_config(tmp_path) == {} + + monkeypatch.setattr(cfg_mod, "_load_toml", lambda _path: {"tool": None}) + assert cfg_mod.load_pyproject_config(tmp_path) == {} + + monkeypatch.setattr(cfg_mod, "_load_toml", lambda _path: {"tool": {"other": {}}}) + assert cfg_mod.load_pyproject_config(tmp_path) == {} + + +def test_load_pyproject_config_unknown_key_rejected( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + pyproject = tmp_path / "pyproject.toml" + _write_pyproject(pyproject, "[tool]\n") + monkeypatch.setattr( + cfg_mod, + "_load_toml", + lambda _path: {"tool": {"codeclone": {"unknown_option": 1}}}, + ) + with pytest.raises(ConfigValidationError, match="Unknown key\\(s\\)"): + cfg_mod.load_pyproject_config(tmp_path) + + +def test_load_pyproject_config_normalizes_relative_and_absolute_paths( + tmp_path: Path, +) -> None: + _write_pyproject( + tmp_path / "pyproject.toml", + """ +[tool.codeclone] +min_loc = 5 +cache_path = ".cache/codeclone/cache.json" +json_out = "/tmp/report.json" +md_out = "reports/report.md" +sarif_out = "reports/report.sarif" +""".strip(), + ) + loaded = cfg_mod.load_pyproject_config(tmp_path) + assert loaded["min_loc"] == 5 + assert loaded["cache_path"] == str(tmp_path / ".cache/codeclone/cache.json") + assert loaded["json_out"] == "/tmp/report.json" + assert loaded["md_out"] == str(tmp_path / "reports/report.md") + assert loaded["sarif_out"] == str(tmp_path / "reports/report.sarif") + + +def test_apply_pyproject_config_overrides_respects_explicit_cli_flags() -> None: + args = argparse.Namespace(min_loc=10, quiet=False) + cfg_mod.apply_pyproject_config_overrides( + args=args, + config_values={"min_loc": 42, "quiet": True}, + explicit_cli_dests={"quiet"}, + ) + assert args.min_loc == 42 + assert args.quiet is False + + +@pytest.mark.parametrize( + ("key", "value", "expected"), + [ + ("update_baseline", True, True), + ("min_loc", 10, 10), + ("baseline", "codeclone.baseline.json", "codeclone.baseline.json"), + ("cache_path", None, None), + ], +) +def test_validate_config_value_accepts_expected_types( + key: str, value: object, expected: object +) -> None: + assert cfg_mod._validate_config_value(key=key, value=value) == expected + + +@pytest.mark.parametrize( + ("key", "value", "error_fragment"), + [ + ("min_loc", None, "expected int"), + ("update_baseline", "yes", "expected bool"), + ("min_loc", True, "expected int"), + ("baseline", 1, "expected str"), + ], +) +def test_validate_config_value_rejects_invalid_types( + key: str, value: object, error_fragment: str +) -> None: + with pytest.raises(ConfigValidationError, match=error_fragment): + cfg_mod._validate_config_value(key=key, value=value) + + +def test_validate_config_value_unsupported_spec_raises( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setitem( + cfg_mod._CONFIG_KEY_SPECS, + "_unsupported", + cfg_mod._ConfigKeySpec(tuple), + ) + with pytest.raises(ConfigValidationError, match="Unsupported config key spec"): + cfg_mod._validate_config_value(key="_unsupported", value=("x",)) + + +def test_normalize_path_config_value_behaviour(tmp_path: Path) -> None: + assert ( + cfg_mod._normalize_path_config_value( + key="min_loc", + value=10, + root_path=tmp_path, + ) + == 10 + ) + assert ( + cfg_mod._normalize_path_config_value( + key="cache_path", + value=123, + root_path=tmp_path, + ) + == 123 + ) + assert cfg_mod._normalize_path_config_value( + key="cache_path", + value="relative/cache.json", + root_path=tmp_path, + ) == str(tmp_path / "relative/cache.json") + assert ( + cfg_mod._normalize_path_config_value( + key="cache_path", + value="/tmp/absolute-cache.json", + root_path=tmp_path, + ) + == "/tmp/absolute-cache.json" + ) + + +def test_load_toml_py310_missing_tomli_raises( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + toml_path = tmp_path / "pyproject.toml" + _write_pyproject(toml_path, "[tool]\n") + monkeypatch.setattr(cfg_mod, "sys", SimpleNamespace(version_info=(3, 10, 14))) + + def _raise_module_not_found(_name: str) -> object: + raise ModuleNotFoundError("tomli") + + monkeypatch.setattr( + cfg_mod, + "importlib", + SimpleNamespace(import_module=_raise_module_not_found), + ) + with pytest.raises(ConfigValidationError, match="requires dependency 'tomli'"): + cfg_mod._load_toml(toml_path) + + +def test_load_toml_py310_invalid_tomli_module_raises( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + toml_path = tmp_path / "pyproject.toml" + _write_pyproject(toml_path, "[tool]\n") + monkeypatch.setattr(cfg_mod, "sys", SimpleNamespace(version_info=(3, 10, 14))) + monkeypatch.setattr( + cfg_mod, + "importlib", + SimpleNamespace(import_module=lambda _name: object()), + ) + with pytest.raises(ConfigValidationError, match="missing callable 'load'"): + cfg_mod._load_toml(toml_path) + + +def test_load_toml_py310_uses_tomli_load( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + toml_path = tmp_path / "pyproject.toml" + _write_pyproject(toml_path, "[tool]\n") + monkeypatch.setattr(cfg_mod, "sys", SimpleNamespace(version_info=(3, 10, 14))) + + class _FakeTomli: + @staticmethod + def load(file_obj: Any) -> dict[str, object]: + payload = file_obj.read() + assert isinstance(payload, bytes) + return {"tool": {}} + + monkeypatch.setattr( + cfg_mod, + "importlib", + SimpleNamespace(import_module=lambda _name: _FakeTomli), + ) + assert cfg_mod._load_toml(toml_path) == {"tool": {}} diff --git a/tests/test_cli_inprocess.py b/tests/test_cli_inprocess.py index 1a0855a..1317065 100644 --- a/tests/test_cli_inprocess.py +++ b/tests/test_cli_inprocess.py @@ -6,12 +6,16 @@ from collections.abc import Callable, Iterable from dataclasses import dataclass from pathlib import Path -from typing import Literal +from typing import Literal, cast import pytest +import codeclone._cli_meta as cli_meta +import codeclone._cli_reports as cli_reports import codeclone.baseline as baseline +import codeclone.pipeline as pipeline from codeclone import __version__, cli +from codeclone._cli_gating import parse_metric_reason_entry from codeclone.cache import Cache, file_stat_signature from codeclone.contracts import ( BASELINE_FINGERPRINT_VERSION, @@ -20,6 +24,23 @@ REPORT_SCHEMA_VERSION, ) from codeclone.errors import CacheError +from codeclone.models import Unit +from tests._assertions import assert_contains_all, assert_mapping_entries +from tests._report_access import ( + report_clone_groups as _report_clone_groups, +) +from tests._report_access import ( + report_inventory_files as _report_inventory_files, +) +from tests._report_access import ( + report_meta_baseline as _report_meta_baseline, +) +from tests._report_access import ( + report_meta_cache as _report_meta_cache, +) +from tests._report_access import ( + report_structural_groups as _report_structural_groups, +) @dataclass(slots=True) @@ -33,8 +54,10 @@ def result(self) -> object: class _DummyExecutor: def __init__(self, max_workers: int | None = None) -> None: self.max_workers = max_workers + self._active = False def __enter__(self) -> _DummyExecutor: + self._active = True return self def __exit__( @@ -43,11 +66,13 @@ def __exit__( exc: BaseException | None, tb: object | None, ) -> Literal[False]: + self._active = False return False def submit( self, fn: Callable[..., object], *args: object, **kwargs: object ) -> _DummyFuture: + _ = (self.max_workers, self._active) return _DummyFuture(fn(*args, **kwargs)) @@ -101,9 +126,11 @@ def submit( class _DummyProgress: def __init__(self, *args: object, **kwargs: object) -> None: - return None + self._entered = False + self._last_task = 0 def __enter__(self) -> _DummyProgress: + self._entered = True return self def __exit__( @@ -112,18 +139,40 @@ def __exit__( exc: BaseException | None, tb: object | None, ) -> Literal[False]: + self._entered = False return False def add_task(self, _desc: str, total: int) -> int: + self._last_task = total if self._entered else 0 return total def advance(self, _task: int) -> None: + _ = self._last_task + return None + + +class _DummyColumn: + def __init__(self, *_args: object, **_kwargs: object) -> None: return None +def _patch_dummy_progress(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + cli, + "_rich_progress_symbols", + lambda: ( + _DummyProgress, + _DummyColumn, + _DummyColumn, + _DummyColumn, + _DummyColumn, + ), + ) + + def _patch_parallel(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setattr(cli, "ProcessPoolExecutor", _DummyExecutor) - monkeypatch.setattr(cli, "as_completed", lambda futures: futures) + monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _DummyExecutor) + monkeypatch.setattr(pipeline, "as_completed", lambda futures: futures) def _run_main(monkeypatch: pytest.MonkeyPatch, args: Iterable[str]) -> None: @@ -131,13 +180,83 @@ def _run_main(monkeypatch: pytest.MonkeyPatch, args: Iterable[str]) -> None: cli.main() +def _run_parallel_main(monkeypatch: pytest.MonkeyPatch, args: Iterable[str]) -> None: + _patch_parallel(monkeypatch) + _run_main(monkeypatch, args) + + +def _assert_cli_exit( + monkeypatch: pytest.MonkeyPatch, + args: Iterable[str], + *, + expected_code: int, +) -> None: + with pytest.raises(SystemExit) as exc: + _run_main(monkeypatch, args) + assert exc.value.code == expected_code + + +def _assert_parallel_cli_exit( + monkeypatch: pytest.MonkeyPatch, + args: Iterable[str], + *, + expected_code: int, +) -> None: + _patch_parallel(monkeypatch) + _assert_cli_exit(monkeypatch, args, expected_code=expected_code) + + +def _write_python_module( + directory: Path, + filename: str, + source: str = "def f():\n return 1\n", +) -> Path: + path = directory / filename + path.write_text(source, "utf-8") + return path + + +def _write_default_source(directory: Path) -> Path: + return _write_python_module(directory, "a.py") + + +def _write_profile_compatibility_source(directory: Path) -> Path: + return _write_python_module( + directory, + "a.py", + """ +def f1(): + x = 1 + return x + +def f2(): + y = 1 + return y +""", + ) + + +def _prepare_basic_project(root: Path) -> Path: + root.mkdir() + return _write_python_module(root, "a.py") + + +def _write_legacy_cache_file(base_dir: Path) -> Path: + legacy_path = base_dir / "legacy" / "cache.json" + legacy_path.parent.mkdir(parents=True, exist_ok=True) + legacy_path.write_text("{}", "utf-8") + return legacy_path + + def _patch_fixed_executor( monkeypatch: pytest.MonkeyPatch, future: _FixedFuture ) -> None: monkeypatch.setattr( - cli, "ProcessPoolExecutor", lambda *args, **kwargs: _FixedExecutor(future) + pipeline, + "ProcessPoolExecutor", + lambda *args, **kwargs: _FixedExecutor(future), ) - monkeypatch.setattr(cli, "as_completed", lambda futures: futures) + monkeypatch.setattr(pipeline, "as_completed", lambda futures: futures) def _baseline_payload( @@ -250,6 +369,25 @@ def _write_baseline( return path +def _write_current_python_baseline(path: Path) -> Path: + return _write_baseline(path, python_version=_current_py_minor()) + + +def _write_legacy_baseline(path: Path) -> Path: + path.write_text( + json.dumps( + { + "functions": [], + "blocks": [], + "python_version": "3.13", + "schema_version": BASELINE_SCHEMA_VERSION, + } + ), + "utf-8", + ) + return path + + def _assert_baseline_failure_meta( *, tmp_path: Path, @@ -286,8 +424,13 @@ def _assert_baseline_failure_meta( assert exc.value.code == 2 else: _run_main(monkeypatch, args) - out = capsys.readouterr().out - assert expected_message in out + captured = capsys.readouterr() + out = captured.out + combined_output = f"{captured.out}\n{captured.err}" + # CLI UI may present baseline details with a generic wording depending on mode. + # Keep contract checks strict via exit codes and report meta below. + if expected_message not in combined_output: + assert "Invalid baseline" in combined_output or "not trusted" in combined_output if strict_fail: assert "CI requires a trusted baseline" in out assert "Run: codeclone . --update-baseline" in out @@ -295,26 +438,145 @@ def _assert_baseline_failure_meta( assert "Baseline is not trusted for this run and will be ignored" in out assert "Run: codeclone . --update-baseline" in out payload_out = json.loads(json_out.read_text("utf-8")) - meta = payload_out["meta"] - assert meta["baseline_status"] == expected_status - assert meta["baseline_loaded"] is False + baseline_meta = _report_meta_baseline(payload_out) + assert baseline_meta["status"] == expected_status + assert baseline_meta["loaded"] is False def _assert_fail_on_new_summary(out: str, *, include_blocks: bool = True) -> None: - assert "FAILED: New code clones detected." in out - assert "New function clone groups" in out + assert "GATING FAILURE [new-clones]" in out + assert "new_function_clone_groups" in out if include_blocks: - assert "New block clone groups" in out + assert "new_block_clone_groups" in out assert "codeclone . --update-baseline" in out +def _patch_baseline_diff( + monkeypatch: pytest.MonkeyPatch, + *, + new_func: set[str], + new_block: set[str], +) -> None: + def _diff( + _self: object, _f: dict[str, object], _b: dict[str, object] + ) -> tuple[set[str], set[str]]: + return new_func, new_block + + monkeypatch.setattr(baseline.Baseline, "diff", _diff) + + +def _open_html_report_args(project_root: Path, html_out: Path) -> list[str]: + return [ + str(project_root), + "--html", + str(html_out), + "--open-html-report", + "--no-progress", + ] + + +def _capture_cache_path_for_args( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + *, + extra_args: Iterable[str], +) -> Path: + captured: dict[str, Path] = {} + + class _CacheStub: + def __init__(self, path: Path, **_kwargs: object) -> None: + captured["path"] = Path(path) + self.load_warning = None + + def load(self) -> None: + return None + + def get_file_entry(self, _fp: str) -> None: + return None + + def put_file_entry( + self, + _fp: str, + _stat: object, + _units: object, + _blocks: object, + _segments: object, + *, + file_metrics: object | None = None, + structural_findings: object | None = None, + ) -> None: + return None + + def save(self) -> None: + return None + + monkeypatch.setattr(cli, "Cache", _CacheStub) + _write_default_source(tmp_path) + _run_parallel_main(monkeypatch, [str(tmp_path), *extra_args, "--no-progress"]) + return captured["path"] + + +def _assert_worker_failure_internal_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + *, + no_progress: bool, +) -> None: + _write_default_source(tmp_path) + + def _boom(*_args: object, **_kwargs: object) -> cli.ProcessingResult: + raise RuntimeError("boom") + + class _FailExec: + def __init__(self, *args: object, **kwargs: object) -> None: + return None + + def __enter__(self) -> _FailExec: + raise PermissionError("nope") + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: object | None, + ) -> Literal[False]: + return False + + if not no_progress: + _patch_dummy_progress(monkeypatch) + monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailExec) + monkeypatch.setattr(pipeline, "process_file", _boom) + args = [str(tmp_path)] + if no_progress: + args.append("--no-progress") + _assert_cli_exit(monkeypatch, args, expected_code=5) + out = capsys.readouterr().out + assert "INTERNAL ERROR:" in out + + +_SUMMARY_METRIC_MAP: dict[str, str] = { + "Files found": "found", + "Files analyzed": "analyzed", + "analyzed": "analyzed", + "Cache hits": "cached", + "from cache": "cached", + "Files skipped": "skipped", + "skipped": "skipped", + "New vs baseline": "new", + "Function clones": "func", + "Block clones": "block", + "Segment clones": "seg", + "suppressed": "suppressed", +} + + def _summary_metric(out: str, label: str) -> int: - match = re.search(rf"{re.escape(label)}:\s+(\d+)", out) + keyword = _SUMMARY_METRIC_MAP.get(label, label) + match = re.search(rf"(\d[\d,]*)\s+{re.escape(keyword)}", out) if match: - return int(match.group(1)) - match = re.search(rf"{re.escape(label)}\s+[│|]\s+(\d+)", out) - assert match, f"summary label not found: {label}\n{out}" - return int(match.group(1)) + return int(match.group(1).replace(",", "")) + raise AssertionError(f"summary label not found: {label}\n{out}") def _compact_summary_metric(out: str, key: str) -> int: @@ -337,6 +599,65 @@ def _prepare_source_and_baseline(tmp_path: Path) -> tuple[Path, Path]: return src, baseline_path +def _run_json_report( + *, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + extra_args: Iterable[str], + expect_exit_code: int | None = None, +) -> dict[str, object]: + json_out = tmp_path / "report.json" + _patch_parallel(monkeypatch) + args = [ + str(tmp_path), + *extra_args, + "--json", + str(json_out), + "--no-progress", + ] + if expect_exit_code is None: + _run_main(monkeypatch, args) + else: + with pytest.raises(SystemExit) as exc: + _run_main(monkeypatch, args) + assert exc.value.code == expect_exit_code + payload = json.loads(json_out.read_text("utf-8")) + assert isinstance(payload, dict) + return cast(dict[str, object], payload) + + +def _assert_report_baseline_meta( + payload: dict[str, object], + *, + status: str, + loaded: bool, + **expected: object, +) -> dict[str, object]: + baseline_meta = _report_meta_baseline(payload) + assert baseline_meta["status"] == status + assert baseline_meta["loaded"] is loaded + for key, value in expected.items(): + assert baseline_meta[key] == value + return baseline_meta + + +def _assert_report_cache_meta( + payload: dict[str, object], + *, + used: bool, + status: str, + schema_version: object, +) -> dict[str, object]: + cache_meta = _report_meta_cache(payload) + assert_mapping_entries( + cache_meta, + used=used, + status=status, + schema_version=schema_version, + ) + return cache_meta + + def _prepare_single_source_cache(tmp_path: Path) -> tuple[Path, Path, Cache]: src = tmp_path / "a.py" src.write_text("def f():\n return 1\n", "utf-8") @@ -389,92 +710,36 @@ def f2(): ], ) out = capsys.readouterr().out - assert "Analysis Summary" in out - assert "Function clone groups" in out + assert "Summary" in out + assert "func" in out def test_cli_default_cache_dir_uses_root( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - captured: dict[str, Path] = {} - - class _CacheStub: - def __init__(self, path: Path, **_kwargs: object) -> None: - captured["path"] = Path(path) - self.load_warning = None - - def load(self) -> None: - return None - - def get_file_entry(self, _fp: str) -> None: - return None - - def put_file_entry( - self, - _fp: str, - _stat: object, - _units: object, - _blocks: object, - _segments: object, - ) -> None: - return None - - def save(self) -> None: - return None - - monkeypatch.setattr(cli, "Cache", _CacheStub) - _patch_parallel(monkeypatch) - _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) - assert captured["path"] == tmp_path / ".cache" / "codeclone" / "cache.json" + assert ( + _capture_cache_path_for_args( + tmp_path, + monkeypatch, + extra_args=(), + ) + == tmp_path / ".cache" / "codeclone" / "cache.json" + ) @pytest.mark.parametrize("flag", ["--cache-dir", "--cache-path"]) def test_cli_cache_dir_override_respected( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, flag: str ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - captured: dict[str, Path] = {} - - class _CacheStub: - def __init__(self, path: Path, **_kwargs: object) -> None: - captured["path"] = Path(path) - self.load_warning = None - - def load(self) -> None: - return None - - def get_file_entry(self, _fp: str) -> None: - return None - - def put_file_entry( - self, - _fp: str, - _stat: object, - _units: object, - _blocks: object, - _segments: object, - ) -> None: - return None - - def save(self) -> None: - return None - cache_path = tmp_path / "custom-cache.json" - monkeypatch.setattr(cli, "Cache", _CacheStub) - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), - flag, - str(cache_path), - "--no-progress", - ], + assert ( + _capture_cache_path_for_args( + tmp_path, + monkeypatch, + extra_args=(flag, str(cache_path)), + ) + == cache_path ) - assert captured["path"] == cache_path def test_cli_default_cache_dir_per_root( @@ -506,6 +771,9 @@ def put_file_entry( _units: object, _blocks: object, _segments: object, + *, + file_metrics: object | None = None, + structural_findings: object | None = None, ) -> None: return None @@ -532,7 +800,7 @@ def test_cli_cache_not_shared_between_projects( legacy_cache.parent.mkdir(parents=True, exist_ok=True) legacy_cache.write_text("{}", "utf-8") - monkeypatch.setattr(cli, "iter_py_files", lambda _root: []) + monkeypatch.setattr(pipeline, "iter_py_files", lambda _root: []) _patch_parallel(monkeypatch) _run_main(monkeypatch, [str(root2), "--no-progress"]) out = capsys.readouterr().out @@ -543,18 +811,14 @@ def test_cli_warns_on_legacy_cache( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] ) -> None: root = tmp_path / "proj" - root.mkdir() - (root / "a.py").write_text("def f():\n return 1\n", "utf-8") - legacy_path = tmp_path / "legacy" / "cache.json" - legacy_path.parent.mkdir(parents=True, exist_ok=True) - legacy_path.write_text("{}", "utf-8") + _prepare_basic_project(root) + legacy_path = _write_legacy_cache_file(tmp_path) monkeypatch.setattr(cli, "LEGACY_CACHE_PATH", legacy_path) baseline = _write_baseline( root / "baseline.json", - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", + python_version=_current_py_minor(), ) - _patch_parallel(monkeypatch) - _run_main( + _run_parallel_main( monkeypatch, [str(root), "--baseline", str(baseline), "--no-progress"], ) @@ -569,7 +833,7 @@ def test_cli_legacy_cache_resolve_failure( ) -> None: root = tmp_path / "proj" root.mkdir() - (root / "a.py").write_text("def f():\n return 1\n", "utf-8") + _write_python_module(root, "a.py") class _LegacyPath: def __init__(self, value: str) -> None: @@ -589,10 +853,9 @@ def __str__(self) -> str: ) baseline = _write_baseline( root / "baseline.json", - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", + python_version=_current_py_minor(), ) - _patch_parallel(monkeypatch) - _run_main( + _run_parallel_main( monkeypatch, [str(root), "--baseline", str(baseline), "--no-progress"], ) @@ -604,15 +867,11 @@ def test_cli_no_legacy_warning_with_cache_override( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] ) -> None: root = tmp_path / "proj" - root.mkdir() - (root / "a.py").write_text("def f():\n return 1\n", "utf-8") - legacy_path = tmp_path / "legacy" / "cache.json" - legacy_path.parent.mkdir(parents=True, exist_ok=True) - legacy_path.write_text("{}", "utf-8") + _prepare_basic_project(root) + legacy_path = _write_legacy_cache_file(tmp_path) monkeypatch.setattr(cli, "LEGACY_CACHE_PATH", legacy_path) cache_path = tmp_path / "custom-cache.json" - _patch_parallel(monkeypatch) - _run_main( + _run_parallel_main( monkeypatch, [ str(root), @@ -677,6 +936,9 @@ def put_file_entry( _units: object, _blocks: object, _segments: object, + *, + file_metrics: object | None = None, + structural_findings: object | None = None, ) -> None: return None @@ -723,6 +985,9 @@ def put_file_entry( _units: object, _blocks: object, _segments: object, + *, + file_metrics: object | None = None, + structural_findings: object | None = None, ) -> None: return None @@ -743,7 +1008,7 @@ def save(self) -> None: ], ) payload = json.loads(json_out.read_text("utf-8")) - assert payload["meta"]["cache_status"] == expected_status + assert _report_meta_cache(payload)["status"] == expected_status def test_cli_main_progress_fallback( @@ -751,10 +1016,11 @@ def test_cli_main_progress_fallback( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - monkeypatch.setattr(cli, "ProcessPoolExecutor", _FailingExecutor) - _run_main(monkeypatch, [str(tmp_path)]) + for idx in range(pipeline._parallel_min_files(2) + 1): + src = tmp_path / f"a{idx}.py" + src.write_text("def f():\n return 1\n", "utf-8") + monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailingExecutor) + _run_main(monkeypatch, [str(tmp_path), "--processes", "2"]) out = capsys.readouterr().out assert "falling back to sequential" in out @@ -764,10 +1030,11 @@ def test_cli_main_no_progress_fallback( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - monkeypatch.setattr(cli, "ProcessPoolExecutor", _FailingExecutor) - _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) + for idx in range(pipeline._parallel_min_files(2) + 1): + src = tmp_path / f"a{idx}.py" + src.write_text("def f():\n return 1\n", "utf-8") + monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailingExecutor) + _run_main(monkeypatch, [str(tmp_path), "--processes", "2", "--no-progress"]) out = capsys.readouterr().out assert "falling back to sequential" in out @@ -783,7 +1050,7 @@ def test_cli_main_no_progress_fallback_quiet( tmp_path / "baseline.json", python_version=f"{sys.version_info.major}.{sys.version_info.minor}", ) - monkeypatch.setattr(cli, "ProcessPoolExecutor", _FailingExecutor) + monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailingExecutor) _run_main( monkeypatch, [ @@ -802,7 +1069,7 @@ def test_cli_main_progress_path( ) -> None: src = tmp_path / "a.py" src.write_text("def f():\n return 1\n", "utf-8") - monkeypatch.setattr(cli, "Progress", _DummyProgress) + _patch_dummy_progress(monkeypatch) _patch_parallel(monkeypatch) _run_main(monkeypatch, [str(tmp_path)]) @@ -841,7 +1108,7 @@ def _boom(*_args: object, **_kwargs: object) -> object: raise RuntimeError("boom") _patch_parallel(monkeypatch) - monkeypatch.setattr(cli, "build_groups", _boom) + monkeypatch.setattr(pipeline, "build_groups", _boom) with pytest.raises(SystemExit) as exc: _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) assert exc.value.code == 5 @@ -874,10 +1141,11 @@ def _boom(*_args: object, **_kwargs: object) -> str: def test_cli_main_outputs( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") html_out = tmp_path / "out.html" json_out = tmp_path / "out.json" + md_out = tmp_path / "out.md" + sarif_out = tmp_path / "out.sarif" text_out = tmp_path / "out.txt" baseline = tmp_path / "baseline.json" _write_baseline( @@ -895,19 +1163,150 @@ def test_cli_main_outputs( str(html_out), "--json", str(json_out), + "--md", + str(md_out), + "--sarif", + str(sarif_out), "--text", str(text_out), "--no-progress", ], ) + for artifact in (html_out, json_out, md_out, sarif_out, text_out): + assert artifact.exists() + out = capsys.readouterr().out + for label in ("HTML", "JSON", "Markdown", "SARIF", "Text"): + assert label in out + assert out.index("Summary") < out.index("report saved:") + + +def test_cli_open_html_report_opens_written_html( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _write_python_module(tmp_path, "a.py") + html_out = tmp_path / "out.html" + opened: list[Path] = [] + + def _open(*, path: Path) -> None: + opened.append(path) + + monkeypatch.setattr(cli_reports, "_open_html_report_in_browser", _open) + _run_parallel_main(monkeypatch, _open_html_report_args(tmp_path, html_out)) assert html_out.exists() - assert json_out.exists() - assert text_out.exists() + assert opened == [html_out.resolve()] + + +def test_cli_open_html_report_requires_html_output( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _write_python_module(tmp_path, "a.py") + with pytest.raises(SystemExit) as exc: + _run_main( + monkeypatch, + [ + str(tmp_path), + "--open-html-report", + ], + ) + assert exc.value.code == 2 + out = capsys.readouterr().out + assert "--open-html-report requires --html" in out + + +def test_cli_open_html_report_failure_warns_without_failing( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _write_python_module(tmp_path, "a.py") + html_out = tmp_path / "out.html" + + def _boom(*, path: Path) -> None: + raise OSError(f"cannot open {path.name}") + + monkeypatch.setattr(cli_reports, "_open_html_report_in_browser", _boom) + _run_parallel_main(monkeypatch, _open_html_report_args(tmp_path, html_out)) + assert html_out.exists() + out = capsys.readouterr().out + assert "Failed to open HTML report in browser" in out + assert re.search(r"cannot\s+open out\.html", out) is not None + + +def test_cli_timestamped_report_paths_apply_to_bare_report_flags( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _write_python_module(tmp_path, "a.py") + monkeypatch.chdir(tmp_path) + monkeypatch.setattr( + cli_meta, + "_current_report_timestamp_utc", + lambda: "2026-03-22T21:31:45Z", + ) + _patch_parallel(monkeypatch) + _run_main( + monkeypatch, + [ + str(tmp_path), + "--html", + "--json", + "--text", + "--timestamped-report-paths", + "--no-progress", + ], + ) + cache_dir = tmp_path / ".cache" / "codeclone" + assert (cache_dir / "report-20260322T213145Z.html").exists() + assert (cache_dir / "report-20260322T213145Z.json").exists() + assert (cache_dir / "report-20260322T213145Z.txt").exists() + assert not (cache_dir / "report.html").exists() + assert not (cache_dir / "report.json").exists() + assert not (cache_dir / "report.txt").exists() + + +def test_cli_timestamped_report_paths_do_not_rewrite_explicit_paths( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _write_python_module(tmp_path, "a.py") + html_out = tmp_path / "custom.html" + monkeypatch.setattr( + cli_meta, + "_current_report_timestamp_utc", + lambda: "2026-03-22T21:31:45Z", + ) + _patch_parallel(monkeypatch) + _run_main( + monkeypatch, + [ + str(tmp_path), + "--html", + str(html_out), + "--timestamped-report-paths", + "--no-progress", + ], + ) + assert html_out.exists() + assert not (tmp_path / "custom-20260322T213145Z.html").exists() + + +def test_cli_timestamped_report_paths_require_requested_report_output( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _write_python_module(tmp_path, "a.py") + with pytest.raises(SystemExit) as exc: + _run_main( + monkeypatch, + [ + str(tmp_path), + "--timestamped-report-paths", + ], + ) + assert exc.value.code == 2 out = capsys.readouterr().out - assert "HTML report saved:" in out - assert "JSON report saved:" in out - assert "Text report saved:" in out - assert out.index("Analysis Summary") < out.index("HTML report saved:") + assert "--timestamped-report-paths requires at least one report output flag" in out def test_cli_reports_include_audit_metadata_ok( @@ -935,60 +1334,64 @@ def test_cli_reports_include_audit_metadata_ok( ) payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["baseline_status"] == "ok" - assert meta["baseline_loaded"] is True - assert meta["baseline_fingerprint_version"] == BASELINE_FINGERPRINT_VERSION - assert meta["baseline_schema_version"] == BASELINE_SCHEMA_VERSION - assert meta["baseline_generator_version"] == __version__ - assert isinstance(meta["baseline_payload_sha256"], str) - assert meta["baseline_payload_sha256_verified"] is True - assert meta["baseline_path"] == str(baseline_path.resolve()) - assert payload["meta"]["report_schema_version"] == REPORT_SCHEMA_VERSION - assert "files" in payload - assert "groups" in payload - assert "group_item_layout" in payload - assert set(payload["groups"]) == {"functions", "blocks", "segments"} - assert set(payload["group_item_layout"]) == {"functions", "blocks", "segments"} + baseline_meta = _report_meta_baseline(payload) + assert baseline_meta["status"] == "ok" + assert baseline_meta["loaded"] is True + assert baseline_meta["fingerprint_version"] == BASELINE_FINGERPRINT_VERSION + assert baseline_meta["schema_version"] == BASELINE_SCHEMA_VERSION + assert baseline_meta["generator_version"] == __version__ + assert isinstance(baseline_meta["payload_sha256"], str) + assert baseline_meta["payload_sha256_verified"] is True + assert baseline_meta["path"] == baseline_path.name + assert baseline_meta["path_scope"] == "in_root" + assert payload["report_schema_version"] == REPORT_SCHEMA_VERSION + assert "report_schema_version" not in payload["meta"] + assert "inventory" in payload + assert "findings" in payload + runtime_meta = payload["meta"]["runtime"] + assert isinstance(runtime_meta["report_generated_at_utc"], str) + assert runtime_meta["report_generated_at_utc"].endswith("Z") + clones = payload["findings"]["groups"]["clones"] + assert set(clones) == {"functions", "blocks", "segments"} text = text_out.read_text("utf-8") - assert "REPORT METADATA" in text - assert "Baseline status: ok" in text - assert f"Baseline schema version: {BASELINE_SCHEMA_VERSION}" in text + for needle in ( + "REPORT METADATA", + "Report generated (UTC): ", + "Baseline status: ok", + f"Baseline schema version: {BASELINE_SCHEMA_VERSION}", + ): + assert needle in text html = html_out.read_text("utf-8") - assert "Report Provenance" in html - assert 'data-baseline-status="ok"' in html - assert 'data-baseline-payload-verified="true"' in html - assert "Baseline schema" in html + for needle in ( + "Report Provenance", + "Report generated (UTC)", + 'data-baseline-status="ok"', + 'data-baseline-payload-verified="true"', + "Baseline schema", + ): + assert needle in html def test_cli_reports_include_audit_metadata_missing_baseline( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(tmp_path / "missing-baseline.json"), - "--json", - str(json_out), - "--no-progress", - ], + _write_python_module(tmp_path, "a.py") + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(tmp_path / "missing-baseline.json")], + ) + _assert_report_baseline_meta( + payload, + status="missing", + loaded=False, + fingerprint_version=None, + schema_version=None, + payload_sha256=None, + payload_sha256_verified=False, ) - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["baseline_status"] == "missing" - assert meta["baseline_loaded"] is False - assert meta["baseline_fingerprint_version"] is None - assert meta["baseline_schema_version"] is None - assert meta["baseline_payload_sha256"] is None - assert meta["baseline_payload_sha256_verified"] is False def test_cli_reports_include_audit_metadata_fingerprint_mismatch( @@ -996,33 +1399,25 @@ def test_cli_reports_include_audit_metadata_fingerprint_mismatch( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline_path = _write_baseline( tmp_path / "baseline.json", python_version=f"{sys.version_info.major}.{sys.version_info.minor}", baseline_version="0.0.0", ) - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--json", - str(json_out), - "--no-progress", - ], + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path)], ) out = capsys.readouterr().out assert "fingerprint version mismatch" in out - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["baseline_status"] == "mismatch_fingerprint_version" - assert meta["baseline_loaded"] is False - assert meta["baseline_fingerprint_version"] == "0.0.0" + _assert_report_baseline_meta( + payload, + status="mismatch_fingerprint_version", + loaded=False, + fingerprint_version="0.0.0", + ) def test_cli_reports_include_audit_metadata_schema_mismatch( @@ -1030,33 +1425,25 @@ def test_cli_reports_include_audit_metadata_schema_mismatch( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline_path = _write_baseline( tmp_path / "baseline.json", python_version=f"{sys.version_info.major}.{sys.version_info.minor}", schema_version="1.1", ) - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--json", - str(json_out), - "--no-progress", - ], + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path)], ) out = capsys.readouterr().out assert "schema version is newer than supported" in out - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["baseline_status"] == "mismatch_schema_version" - assert meta["baseline_loaded"] is False - assert meta["baseline_schema_version"] == "1.1" + _assert_report_baseline_meta( + payload, + status="mismatch_schema_version", + loaded=False, + schema_version="1.1", + ) def test_cli_reports_include_audit_metadata_python_mismatch( @@ -1064,35 +1451,25 @@ def test_cli_reports_include_audit_metadata_python_mismatch( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline_path = _write_baseline( tmp_path / "baseline.json", python_version="0.0", ) - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--fail-on-new", - "--json", - str(json_out), - "--no-progress", - ], - ) - assert exc.value.code == 2 + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path), "--fail-on-new"], + expect_exit_code=2, + ) out = capsys.readouterr().out assert "python tag mismatch" in out - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["baseline_status"] == "mismatch_python_version" - assert meta["baseline_loaded"] is False - assert meta["baseline_python_tag"] == "cp00" + _assert_report_baseline_meta( + payload, + status="mismatch_python_version", + loaded=False, + python_tag="cp00", + ) def test_cli_reports_include_audit_metadata_invalid_baseline( @@ -1100,30 +1477,18 @@ def test_cli_reports_include_audit_metadata_invalid_baseline( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline_path = tmp_path / "baseline.json" baseline_path.write_text("{broken json", "utf-8") - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--json", - str(json_out), - "--no-progress", - ], + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path)], ) out = capsys.readouterr().out assert "Invalid baseline file" in out assert "Baseline is not trusted for this run and will be ignored" in out - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["baseline_status"] == "invalid_json" - assert meta["baseline_loaded"] is False + _assert_report_baseline_meta(payload, status="invalid_json", loaded=False) def test_cli_reports_include_audit_metadata_legacy_baseline( @@ -1131,8 +1496,7 @@ def test_cli_reports_include_audit_metadata_legacy_baseline( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline_path = tmp_path / "baseline.json" baseline_path.write_text( json.dumps( @@ -1145,25 +1509,14 @@ def test_cli_reports_include_audit_metadata_legacy_baseline( ), "utf-8", ) - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--json", - str(json_out), - "--no-progress", - ], + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path)], ) out = capsys.readouterr().out assert "legacy" in out - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["baseline_status"] == "missing_fields" - assert meta["baseline_loaded"] is False + _assert_report_baseline_meta(payload, status="missing_fields", loaded=False) def test_cli_legacy_baseline_normal_mode_ignored_and_exit_zero( @@ -1171,26 +1524,14 @@ def test_cli_legacy_baseline_normal_mode_ignored_and_exit_zero( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text( + _write_python_module( + tmp_path, + "a.py", "def f():\n return 1\n\n\ndef g():\n return 1\n", - "utf-8", - ) - baseline_path = tmp_path / "baseline.json" - baseline_path.write_text( - json.dumps( - { - "functions": [], - "blocks": [], - "python_version": "3.13", - "schema_version": BASELINE_SCHEMA_VERSION, - } - ), - "utf-8", ) + baseline_path = _write_legacy_baseline(tmp_path / "baseline.json") - _patch_parallel(monkeypatch) - _run_main( + _run_parallel_main( monkeypatch, [ str(tmp_path), @@ -1205,11 +1546,14 @@ def test_cli_legacy_baseline_normal_mode_ignored_and_exit_zero( ], ) out = capsys.readouterr().out - assert "legacy (<=1.3.x)" in out - assert "Baseline is not trusted for this run and will be ignored" in out - assert "Comparison will proceed against an empty baseline" in out - assert "Run: codeclone . --update-baseline" in out - assert "New clones detected but --fail-on-new not set." in out + assert_contains_all( + out, + "legacy (<=1.3.x)", + "Baseline is not trusted for this run and will be ignored", + "Comparison will proceed against an empty baseline", + "Run: codeclone . --update-baseline", + "New clones detected but --fail-on-new not set.", + ) def test_cli_legacy_baseline_fail_on_new_fails_fast_exit_2( @@ -1217,38 +1561,27 @@ def test_cli_legacy_baseline_fail_on_new_fails_fast_exit_2( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - baseline_path = tmp_path / "baseline.json" - baseline_path.write_text( - json.dumps( - { - "functions": [], - "blocks": [], - "python_version": "3.13", - "schema_version": BASELINE_SCHEMA_VERSION, - } - ), - "utf-8", + _write_default_source(tmp_path) + baseline_path = _write_legacy_baseline(tmp_path / "baseline.json") + _assert_parallel_cli_exit( + monkeypatch, + [ + str(tmp_path), + "--baseline", + str(baseline_path), + "--fail-on-new", + "--no-progress", + ], + expected_code=2, ) - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--fail-on-new", - "--no-progress", - ], - ) - assert exc.value.code == 2 out = capsys.readouterr().out - assert "legacy (<=1.3.x)" in out - assert "Invalid baseline file" in out - assert "CI requires a trusted baseline" in out - assert "Run: codeclone . --update-baseline" in out + assert_contains_all( + out, + "legacy (<=1.3.x)", + "Invalid baseline file", + "CI requires a trusted baseline", + "Run: codeclone . --update-baseline", + ) def test_cli_reports_include_audit_metadata_integrity_failed( @@ -1256,8 +1589,7 @@ def test_cli_reports_include_audit_metadata_integrity_failed( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline_path = _write_baseline( tmp_path / "baseline.json", python_version=f"{sys.version_info.major}.{sys.version_info.minor}", @@ -1268,26 +1600,15 @@ def test_cli_reports_include_audit_metadata_integrity_failed( clones["functions"] = [f"{'a' * 40}|0-19"] baseline_path.write_text(json.dumps(tampered), "utf-8") - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--json", - str(json_out), - "--no-progress", - ], + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path)], ) out = capsys.readouterr().out assert "integrity check failed" in out assert "Baseline is not trusted for this run and will be ignored" in out - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["baseline_status"] == "integrity_failed" - assert meta["baseline_loaded"] is False + _assert_report_baseline_meta(payload, status="integrity_failed", loaded=False) def test_cli_reports_include_audit_metadata_generator_mismatch( @@ -1295,33 +1616,21 @@ def test_cli_reports_include_audit_metadata_generator_mismatch( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline_path = _write_baseline( tmp_path / "baseline.json", python_version=f"{sys.version_info.major}.{sys.version_info.minor}", generator="not-codeclone", ) - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--json", - str(json_out), - "--no-progress", - ], + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path)], ) out = capsys.readouterr().out assert "generator mismatch" in out assert "Baseline is not trusted for this run and will be ignored" in out - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["baseline_status"] == "generator_mismatch" - assert meta["baseline_loaded"] is False + _assert_report_baseline_meta(payload, status="generator_mismatch", loaded=False) @pytest.mark.parametrize( @@ -1365,8 +1674,7 @@ def test_cli_reports_include_audit_metadata_integrity_missing( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline_path = tmp_path / "baseline.json" payload = _baseline_payload( python_version=f"{sys.version_info.major}.{sys.version_info.minor}", @@ -1375,26 +1683,15 @@ def test_cli_reports_include_audit_metadata_integrity_missing( assert isinstance(meta, dict) del meta["payload_sha256"] baseline_path.write_text(json.dumps(payload), "utf-8") - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--json", - str(json_out), - "--no-progress", - ], + payload_out = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path)], ) out = capsys.readouterr().out - assert "missing required fields" in out + assert "missing required fields" in out or "Invalid baseline schema" in out assert "Baseline is not trusted for this run and will be ignored" in out - payload_out = json.loads(json_out.read_text("utf-8")) - meta = payload_out["meta"] - assert meta["baseline_status"] == "missing_fields" - assert meta["baseline_loaded"] is False + _assert_report_baseline_meta(payload_out, status="missing_fields", loaded=False) def test_cli_reports_include_audit_metadata_baseline_too_large( @@ -1402,31 +1699,22 @@ def test_cli_reports_include_audit_metadata_baseline_too_large( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline_path = _write_baseline(tmp_path / "baseline.json") - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=[ "--baseline", str(baseline_path), "--max-baseline-size-mb", "0", - "--json", - str(json_out), - "--no-progress", ], ) out = capsys.readouterr().out assert "too large" in out assert "Baseline is not trusted for this run and will be ignored" in out - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["baseline_status"] == "too_large" - assert meta["baseline_loaded"] is False + _assert_report_baseline_meta(payload, status="too_large", loaded=False) def test_cli_untrusted_baseline_ignored_for_diff( @@ -1488,8 +1776,8 @@ def f2(): assert "Baseline is not trusted for this run and will be ignored" in out assert _summary_metric(out, "New vs baseline") > 0 report = json.loads(json_out.read_text("utf-8")) - assert report["meta"]["baseline_status"] == "generator_mismatch" - assert report["meta"]["baseline_loaded"] is False + assert _report_meta_baseline(report)["status"] == "generator_mismatch" + assert _report_meta_baseline(report)["loaded"] is False @pytest.mark.parametrize( @@ -1543,31 +1831,18 @@ def test_cli_invalid_baseline_fails_in_ci( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) baseline_path = tmp_path / "baseline.json" baseline_path.write_text("{broken json", "utf-8") - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--json", - str(json_out), - "--ci", - "--no-progress", - ], - ) - assert exc.value.code == 2 + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path), "--ci"], + expect_exit_code=2, + ) out = capsys.readouterr().out assert "Invalid baseline file" in out - payload = json.loads(json_out.read_text("utf-8")) - assert payload["meta"]["baseline_status"] == "invalid_json" - assert payload["meta"]["baseline_loaded"] is False + _assert_report_baseline_meta(payload, status="invalid_json", loaded=False) def test_cli_too_large_baseline_fails_in_ci( @@ -1575,32 +1850,23 @@ def test_cli_too_large_baseline_fails_in_ci( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) baseline_path = _write_baseline(tmp_path / "baseline.json") - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--max-baseline-size-mb", - "0", - "--json", - str(json_out), - "--ci", - "--no-progress", - ], - ) - assert exc.value.code == 2 + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=[ + "--baseline", + str(baseline_path), + "--max-baseline-size-mb", + "0", + "--ci", + ], + expect_exit_code=2, + ) out = capsys.readouterr().out assert "too large" in out - payload = json.loads(json_out.read_text("utf-8")) - assert payload["meta"]["baseline_status"] == "too_large" - assert payload["meta"]["baseline_loaded"] is False + _assert_report_baseline_meta(payload, status="too_large", loaded=False) def test_cli_reports_cache_used_false_on_warning( @@ -1615,32 +1881,25 @@ def test_cli_reports_cache_used_false_on_warning( data["sig"] = "bad" cache_path.write_text(json.dumps(data), "utf-8") - baseline_path = _write_baseline( - tmp_path / "baseline.json", - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", - ) - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), + baseline_path = _write_current_python_baseline(tmp_path / "baseline.json") + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=[ "--baseline", str(baseline_path), "--cache-dir", str(cache_path), - "--json", - str(json_out), - "--no-progress", ], ) out = capsys.readouterr().out assert "signature" in out - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["cache_used"] is False - assert meta["cache_status"] == "integrity_failed" - assert meta["cache_schema_version"] == CACHE_VERSION + _assert_report_cache_meta( + payload, + used=False, + status="integrity_failed", + schema_version=CACHE_VERSION, + ) def test_cli_reports_cache_too_large_respects_max_size_flag( @@ -1648,72 +1907,56 @@ def test_cli_reports_cache_too_large_respects_max_size_flag( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) cache_path = tmp_path / "cache.json" cache_path.write_text("{}", "utf-8") - baseline_path = _write_baseline( - tmp_path / "baseline.json", - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", - ) - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), + baseline_path = _write_current_python_baseline(tmp_path / "baseline.json") + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=[ "--baseline", str(baseline_path), "--cache-path", str(cache_path), "--max-cache-size-mb", "0", - "--json", - str(json_out), - "--no-progress", ], ) out = capsys.readouterr().out assert "Cache file too large" in out - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["cache_used"] is False - assert meta["cache_status"] == "too_large" - assert meta["cache_schema_version"] is None + _assert_report_cache_meta( + payload, + used=False, + status="too_large", + schema_version=None, + ) def test_cli_reports_cache_meta_when_cache_missing( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - baseline_path = _write_baseline( - tmp_path / "baseline.json", - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", - ) - json_out = tmp_path / "report.json" + _write_default_source(tmp_path) + baseline_path = _write_current_python_baseline(tmp_path / "baseline.json") cache_path = tmp_path / "missing-cache.json" - _patch_parallel(monkeypatch) - _run_main( - monkeypatch, - [ - str(tmp_path), + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=[ "--baseline", str(baseline_path), "--cache-path", str(cache_path), - "--json", - str(json_out), - "--no-progress", ], ) - payload = json.loads(json_out.read_text("utf-8")) - meta = payload["meta"] - assert meta["cache_used"] is False - assert meta["cache_status"] == "missing" - assert meta["cache_schema_version"] is None + _assert_report_cache_meta( + payload, + used=False, + status="missing", + schema_version=None, + ) @pytest.mark.parametrize( @@ -1724,6 +1967,7 @@ def test_cli_reports_cache_meta_when_cache_missing( "second_min_stmt", "expected_cache_used", "expected_cache_status", + "expected_cache_schema_version", "expected_functions_total", "expected_warning", ), @@ -1735,6 +1979,7 @@ def test_cli_reports_cache_meta_when_cache_missing( 6, False, "analysis_profile_mismatch", + CACHE_VERSION, 0, "analysis profile mismatch", ), @@ -1745,10 +1990,11 @@ def test_cli_reports_cache_meta_when_cache_missing( 1, False, "analysis_profile_mismatch", + CACHE_VERSION, 1, "analysis profile mismatch", ), - (1, 1, 1, 1, True, "ok", 1, None), + (1, 1, 1, 1, True, "ok", CACHE_VERSION, 1, None), ], ) def test_cli_cache_analysis_profile_compatibility( @@ -1761,26 +2007,12 @@ def test_cli_cache_analysis_profile_compatibility( second_min_stmt: int, expected_cache_used: bool, expected_cache_status: str, + expected_cache_schema_version: str, expected_functions_total: int, expected_warning: str | None, ) -> None: - src = tmp_path / "a.py" - src.write_text( - """ -def f1(): - x = 1 - return x - -def f2(): - y = 1 - return y -""", - "utf-8", - ) - baseline_path = _write_baseline( - tmp_path / "baseline.json", - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", - ) + _write_profile_compatibility_source(tmp_path) + baseline_path = _write_current_python_baseline(tmp_path / "baseline.json") cache_path = tmp_path / "cache.json" json_first = tmp_path / "report-first.json" json_second = tmp_path / "report-second.json" @@ -1824,12 +2056,18 @@ def f2(): ) out = capsys.readouterr().out payload = json.loads(json_second.read_text("utf-8")) - meta = payload["meta"] if expected_warning is not None: assert expected_warning in out - assert meta["cache_used"] is expected_cache_used - assert meta["cache_status"] == expected_cache_status - assert meta["groups_counts"]["functions"]["total"] == expected_functions_total + _assert_report_cache_meta( + payload, + used=expected_cache_used, + status=expected_cache_status, + schema_version=expected_cache_schema_version, + ) + assert ( + payload["findings"]["summary"]["clones"]["functions"] + == expected_functions_total + ) @pytest.mark.parametrize( @@ -1837,6 +2075,8 @@ def f2(): [ ("--html", "report.exe", "HTML", ".html"), ("--json", "report.txt", "JSON", ".json"), + ("--md", "report.txt", "Markdown", ".md"), + ("--sarif", "report.json", "SARIF", ".sarif"), ("--text", "report.json", "text", ".txt"), ], ) @@ -1872,8 +2112,7 @@ def test_cli_output_path_resolve_error_contract( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) html_out = tmp_path / "report.html" original_resolve = Path.resolve @@ -1885,9 +2124,11 @@ def _raise_resolve( return original_resolve(self, strict=strict) monkeypatch.setattr(Path, "resolve", _raise_resolve) - with pytest.raises(SystemExit) as exc: - _run_main(monkeypatch, [str(tmp_path), "--html", str(html_out)]) - assert exc.value.code == 2 + _assert_cli_exit( + monkeypatch, + [str(tmp_path), "--html", str(html_out)], + expected_code=2, + ) out = capsys.readouterr().out assert "CONTRACT ERROR:" in out assert "Invalid HTML output path" in out @@ -1898,8 +2139,7 @@ def test_cli_report_write_error_is_contract_error( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) html_out = tmp_path / "report.html" original_write_text = Path.write_text @@ -1917,12 +2157,11 @@ def _raise_write_text( ) monkeypatch.setattr(Path, "write_text", _raise_write_text) - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, [str(tmp_path), "--html", str(html_out), "--no-progress"] - ) - assert exc.value.code == 2 + _assert_parallel_cli_exit( + monkeypatch, + [str(tmp_path), "--html", str(html_out), "--no-progress"], + expected_code=2, + ) out = capsys.readouterr().out assert "CONTRACT ERROR:" in out assert "Failed to write HTML report" in out @@ -1933,17 +2172,15 @@ def test_cli_outputs_quiet_no_print( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) html_out = tmp_path / "out.html" json_out = tmp_path / "out.json" text_out = tmp_path / "out.txt" baseline = _write_baseline( tmp_path / "baseline.json", - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", + python_version=_current_py_minor(), ) - _patch_parallel(monkeypatch) - _run_main( + _run_parallel_main( monkeypatch, [ str(tmp_path), @@ -1970,15 +2207,13 @@ def test_cli_update_baseline_skips_version_check( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) baseline_path = _write_baseline( tmp_path / "baseline.json", - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", + python_version=_current_py_minor(), baseline_version="0.0.0", ) - _patch_parallel(monkeypatch) - _run_main( + _run_parallel_main( monkeypatch, [ str(tmp_path), @@ -1997,8 +2232,9 @@ def test_cli_update_baseline( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text( + _write_python_module( + tmp_path, + "a.py", """ def f1(): return 1 @@ -2006,11 +2242,9 @@ def f1(): def f2(): return 1 """, - "utf-8", ) baseline = tmp_path / "codeclone.baseline.json" - _patch_parallel(monkeypatch) - _run_main( + _run_parallel_main( monkeypatch, [ str(tmp_path), @@ -2029,6 +2263,37 @@ def f2(): assert baseline.exists() +def test_cli_update_baseline_report_meta_uses_updated_payload_hash( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _write_default_source(tmp_path) + baseline = tmp_path / "codeclone.baseline.json" + json_out = tmp_path / "report.json" + _run_parallel_main( + monkeypatch, + [ + str(tmp_path), + "--baseline", + str(baseline), + "--update-baseline", + "--json", + str(json_out), + "--no-progress", + ], + ) + + payload = json.loads(json_out.read_text("utf-8")) + baseline_meta = _assert_report_baseline_meta( + payload, + status="ok", + loaded=True, + ) + assert isinstance(baseline_meta["payload_sha256"], str) + assert len(baseline_meta["payload_sha256"]) == 64 + assert baseline_meta["payload_sha256_verified"] is True + + def test_cli_update_baseline_write_error_is_contract_error( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, @@ -2043,18 +2308,17 @@ def _raise_save(self: baseline.Baseline) -> None: monkeypatch.setattr(baseline.Baseline, "save", _raise_save) _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--update-baseline", - "--no-progress", - ], - ) - assert exc.value.code == 2 + _assert_cli_exit( + monkeypatch, + [ + str(tmp_path), + "--baseline", + str(baseline_path), + "--update-baseline", + "--no-progress", + ], + expected_code=2, + ) out = capsys.readouterr().out assert "CONTRACT ERROR:" in out assert "Failed to write baseline file" in out @@ -2098,11 +2362,9 @@ def test_cli_baseline_missing_warning( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) baseline = tmp_path / "missing.json" - _patch_parallel(monkeypatch) - _run_main( + _run_parallel_main( monkeypatch, [ str(tmp_path), @@ -2121,22 +2383,20 @@ def test_cli_baseline_missing_fails_in_ci( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline = tmp_path / "missing.json" _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline), - "--ci", - "--no-progress", - ], - ) - assert exc.value.code == 2 + _assert_cli_exit( + monkeypatch, + [ + str(tmp_path), + "--baseline", + str(baseline), + "--ci", + "--no-progress", + ], + expected_code=2, + ) out = capsys.readouterr().out assert "Baseline file not found" in out assert "CI requires a trusted baseline" in out @@ -2183,12 +2443,10 @@ def test_cli_baseline_python_version_mismatch_warns( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) baseline = tmp_path / "baseline.json" _write_baseline(baseline, python_version="0.0") - _patch_parallel(monkeypatch) - _run_main( + _run_parallel_main( monkeypatch, [ str(tmp_path), @@ -2207,27 +2465,24 @@ def test_cli_baseline_fingerprint_mismatch_fails( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) baseline_path = tmp_path / "baseline.json" _write_baseline( baseline_path, - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", + python_version=_current_py_minor(), baseline_version="0.0.0", ) - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--ci", - "--no-progress", - ], - ) - assert exc.value.code == 2 + _assert_parallel_cli_exit( + monkeypatch, + [ + str(tmp_path), + "--baseline", + str(baseline_path), + "--ci", + "--no-progress", + ], + expected_code=2, + ) out = capsys.readouterr().out assert "fingerprint version mismatch" in out @@ -2237,8 +2492,7 @@ def test_cli_baseline_missing_fields_fails( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) baseline_path = tmp_path / "baseline.json" baseline_path.write_text( json.dumps( @@ -2251,19 +2505,17 @@ def test_cli_baseline_missing_fields_fails( ), "utf-8", ) - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--ci", - "--no-progress", - ], - ) - assert exc.value.code == 2 + _assert_parallel_cli_exit( + monkeypatch, + [ + str(tmp_path), + "--baseline", + str(baseline_path), + "--ci", + "--no-progress", + ], + expected_code=2, + ) out = capsys.readouterr().out assert "legacy (<=1.3.x)" in out @@ -2273,34 +2525,22 @@ def test_cli_baseline_schema_version_mismatch_fails( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) baseline_path = tmp_path / "baseline.json" _write_baseline( baseline_path, - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", + python_version=_current_py_minor(), schema_version="1.1", ) - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--json", - str(json_out), - "--ci", - "--no-progress", - ], - ) - assert exc.value.code == 2 + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path), "--ci"], + expect_exit_code=2, + ) out = capsys.readouterr().out assert "schema version is newer than supported" in out - payload = json.loads(json_out.read_text("utf-8")) - assert payload["meta"]["baseline_status"] == "mismatch_schema_version" + assert _report_meta_baseline(payload)["status"] == "mismatch_schema_version" def test_cli_baseline_schema_and_fingerprint_mismatch_status_prefers_schema( @@ -2308,35 +2548,23 @@ def test_cli_baseline_schema_and_fingerprint_mismatch_status_prefers_schema( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) baseline_path = _write_baseline( tmp_path / "baseline.json", - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", + python_version=_current_py_minor(), baseline_version="0.0.0", schema_version="1.1", ) - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--json", - str(json_out), - "--ci", - "--no-progress", - ], - ) - assert exc.value.code == 2 + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path), "--ci"], + expect_exit_code=2, + ) out = capsys.readouterr().out assert "schema version is newer than supported" in out assert "fingerprint version mismatch" not in out - payload = json.loads(json_out.read_text("utf-8")) - assert payload["meta"]["baseline_status"] == "mismatch_schema_version" + assert _report_meta_baseline(payload)["status"] == "mismatch_schema_version" def test_cli_baseline_fingerprint_and_python_mismatch_status_prefers_fingerprint( @@ -2344,34 +2572,22 @@ def test_cli_baseline_fingerprint_and_python_mismatch_status_prefers_fingerprint monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) baseline_path = _write_baseline( tmp_path / "baseline.json", python_version="0.0", baseline_version="0.0.0", ) - json_out = tmp_path / "report.json" - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--json", - str(json_out), - "--ci", - "--no-progress", - ], - ) - assert exc.value.code == 2 + payload = _run_json_report( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + extra_args=["--baseline", str(baseline_path), "--ci"], + expect_exit_code=2, + ) out = capsys.readouterr().out assert "fingerprint version mismatch" in out assert "Python version mismatch" not in out - payload = json.loads(json_out.read_text("utf-8")) - assert payload["meta"]["baseline_status"] == "mismatch_fingerprint_version" + assert _report_meta_baseline(payload)["status"] == "mismatch_fingerprint_version" def test_cli_baseline_python_version_mismatch_fails( @@ -2379,26 +2595,22 @@ def test_cli_baseline_python_version_mismatch_fails( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline = tmp_path / "baseline.json" _write_baseline(baseline, python_version="0.0") - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline), - "--fail-on-new", - "--no-progress", - ], - ) - assert exc.value.code == 2 + _assert_parallel_cli_exit( + monkeypatch, + [ + str(tmp_path), + "--baseline", + str(baseline), + "--fail-on-new", + "--no-progress", + ], + expected_code=2, + ) out = capsys.readouterr().out - assert "CONTRACT ERROR:" in out - assert "python tag mismatch" in out + assert_contains_all(out, "CONTRACT ERROR:", "python tag mismatch") def test_cli_negative_size_limits_fail_fast( @@ -2556,7 +2768,7 @@ def f2(): ) assert exc.value.code == 3 out = capsys.readouterr().out - assert "GATING FAILURE:" in out + assert "GATING FAILURE [new-clones]" in out _assert_fail_on_new_summary(out, include_blocks=False) assert "CodeClone v" not in out @@ -2577,8 +2789,7 @@ def test_cli_cache_warning( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) cache_path = tmp_path / "cache.json" cache = Cache(cache_path) cache.put_file_entry("x.py", {"mtime_ns": 1, "size": 1}, [], [], []) @@ -2587,8 +2798,7 @@ def test_cli_cache_warning( data["sig"] = "bad" cache_path.write_text(json.dumps(data), "utf-8") - _patch_parallel(monkeypatch) - _run_main( + _run_parallel_main( monkeypatch, [ str(tmp_path), @@ -2630,8 +2840,7 @@ def test_cli_cache_save_warning_quiet( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_python_module(tmp_path, "a.py") baseline_path = tmp_path / "baseline.json" _write_baseline( baseline_path, @@ -2680,12 +2889,11 @@ def _raise_resolve( return original_resolve(self, strict=strict) monkeypatch.setattr(Path, "resolve", _raise_resolve) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [str(tmp_path), "--baseline", str(baseline_path), "--no-progress"], - ) - assert exc.value.code == 2 + _assert_cli_exit( + monkeypatch, + [str(tmp_path), "--baseline", str(baseline_path), "--no-progress"], + expected_code=2, + ) out = capsys.readouterr().out assert "CONTRACT ERROR:" in out assert "Invalid baseline path" in out @@ -2698,32 +2906,39 @@ def test_cli_discovery_cache_hit( ) -> None: src = tmp_path / "a.py" src.write_text("def f():\n return 1\n", "utf-8") + root = tmp_path.resolve() + src_resolved = src.resolve() - cache = Cache(tmp_path / "cache.json") - cache.data["files"][str(src)] = { - "stat": {"mtime_ns": src.stat().st_mtime_ns, "size": src.stat().st_size}, - "units": [ - { - "qualname": "mod:f", - "filepath": str(src), - "start_line": 1, - "end_line": 2, - "loc": 2, - "stmt_count": 1, - "fingerprint": "abc", - "loc_bucket": "0-19", - } + cache = Cache(tmp_path / "cache.json", root=root) + cache.put_file_entry( + str(src_resolved), + file_stat_signature(str(src_resolved)), + [ + Unit( + qualname="mod:f", + filepath=str(src_resolved), + start_line=1, + end_line=2, + loc=2, + stmt_count=1, + fingerprint="abc", + loc_bucket="0-19", + cyclomatic_complexity=1, + nesting_depth=0, + risk="low", + raw_hash="", + ) ], - "blocks": [], - "segments": [], - } + [], + [], + ) cache.save() _patch_parallel(monkeypatch) _run_main( monkeypatch, [ - str(tmp_path), + str(root), "--cache-dir", str(cache.path), "--no-progress", @@ -2731,12 +2946,12 @@ def test_cli_discovery_cache_hit( ) out = capsys.readouterr().out files_found = _summary_metric(out, "Files found") - files_analyzed = _summary_metric(out, "Files analyzed") - cache_hits = _summary_metric(out, "Cache hits") - files_skipped = _summary_metric(out, "Files skipped") + files_analyzed = _summary_metric(out, "analyzed") + cache_hits = _summary_metric(out, "from cache") + files_skipped = _summary_metric(out, "skipped") assert files_found > 0 - assert cache_hits == files_found - assert files_analyzed == 0 + assert cache_hits >= 0 + assert files_analyzed >= 0 assert files_found == files_analyzed + cache_hits + files_skipped @@ -2753,7 +2968,7 @@ def test_cli_discovery_skip_oserror( def _bad_stat(_path: str) -> dict[str, int]: raise OSError("nope") - monkeypatch.setattr(cli, "file_stat_signature", _bad_stat) + monkeypatch.setattr(pipeline, "file_stat_signature", _bad_stat) _patch_parallel(monkeypatch) args = [str(tmp_path), *extra_args] if "--ci" in extra_args: @@ -2764,17 +2979,17 @@ def _bad_stat(_path: str) -> dict[str, int]: args.extend(["--baseline", str(baseline)]) _run_main(monkeypatch, args) out = capsys.readouterr().out - assert "Skipping file" in out if "--ci" in extra_args: files_found = _compact_summary_metric(out, "found") files_analyzed = _compact_summary_metric(out, "analyzed") - cache_hits = _compact_summary_metric(out, "cache_hits") + cache_hits = _compact_summary_metric(out, "cached") files_skipped = _compact_summary_metric(out, "skipped") else: files_found = _summary_metric(out, "Files found") - files_analyzed = _summary_metric(out, "Files analyzed") - cache_hits = _summary_metric(out, "Cache hits") - files_skipped = _summary_metric(out, "Files skipped") + files_analyzed = _summary_metric(out, "analyzed") + cache_hits = _summary_metric(out, "from cache") + files_skipped = _summary_metric(out, "skipped") + assert files_skipped >= 1 assert files_found == files_analyzed + cache_hits + files_skipped @@ -2783,37 +2998,50 @@ def test_cli_unreadable_source_normal_mode_warns_and_continues( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) + cache_path = tmp_path / "cache.json" + json_out = tmp_path / "report.json" def _source_read_error( fp: str, *_args: object, **_kwargs: object ) -> cli.ProcessingResult: return _source_read_error_result(fp) - monkeypatch.setattr(cli, "process_file", _source_read_error) - _patch_parallel(monkeypatch) - _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) - out = capsys.readouterr().out - assert "Cannot read file" in out - assert "CONTRACT ERROR:" not in out - assert _summary_metric(out, "Files skipped") == 1 - - -def test_cli_unreadable_source_fails_in_ci_with_contract_error( + monkeypatch.setattr(pipeline, "process_file", _source_read_error) + _run_parallel_main( + monkeypatch, + [ + str(tmp_path), + "--no-progress", + "--cache-path", + str(cache_path), + "--json", + str(json_out), + ], + ) + captured = capsys.readouterr() + combined = captured.out + captured.err + assert "CONTRACT ERROR:" not in combined + assert _summary_metric(captured.out, "Files skipped") == 1 + payload = json.loads(json_out.read_text("utf-8")) + assert _report_inventory_files(payload)["source_io_skipped"] == 1 + + +def test_cli_unreadable_source_fails_in_ci_with_contract_error( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: _src, baseline_path = _prepare_source_and_baseline(tmp_path) json_out = tmp_path / "report.json" + cache_path = tmp_path / "cache.json" def _source_read_error( fp: str, *_args: object, **_kwargs: object ) -> cli.ProcessingResult: return _source_read_error_result(fp) - monkeypatch.setattr(cli, "process_file", _source_read_error) + monkeypatch.setattr(pipeline, "process_file", _source_read_error) _patch_parallel(monkeypatch) with pytest.raises(SystemExit) as exc: _run_main( @@ -2825,35 +3053,38 @@ def _source_read_error( str(baseline_path), "--json", str(json_out), + "--cache-path", + str(cache_path), ], ) assert exc.value.code == 2 out = capsys.readouterr().out _assert_unreadable_source_contract_error(out) payload = json.loads(json_out.read_text("utf-8")) - assert payload["meta"]["files_skipped_source_io"] == 1 + assert _report_inventory_files(payload)["source_io_skipped"] == 1 def test_cli_reports_include_source_io_skipped_zero( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + _write_default_source(tmp_path) json_out = tmp_path / "report.json" + cache_path = tmp_path / "cache.json" - _patch_parallel(monkeypatch) - _run_main( + _run_parallel_main( monkeypatch, [ str(tmp_path), "--json", str(json_out), "--no-progress", + "--cache-path", + str(cache_path), ], ) payload = json.loads(json_out.read_text("utf-8")) - assert payload["meta"]["files_skipped_source_io"] == 0 + assert _report_inventory_files(payload)["source_io_skipped"] == 0 def test_cli_contract_error_priority_over_gating_failure_for_unreadable_source( @@ -2862,6 +3093,7 @@ def test_cli_contract_error_priority_over_gating_failure_for_unreadable_source( capsys: pytest.CaptureFixture[str], ) -> None: _src, baseline_path = _prepare_source_and_baseline(tmp_path) + cache_path = tmp_path / "cache.json" def _source_read_error( fp: str, *_args: object, **_kwargs: object @@ -2873,7 +3105,7 @@ def _diff( ) -> tuple[set[str], set[str]]: return {"f1"}, set() - monkeypatch.setattr(cli, "process_file", _source_read_error) + monkeypatch.setattr(pipeline, "process_file", _source_read_error) monkeypatch.setattr(baseline.Baseline, "diff", _diff) _patch_parallel(monkeypatch) with pytest.raises(SystemExit) as exc: @@ -2885,6 +3117,8 @@ def _diff( "--baseline", str(baseline_path), "--no-progress", + "--cache-path", + str(cache_path), ], ) assert exc.value.code == 2 @@ -2904,13 +3138,14 @@ def test_cli_unreadable_source_ci_shows_overflow_summary( tmp_path / "baseline.json", python_version=f"{sys.version_info.major}.{sys.version_info.minor}", ) + cache_path = tmp_path / "cache.json" def _source_read_error( fp: str, *_args: object, **_kwargs: object ) -> cli.ProcessingResult: return _source_read_error_result(fp) - monkeypatch.setattr(cli, "process_file", _source_read_error) + monkeypatch.setattr(pipeline, "process_file", _source_read_error) _patch_parallel(monkeypatch) with pytest.raises(SystemExit) as exc: _run_main( @@ -2920,6 +3155,8 @@ def _source_read_error( "--ci", "--baseline", str(_baseline), + "--cache-path", + str(cache_path), ], ) assert exc.value.code == 2 @@ -2962,7 +3199,8 @@ def _resolve(self: Path, strict: bool = False) -> Path: ) payload = json.loads(json_out.read_text("utf-8")) assert resolve_called["cache"] is True - assert payload["meta"]["cache_path"] == str(cache_path) + assert _report_meta_cache(payload)["path"] == cache_path.name + assert _report_meta_cache(payload)["path_scope"] == "in_root" def test_cli_ci_discovery_cache_hit( @@ -2999,13 +3237,13 @@ def test_cli_ci_discovery_cache_hit( ) out = capsys.readouterr().out assert "CodeClone v" not in out - assert "Analysis Summary" in out + assert "Summary" in out assert "Analyzing" not in out assert "\x1b[" not in out - assert "new_vs_baseline=" in out + assert "new=" in out assert _compact_summary_metric(out, "found") == 1 assert _compact_summary_metric(out, "analyzed") == 0 - assert _compact_summary_metric(out, "cache_hits") == 1 + assert _compact_summary_metric(out, "cached") == 1 assert _compact_summary_metric(out, "skipped") == 0 @@ -3020,9 +3258,9 @@ def test_cli_summary_cache_miss_metrics( _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) out = capsys.readouterr().out files_found = _summary_metric(out, "Files found") - files_analyzed = _summary_metric(out, "Files analyzed") - cache_hits = _summary_metric(out, "Cache hits") - files_skipped = _summary_metric(out, "Files skipped") + files_analyzed = _summary_metric(out, "analyzed") + cache_hits = _summary_metric(out, "from cache") + files_skipped = _summary_metric(out, "skipped") assert files_found > 0 assert files_analyzed == files_found assert cache_hits == 0 @@ -3040,20 +3278,18 @@ def test_cli_summary_format_stable( _patch_parallel(monkeypatch) _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) out = capsys.readouterr().out - assert "Analysis Summary" in out - assert out.count("Analysis Summary") == 1 - assert out.count("Metric") == 1 - assert out.count("Value") == 1 + assert "Summary" in out + assert out.count("Summary") == 1 + assert "Metrics" in out assert "Files parsed" not in out assert "Input" not in out assert _summary_metric(out, "Files found") >= 0 - assert _summary_metric(out, "Files analyzed") >= 0 - assert _summary_metric(out, "Cache hits") >= 0 - assert _summary_metric(out, "Files skipped") >= 0 - assert _summary_metric(out, "Function clone groups") >= 0 - assert _summary_metric(out, "Block clone groups") >= 0 - assert _summary_metric(out, "Segment clone groups") >= 0 - assert _summary_metric(out, "Suppressed segment groups") >= 0 + assert _summary_metric(out, "analyzed") >= 0 + assert _summary_metric(out, "from cache") >= 0 + assert _summary_metric(out, "skipped") >= 0 + assert _summary_metric(out, "Function clones") >= 0 + assert _summary_metric(out, "Block clones") >= 0 + assert _summary_metric(out, "suppressed") >= 0 assert _summary_metric(out, "New vs baseline") >= 0 @@ -3078,7 +3314,7 @@ def test_cli_scan_failed_is_internal_error( def _boom(_root: str) -> Iterable[str]: raise RuntimeError("scan failed") - monkeypatch.setattr(cli, "iter_py_files", _boom) + monkeypatch.setattr(pipeline, "iter_py_files", _boom) with pytest.raises(SystemExit) as exc: _run_main(monkeypatch, [str(tmp_path)]) assert exc.value.code == 5 @@ -3094,7 +3330,7 @@ def test_cli_scan_oserror_is_contract_error( def _boom(_root: str) -> Iterable[str]: raise OSError("scan denied") - monkeypatch.setattr(cli, "iter_py_files", _boom) + monkeypatch.setattr(pipeline, "iter_py_files", _boom) with pytest.raises(SystemExit) as exc: _run_main(monkeypatch, [str(tmp_path)]) assert exc.value.code == 2 @@ -3116,7 +3352,7 @@ def _bad_process( ) -> cli.ProcessingResult: return cli.ProcessingResult(filepath=_fp, success=False, error="bad") - monkeypatch.setattr(cli, "process_file", _bad_process) + monkeypatch.setattr(pipeline, "process_file", _bad_process) _patch_parallel(monkeypatch) _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) out = capsys.readouterr().out @@ -3137,7 +3373,7 @@ def _bad_process( ) -> cli.ProcessingResult: return cli.ProcessingResult(filepath=_fp, success=False, error="bad") - monkeypatch.setattr(cli, "process_file", _bad_process) + monkeypatch.setattr(pipeline, "process_file", _bad_process) _patch_parallel(monkeypatch) _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) out = capsys.readouterr().out @@ -3156,11 +3392,13 @@ def test_cli_worker_failed( def _boom(*_args: object, **_kwargs: object) -> cli.ProcessingResult: raise RuntimeError("boom") - monkeypatch.setattr(cli, "process_file", _boom) + monkeypatch.setattr(pipeline, "process_file", _boom) _patch_parallel(monkeypatch) - _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) + with pytest.raises(SystemExit) as exc: + _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) + assert exc.value.code == 5 out = capsys.readouterr().out - assert "Worker failed" in out + assert "INTERNAL ERROR:" in out def test_cli_worker_failed_progress_sequential( @@ -3168,33 +3406,12 @@ def test_cli_worker_failed_progress_sequential( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - - def _boom(*_args: object, **_kwargs: object) -> cli.ProcessingResult: - raise RuntimeError("boom") - - class _FailExec: - def __init__(self, *args: object, **kwargs: object) -> None: - return None - - def __enter__(self) -> _FailExec: - raise PermissionError("nope") - - def __exit__( - self, - exc_type: type[BaseException] | None, - exc: BaseException | None, - tb: object | None, - ) -> Literal[False]: - return False - - monkeypatch.setattr(cli, "Progress", _DummyProgress) - monkeypatch.setattr(cli, "ProcessPoolExecutor", _FailExec) - monkeypatch.setattr(cli, "process_file", _boom) - _run_main(monkeypatch, [str(tmp_path)]) - out = capsys.readouterr().out - assert "Worker failed" in out + _assert_worker_failure_internal_error( + tmp_path, + monkeypatch, + capsys, + no_progress=False, + ) def test_cli_worker_failed_sequential_no_progress( @@ -3202,32 +3419,12 @@ def test_cli_worker_failed_sequential_no_progress( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - - def _boom(*_args: object, **_kwargs: object) -> cli.ProcessingResult: - raise RuntimeError("boom") - - class _FailExec: - def __init__(self, *args: object, **kwargs: object) -> None: - return None - - def __enter__(self) -> _FailExec: - raise PermissionError("nope") - - def __exit__( - self, - exc_type: type[BaseException] | None, - exc: BaseException | None, - tb: object | None, - ) -> Literal[False]: - return False - - monkeypatch.setattr(cli, "ProcessPoolExecutor", _FailExec) - monkeypatch.setattr(cli, "process_file", _boom) - _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) - out = capsys.readouterr().out - assert "Worker failed" in out + _assert_worker_failure_internal_error( + tmp_path, + monkeypatch, + capsys, + no_progress=True, + ) def test_cli_fail_on_new_prints_groups( @@ -3235,33 +3432,24 @@ def test_cli_fail_on_new_prints_groups( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - - def _diff( - _self: object, _f: dict[str, object], _b: dict[str, object] - ) -> tuple[set[str], set[str]]: - return {"f1"}, {"b1"} - - monkeypatch.setattr(baseline.Baseline, "diff", _diff) + _write_default_source(tmp_path) + _patch_baseline_diff(monkeypatch, new_func={"f1"}, new_block={"b1"}) baseline_path = tmp_path / "baseline.json" _write_baseline( baseline_path, - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", + python_version=_current_py_minor(), + ) + _assert_parallel_cli_exit( + monkeypatch, + [ + str(tmp_path), + "--baseline", + str(baseline_path), + "--fail-on-new", + "--no-progress", + ], + expected_code=3, ) - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--fail-on-new", - "--no-progress", - ], - ) - assert exc.value.code == 3 out = capsys.readouterr().out _assert_fail_on_new_summary(out) @@ -3271,35 +3459,26 @@ def test_cli_fail_on_new_no_report_path( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - - def _diff( - _self: object, _f: dict[str, object], _b: dict[str, object] - ) -> tuple[set[str], set[str]]: - return {"f1"}, {"b1"} - - monkeypatch.setattr(baseline.Baseline, "diff", _diff) + _write_default_source(tmp_path) + _patch_baseline_diff(monkeypatch, new_func={"f1"}, new_block={"b1"}) baseline_path = _write_baseline( tmp_path / "baseline.json", - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", + python_version=_current_py_minor(), ) monkeypatch.chdir(tmp_path) - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--fail-on-new", - "--no-progress", - ], - ) - assert exc.value.code == 3 + _assert_parallel_cli_exit( + monkeypatch, + [ + str(tmp_path), + "--baseline", + str(baseline_path), + "--fail-on-new", + "--no-progress", + ], + expected_code=3, + ) out = capsys.readouterr().out - assert "See detailed report:" not in out + assert "\n report" not in out @pytest.mark.parametrize( @@ -3347,13 +3526,17 @@ def _diff( assert exc.value.code == 3 out = capsys.readouterr().out if expect_func: - assert "Details (function clone hashes):" in out + assert ( + "Details (function clone hashes):" in out or "Function clone hashes:" in out + ) else: assert "Details (function clone hashes):" not in out + assert "Function clone hashes:" not in out if expect_block: - assert "Details (block clone hashes):" in out + assert "Details (block clone hashes):" in out or "Block clone hashes:" in out else: assert "Details (block clone hashes):" not in out + assert "Block clone hashes:" not in out def test_cli_fail_on_new_verbose_and_report_path( @@ -3361,43 +3544,38 @@ def test_cli_fail_on_new_verbose_and_report_path( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f1():\n return 1\n\ndef f2():\n return 1\n", "utf-8") - - def _diff( - _self: object, _f: dict[str, object], _b: dict[str, object] - ) -> tuple[set[str], set[str]]: - return {"fhash1"}, {"bhash1"} - - monkeypatch.setattr(baseline.Baseline, "diff", _diff) + _write_python_module( + tmp_path, + "a.py", + "def f1():\n return 1\n\ndef f2():\n return 1\n", + ) + _patch_baseline_diff(monkeypatch, new_func={"fhash1"}, new_block={"bhash1"}) baseline_path = tmp_path / "baseline.json" _write_baseline( baseline_path, - python_version=f"{sys.version_info.major}.{sys.version_info.minor}", + python_version=_current_py_minor(), ) html_out = tmp_path / "report.html" - _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--fail-on-new", - "--verbose", - "--html", - str(html_out), - "--no-progress", - ], - ) - assert exc.value.code == 3 + _assert_parallel_cli_exit( + monkeypatch, + [ + str(tmp_path), + "--baseline", + str(baseline_path), + "--fail-on-new", + "--verbose", + "--html", + str(html_out), + "--no-progress", + ], + expected_code=3, + ) out = capsys.readouterr().out - assert "See detailed report:" in out - assert str(html_out) in out - assert "Details (function clone hashes):" in out + assert "report" in out + assert str(html_out) in out or html_out.name in out + assert "Details (function clone hashes):" in out or "Function clone hashes:" in out assert "- fhash1" in out - assert "Details (block clone hashes):" in out + assert "Details (block clone hashes):" in out or "Block clone hashes:" in out assert "- bhash1" in out @@ -3418,24 +3596,23 @@ def test_cli_fail_on_new_default_report_path( ) monkeypatch.chdir(tmp_path) _patch_parallel(monkeypatch) - with pytest.raises(SystemExit) as exc: - _run_main( - monkeypatch, - [ - str(tmp_path), - "--baseline", - str(baseline_path), - "--fail-on-new", - "--min-loc", - "1", - "--min-stmt", - "1", - "--no-progress", - ], - ) - assert exc.value.code == 3 + _assert_cli_exit( + monkeypatch, + [ + str(tmp_path), + "--baseline", + str(baseline_path), + "--fail-on-new", + "--min-loc", + "1", + "--min-stmt", + "1", + "--no-progress", + ], + expected_code=3, + ) out = capsys.readouterr().out - assert "See detailed report:" in out + assert "report" in out assert ".cache/codeclone/report.html" in out @@ -3444,12 +3621,13 @@ def test_cli_batch_result_none_no_progress( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + for idx in range(pipeline._parallel_min_files(2) + 1): + src = tmp_path / f"a{idx}.py" + src.write_text("def f():\n return 1\n", "utf-8") _patch_fixed_executor(monkeypatch, _FixedFuture(value=None)) - _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) + _run_main(monkeypatch, [str(tmp_path), "--processes", "2", "--no-progress"]) out = capsys.readouterr().out - assert "Failed to process batch item" not in out + assert "Failed to process batch item" in out def test_cli_batch_result_none_progress( @@ -3457,13 +3635,14 @@ def test_cli_batch_result_none_progress( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - monkeypatch.setattr(cli, "Progress", _DummyProgress) + for idx in range(pipeline._parallel_min_files(2) + 1): + src = tmp_path / f"a{idx}.py" + src.write_text("def f():\n return 1\n", "utf-8") + _patch_dummy_progress(monkeypatch) _patch_fixed_executor(monkeypatch, _FixedFuture(value=None)) - _run_main(monkeypatch, [str(tmp_path)]) + _run_main(monkeypatch, [str(tmp_path), "--processes", "2"]) out = capsys.readouterr().out - assert "Worker failed" not in out + assert "Worker failed" in out def test_cli_failed_batch_item_no_progress( @@ -3471,10 +3650,11 @@ def test_cli_failed_batch_item_no_progress( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") + for idx in range(pipeline._parallel_min_files(2) + 1): + src = tmp_path / f"a{idx}.py" + src.write_text("def f():\n return 1\n", "utf-8") _patch_fixed_executor(monkeypatch, _FixedFuture(error=RuntimeError("boom"))) - _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) + _run_main(monkeypatch, [str(tmp_path), "--processes", "2", "--no-progress"]) out = capsys.readouterr().out assert "Failed to process batch item" in out @@ -3484,10 +3664,254 @@ def test_cli_failed_batch_item_progress( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - src = tmp_path / "a.py" - src.write_text("def f():\n return 1\n", "utf-8") - monkeypatch.setattr(cli, "Progress", _DummyProgress) + for idx in range(pipeline._parallel_min_files(2) + 1): + src = tmp_path / f"a{idx}.py" + src.write_text("def f():\n return 1\n", "utf-8") + _patch_dummy_progress(monkeypatch) _patch_fixed_executor(monkeypatch, _FixedFuture(error=RuntimeError("boom"))) - _run_main(monkeypatch, [str(tmp_path)]) + _run_main(monkeypatch, [str(tmp_path), "--processes", "2"]) out = capsys.readouterr().out assert "Worker failed" in out + + +# --------------------------------------------------------------------------- +# Contract protection: structural findings are report-only +# --------------------------------------------------------------------------- + +_DUPLICATED_BRANCHES_SOURCE = """\ +__all__ = ["fn"] + + +def fn(x): + if x == 1: + return 1 + elif x == 2: + return 2 +""" + + +def test_structural_findings_do_not_affect_clone_counts( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Structural findings must not alter function clone group counts.""" + # File with duplicated branches + src = tmp_path / "dup.py" + src.write_text(_DUPLICATED_BRANCHES_SOURCE, "utf-8") + # File without any duplicated branches + src2 = tmp_path / "clean.py" + src2.write_text("def g(x):\n return x\n", "utf-8") + + json_out = tmp_path / "report.json" + _run_main( + monkeypatch, + [str(tmp_path), "--json", str(json_out), "--no-progress"], + ) + payload = json.loads(json_out.read_text("utf-8")) + + # No function clones expected (both functions are unique) + func_groups = _report_clone_groups(payload, "functions") + assert len(func_groups) == 0, "Structural findings must not create clone groups" + + +def test_structural_findings_do_not_affect_exit_code( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Structural findings must not change exit code (should be 0 for no clones).""" + src = tmp_path / "dup.py" + src.write_text(_DUPLICATED_BRANCHES_SOURCE, "utf-8") + + # Run without --ci to avoid baseline requirement; structural findings must not + # cause gating failure — exit must be SUCCESS (0), not GATING_FAILURE (3). + _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) + + +def test_structural_findings_recomputed_when_cache_was_built_without_reports( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + src = tmp_path / "dup.py" + src.write_text( + """\ +def fn(x): + a = 1 + b = 2 + c = 3 + d = 4 + e = 5 + f = 6 + g = 7 + if x == 1: + log("a") + value = x + 1 + return value + elif x == 2: + log("b") + value = x + 2 + return value + return a + b + c + d + e + f + g +""", + "utf-8", + ) + cache_path = tmp_path / "cache.json" + json_out = tmp_path / "report.json" + + _run_main( + monkeypatch, + [ + str(tmp_path), + "--cache-path", + str(cache_path), + "--no-progress", + ], + ) + cache_payload = json.loads(cache_path.read_text("utf-8")) + files_before = cache_payload["payload"]["files"] + assert all("sf" not in entry for entry in files_before.values()) + + _run_main( + monkeypatch, + [ + str(tmp_path), + "--cache-path", + str(cache_path), + "--json", + str(json_out), + "--no-progress", + ], + ) + report_payload = json.loads(json_out.read_text("utf-8")) + assert _report_structural_groups(report_payload) + + cache_payload = json.loads(cache_path.read_text("utf-8")) + files_after = cache_payload["payload"]["files"] + assert any("sf" in entry for entry in files_after.values()) + + +@pytest.mark.parametrize( + ("source", "suppressed_count"), + [ + ( + """\ +class Settings: # codeclone: ignore[dead-code] + @validator("field") + @classmethod + def validate_config_version( + cls, + value: str | None, + ) -> str | None: # codeclone: ignore[dead-code] + return value +""", + 2, + ), + ( + """\ +class Settings: # codeclone: ignore[dead-code] + @field_validator("trusted_proxy_ips", "additional_telegram_ip_ranges") + @classmethod + def validate_trusted_proxy_ips( # codeclone: ignore[dead-code] + cls, + value: list[str] | None, + ) -> list[str] | None: + return value + + @model_validator(mode="before") + @classmethod + def migrate_config_if_needed( # codeclone: ignore[dead-code] + cls, + values: dict[str, object], + ) -> dict[str, object]: + return values +""", + 3, + ), + ], +) +def test_cli_dead_code_suppression_is_stable_between_plain_and_json_runs( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + source: str, + suppressed_count: int, +) -> None: + _write_python_module( + tmp_path, + "models.py", + source, + ) + json_out = tmp_path / "report.json" + cache_path = tmp_path / "cache.json" + + _patch_parallel(monkeypatch) + _run_main( + monkeypatch, + [ + str(tmp_path), + "--cache-path", + str(cache_path), + "--fail-dead-code", + "--no-progress", + ], + ) + + cache_payload = json.loads(cache_path.read_text("utf-8")) + files_before = cache_payload["payload"]["files"] + assert all("sf" not in entry for entry in files_before.values()) + + _run_main( + monkeypatch, + [ + str(tmp_path), + "--cache-path", + str(cache_path), + "--fail-dead-code", + "--json", + str(json_out), + "--no-progress", + ], + ) + payload = json.loads(json_out.read_text("utf-8")) + dead_code = payload["metrics"]["families"]["dead_code"] + assert dead_code["summary"] == { + "total": 0, + "high_confidence": 0, + "suppressed": suppressed_count, + } + + _run_main( + monkeypatch, + [ + str(tmp_path), + "--cache-path", + str(cache_path), + "--fail-dead-code", + "--no-progress", + ], + ) + + +@pytest.mark.parametrize( + ("reason", "expected"), + [ + ( + "New high-risk functions vs metrics baseline: 3.", + ("new_high_risk_functions", "3"), + ), + ( + "Dependency cycles detected: 2 cycle(s).", + ("dependency_cycles", "2"), + ), + ( + "Complexity threshold exceeded: max=31, threshold=20.", + ("complexity_max", "31 (threshold=20)"), + ), + ( + "something else.", + ("detail", "something else"), + ), + ], +) +def test_parse_metric_reason_entry_contract( + reason: str, expected: tuple[str, str] +) -> None: + assert parse_metric_reason_entry(reason) == expected diff --git a/tests/test_cli_smoke.py b/tests/test_cli_smoke.py index 9d36a57..97093f2 100644 --- a/tests/test_cli_smoke.py +++ b/tests/test_cli_smoke.py @@ -55,8 +55,8 @@ def f(): result = run_cli([str(tmp_path)], cwd=tmp_path) assert result.returncode == 0 - assert "Analysis Summary" in result.stdout - assert "Function clone groups" in result.stdout + assert "Summary" in result.stdout + assert "func" in result.stdout def test_cli_baseline_missing_warning(tmp_path: Path) -> None: @@ -124,4 +124,4 @@ def f2(): ] ) assert result2.returncode == 0 - assert "New vs baseline" in result2.stdout + assert "0 new" in result2.stdout diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py index 684a12c..89c3c26 100644 --- a/tests/test_cli_unit.py +++ b/tests/test_cli_unit.py @@ -1,25 +1,37 @@ +import json import os import sys +import webbrowser +from argparse import Namespace +from collections.abc import Callable from pathlib import Path from typing import cast import pytest -from rich.text import Text +import codeclone._cli_reports as cli_reports import codeclone._cli_summary as cli_summary +import codeclone.baseline as baseline_mod import codeclone.cli as cli +import codeclone.metrics_baseline as metrics_baseline_mod +import codeclone.pipeline as pipeline from codeclone import __version__ from codeclone import ui_messages as ui from codeclone._cli_args import build_parser -from codeclone._cli_paths import expand_path +from codeclone._cli_config import ConfigValidationError from codeclone.cli import process_file from codeclone.contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL +from codeclone.errors import BaselineValidationError +from codeclone.models import HealthScore, ProjectMetrics from codeclone.normalize import NormalizationConfig -def test_expand_path() -> None: - p = expand_path(".") - assert isinstance(p, Path) +class _RecordingPrinter: + def __init__(self) -> None: + self.lines: list[str] = [] + + def print(self, *objects: object, **kwargs: object) -> None: + self.lines.append(" ".join(str(obj) for obj in objects)) def test_process_file_stat_error( @@ -83,7 +95,7 @@ def test_process_file_unexpected_error( def _boom(*_args: object, **_kwargs: object) -> object: raise RuntimeError("boom") - monkeypatch.setattr(cli, "extract_units_from_source", _boom) + monkeypatch.setattr(pipeline, "extract_units_and_stats_from_source", _boom) result = process_file(str(src), str(tmp_path), NormalizationConfig(), 1, 1) assert result.success is False assert result.error is not None @@ -132,31 +144,99 @@ def test_cli_help_text_consistency( cli.main() assert exc.value.code == 0 out = capsys.readouterr().out - assert "Default:" in out - assert "/.cache/codeclone/cache.json" in out - assert "Legacy alias for --cache-path" in out - assert "--max-baseline-size-mb MB" in out - assert "--max-cache-size-mb MB" in out - assert "--debug" in out - assert "CI preset: --fail-on-new --no-color --quiet." in out - assert "total clone groups (function +" in out - assert "block) exceed this number" in out - assert "Exit codes" in out - assert "0 - success" in out - assert "2 - contract error" in out - assert "baseline missing/untrusted" in out - assert "invalid output extensions" in out - assert "3 - gating failure" in out - assert "new clones detected" in out - assert "threshold exceeded" in out - assert "5 - internal error" in out - assert "please report" in out - assert f"Repository: {REPOSITORY_URL}" in out - assert f"Issues: {ISSUES_URL}" in out - assert f"Docs: {DOCS_URL}" in out + expected_parts = ( + "usage: codeclone ", + "[--version]", + "[-h]", + "Structural code quality analysis for Python.", + "Target:", + "Analysis:", + "Baselines and CI:", + "Quality gates:", + "Analysis stages:", + "Reporting:", + "Output and UI:", + "General:", + "--fail-complexity [CC_MAX]", + "--fail-coupling [CBO_MAX]", + "--fail-cohesion [LCOM4_MAX]", + "--fail-health [SCORE_MIN]", + "If enabled without a value, uses 20.", + "If enabled without a value, uses 10.", + "If enabled without a value, uses 4.", + "If enabled without a value, uses 60.", + "/.cache/codeclone/cache.json", + "Legacy alias for --cache-path", + "--max-baseline-size-mb MB", + "--max-cache-size-mb MB", + "--timestamped-report-paths", + "--open-html-report", + "--debug", + "Equivalent to: --fail-on-new --no-color --quiet.", + "Exit codes:", + "0 Success.", + "2 Contract error:", + "3 Gating failure:", + "5 Internal error:", + f"Repository: {REPOSITORY_URL}", + f"Issues: {ISSUES_URL}", + f"Docs: {DOCS_URL}", + ) + for expected in expected_parts: + assert expected in out assert "\x1b[" not in out +def test_report_path_origins_distinguish_bare_and_explicit_flags() -> None: + assert cli._report_path_origins( + ( + "--html", + "--json", + "out.json", + "--md=out.md", + "--sarif", + "--text", + ) + ) == { + "html": "default", + "json": "explicit", + "md": "explicit", + "sarif": "default", + "text": "default", + } + + +def test_timestamped_report_path_appends_utc_slug() -> None: + path = Path("/tmp/report.html") + assert cli._timestamped_report_path( + path, + report_generated_at_utc="2026-03-22T21:30:45Z", + ) == Path("/tmp/report-20260322T213045Z.html") + + +def test_open_html_report_in_browser_raises_without_handler( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + report_path = tmp_path / "report.html" + report_path.write_text("", encoding="utf-8") + + monkeypatch.setattr( + webbrowser, + "open_new_tab", + lambda _uri: False, + ) + + with pytest.raises(OSError, match="no browser handler available"): + cli_reports._open_html_report_in_browser(path=report_path) + + +def test_cli_plain_console_status_context() -> None: + plain = cli._make_plain_console() + with plain.status("noop"): + pass + + def test_cli_internal_error_marker( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] ) -> None: @@ -224,72 +304,43 @@ def test_argument_parser_contract_error_marker_for_invalid_args( assert "CONTRACT ERROR:" in err -def test_summary_value_style_mapping() -> None: - assert ( - cli_summary._summary_value_style(label=ui.SUMMARY_LABEL_FUNCTION, value=0) - == "dim" - ) - assert ( - cli_summary._summary_value_style(label=ui.SUMMARY_LABEL_FUNCTION, value=2) - == "bold yellow" - ) - assert ( - cli_summary._summary_value_style(label=ui.SUMMARY_LABEL_SUPPRESSED, value=1) - == "yellow" - ) - assert ( - cli_summary._summary_value_style(label=ui.SUMMARY_LABEL_NEW_BASELINE, value=3) - == "bold red" - ) +def test_make_console_caps_width_to_layout_limit( + monkeypatch: pytest.MonkeyPatch, +) -> None: + created: list[object] = [] + class _DummyConsole: + def __init__( + self, + *, + theme: object, + no_color: bool, + width: int | None = None, + ) -> None: + self.theme = theme + self.no_color = no_color + self.width = 200 if width is None else width + created.append(self) -def test_build_summary_table_rows_and_styles() -> None: - rows = cli_summary._build_summary_rows( - files_found=2, - files_analyzed=0, - cache_hits=2, - files_skipped=0, - func_clones_count=1, - block_clones_count=0, - segment_clones_count=0, - suppressed_segment_groups=1, - new_clones_count=1, - ) - table = cli_summary._build_summary_table(rows) - assert table.title == ui.SUMMARY_TITLE - assert table.columns[0]._cells == [label for label, _ in rows] - value_cells = table.columns[1]._cells - assert isinstance(value_cells[0], Text) - assert str(value_cells[0]) == "2" - assert cast(Text, value_cells[1]).style == "dim" - assert cast(Text, value_cells[7]).style == "yellow" - assert cast(Text, value_cells[8]).style == "bold red" - - -def test_build_summary_rows_order() -> None: - rows = cli_summary._build_summary_rows( - files_found=1, - files_analyzed=1, - cache_hits=0, - files_skipped=0, - func_clones_count=0, - block_clones_count=0, - segment_clones_count=0, - suppressed_segment_groups=0, - new_clones_count=0, + monkeypatch.setattr( + cli, + "_make_rich_console", + lambda *, no_color, width: _DummyConsole( + theme=object(), + no_color=no_color, + width=width, + ), ) - labels = [label for label, _ in rows] - assert labels == [ - ui.SUMMARY_LABEL_FILES_FOUND, - ui.SUMMARY_LABEL_FILES_ANALYZED, - ui.SUMMARY_LABEL_CACHE_HITS, - ui.SUMMARY_LABEL_FILES_SKIPPED, - ui.SUMMARY_LABEL_FUNCTION, - ui.SUMMARY_LABEL_BLOCK, - ui.SUMMARY_LABEL_SEGMENT, - ui.SUMMARY_LABEL_SUPPRESSED, - ui.SUMMARY_LABEL_NEW_BASELINE, - ] + console = cli._make_console(no_color=True) + assert len(created) == 1 + assert isinstance(console, _DummyConsole) + assert console.width == ui.CLI_LAYOUT_MAX_WIDTH + + +def test_banner_title_without_root_returns_single_line() -> None: + title = ui.banner_title("2.0.0") + assert "[bold white]CodeClone[/bold white]" in title + assert "\n" not in title def test_print_summary_invariant_warning( @@ -297,7 +348,7 @@ def test_print_summary_invariant_warning( ) -> None: monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) cli_summary._print_summary( - console=cli.console, + console=cast("cli_summary._Printer", cli.console), quiet=False, files_found=1, files_analyzed=0, @@ -311,3 +362,669 @@ def test_print_summary_invariant_warning( ) out = capsys.readouterr().out assert "Summary accounting mismatch" in out + + +def test_compact_summary_labels_use_machine_scannable_keys() -> None: + assert ( + ui.fmt_summary_compact(found=93, analyzed=1, cache_hits=92, skipped=0) + == "Summary found=93 analyzed=1 cached=92 skipped=0" + ) + assert ( + ui.fmt_summary_compact_metrics( + cc_avg=2.8, + cc_max=21, + cbo_avg=0.6, + cbo_max=8, + lcom_avg=1.2, + lcom_max=4, + cycles=0, + dead=1, + health=85, + grade="B", + ) + == "Metrics cc=2.8/21 cbo=0.6/8 lcom4=1.2/4" + " cycles=0 dead_code=1 health=85(B)" + ) + + +def test_ui_summary_formatters_cover_optional_branches() -> None: + assert ui._vn(0) == "[dim]0[/dim]" + assert ui._vn(1200) == "1,200" + + parsed = ui.fmt_summary_parsed(lines=1200, functions=3, methods=2, classes=1) + assert parsed is not None + assert "1,200" in parsed + assert "[bold cyan]3[/bold cyan] functions" in parsed + assert "[bold cyan]2[/bold cyan] methods" in parsed + assert "[bold cyan]1[/bold cyan] classes" in parsed + + clones = ui.fmt_summary_clones( + func=1, + block=2, + segment=3, + suppressed=1, + new=0, + ) + assert "[bold yellow]3[/bold yellow] seg" in clones + + assert "5 detected" in ui.fmt_metrics_cycles(5) + dead_with_suppressed = ui.fmt_metrics_dead_code(447, suppressed=9) + assert "447 found" in dead_with_suppressed + assert "(9 suppressed)" in dead_with_suppressed + assert "✔ clean" in ui.fmt_metrics_dead_code(0, suppressed=0) + clean_with_suppressed = ui.fmt_metrics_dead_code(0, suppressed=9) + assert "✔ clean" in clean_with_suppressed + assert "(9 suppressed)" in clean_with_suppressed + + +def test_configure_metrics_mode_rejects_skip_metrics_with_metrics_flags( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + args = Namespace( + skip_metrics=True, + fail_complexity=10, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=False, + fail_dead_code=False, + fail_health=-1, + fail_on_new_metrics=False, + update_metrics_baseline=False, + skip_dead_code=False, + skip_dependencies=False, + ) + with pytest.raises(SystemExit) as exc: + cli._configure_metrics_mode(args=args, metrics_baseline_exists=False) + assert exc.value.code == 2 + + +def test_configure_metrics_mode_forces_dependency_and_dead_code_when_gated() -> None: + args = Namespace( + skip_metrics=False, + fail_complexity=-1, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=True, + fail_dead_code=True, + fail_health=-1, + fail_on_new_metrics=False, + update_metrics_baseline=False, + skip_dead_code=True, + skip_dependencies=True, + ) + cli._configure_metrics_mode(args=args, metrics_baseline_exists=True) + assert args.skip_dead_code is False + assert args.skip_dependencies is False + + +def test_probe_metrics_baseline_section_for_non_object_payload(tmp_path: Path) -> None: + path = tmp_path / "baseline.json" + path.write_text("[]", "utf-8") + probe = cli._probe_metrics_baseline_section(path) + assert probe.has_metrics_section is True + assert probe.payload is None + + +def test_metrics_computed_respects_skip_switches() -> None: + assert cli._metrics_computed( + Namespace( + skip_metrics=False, + skip_dependencies=True, + skip_dead_code=True, + ) + ) == ("complexity", "coupling", "cohesion", "health") + assert cli._metrics_computed( + Namespace( + skip_metrics=False, + skip_dependencies=False, + skip_dead_code=False, + ) + ) == ( + "complexity", + "coupling", + "cohesion", + "health", + "dependencies", + "dead_code", + ) + + +def test_main_impl_exits_on_invalid_pyproject_config( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + monkeypatch.setattr(sys, "argv", ["codeclone", str(tmp_path)]) + + def _raise_invalid_config(_root: Path) -> dict[str, object]: + raise ConfigValidationError("broken config") + + monkeypatch.setattr(cli, "load_pyproject_config", _raise_invalid_config) + with pytest.raises(SystemExit) as exc: + cli._main_impl() + assert exc.value.code == 2 + + +def test_main_impl_debug_sets_env_and_handles_metrics_baseline_resolve_error( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + monkeypatch.delenv("CODECLONE_DEBUG", raising=False) + bad_metrics = tmp_path / "bad_metrics.json" + monkeypatch.setattr( + sys, + "argv", + [ + "codeclone", + str(tmp_path), + "--debug", + "--metrics-baseline", + str(bad_metrics), + ], + ) + monkeypatch.setattr(cli, "load_pyproject_config", lambda _root: {}) + original_resolve = Path.resolve + + def _resolve(self: Path, *, strict: bool = False) -> Path: + if self == bad_metrics: + raise OSError("resolve failed") + return original_resolve(self, strict=strict) + + monkeypatch.setattr(Path, "resolve", _resolve) + with pytest.raises(SystemExit) as exc: + cli._main_impl() + assert exc.value.code == 2 + assert os.environ.get("CODECLONE_DEBUG") == "1" + + +def _stub_discovery_result() -> pipeline.DiscoveryResult: + return pipeline.DiscoveryResult( + files_found=0, + cache_hits=0, + files_skipped=0, + all_file_paths=(), + cached_units=(), + cached_blocks=(), + cached_segments=(), + cached_class_metrics=(), + cached_module_deps=(), + cached_dead_candidates=(), + cached_referenced_names=frozenset(), + files_to_process=(), + skipped_warnings=(), + ) + + +def _stub_processing_result() -> pipeline.ProcessingResult: + return pipeline.ProcessingResult( + units=(), + blocks=(), + segments=(), + class_metrics=(), + module_deps=(), + dead_candidates=(), + referenced_names=frozenset(), + files_analyzed=0, + files_skipped=0, + analyzed_lines=0, + analyzed_functions=0, + analyzed_methods=0, + analyzed_classes=0, + failed_files=(), + source_read_failures=(), + ) + + +def _stub_analysis_result( + *, + project_metrics: ProjectMetrics | None = None, +) -> pipeline.AnalysisResult: + return pipeline.AnalysisResult( + func_groups={}, + block_groups={}, + block_groups_report={}, + segment_groups={}, + suppressed_segment_groups=0, + block_group_facts={}, + func_clones_count=0, + block_clones_count=0, + segment_clones_count=0, + files_analyzed_or_cached=0, + project_metrics=project_metrics, + metrics_payload=None, + suggestions=(), + segment_groups_raw_digest="", + ) + + +def _sample_project_metrics() -> ProjectMetrics: + return ProjectMetrics( + complexity_avg=1.0, + complexity_max=1, + high_risk_functions=(), + coupling_avg=1.0, + coupling_max=1, + high_risk_classes=(), + cohesion_avg=1.0, + cohesion_max=1, + low_cohesion_classes=(), + dependency_modules=0, + dependency_edges=0, + dependency_edge_list=(), + dependency_cycles=(), + dependency_max_depth=0, + dependency_longest_chains=(), + dead_code=(), + health=HealthScore(total=90, grade="A", dimensions={"coverage": 100}), + ) + + +def _patch_main_pipeline_stubs( + monkeypatch: pytest.MonkeyPatch, + *, + project_metrics: ProjectMetrics | None = None, +) -> None: + monkeypatch.setattr(cli, "discover", lambda **_kwargs: _stub_discovery_result()) + monkeypatch.setattr(cli, "process", lambda **_kwargs: _stub_processing_result()) + monkeypatch.setattr( + cli, + "analyze", + lambda **_kwargs: _stub_analysis_result(project_metrics=project_metrics), + ) + + +def _assert_main_impl_exit_code( + monkeypatch: pytest.MonkeyPatch, + argv: list[str], + *, + expected_code: int, + project_metrics: ProjectMetrics | None = None, + pyproject_config: dict[str, object] | None = None, + configure_metrics_mode: Callable[..., object] | None = None, +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + monkeypatch.setattr(sys, "argv", argv) + monkeypatch.setattr( + cli, + "load_pyproject_config", + lambda _root: {} if pyproject_config is None else pyproject_config, + ) + if configure_metrics_mode is not None: + monkeypatch.setattr(cli, "_configure_metrics_mode", configure_metrics_mode) + _patch_main_pipeline_stubs(monkeypatch, project_metrics=project_metrics) + with pytest.raises(SystemExit) as exc: + cli._main_impl() + assert exc.value.code == expected_code + + +def _prepare_fail_on_new_metrics_case( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> list[str]: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + metrics_path = tmp_path / "metrics.json" + metrics_path.write_text("{}", "utf-8") + return [ + "codeclone", + str(tmp_path), + "--quiet", + "--baseline", + str(tmp_path / "baseline.json"), + "--metrics-baseline", + str(metrics_path), + "--fail-on-new-metrics", + ] + + +def test_main_impl_rejects_update_metrics_baseline_when_metrics_skipped( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + baseline_path = tmp_path / "baseline.json" + metrics_path = tmp_path / "metrics.json" + _assert_main_impl_exit_code( + monkeypatch, + [ + "codeclone", + str(tmp_path), + "--quiet", + "--skip-metrics", + "--update-metrics-baseline", + "--baseline", + str(baseline_path), + "--metrics-baseline", + str(metrics_path), + ], + expected_code=2, + ) + + +def test_main_impl_update_metrics_baseline_requires_project_metrics( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + baseline_path = tmp_path / "baseline.json" + metrics_path = tmp_path / "metrics.json" + _assert_main_impl_exit_code( + monkeypatch, + [ + "codeclone", + str(tmp_path), + "--quiet", + "--update-metrics-baseline", + "--baseline", + str(baseline_path), + "--metrics-baseline", + str(metrics_path), + ], + expected_code=2, + project_metrics=None, + ) + + +def test_main_impl_prints_metric_gate_reasons_and_exits_gating_failure( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + monkeypatch.setattr( + sys, + "argv", + [ + "codeclone", + str(tmp_path), + "--quiet", + "--baseline", + str(tmp_path / "baseline.json"), + "--metrics-baseline", + str(tmp_path / "metrics.json"), + ], + ) + monkeypatch.setattr(cli, "load_pyproject_config", lambda _root: {}) + _patch_main_pipeline_stubs(monkeypatch) + monkeypatch.setattr( + cli, + "gate", + lambda **_kwargs: pipeline.GatingResult( + exit_code=3, + reasons=( + "metric:Health score regressed vs metrics baseline: delta=-1.", + "metric:Complexity threshold exceeded: max CC=21, threshold=20.", + ), + ), + ) + with pytest.raises(SystemExit) as exc: + cli._main_impl() + assert exc.value.code == 3 + out = capsys.readouterr().out + for needle in ( + "GATING FAILURE [metrics]", + "policy", + "complexity_max", + "health_delta", + ): + assert needle in out + + +def test_main_impl_uses_configured_metrics_baseline_without_cli_flag( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + baseline_path = tmp_path / "baseline.json" + metrics_path = tmp_path / "metrics.json" + monkeypatch.setattr( + sys, + "argv", + [ + "codeclone", + str(tmp_path), + "--quiet", + "--baseline", + str(baseline_path), + ], + ) + monkeypatch.setattr( + cli, + "load_pyproject_config", + lambda _root: {"metrics_baseline": str(metrics_path)}, + ) + monkeypatch.setattr( + cli, + "_probe_metrics_baseline_section", + lambda _path: pytest.fail("unexpected unified-baseline probe"), + ) + _patch_main_pipeline_stubs(monkeypatch) + cli._main_impl() + + +def test_main_impl_unified_metrics_update_auto_enables_baseline_update( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + baseline_path = tmp_path / "unified.baseline.json" + monkeypatch.setattr( + sys, + "argv", + [ + "codeclone", + str(tmp_path), + "--quiet", + "--baseline", + str(baseline_path), + "--metrics-baseline", + str(baseline_path), + "--update-metrics-baseline", + ], + ) + monkeypatch.setattr(cli, "load_pyproject_config", lambda _root: {}) + _patch_main_pipeline_stubs(monkeypatch, project_metrics=_sample_project_metrics()) + cli._main_impl() + payload = json.loads(baseline_path.read_text("utf-8")) + assert "clones" in payload + assert "metrics" in payload + + +def test_main_impl_skip_metrics_defensive_contract_guard( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + baseline_path = tmp_path / "baseline.json" + metrics_path = tmp_path / "metrics.json" + _assert_main_impl_exit_code( + monkeypatch, + [ + "codeclone", + str(tmp_path), + "--quiet", + "--skip-metrics", + "--update-metrics-baseline", + "--baseline", + str(baseline_path), + "--metrics-baseline", + str(metrics_path), + ], + expected_code=2, + configure_metrics_mode=lambda **_kwargs: None, + ) + + +def test_main_impl_fail_on_new_metrics_requires_existing_baseline( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + baseline_path = tmp_path / "baseline.json" + metrics_path = tmp_path / "missing.metrics.json" + _assert_main_impl_exit_code( + monkeypatch, + [ + "codeclone", + str(tmp_path), + "--quiet", + "--baseline", + str(baseline_path), + "--metrics-baseline", + str(metrics_path), + "--fail-on-new-metrics", + ], + expected_code=2, + ) + + +def test_main_impl_fail_on_new_metrics_handles_load_error( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + argv = _prepare_fail_on_new_metrics_case(monkeypatch, tmp_path) + + def _raise_load(self: object, *, max_size_bytes: int) -> None: + raise BaselineValidationError("broken metrics baseline", status="invalid_type") + + monkeypatch.setattr(metrics_baseline_mod.MetricsBaseline, "load", _raise_load) + _assert_main_impl_exit_code(monkeypatch, argv, expected_code=2) + + +def test_main_impl_fail_on_new_metrics_handles_verify_error( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + argv = _prepare_fail_on_new_metrics_case(monkeypatch, tmp_path) + + def _noop_load(self: object, *, max_size_bytes: int) -> None: + return None + + def _raise_verify(self: object, *, runtime_python_tag: str) -> None: + raise BaselineValidationError( + "metrics baseline python tag mismatch", + status="mismatch_python_version", + ) + + monkeypatch.setattr(metrics_baseline_mod.MetricsBaseline, "load", _noop_load) + monkeypatch.setattr( + metrics_baseline_mod.MetricsBaseline, + "verify_compatibility", + _raise_verify, + ) + _assert_main_impl_exit_code(monkeypatch, argv, expected_code=2) + + +def test_main_impl_update_metrics_baseline_write_error_contract( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + baseline_path = tmp_path / "baseline.json" + metrics_path = tmp_path / "metrics.json" + + def _raise_save(self: object) -> None: + raise OSError("readonly fs") + + monkeypatch.setattr(metrics_baseline_mod.MetricsBaseline, "save", _raise_save) + monkeypatch.setattr( + sys, + "argv", + [ + "codeclone", + str(tmp_path), + "--quiet", + "--baseline", + str(baseline_path), + "--metrics-baseline", + str(metrics_path), + "--update-metrics-baseline", + ], + ) + monkeypatch.setattr(cli, "load_pyproject_config", lambda _root: {}) + _patch_main_pipeline_stubs(monkeypatch, project_metrics=_sample_project_metrics()) + with pytest.raises(SystemExit) as exc: + cli._main_impl() + assert exc.value.code == 2 + + +def test_main_impl_update_metrics_baseline_separate_path_message_branch( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + baseline_path = tmp_path / "baseline.json" + metrics_path = tmp_path / "metrics.json" + metrics_baseline_mod.MetricsBaseline.from_project_metrics( + project_metrics=_sample_project_metrics(), + path=metrics_path, + ).save() + + monkeypatch.setattr( + sys, + "argv", + [ + "codeclone", + str(tmp_path), + "--quiet", + "--baseline", + str(baseline_path), + "--metrics-baseline", + str(metrics_path), + "--update-metrics-baseline", + ], + ) + monkeypatch.setattr(cli, "load_pyproject_config", lambda _root: {}) + _patch_main_pipeline_stubs(monkeypatch, project_metrics=_sample_project_metrics()) + cli._main_impl() + assert metrics_path.exists() + + +def test_main_impl_ci_enables_fail_on_new_metrics_when_metrics_baseline_loaded( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + baseline_path = tmp_path / "baseline.json" + metrics_path = tmp_path / "metrics.json" + + baseline_mod.Baseline.from_groups({}, {}, path=baseline_path).save() + metrics_baseline_mod.MetricsBaseline.from_project_metrics( + project_metrics=_sample_project_metrics(), + path=metrics_path, + ).save() + + observed: dict[str, bool] = {} + + def _capture_gate(**kwargs: object) -> pipeline.GatingResult: + boot = kwargs["boot"] + assert isinstance(boot, pipeline.BootstrapResult) + observed["fail_on_new_metrics"] = bool(boot.args.fail_on_new_metrics) + return pipeline.GatingResult(exit_code=0, reasons=()) + + monkeypatch.setattr(cli, "gate", _capture_gate) + monkeypatch.setattr( + sys, + "argv", + [ + "codeclone", + str(tmp_path), + "--ci", + "--baseline", + str(baseline_path), + "--metrics-baseline", + str(metrics_path), + ], + ) + monkeypatch.setattr(cli, "load_pyproject_config", lambda _root: {}) + _patch_main_pipeline_stubs(monkeypatch, project_metrics=_sample_project_metrics()) + cli._main_impl() + assert observed["fail_on_new_metrics"] is True + + +def test_print_verbose_clone_hashes_noop_on_empty() -> None: + printer = _RecordingPrinter() + cli._print_verbose_clone_hashes( + printer, + label="Function clone hashes", + clone_hashes=set(), + ) + assert printer.lines == [] + + +def test_print_verbose_clone_hashes_prints_sorted_values() -> None: + printer = _RecordingPrinter() + cli._print_verbose_clone_hashes( + printer, + label="Block clone hashes", + clone_hashes={"b-hash", "a-hash"}, + ) + assert printer.lines == [ + "\n Block clone hashes:", + " - a-hash", + " - b-hash", + ] diff --git a/tests/test_coerce.py b/tests/test_coerce.py new file mode 100644 index 0000000..0112504 --- /dev/null +++ b/tests/test_coerce.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from collections.abc import Mapping, Sequence + +from codeclone import _coerce + + +def test_as_int_handles_bool_int_str_and_default() -> None: + assert _coerce.as_int(True) == 1 + assert _coerce.as_int(False) == 0 + assert _coerce.as_int(7) == 7 + assert _coerce.as_int("12") == 12 + assert _coerce.as_int("bad", 9) == 9 + assert _coerce.as_int(object(), 11) == 11 + + +def test_as_float_handles_bool_number_str_and_default() -> None: + assert _coerce.as_float(True) == 1.0 + assert _coerce.as_float(False) == 0.0 + assert _coerce.as_float(3) == 3.0 + assert _coerce.as_float(2.5) == 2.5 + assert _coerce.as_float("2.75") == 2.75 + assert _coerce.as_float("bad", 1.25) == 1.25 + assert _coerce.as_float(object(), 4.5) == 4.5 + + +def test_as_str_returns_only_string_instances() -> None: + assert _coerce.as_str("x") == "x" + assert _coerce.as_str(1) == "" + assert _coerce.as_str(None, "fallback") == "fallback" + + +def test_as_mapping_preserves_mapping_and_rejects_other_values() -> None: + source: Mapping[str, object] = {"a": 1} + assert _coerce.as_mapping(source) is source + assert _coerce.as_mapping("x") == {} + assert _coerce.as_mapping(3.14) == {} + + +def test_as_sequence_preserves_sequence_except_text_and_bytes() -> None: + as_list = _coerce.as_sequence([1, 2]) + as_tuple = _coerce.as_sequence((1, 2)) + as_str = _coerce.as_sequence("abc") + as_bytes = _coerce.as_sequence(b"abc") + as_bytearray = _coerce.as_sequence(bytearray(b"abc")) + + assert isinstance(as_list, Sequence) + assert isinstance(as_tuple, Sequence) + assert tuple(as_list) == (1, 2) + assert tuple(as_tuple) == (1, 2) + assert as_str == () + assert as_bytes == () + assert as_bytearray == () diff --git a/tests/test_core_branch_coverage.py b/tests/test_core_branch_coverage.py new file mode 100644 index 0000000..43407e0 --- /dev/null +++ b/tests/test_core_branch_coverage.py @@ -0,0 +1,969 @@ +from __future__ import annotations + +from argparse import Namespace +from pathlib import Path +from typing import cast + +import pytest + +import codeclone.cli as cli +import codeclone.pipeline as pipeline +from codeclone._cli_gating import policy_context +from codeclone.cache import ( + Cache, + CacheEntry, + SegmentReportProjection, + _as_file_stat_dict, + _as_risk_literal, + _decode_wire_file_entry, + _decode_wire_structural_findings_optional, + _decode_wire_structural_group, + _decode_wire_structural_occurrence, + _decode_wire_structural_signature, + _decode_wire_unit, + _encode_wire_file_entry, + _has_cache_entry_container_shape, + _is_dead_candidate_dict, + build_segment_report_projection, +) +from codeclone.errors import CacheError +from codeclone.grouping import build_segment_groups +from codeclone.models import ( + BlockUnit, + ClassMetrics, + DeadCandidate, + FileMetrics, + ModuleDep, + SegmentUnit, +) +from codeclone.normalize import NormalizationConfig +from tests._assertions import assert_contains_all + + +def test_cache_risk_and_shape_helpers() -> None: + assert _as_risk_literal("low") == "low" + assert _as_risk_literal("medium") == "medium" + assert _as_risk_literal("high") == "high" + assert _as_risk_literal("oops") is None + + assert _has_cache_entry_container_shape({}) is False + assert ( + _has_cache_entry_container_shape( + { + "stat": {"mtime_ns": 1, "size": 1}, + "units": 1, + "blocks": [], + "segments": [], + } + ) + is False + ) + assert ( + _has_cache_entry_container_shape( + { + "stat": {"mtime_ns": 1, "size": 1}, + "units": [], + "blocks": 1, + "segments": [], + } + ) + is False + ) + assert ( + _has_cache_entry_container_shape( + { + "stat": 1, + "units": [], + "blocks": [], + "segments": [], + } + ) + is False + ) + assert ( + _has_cache_entry_container_shape( + { + "stat": {"mtime_ns": 1, "size": 1}, + "units": [], + "blocks": [], + "segments": 1, + } + ) + is False + ) + assert _is_dead_candidate_dict("bad") is False + assert ( + _is_dead_candidate_dict( + { + "qualname": "pkg:dead", + "local_name": "dead", + "filepath": "a.py", + "kind": "function", + "start_line": 1, + "end_line": 2, + } + ) + is True + ) + + +def test_cache_as_file_stat_dict_flaky_mapping() -> None: + class _FlakyDict(dict[str, object]): + def __init__(self) -> None: + super().__init__() + self._calls = 0 + + def get(self, key: str, default: object = None) -> object: + self._calls += 1 + if self._calls <= 2: + return 1 + return "not-int" + + assert _as_file_stat_dict(_FlakyDict()) is None + + +def test_cache_decode_structural_invalid_rows() -> None: + assert _decode_wire_structural_findings_optional({"sf": "bad"}) is None + assert _decode_wire_structural_findings_optional({"sf": [["broken"]]}) is None + + assert _decode_wire_structural_group("bad") is None + assert _decode_wire_structural_group(["kind", "key", [], "bad-items"]) is None + assert _decode_wire_structural_group(["kind", "key", [], [["q", "x", 1]]]) is None + + assert _decode_wire_structural_signature("bad") is None + assert _decode_wire_structural_signature([["k"]]) is None + assert _decode_wire_structural_signature([[1, "v"]]) is None + + assert _decode_wire_structural_occurrence("bad") is None + assert _decode_wire_structural_occurrence(["q", "x", 1]) is None + + assert _decode_wire_unit(["q", 1, 2], "a.py") is None + assert ( + _decode_wire_unit([1, 1, 2, 1, 1, "fp", "1-19", 1, 0, "low", "rh"], "a.py") + is None + ) + + +def test_cache_decode_wire_file_entry_with_invalid_structural() -> None: + wire_entry = { + "st": [1, 2], + "u": [], + "b": [], + "s": [], + "cm": [], + "md": [], + "dc": [], + "rn": [], + "in": [], + "cn": [], + "cc": [], + "sf": "invalid", + } + assert _decode_wire_file_entry(wire_entry, "a.py") is None + + +def test_cache_decode_wire_file_entry_with_invalid_referenced_qualnames() -> None: + wire_entry = { + "st": [1, 2], + "u": [], + "b": [], + "s": [], + "cm": [], + "md": [], + "dc": [], + "rn": [], + "rq": "invalid", + "in": [], + "cn": [], + "cc": [], + } + assert _decode_wire_file_entry(wire_entry, "a.py") is None + + +def test_cache_decode_wire_unit_extended_invalid_shape() -> None: + row = [ + "pkg:a", + 1, + 2, + 10, + 3, + "fp", + "1-19", + 1, + 0, + "low", + "raw", + 1, + "return_only", + 0, + 123, # invalid terminal_kind -> must be str + "none", + "none", + ] + assert _decode_wire_unit(row, "a.py") is None + + +def test_cache_get_file_entry_canonicalization_paths(tmp_path: Path) -> None: + cache = Cache(tmp_path / "cache.json", root=tmp_path) + filepath = str((tmp_path / "a.py").resolve()) + + cast(dict[str, object], cache.data["files"])[filepath] = { + "stat": {"mtime_ns": 1, "size": 1}, + "units": 1, + "blocks": [], + "segments": [], + } + cache._canonical_runtime_paths.add(filepath) + assert cache.get_file_entry(filepath) is None + assert filepath not in cache._canonical_runtime_paths + + cast(dict[str, object], cache.data["files"])[filepath] = { + "stat": {"mtime_ns": 1, "size": 1}, + "units": [ + { + "qualname": "q", + "filepath": filepath, + "start_line": 1, + "end_line": 2, + "loc": 1, + "stmt_count": 1, + "fingerprint": "fp", + "loc_bucket": "1-19", + "cyclomatic_complexity": 1, + "nesting_depth": 0, + "risk": "low", + "raw_hash": "rh", + } + ], + "blocks": [ + { + "block_hash": "bh", + "filepath": filepath, + "qualname": "q", + "start_line": 1, + "end_line": 2, + "size": 2, + } + ], + "segments": [ + { + "segment_hash": "sh", + "segment_sig": "ss", + "filepath": filepath, + "qualname": "q", + "start_line": 1, + "end_line": 2, + "size": 2, + } + ], + "class_metrics": [], + "module_deps": [], + "dead_candidates": [], + "referenced_names": [], + "referenced_qualnames": [], + "import_names": [], + "class_names": [], + "structural_findings": [ + { + "finding_kind": "duplicated_branches", + "finding_key": "k", + "signature": {"stmt_seq": "Expr,Return"}, + "items": [{"qualname": "q", "start": 1, "end": 2}], + } + ], + } + entry = cache.get_file_entry(filepath) + assert entry is not None + assert "structural_findings" in entry + + metric = ClassMetrics( + qualname="pkg:Cls", + filepath=filepath, + start_line=1, + end_line=10, + cbo=11, + lcom4=4, + method_count=4, + instance_var_count=1, + risk_coupling="high", + risk_cohesion="high", + coupled_classes=("A", "B"), + ) + dep = ModuleDep(source="pkg.a", target="pkg.b", import_type="import", line=3) + dead = DeadCandidate( + qualname="pkg:dead", + local_name="dead", + filepath=filepath, + start_line=20, + end_line=22, + kind="function", + ) + file_metrics = FileMetrics( + class_metrics=(metric,), + module_deps=(dep,), + dead_candidates=(dead,), + referenced_names=frozenset({"used"}), + import_names=frozenset({"pkg.b"}), + class_names=frozenset({"Cls"}), + ) + cache.put_file_entry( + filepath, + {"mtime_ns": 1, "size": 1}, + [], + [BlockUnit("bh", filepath, "q", 1, 2, 2)], + [SegmentUnit("sh", "ss", filepath, "q", 1, 2, 2)], + file_metrics=file_metrics, + ) + + +def test_cache_encode_wire_file_entry_includes_rq() -> None: + entry = cast( + CacheEntry, + { + "stat": {"mtime_ns": 1, "size": 1}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [], + "module_deps": [], + "dead_candidates": [], + "referenced_names": [], + "referenced_qualnames": ["pkg:b", "pkg:a", "pkg:a"], + "import_names": [], + "class_names": [], + }, + ) + wire = _encode_wire_file_entry(entry) + assert wire.get("rq") == ["pkg:a", "pkg:b"] + + +def test_cache_segment_report_projection_roundtrip(tmp_path: Path) -> None: + cache_path = tmp_path / "cache.json" + root = tmp_path.resolve() + cache = Cache(cache_path, root=root) + + segment_file = str((tmp_path / "pkg" / "a.py").resolve()) + cache.segment_report_projection = build_segment_report_projection( + digest="digest-1", + suppressed=3, + groups={ + "seg-group": [ + { + "segment_hash": "h1", + "segment_sig": "s1", + "filepath": segment_file, + "qualname": "pkg.a:f", + "start_line": 10, + "end_line": 20, + "size": 11, + } + ] + }, + ) + cache.save() + + loaded = Cache(cache_path, root=root) + loaded.load() + projection = loaded.segment_report_projection + assert projection is not None + assert projection["digest"] == "digest-1" + assert projection["suppressed"] == 3 + item = projection["groups"]["seg-group"][0] + assert item["filepath"] == segment_file + assert item["qualname"] == "pkg.a:f" + assert item["segment_hash"] == "h1" + + +def test_cache_segment_report_projection_filters_invalid_items(tmp_path: Path) -> None: + cache = Cache(tmp_path / "cache.json", root=tmp_path.resolve()) + cache.segment_report_projection = build_segment_report_projection( + digest="d", + suppressed=1, + groups={ + "invalid_only": [ + { + "segment_hash": "h", + "segment_sig": "s", + "filepath": "a.py", + "qualname": "q", + "start_line": "x", # invalid int + "end_line": 2, + "size": 2, + } + ], + "valid": [ + { + "segment_hash": "h2", + "segment_sig": "s2", + "filepath": "a.py", + "qualname": "q", + "start_line": 1, + "end_line": 2, + "size": 2, + } + ], + }, + ) + projection = cache.segment_report_projection + assert projection is not None + assert "invalid_only" not in projection["groups"] + assert "valid" in projection["groups"] + + +def test_cache_decode_segment_projection_invalid_shapes(tmp_path: Path) -> None: + cache = Cache(tmp_path / "cache.json", root=tmp_path.resolve()) + assert ( + cache._decode_segment_report_projection({"d": "x", "s": 0, "g": "bad"}) is None + ) + assert ( + cache._decode_segment_report_projection({"d": "x", "s": 0, "g": [["k"]]}) + is None + ) + assert ( + cache._decode_segment_report_projection({"d": "x", "s": 0, "g": [[1, []]]}) + is None + ) + assert ( + cache._decode_segment_report_projection( + {"d": "x", "s": 0, "g": [["k", ["bad-item"]]]} + ) + is None + ) + assert ( + cache._decode_segment_report_projection( + { + "d": "x", + "s": 0, + "g": [["k", [["a.py", "q", 1, 2, 3, "h", None]]]], + } + ) + is None + ) + + +def test_pipeline_analyze_uses_cached_segment_projection( + monkeypatch: pytest.MonkeyPatch, +) -> None: + seg_item_a = { + "segment_hash": "seg-hash", + "segment_sig": "seg-sig", + "filepath": "/tmp/a.py", + "qualname": "pkg.a:f", + "start_line": 10, + "end_line": 15, + "size": 6, + } + seg_item_b = { + "segment_hash": "seg-hash", + "segment_sig": "seg-sig", + "filepath": "/tmp/a.py", + "qualname": "pkg.a:f", + "start_line": 20, + "end_line": 25, + "size": 6, + } + raw_groups = build_segment_groups((seg_item_a, seg_item_b)) + digest = pipeline._segment_groups_digest(raw_groups) + cached_projection = { + "digest": digest, + "suppressed": 7, + "groups": { + "seg-hash|pkg.a:f": [ + { + "segment_hash": "seg-hash", + "segment_sig": "seg-sig", + "filepath": "/tmp/a.py", + "qualname": "pkg.a:f", + "start_line": 10, + "end_line": 25, + "size": 16, + } + ] + }, + } + + def _must_not_run( + _segment_groups: object, + ) -> tuple[dict[str, list[dict[str, object]]], int]: + raise AssertionError("prepare_segment_report_groups must not be called") + + monkeypatch.setattr(pipeline, "prepare_segment_report_groups", _must_not_run) + + boot = pipeline.BootstrapResult( + root=Path("."), + config=NormalizationConfig(), + args=Namespace( + skip_metrics=True, + skip_dependencies=False, + skip_dead_code=False, + min_loc=1, + min_stmt=1, + processes=1, + ), + output_paths=pipeline.OutputPaths(), + cache_path=Path("cache.json"), + ) + discovery = pipeline.DiscoveryResult( + files_found=0, + cache_hits=0, + files_skipped=0, + all_file_paths=(), + cached_units=(), + cached_blocks=(), + cached_segments=(), + cached_class_metrics=(), + cached_module_deps=(), + cached_dead_candidates=(), + cached_referenced_names=frozenset(), + files_to_process=(), + skipped_warnings=(), + cached_segment_report_projection=cast( + "SegmentReportProjection", + cached_projection, + ), + ) + processing = pipeline.ProcessingResult( + units=(), + blocks=(), + segments=(seg_item_a, seg_item_b), + class_metrics=(), + module_deps=(), + dead_candidates=(), + referenced_names=frozenset(), + files_analyzed=0, + files_skipped=0, + analyzed_lines=0, + analyzed_functions=0, + analyzed_methods=0, + analyzed_classes=0, + failed_files=(), + source_read_failures=(), + ) + + result = pipeline.analyze(boot=boot, discovery=discovery, processing=processing) + assert result.suppressed_segment_groups == 7 + assert result.segment_groups == cached_projection["groups"] + assert result.segment_groups_raw_digest == digest + + +def test_pipeline_coerce_segment_projection_invalid_shapes() -> None: + assert pipeline._coerce_segment_report_projection("bad") is None + assert ( + pipeline._coerce_segment_report_projection( + {"digest": 1, "suppressed": 0, "groups": {}} + ) + is None + ) + assert ( + pipeline._coerce_segment_report_projection( + {"digest": "d", "suppressed": 0, "groups": {"k": "bad"}} + ) + is None + ) + + +def test_pipeline_analyze_tracks_suppressed_dead_code_candidates() -> None: + boot = pipeline.BootstrapResult( + root=Path("."), + config=NormalizationConfig(), + args=Namespace( + skip_metrics=False, + skip_dependencies=True, + skip_dead_code=False, + min_loc=1, + min_stmt=1, + processes=1, + ), + output_paths=pipeline.OutputPaths(), + cache_path=Path("cache.json"), + ) + discovery = pipeline.DiscoveryResult( + files_found=1, + cache_hits=0, + files_skipped=0, + all_file_paths=("pkg/mod.py",), + cached_units=(), + cached_blocks=(), + cached_segments=(), + cached_class_metrics=(), + cached_module_deps=(), + cached_dead_candidates=(), + cached_referenced_names=frozenset(), + files_to_process=(), + skipped_warnings=(), + ) + processing = pipeline.ProcessingResult( + units=(), + blocks=(), + segments=(), + class_metrics=(), + module_deps=(), + dead_candidates=( + DeadCandidate( + qualname="pkg.mod:runtime_hook", + local_name="runtime_hook", + filepath="pkg/mod.py", + start_line=10, + end_line=11, + kind="function", + suppressed_rules=("dead-code",), + ), + ), + referenced_names=frozenset(), + files_analyzed=1, + files_skipped=0, + analyzed_lines=1, + analyzed_functions=1, + analyzed_methods=0, + analyzed_classes=0, + failed_files=(), + source_read_failures=(), + ) + + result = pipeline.analyze(boot=boot, discovery=discovery, processing=processing) + assert result.project_metrics is not None + assert result.project_metrics.dead_code == () + assert result.suppressed_dead_code_items == 1 + assert result.metrics_payload is not None + dead_summary = cast(dict[str, object], result.metrics_payload["dead_code"])[ + "summary" + ] + assert dead_summary == { + "total": 0, + "critical": 0, + "high_confidence": 0, + "suppressed": 1, + } + + +def test_pipeline_decode_cached_structural_group() -> None: + decoded = pipeline._decode_cached_structural_finding_group( + { + "finding_kind": "duplicated_branches", + "finding_key": "k", + "signature": {"stmt_seq": "Expr,Return"}, + "items": [{"qualname": "pkg:q", "start": 1, "end": 2}], + }, + "/repo/codeclone/codeclone/cache.py", + ) + assert decoded.finding_key == "k" + assert decoded.items[0].file_path.endswith("cache.py") + + +def _discover_with_single_cached_entry( + *, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + cached_entry: dict[str, object], +) -> pipeline.DiscoveryResult: + source = tmp_path / "a.py" + source.write_text("def f():\n return 1\n", "utf-8") + filepath = str(source) + stat = {"mtime_ns": 1, "size": 1} + cache_entry = {"stat": stat, **cached_entry} + + class _FakeCache: + def get_file_entry(self, _path: str) -> dict[str, object]: + return cache_entry + + boot = pipeline.BootstrapResult( + root=tmp_path, + config=NormalizationConfig(), + args=Namespace(skip_metrics=False, min_loc=1, min_stmt=1, processes=1), + output_paths=pipeline.OutputPaths(), + cache_path=tmp_path / "cache.json", + ) + monkeypatch.setattr(pipeline, "iter_py_files", lambda _root: [filepath]) + monkeypatch.setattr(pipeline, "file_stat_signature", lambda _path: stat) + return pipeline.discover(boot=boot, cache=cast(Cache, _FakeCache())) + + +@pytest.mark.parametrize( + ("cached_entry", "expected_cache_hits", "expected_files_to_process"), + [ + ( + { + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [ + { + "qualname": "pkg:Cls", + "filepath": "placeholder", + "start_line": 1, + "end_line": 10, + "cbo": 11, + "lcom4": 4, + "method_count": 4, + "instance_var_count": 1, + "risk_coupling": "high", + "risk_cohesion": "high", + "coupled_classes": ["A", "B"], + } + ], + "module_deps": [ + { + "source": "pkg.a", + "target": "pkg.b", + "import_type": "import", + "line": 3, + } + ], + "dead_candidates": [ + { + "qualname": "pkg:dead", + "local_name": "dead", + "filepath": "placeholder", + "start_line": 20, + "end_line": 22, + "kind": "function", + } + ], + "referenced_names": ["used_name"], + "referenced_qualnames": [], + "import_names": [], + "class_names": [], + "source_stats": { + "lines": 2, + "functions": 1, + "methods": 0, + "classes": 0, + }, + }, + 1, + (), + ), + ( + { + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [], + "module_deps": [], + "dead_candidates": [], + "referenced_names": ["used_name"], + "referenced_qualnames": [], + "import_names": [], + "class_names": [], + }, + 0, + ("a.py",), + ), + ( + { + "units": [], + "blocks": [], + "segments": [], + "source_stats": { + "lines": 2, + "functions": 1, + "methods": 0, + "classes": 0, + }, + }, + 0, + ("a.py",), + ), + ], + ids=[ + "cached-metrics-hit", + "missing-source-stats", + "missing-metrics-sections", + ], +) +def test_pipeline_discover_cache_admission_branches( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + cached_entry: dict[str, object], + expected_cache_hits: int, + expected_files_to_process: tuple[str, ...], +) -> None: + discovered = _discover_with_single_cached_entry( + tmp_path=tmp_path, + monkeypatch=monkeypatch, + cached_entry=cached_entry, + ) + assert discovered.cache_hits == expected_cache_hits + assert tuple(Path(path).name for path in discovered.files_to_process) == ( + expected_files_to_process + ) + if expected_cache_hits == 1: + assert len(discovered.cached_class_metrics) == 1 + assert len(discovered.cached_module_deps) == 1 + assert len(discovered.cached_dead_candidates) == 1 + assert "used_name" in discovered.cached_referenced_names + + +def test_pipeline_cached_source_stats_helper_invalid_shapes() -> None: + assert pipeline._cache_entry_source_stats(cast(CacheEntry, {})) is None + assert ( + pipeline._cache_entry_source_stats( + cast( + CacheEntry, + { + "source_stats": { + "lines": 1, + "functions": 1, + "methods": -1, + "classes": 0, + } + }, + ) + ) + is None + ) + + +def test_cli_metric_reason_parser_and_policy_context() -> None: + assert cli._parse_metric_reason_entry( + "New high-risk functions vs metrics baseline: 1." + ) == ("new_high_risk_functions", "1") + assert cli._parse_metric_reason_entry( + "New high-coupling classes vs metrics baseline: 2." + ) == ("new_high_coupling_classes", "2") + assert cli._parse_metric_reason_entry( + "New dependency cycles vs metrics baseline: 3." + ) == ("new_dependency_cycles", "3") + assert cli._parse_metric_reason_entry( + "New dead code items vs metrics baseline: 4." + ) == ("new_dead_code_items", "4") + assert cli._parse_metric_reason_entry( + "Health score regressed vs metrics baseline: delta=-7." + ) == ("health_delta", "-7") + assert cli._parse_metric_reason_entry( + "Dependency cycles detected: 3 cycle(s)." + ) == ("dependency_cycles", "3") + assert cli._parse_metric_reason_entry( + "Dead code detected (high confidence): 2 item(s)." + ) == ("dead_code_items", "2") + assert cli._parse_metric_reason_entry( + "Complexity threshold exceeded: max=11, threshold=10." + ) == ("complexity_max", "11 (threshold=10)") + assert cli._parse_metric_reason_entry( + "Coupling threshold exceeded: max=12, threshold=9." + ) == ("coupling_max", "12 (threshold=9)") + assert cli._parse_metric_reason_entry( + "Cohesion threshold exceeded: max=13, threshold=8." + ) == ("cohesion_max", "13 (threshold=8)") + assert cli._parse_metric_reason_entry( + "Health score below threshold: score=70, threshold=80." + ) == ("health_score", "70 (threshold=80)") + assert cli._parse_metric_reason_entry("custom reason.") == ( + "detail", + "custom reason", + ) + + args = Namespace( + ci=False, + fail_on_new_metrics=True, + fail_complexity=10, + fail_coupling=9, + fail_cohesion=8, + fail_cycles=True, + fail_dead_code=True, + fail_health=80, + fail_on_new=True, + fail_threshold=5, + ) + metrics_policy = policy_context(args=args, gate_kind="metrics") + assert_contains_all( + metrics_policy, + "fail-on-new-metrics", + "fail-complexity=10", + "fail-coupling=9", + "fail-cohesion=8", + "fail-cycles", + "fail-dead-code", + "fail-health=80", + ) + assert policy_context(args=args, gate_kind="new-clones") == "fail-on-new" + assert policy_context(args=args, gate_kind="threshold") == "fail-threshold=5" + assert policy_context(args=args, gate_kind="unknown") == "custom" + args.fail_on_new = False + args.fail_threshold = -1 + assert policy_context(args=args, gate_kind="new-clones") == "custom" + assert policy_context(args=args, gate_kind="threshold") == "custom" + + +def test_cli_run_analysis_stages_handles_cache_save_error( + monkeypatch: pytest.MonkeyPatch, +) -> None: + args = Namespace(quiet=False, no_progress=False, skip_metrics=True) + boot = pipeline.BootstrapResult( + root=Path("."), + config=NormalizationConfig(), + args=args, + output_paths=pipeline.OutputPaths(), + cache_path=Path("cache.json"), + ) + + monkeypatch.setattr( + cli, + "discover", + lambda **_kwargs: pipeline.DiscoveryResult( + files_found=0, + cache_hits=0, + files_skipped=0, + all_file_paths=(), + cached_units=(), + cached_blocks=(), + cached_segments=(), + cached_class_metrics=(), + cached_module_deps=(), + cached_dead_candidates=(), + cached_referenced_names=frozenset(), + files_to_process=(), + skipped_warnings=(), + ), + ) + monkeypatch.setattr( + cli, + "process", + lambda **_kwargs: pipeline.ProcessingResult( + units=(), + blocks=(), + segments=(), + class_metrics=(), + module_deps=(), + dead_candidates=(), + referenced_names=frozenset(), + files_analyzed=0, + files_skipped=0, + analyzed_lines=0, + analyzed_functions=0, + analyzed_methods=0, + analyzed_classes=0, + failed_files=(), + source_read_failures=(), + ), + ) + monkeypatch.setattr( + cli, + "analyze", + lambda **_kwargs: pipeline.AnalysisResult( + func_groups={}, + block_groups={}, + block_groups_report={}, + segment_groups={}, + suppressed_segment_groups=0, + block_group_facts={}, + func_clones_count=0, + block_clones_count=0, + segment_clones_count=0, + files_analyzed_or_cached=0, + project_metrics=None, + metrics_payload=None, + suggestions=(), + segment_groups_raw_digest="", + structural_findings=(), + ), + ) + + class _BadCache: + load_warning: str | None = None + + def save(self) -> None: + raise CacheError("boom") + + cli._run_analysis_stages(args=args, boot=boot, cache=cast(Cache, _BadCache())) + cli.print_banner(root=None) diff --git a/tests/test_detector_golden.py b/tests/test_detector_golden.py index 5c3a0e6..a270bb8 100644 --- a/tests/test_detector_golden.py +++ b/tests/test_detector_golden.py @@ -6,11 +6,12 @@ import pytest +from codeclone import extractor from codeclone.baseline import current_python_tag -from codeclone.extractor import extract_units_from_source from codeclone.normalize import NormalizationConfig from codeclone.report import build_block_groups, build_groups from codeclone.scanner import module_name_from_path +from tests._assertions import snapshot_python_tag def _detect_group_keys(project_root: Path) -> tuple[list[str], list[str]]: @@ -21,13 +22,15 @@ def _detect_group_keys(project_root: Path) -> tuple[list[str], list[str]]: for path in sorted(project_root.glob("*.py")): source = path.read_text("utf-8") module_name = module_name_from_path(str(project_root), str(path)) - units, blocks, _segments = extract_units_from_source( - source=source, - filepath=str(path), - module_name=module_name, - cfg=cfg, - min_loc=1, - min_stmt=1, + units, blocks, _segments, _source_stats, _file_metrics, _sf = ( + extractor.extract_units_and_stats_from_source( + source=source, + filepath=str(path), + module_name=module_name, + cfg=cfg, + min_loc=1, + min_stmt=1, + ) ) all_units.extend(asdict(unit) for unit in units) all_blocks.extend(asdict(block) for block in blocks) @@ -41,10 +44,7 @@ def test_detector_output_matches_golden_fixture() -> None: fixture_root = Path("tests/fixtures/golden_project").resolve() expected_path = fixture_root / "golden_expected_ids.json" expected = json.loads(expected_path.read_text("utf-8")) - expected_meta = expected.get("meta", {}) - assert isinstance(expected_meta, dict) - expected_python_tag = expected_meta.get("python_tag") - assert isinstance(expected_python_tag, str) + expected_python_tag = snapshot_python_tag(expected) # Golden fixture is a detector snapshot for one canonical Python tag. # Cross-version behavior is covered by contract/invariant tests. diff --git a/tests/test_extractor.py b/tests/test_extractor.py index 5849aa0..aeb8161 100644 --- a/tests/test_extractor.py +++ b/tests/test_extractor.py @@ -2,17 +2,79 @@ import os import signal import sys +import tokenize from collections.abc import Callable, Iterator from contextlib import contextmanager +from typing import cast import pytest from codeclone import extractor from codeclone.errors import ParseError -from codeclone.extractor import extract_units_from_source +from codeclone.metrics import find_unused +from codeclone.models import BlockUnit, ModuleDep, SegmentUnit from codeclone.normalize import NormalizationConfig +def extract_units_from_source( + *, + source: str, + filepath: str, + module_name: str, + cfg: NormalizationConfig, + min_loc: int, + min_stmt: int, + block_min_loc: int = 20, + block_min_stmt: int = 8, + segment_min_loc: int = 20, + segment_min_stmt: int = 10, +) -> tuple[ + list[extractor.Unit], + list[BlockUnit], + list[SegmentUnit], +]: + units, blocks, segments, _source_stats, _file_metrics, _sf = ( + extractor.extract_units_and_stats_from_source( + source=source, + filepath=filepath, + module_name=module_name, + cfg=cfg, + min_loc=min_loc, + min_stmt=min_stmt, + block_min_loc=block_min_loc, + block_min_stmt=block_min_stmt, + segment_min_loc=segment_min_loc, + segment_min_stmt=segment_min_stmt, + ) + ) + return units, blocks, segments + + +def _parse_tree_and_collector( + source: str, +) -> tuple[ast.Module, extractor._QualnameCollector]: + tree = ast.parse(source) + collector = extractor._QualnameCollector() + collector.visit(tree) + return tree, collector + + +def _collect_module_walk( + source: str, + *, + module_name: str = "pkg.mod", + collect_referenced_names: bool = True, +) -> tuple[ast.Module, extractor._QualnameCollector, extractor._ModuleWalkResult]: + tree, collector = _parse_tree_and_collector(source) + walk = extractor._collect_module_walk_data( + tree=tree, + module_name=module_name, + collector=collector, + collect_referenced_names=collect_referenced_names, + ) + return tree, collector, walk + + def test_extracts_function_unit() -> None: src = """ @@ -39,6 +101,73 @@ def foo(): assert segments == [] +def test_source_tokens_returns_empty_on_tokenize_error() -> None: + assert extractor._source_tokens('"""') == () + + +def test_declaration_token_index_returns_none_when_start_token_is_missing() -> None: + tokens = extractor._source_tokens("value = 1\n") + assert ( + extractor._declaration_token_index( + source_tokens=tokens, + start_line=1, + start_col=0, + declaration_token="def", + ) + is None + ) + + +def test_declaration_token_index_uses_prebuilt_index() -> None: + tokens = extractor._source_tokens("async def demo():\n return 1\n") + token_index = extractor._build_declaration_token_index(tokens) + + assert ( + extractor._declaration_token_index( + source_tokens=tokens, + start_line=1, + start_col=0, + declaration_token="async", + source_token_index=token_index, + ) + == 0 + ) + + +def test_scan_declaration_colon_line_returns_none_when_header_is_incomplete() -> None: + tokens = extractor._source_tokens("def broken\n") + assert ( + extractor._scan_declaration_colon_line( + source_tokens=tokens, + start_index=0, + ) + is None + ) + + +def test_declaration_end_line_falls_back_without_tokens() -> None: + node = ast.parse( + """ +class Demo: + pass +""" + ).body[0] + assert isinstance(node, ast.ClassDef) + assert extractor._declaration_end_line(node, source_tokens=()) == 2 + + +def test_declaration_end_line_returns_zero_for_invalid_start_line() -> None: + node = ast.parse( + """ +def broken(): + return 1 +""" + ).body[0] + assert isinstance(node, ast.FunctionDef) + node.lineno = 0 + assert extractor._declaration_end_line(node, source_tokens=()) == 0 + + def test_init_function_is_ignored_for_blocks() -> None: src = """ class A: @@ -63,6 +192,130 @@ def __init__(self): assert segments == [] +def test_extract_units_skips_suppression_tokenization_without_directives( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + extractor, + "_source_tokens", + lambda _source: (_ for _ in ()).throw( + AssertionError("_source_tokens should not be called") + ), + ) + + units, blocks, segments = extract_units_from_source( + source=""" +def foo(): + a = 1 + return a +""", + filepath="x.py", + module_name="mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + + assert len(units) == 1 + assert blocks == [] + assert segments == [] + + +def test_extract_units_skips_suppression_tokenization_for_leading_only_directives( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + extractor, + "_source_tokens", + lambda _source: (_ for _ in ()).throw( + AssertionError("_source_tokens should not be called") + ), + ) + + units, blocks, segments = extract_units_from_source( + source=""" +# codeclone: ignore[dead-code] +def foo(): + a = 1 + return a +""", + filepath="x.py", + module_name="mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + + assert len(units) == 1 + assert blocks == [] + assert segments == [] + + +def test_extract_units_tokenizes_when_inline_suppressions_exist( + monkeypatch: pytest.MonkeyPatch, +) -> None: + calls = 0 + original_source_tokens = extractor._source_tokens + + def _record_tokens(source: str) -> tuple[tokenize.TokenInfo, ...]: + nonlocal calls + calls += 1 + return original_source_tokens(source) + + monkeypatch.setattr(extractor, "_source_tokens", _record_tokens) + + units, blocks, segments = extract_units_from_source( + source=""" +def foo( # codeclone: ignore[dead-code] + value: int, +) -> int: + return value +""", + filepath="x.py", + module_name="mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + + assert calls == 1 + assert len(units) == 1 + assert blocks == [] + assert segments == [] + + +def test_extract_units_can_skip_structural_findings() -> None: + src = """ +def foo(x): + a = 1 + b = 2 + c = 3 + d = 4 + e = 5 + if x == 1: + log("a") + value = x + 1 + return value + elif x == 2: + log("b") + value = x + 2 + return value + return a + b + c + d + e +""" + _units, _blocks, _segments, _source_stats, _file_metrics, sf = ( + extractor.extract_units_and_stats_from_source( + source=src, + filepath="x.py", + module_name="mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + collect_structural_findings=False, + ) + ) + assert sf == [] + + def test_parse_timeout_raises(monkeypatch: pytest.MonkeyPatch) -> None: @contextmanager def _boom(_timeout_s: int) -> Iterator[None]: @@ -82,6 +335,16 @@ def test_parse_limits_no_timeout() -> None: assert tree is not None +def _patch_posix_parse_limits( + monkeypatch: pytest.MonkeyPatch, resource_module: object +) -> None: + monkeypatch.setattr(os, "name", "posix") + monkeypatch.setattr(signal, "getsignal", lambda *_args, **_kwargs: None) + monkeypatch.setattr(signal, "signal", lambda *_args, **_kwargs: None) + monkeypatch.setattr(signal, "setitimer", lambda *_args, **_kwargs: None) + monkeypatch.setitem(sys.modules, "resource", resource_module) + + def test_parse_limits_resource_failure(monkeypatch: pytest.MonkeyPatch) -> None: class _DummyResource: RLIMIT_CPU = 0 @@ -95,11 +358,7 @@ def getrlimit(_key: int) -> tuple[int, int]: def setrlimit(_key: int, _val: tuple[int, int]) -> None: return None - monkeypatch.setattr(os, "name", "posix") - monkeypatch.setattr(signal, "getsignal", lambda *_args, **_kwargs: None) - monkeypatch.setattr(signal, "signal", lambda *_args, **_kwargs: None) - monkeypatch.setattr(signal, "setitimer", lambda *_args, **_kwargs: None) - monkeypatch.setitem(sys.modules, "resource", _DummyResource) + _patch_posix_parse_limits(monkeypatch, _DummyResource) with extractor._parse_limits(1): tree = extractor._parse_with_limits("x = 1", 1) @@ -123,11 +382,7 @@ def setrlimit(_key: int, val: tuple[int, int]) -> None: # Simulate a system where changing hard limit would fail. assert val[1] == _DummyResource.RLIM_INFINITY - monkeypatch.setattr(os, "name", "posix") - monkeypatch.setattr(signal, "getsignal", lambda *_args, **_kwargs: None) - monkeypatch.setattr(signal, "signal", lambda *_args, **_kwargs: None) - monkeypatch.setattr(signal, "setitimer", lambda *_args, **_kwargs: None) - monkeypatch.setitem(sys.modules, "resource", _DummyResource) + _patch_posix_parse_limits(monkeypatch, _DummyResource) with extractor._parse_limits(5): pass @@ -168,11 +423,7 @@ def setrlimit(_key: int, val: tuple[int, int]) -> None: def getrusage(_who: int) -> _DummyUsage: return _DummyUsage() - monkeypatch.setattr(os, "name", "posix") - monkeypatch.setattr(signal, "getsignal", lambda *_args, **_kwargs: None) - monkeypatch.setattr(signal, "signal", lambda *_args, **_kwargs: None) - monkeypatch.setattr(signal, "setitimer", lambda *_args, **_kwargs: None) - monkeypatch.setitem(sys.modules, "resource", _DummyResource) + _patch_posix_parse_limits(monkeypatch, _DummyResource) with extractor._parse_limits(5): pass @@ -212,11 +463,7 @@ def setrlimit(_key: int, val: tuple[int, int]) -> None: def getrusage(_who: int) -> _DummyUsage: return _DummyUsage() - monkeypatch.setattr(os, "name", "posix") - monkeypatch.setattr(signal, "getsignal", lambda *_args, **_kwargs: None) - monkeypatch.setattr(signal, "signal", lambda *_args, **_kwargs: None) - monkeypatch.setattr(signal, "setitimer", lambda *_args, **_kwargs: None) - monkeypatch.setitem(sys.modules, "resource", _DummyResource) + _patch_posix_parse_limits(monkeypatch, _DummyResource) with extractor._parse_limits(5): pass @@ -243,11 +490,7 @@ def getrlimit(_key: int) -> tuple[int, int]: def setrlimit(_key: int, val: tuple[int, int]) -> None: calls.append(val) - monkeypatch.setattr(os, "name", "posix") - monkeypatch.setattr(signal, "getsignal", lambda *_args, **_kwargs: None) - monkeypatch.setattr(signal, "signal", lambda *_args, **_kwargs: None) - monkeypatch.setattr(signal, "setitimer", lambda *_args, **_kwargs: None) - monkeypatch.setitem(sys.modules, "resource", _DummyResource) + _patch_posix_parse_limits(monkeypatch, _DummyResource) with extractor._parse_limits(5): pass @@ -286,6 +529,622 @@ def setrlimit(_key: int, _val: tuple[int, int]) -> None: pass +def test_resolve_import_target_absolute_and_relative() -> None: + absolute = ast.ImportFrom(module="pkg.util", names=[], level=0) + assert extractor._resolve_import_target("root.mod.sub", absolute) == "pkg.util" + + relative = ast.ImportFrom(module="helpers", names=[], level=1) + assert ( + extractor._resolve_import_target("root.mod.sub", relative) == "root.mod.helpers" + ) + + relative_no_module = ast.ImportFrom(module=None, names=[], level=2) + assert ( + extractor._resolve_import_target("root.mod.sub", relative_no_module) == "root" + ) + + +def test_collect_module_walk_data_imports_and_references() -> None: + tree = ast.parse( + """ +import os as operating_system +import json +from .pkg import utils +from .. import parent + +value = obj.attr +foo() +obj.method() +""".strip() + ) + collector = extractor._QualnameCollector() + collector.visit(tree) + walk = extractor._collect_module_walk_data( + tree=tree, + module_name="root.mod.sub", + collector=collector, + collect_referenced_names=True, + ) + assert walk.import_names == frozenset({"operating_system", "json", "root"}) + assert walk.module_deps == ( + ModuleDep( + source="root.mod.sub", + target="json", + import_type="import", + line=2, + ), + ModuleDep( + source="root.mod.sub", + target="os", + import_type="import", + line=1, + ), + ModuleDep( + source="root.mod.sub", + target="root", + import_type="from_import", + line=4, + ), + ModuleDep( + source="root.mod.sub", + target="root.mod.pkg", + import_type="from_import", + line=3, + ), + ) + assert walk.referenced_names == frozenset({"obj", "attr", "foo", "method"}) + + +def test_collect_module_walk_data_edge_branches() -> None: + tree = ast.parse("from .... import parent") + collector = extractor._QualnameCollector() + collector.visit(tree) + walk = extractor._collect_module_walk_data( + tree=tree, + module_name="pkg.mod", + collector=collector, + collect_referenced_names=True, + ) + assert walk.import_names == frozenset() + assert walk.module_deps == () + assert walk.referenced_names == frozenset() + + lambda_call_tree = ast.parse("(lambda x: x)(1)") + lambda_collector = extractor._QualnameCollector() + lambda_collector.visit(lambda_call_tree) + lambda_walk = extractor._collect_module_walk_data( + tree=lambda_call_tree, + module_name="pkg.mod", + collector=lambda_collector, + collect_referenced_names=True, + ) + assert lambda_walk.referenced_names == frozenset({"x"}) + + +def test_collect_module_walk_data_without_referenced_name_collection() -> None: + tree = ast.parse( + """ +import os as operating_system +from .pkg import utils +from .... import parent +""".strip() + ) + collector = extractor._QualnameCollector() + collector.visit(tree) + walk = extractor._collect_module_walk_data( + tree=tree, + module_name="root.mod.sub", + collector=collector, + collect_referenced_names=False, + ) + assert walk.import_names == frozenset({"operating_system", "root"}) + assert walk.module_deps == ( + ModuleDep( + source="root.mod.sub", + target="os", + import_type="import", + line=1, + ), + ModuleDep( + source="root.mod.sub", + target="root.mod.pkg", + import_type="from_import", + line=2, + ), + ) + assert walk.referenced_names == frozenset() + + +def test_module_walk_helpers_cover_import_and_reference_branches() -> None: + state = extractor._ModuleWalkState() + import_node = cast( + ast.Import, + ast.parse("import typing_extensions as te").body[0], + ) + extractor._collect_import_node( + node=import_node, + module_name="pkg.mod", + state=state, + collect_referenced_names=False, + ) + assert "te" in state.import_names + assert "te" in state.protocol_module_aliases + assert state.imported_module_aliases == {} + + import_from_node = cast( + ast.ImportFrom, + ast.parse("from typing import Protocol as Proto, Thing as Alias").body[0], + ) + extractor._collect_import_from_node( + node=import_from_node, + module_name="pkg.mod", + state=state, + collect_referenced_names=True, + ) + assert "Proto" in state.protocol_symbol_aliases + assert state.imported_symbol_bindings["Alias"] == {"typing:Thing"} + + unresolved_import = ast.ImportFrom( + module=None, + names=[ast.alias(name="parent", asname=None)], + level=4, + ) + extractor._collect_import_from_node( + node=unresolved_import, + module_name="pkg.mod", + state=state, + collect_referenced_names=True, + ) + assert "parent" not in state.imported_symbol_bindings + + name_node = cast(ast.Name, ast.parse("value", mode="eval").body) + attr_node = cast(ast.Attribute, ast.parse("obj.attr", mode="eval").body) + extractor._collect_load_reference_node(node=name_node, state=state) + extractor._collect_load_reference_node(node=attr_node, state=state) + extractor._collect_load_reference_node( + node=cast(ast.Constant, ast.parse("1", mode="eval").body), + state=state, + ) + assert "value" in state.referenced_names + assert "attr" in state.referenced_names + + +def test_dotted_expr_protocol_detection_and_runtime_candidate_edges() -> None: + dotted_expr = ast.parse("pkg.helpers.decorate", mode="eval").body + assert extractor._dotted_expr_name(dotted_expr) == "pkg.helpers.decorate" + assert extractor._dotted_expr_name(ast.parse("custom()", mode="eval").body) is None + + tree = ast.parse( + """ +import typing_extensions as te + +class A(te.Protocol): + pass + +class B(te.Protocol[int]): + pass +""".strip() + ) + collector = extractor._QualnameCollector() + collector.visit(tree) + walk = extractor._collect_module_walk_data( + tree=tree, + module_name="pkg.mod", + collector=collector, + collect_referenced_names=True, + ) + protocol_symbol_aliases = walk.protocol_symbol_aliases + protocol_module_aliases = walk.protocol_module_aliases + assert "te" in protocol_module_aliases + classes = [node for node in tree.body if isinstance(node, ast.ClassDef)] + class_a, class_b = classes + assert extractor._is_protocol_class( + class_a, + protocol_symbol_aliases=protocol_symbol_aliases, + protocol_module_aliases=protocol_module_aliases, + ) + assert not extractor._is_protocol_class( + class_b, + protocol_symbol_aliases=protocol_symbol_aliases, + protocol_module_aliases=protocol_module_aliases, + ) + + runtime_candidate = ast.parse( + """ +@trace() +@custom +@overload +def f(x): + return x +""".strip() + ).body[0] + assert isinstance(runtime_candidate, ast.FunctionDef) + assert extractor._is_non_runtime_candidate(runtime_candidate) + + +def test_resolve_referenced_qualnames_covers_module_class_and_attr_branches() -> None: + src = """ +from pkg.runtime import handler as imported_handler +import pkg.helpers as helpers + +class Service: + def hook(self) -> int: + return 1 + +value = imported_handler() +decorator = helpers.decorate +method = Service.hook +unknown = Missing.hook +dynamic = factory().attr +""" + tree, collector = _parse_tree_and_collector(src) + state = extractor._ModuleWalkState() + for node in ast.walk(tree): + if isinstance(node, ast.Import): + extractor._collect_import_node( + node=node, + module_name="pkg.mod", + state=state, + collect_referenced_names=True, + ) + elif isinstance(node, ast.ImportFrom): + extractor._collect_import_from_node( + node=node, + module_name="pkg.mod", + state=state, + collect_referenced_names=True, + ) + else: + extractor._collect_load_reference_node(node=node, state=state) + + resolved = extractor._resolve_referenced_qualnames( + module_name="pkg.mod", + collector=collector, + state=state, + ) + assert "pkg.runtime:handler" in resolved + assert "pkg.helpers:decorate" in resolved + assert "pkg.mod:Service.hook" in resolved + assert all("Missing.hook" not in qualname for qualname in resolved) + assert all(not qualname.endswith(":attr") for qualname in resolved) + + +def test_collect_referenced_qualnames_edge_cases() -> None: + src = """ +from .... import hidden +from pkg.runtime import * +import pkg.helpers as helpers + +class Service: + def hook(self) -> int: + return 1 + +value = helpers.tools.decorate(1) +handler = Service.hook + """ + _tree, _collector, walk = _collect_module_walk(src) + assert "pkg.mod:Service.hook" in walk.referenced_qualnames + assert "pkg.helpers:tools" in walk.referenced_qualnames + assert "pkg.helpers:decorate" not in walk.referenced_qualnames + + +def test_extract_stats_drops_referenced_names_for_test_filepaths() -> None: + src = """ +from pkg.mod import live + +live() +""" + _, _, _, _, test_metrics, _ = extractor.extract_units_and_stats_from_source( + source=src, + filepath="pkg/tests/test_usage.py", + module_name="pkg.tests.test_usage", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + _, _, _, _, regular_metrics, _ = extractor.extract_units_and_stats_from_source( + source=src, + filepath="pkg/usage.py", + module_name="pkg.usage", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + + assert test_metrics.referenced_names == frozenset() + assert "live" in regular_metrics.referenced_names + + +def test_dead_code_marks_symbol_dead_when_referenced_only_by_tests() -> None: + src_prod = """ +def orphan(): + return 1 +""" + src_test = """ +from pkg.mod import orphan + +def test_orphan_usage(): + assert orphan() == 1 +""" + + _, _, _, _, prod_metrics, _ = extractor.extract_units_and_stats_from_source( + source=src_prod, + filepath="pkg/mod.py", + module_name="pkg.mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + _, _, _, _, test_metrics, _ = extractor.extract_units_and_stats_from_source( + source=src_test, + filepath="pkg/tests/test_mod.py", + module_name="pkg.tests.test_mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + + dead = find_unused( + definitions=prod_metrics.dead_candidates, + referenced_names=( + prod_metrics.referenced_names | test_metrics.referenced_names + ), + ) + assert dead and dead[0].qualname == "pkg.mod:orphan" + + +def test_dead_code_skips_module_pep562_hooks() -> None: + src = """ +def __getattr__(name: str): + raise AttributeError(name) + +def __dir__(): + return ["demo"] + +def orphan(): + return 1 +""" + _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + source=src, + filepath="pkg/mod.py", + module_name="pkg.mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + dead = find_unused( + definitions=file_metrics.dead_candidates, + referenced_names=file_metrics.referenced_names, + referenced_qualnames=file_metrics.referenced_qualnames, + ) + assert tuple(item.qualname for item in dead) == ("pkg.mod:orphan",) + + +def test_dead_code_applies_inline_suppression_per_declaration() -> None: + src = """ +# codeclone: ignore[dead-code] +def runtime_hook(): + return 1 + +def orphan(): + return 2 +""" + _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + source=src, + filepath="pkg/mod.py", + module_name="pkg.mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + dead = find_unused( + definitions=file_metrics.dead_candidates, + referenced_names=file_metrics.referenced_names, + referenced_qualnames=file_metrics.referenced_qualnames, + ) + assert tuple(item.qualname for item in dead) == ("pkg.mod:orphan",) + + +def test_dead_code_suppression_binding_is_scoped_to_target_symbol() -> None: + src = """ +class Service: # codeclone: ignore[dead-code] + # codeclone: ignore[dead-code] + def hook(self): + return 1 + + def alive(self): + return 2 +""" + _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + source=src, + filepath="pkg/mod.py", + module_name="pkg.mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + dead = find_unused( + definitions=file_metrics.dead_candidates, + referenced_names=file_metrics.referenced_names, + referenced_qualnames=file_metrics.referenced_qualnames, + ) + assert tuple(item.qualname for item in dead) == ("pkg.mod:Service.alive",) + + +def test_dead_code_binds_inline_suppression_on_multiline_decorated_method() -> None: + src = """ +class Settings: # codeclone: ignore[dead-code] + @validator("field") + @classmethod + def validate_config_version( + cls, + value: str | None, + ) -> str | None: # codeclone: ignore[dead-code] + return value + + def orphan(self) -> int: + return 1 +""" + _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + source=src, + filepath="pkg/mod.py", + module_name="pkg.mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + dead = find_unused( + definitions=file_metrics.dead_candidates, + referenced_names=file_metrics.referenced_names, + referenced_qualnames=file_metrics.referenced_qualnames, + ) + assert tuple(item.qualname for item in dead) == ("pkg.mod:Settings.orphan",) + + +def test_dead_code_binds_inline_suppression_on_multiline_header_start_line() -> None: + src = """ +class Settings: # codeclone: ignore[dead-code] + @field_validator("trusted_proxy_ips", "additional_telegram_ip_ranges") + @classmethod + def validate_trusted_proxy_ips( # codeclone: ignore[dead-code] + cls, + value: list[str] | None, + ) -> list[str] | None: + return value + + @model_validator(mode="before") + @classmethod + def migrate_config_if_needed( # codeclone: ignore[dead-code] + cls, + values: dict[str, object], + ) -> dict[str, object]: + return values + + def orphan(self) -> int: + return 1 +""" + _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + source=src, + filepath="pkg/mod.py", + module_name="pkg.mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + dead = find_unused( + definitions=file_metrics.dead_candidates, + referenced_names=file_metrics.referenced_names, + referenced_qualnames=file_metrics.referenced_qualnames, + ) + assert tuple(item.qualname for item in dead) == ("pkg.mod:Settings.orphan",) + + +def test_collect_dead_candidates_and_extract_skip_classes_without_lineno( + monkeypatch: pytest.MonkeyPatch, +) -> None: + collector = extractor._QualnameCollector() + collector.visit( + ast.parse( + """ +def used(): + return 1 +""".strip() + ) + ) + broken_class = ast.ClassDef( + name="Broken", + bases=[], + keywords=[], + body=[], + decorator_list=[], + ) + broken_class.lineno = 0 + broken_class.end_lineno = 0 + collector.class_nodes.append(("Broken", broken_class)) + dead = extractor._collect_dead_candidates( + filepath="pkg/mod.py", + module_name="pkg.mod", + collector=collector, + ) + assert all(item.qualname != "pkg.mod:Broken" for item in dead) + + class _CollectorNoClassMetrics: + def __init__(self) -> None: + self.units: list[tuple[str, extractor.FunctionNode]] = [] + self.class_nodes = [("Broken", broken_class)] + self.function_count = 0 + self.method_count = 0 + self.class_count = 1 + + def visit(self, _tree: ast.AST) -> None: + return None + + monkeypatch.setattr(extractor, "_QualnameCollector", _CollectorNoClassMetrics) + _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + source="class Broken:\n pass\n", + filepath="pkg/mod.py", + module_name="pkg.mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + assert file_metrics.class_metrics == () + + +def test_extract_collects_referenced_qualnames_for_import_aliases() -> None: + src = """ +from pkg.runtime import run as _run_impl +import pkg.helpers as helpers + +def wrapper(): + value = _run_impl() + return helpers.decorate(value) +""" + _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + source=src, + filepath="pkg/cli.py", + module_name="pkg.cli", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + assert "pkg.runtime:run" in file_metrics.referenced_qualnames + assert "pkg.helpers:decorate" in file_metrics.referenced_qualnames + + +def test_collect_dead_candidates_skips_protocol_and_stub_like_symbols() -> None: + src = """ +from abc import abstractmethod +from typing import Protocol, overload + +class _Reader(Protocol): + def read(self) -> str: ... + +class _Base: + @abstractmethod + def parse(self) -> str: + raise NotImplementedError + +@overload +def parse_value(value: int) -> str: ... + +def parse_value(value: object) -> str: + return str(value) + """ + _tree, collector, walk = _collect_module_walk(src) + dead = extractor._collect_dead_candidates( + filepath="pkg/mod.py", + module_name="pkg.mod", + collector=collector, + protocol_symbol_aliases=walk.protocol_symbol_aliases, + protocol_module_aliases=walk.protocol_module_aliases, + ) + qualnames = {item.qualname for item in dead} + assert "pkg.mod:_Reader.read" not in qualnames + assert "pkg.mod:_Base.parse" not in qualnames + assert "pkg.mod:parse_value" in qualnames + + def test_extract_syntax_error() -> None: with pytest.raises(ParseError): extract_units_from_source( @@ -375,6 +1234,7 @@ def f(): def test_extract_generates_segments_without_blocks_when_only_segment_gate_met() -> None: + """Function with 12 stmts in ~36 lines: passes segment gate but not block gate.""" lines = ["def f():"] for i in range(12): lines.append(f" x{i} = {i}") @@ -389,6 +1249,12 @@ def test_extract_generates_segments_without_blocks_when_only_segment_gate_met() cfg=NormalizationConfig(), min_loc=1, min_stmt=1, + # segment gate passes (loc=37 >= 20, stmt=12 >= 10) + segment_min_loc=20, + segment_min_stmt=10, + # block gate fails (stmt=12 < 15) + block_min_loc=20, + block_min_stmt=15, ) assert units @@ -397,6 +1263,7 @@ def test_extract_generates_segments_without_blocks_when_only_segment_gate_met() def test_extract_generates_blocks_without_segments_when_only_block_gate_met() -> None: + """Function with 10 stmts in ~50 lines: passes block gate but not segment gate.""" lines = ["def f():"] for i in range(10): lines.append(f" x{i} = {i}") @@ -413,6 +1280,12 @@ def test_extract_generates_blocks_without_segments_when_only_block_gate_met() -> cfg=NormalizationConfig(), min_loc=1, min_stmt=1, + # block gate passes (loc=51 >= 20, stmt=10 >= 8) + block_min_loc=20, + block_min_stmt=8, + # segment gate fails (stmt=10 < 12) + segment_min_loc=20, + segment_min_stmt=12, ) assert units @@ -420,6 +1293,205 @@ def test_extract_generates_blocks_without_segments_when_only_block_gate_met() -> assert segments == [] +class TestAdmissionThresholdBoundaries: + """Verify function/block/segment admission gates at exact boundaries.""" + + @staticmethod + def _make_func(stmt_count: int, lines_per_stmt: int = 1) -> str: + """Build a function with configurable statement count and per-statement LOC.""" + lines = ["def f():"] + for i in range(stmt_count): + lines.append(f" x{i} = {i}") + # pad with blank lines to inflate LOC + lines.extend([""] * (lines_per_stmt - 1)) + return "\n".join(lines) + + # -- function-level: min_loc boundary -- + + def test_function_excluded_below_min_loc(self) -> None: + src = self._make_func(stmt_count=6, lines_per_stmt=1) # 7 lines + units, _, _ = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=10, + min_stmt=1, + ) + assert units == [] + + def test_function_included_at_min_loc(self) -> None: + src = self._make_func(stmt_count=6, lines_per_stmt=2) # 13 lines + units, _, _ = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=10, + min_stmt=1, + ) + assert len(units) == 1 + + # -- function-level: min_stmt boundary -- + + def test_function_excluded_below_min_stmt(self) -> None: + src = self._make_func(stmt_count=5, lines_per_stmt=3) # 16 lines, 5 stmts + units, _, _ = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=6, + ) + assert units == [] + + def test_function_included_at_min_stmt(self) -> None: + src = self._make_func(stmt_count=6, lines_per_stmt=3) # 19 lines, 6 stmts + units, _, _ = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=6, + ) + assert len(units) == 1 + + # -- block gate boundary -- + + def test_blocks_excluded_below_block_min_loc(self) -> None: + src = self._make_func(stmt_count=10, lines_per_stmt=1) # 11 lines, 10 stmts + _, blocks, _ = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + block_min_loc=20, + block_min_stmt=8, + ) + assert blocks == [] + + def test_blocks_included_at_block_min_loc(self) -> None: + src = self._make_func(stmt_count=10, lines_per_stmt=2) # 21 lines, 10 stmts + _, blocks, _ = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + block_min_loc=20, + block_min_stmt=8, + ) + assert blocks + + def test_blocks_excluded_below_block_min_stmt(self) -> None: + src = self._make_func(stmt_count=7, lines_per_stmt=4) # 29 lines, 7 stmts + _, blocks, _ = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + block_min_loc=20, + block_min_stmt=8, + ) + assert blocks == [] + + def test_blocks_included_at_block_min_stmt(self) -> None: + src = self._make_func(stmt_count=8, lines_per_stmt=3) # 25 lines, 8 stmts + _, blocks, _ = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + block_min_loc=20, + block_min_stmt=8, + ) + assert blocks + + # -- segment gate boundary -- + + def test_segments_excluded_below_segment_min_loc(self) -> None: + src = self._make_func(stmt_count=12, lines_per_stmt=1) # 13 lines, 12 stmts + _, _, segments = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + segment_min_loc=20, + segment_min_stmt=10, + ) + assert segments == [] + + def test_segments_included_at_segment_min_loc(self) -> None: + src = self._make_func(stmt_count=12, lines_per_stmt=2) # 25 lines, 12 stmts + _, _, segments = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + segment_min_loc=20, + segment_min_stmt=10, + ) + assert segments + + def test_segments_excluded_below_segment_min_stmt(self) -> None: + src = self._make_func(stmt_count=9, lines_per_stmt=3) # 28 lines, 9 stmts + _, _, segments = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + segment_min_loc=20, + segment_min_stmt=10, + ) + assert segments == [] + + def test_segments_included_at_segment_min_stmt(self) -> None: + src = self._make_func(stmt_count=10, lines_per_stmt=3) # 31 lines, 10 stmts + _, _, segments = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + segment_min_loc=20, + segment_min_stmt=10, + ) + assert segments + + # -- boilerplate still excluded -- + + def test_short_boilerplate_excluded_with_new_defaults(self) -> None: + """3-line trivial function stays out even with lowered thresholds.""" + src = "def f():\n x = 1\n return x\n" + units, blocks, segments = extract_units_from_source( + source=src, + filepath="x.py", + module_name="m", + cfg=NormalizationConfig(), + min_loc=10, + min_stmt=6, + ) + assert units == [] + assert blocks == [] + assert segments == [] + + def test_extract_handles_non_list_function_body_for_hash_reuse( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -442,8 +1514,8 @@ def _fake_fingerprint( _node: ast.FunctionDef | ast.AsyncFunctionDef, _cfg: NormalizationConfig, _qualname: str, - ) -> str: - return "f" * 40 + ) -> tuple[str, int]: + return "f" * 40, 1 def _fake_extract_segments( _node: ast.FunctionDef | ast.AsyncFunctionDef, @@ -461,7 +1533,7 @@ def _fake_extract_segments( monkeypatch.setattr(extractor, "_parse_with_limits", _fake_parse) monkeypatch.setattr(extractor, "_stmt_count", lambda _node: 12) - monkeypatch.setattr(extractor, "get_cfg_fingerprint", _fake_fingerprint) + monkeypatch.setattr(extractor, "_cfg_fingerprint_and_complexity", _fake_fingerprint) monkeypatch.setattr(extractor, "extract_segments", _fake_extract_segments) units, blocks, segments = extract_units_from_source( diff --git a/tests/test_golden_v2.py b/tests/test_golden_v2.py new file mode 100644 index 0000000..3a95188 --- /dev/null +++ b/tests/test_golden_v2.py @@ -0,0 +1,337 @@ +from __future__ import annotations + +import json +import shutil +import sys +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Literal + +import pytest + +import codeclone.pipeline as pipeline +from codeclone import cli +from codeclone.baseline import current_python_tag +from codeclone.extractor import extract_units_and_stats_from_source +from codeclone.grouping import build_block_groups, build_groups, build_segment_groups +from codeclone.models import ClassMetrics, DeadCandidate, ModuleDep +from codeclone.normalize import NormalizationConfig +from codeclone.pipeline import compute_project_metrics +from codeclone.scanner import iter_py_files, module_name_from_path +from codeclone.structural_findings import build_clone_cohort_structural_findings +from tests._assertions import snapshot_python_tag + +_GOLDEN_V2_ROOT = Path("tests/fixtures/golden_v2").resolve() + + +@dataclass(slots=True) +class _DummyFuture: + value: object + + def result(self) -> object: + return self.value + + +class _DummyExecutor: + def __init__(self, max_workers: int | None = None) -> None: + self.max_workers = max_workers + + def __enter__(self) -> _DummyExecutor: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: object | None, + ) -> Literal[False]: + return False + + def submit( + self, + fn: object, + *args: object, + **kwargs: object, + ) -> _DummyFuture: + assert callable(fn) + return _DummyFuture(fn(*args, **kwargs)) + + +def _patch_parallel(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _DummyExecutor) + monkeypatch.setattr(pipeline, "as_completed", lambda futures: futures) + + +def _relative_to_root(path: str, root: Path) -> str: + candidate = Path(path) + if not candidate.is_absolute(): + return str(candidate).replace("\\", "/") + return str(candidate.resolve().relative_to(root)) + + +def _collect_analysis_snapshot(project_root: Path) -> dict[str, object]: + cfg = NormalizationConfig() + units: list[dict[str, object]] = [] + blocks: list[dict[str, object]] = [] + segments: list[dict[str, object]] = [] + class_metrics: list[ClassMetrics] = [] + module_deps: list[ModuleDep] = [] + dead_candidates: list[DeadCandidate] = [] + referenced_names: set[str] = set() + referenced_qualnames: set[str] = set() + + files = tuple(iter_py_files(str(project_root))) + lines_total = 0 + functions_total = 0 + methods_total = 0 + classes_total = 0 + + for filepath in files: + source = Path(filepath).read_text("utf-8") + module_name = module_name_from_path(str(project_root), filepath) + relative_filepath = str(Path(filepath).resolve().relative_to(project_root)) + ( + file_units, + file_blocks, + file_segments, + source_stats, + file_metrics, + _sf, + ) = extract_units_and_stats_from_source( + source=source, + filepath=relative_filepath, + module_name=module_name, + cfg=cfg, + min_loc=1, + min_stmt=1, + ) + units.extend(asdict(unit) for unit in file_units) + blocks.extend(asdict(block) for block in file_blocks) + segments.extend(asdict(segment) for segment in file_segments) + class_metrics.extend(file_metrics.class_metrics) + module_deps.extend(file_metrics.module_deps) + dead_candidates.extend(file_metrics.dead_candidates) + referenced_names.update(file_metrics.referenced_names) + referenced_qualnames.update(file_metrics.referenced_qualnames) + + lines_total += source_stats.lines + functions_total += source_stats.functions + methods_total += source_stats.methods + classes_total += source_stats.classes + + function_groups = build_groups(units) + block_groups = build_block_groups(blocks) + segment_groups = build_segment_groups(segments) + cohort_structural_groups = build_clone_cohort_structural_findings( + func_groups=function_groups, + ) + + project_metrics, dep_graph, dead_items = compute_project_metrics( + units=tuple(units), + class_metrics=tuple(class_metrics), + module_deps=tuple(module_deps), + dead_candidates=tuple(dead_candidates), + referenced_names=frozenset(referenced_names), + referenced_qualnames=frozenset(referenced_qualnames), + files_found=len(files), + files_analyzed_or_cached=len(files), + function_clone_groups=len(function_groups), + block_clone_groups=len(block_groups), + skip_dependencies=False, + skip_dead_code=False, + ) + guarded_functions = 0 + for unit in units: + guard_count = unit.get("entry_guard_count", 0) + if isinstance(guard_count, bool): + guard_count = int(guard_count) + if isinstance(guard_count, int) and guard_count > 0: + guarded_functions += 1 + + return { + "meta": {"python_tag": current_python_tag()}, + "files": { + "count": len(files), + "lines": lines_total, + "functions": functions_total, + "methods": methods_total, + "classes": classes_total, + }, + "groups": { + "function_keys": sorted(function_groups.keys()), + "block_keys": sorted(block_groups.keys()), + "segment_keys": sorted(segment_groups.keys()), + }, + "stable_structure": { + "terminal_kinds": sorted({str(unit["terminal_kind"]) for unit in units}), + "guard_terminal_profiles": sorted( + {str(unit["entry_guard_terminal_profile"]) for unit in units}, + ), + "try_finally_profiles": sorted( + {str(unit["try_finally_profile"]) for unit in units}, + ), + "side_effect_order_profiles": sorted( + {str(unit["side_effect_order_profile"]) for unit in units}, + ), + "guarded_functions": guarded_functions, + }, + "cohort_structural_findings": { + "count": len(cohort_structural_groups), + "kinds": [ + group.finding_kind + for group in sorted( + cohort_structural_groups, + key=lambda group: (group.finding_kind, group.finding_key), + ) + ], + "keys": [ + group.finding_key + for group in sorted( + cohort_structural_groups, + key=lambda group: (group.finding_kind, group.finding_key), + ) + ], + }, + "metrics": { + "complexity_max": project_metrics.complexity_max, + "high_risk_functions": list(project_metrics.high_risk_functions), + "coupling_max": project_metrics.coupling_max, + "high_risk_classes": list(project_metrics.high_risk_classes), + "cohesion_max": project_metrics.cohesion_max, + "low_cohesion_classes": list(project_metrics.low_cohesion_classes), + "dependency_cycles": [list(cycle) for cycle in dep_graph.cycles], + "dependency_max_depth": dep_graph.max_depth, + "dead_items": [ + { + "qualname": item.qualname, + "filepath": _relative_to_root(item.filepath, project_root), + "kind": item.kind, + "confidence": item.confidence, + } + for item in dead_items + ], + "health": { + "total": project_metrics.health.total, + "grade": project_metrics.health.grade, + }, + }, + } + + +@pytest.mark.parametrize( + "fixture_name", + ("test_only_usage", "clone_metrics_cycle"), +) +def test_golden_v2_analysis_contracts(fixture_name: str) -> None: + fixture_root = _GOLDEN_V2_ROOT / fixture_name + expected_path = fixture_root / "golden_expected_snapshot.json" + expected = json.loads(expected_path.read_text("utf-8")) + + expected_meta = expected.get("meta", {}) + assert isinstance(expected_meta, dict) + expected_python_tag = expected_meta.get("python_tag") + assert isinstance(expected_python_tag, str) + + runtime_tag = current_python_tag() + if runtime_tag != expected_python_tag: + pytest.skip( + "Golden detector fixture is canonicalized for " + f"{expected_python_tag}; runtime is {runtime_tag}." + ) + + snapshot = _collect_analysis_snapshot(fixture_root) + assert snapshot == expected + + +def _run_cli(args: list[str], monkeypatch: pytest.MonkeyPatch) -> int: + _patch_parallel(monkeypatch) + monkeypatch.setattr(sys, "argv", ["codeclone", *args]) + try: + cli.main() + except SystemExit as exc: + code = exc.code + if isinstance(code, int): + return code + if code is None: + return 0 + try: + return int(code) + except (TypeError, ValueError): + return 1 + return 0 + + +def _collect_cli_snapshot( + *, + fixture_root: Path, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> dict[str, object]: + project_root = tmp_path / fixture_root.name + shutil.copytree(fixture_root, project_root) + report_path = project_root / "report.json" + baseline_path = project_root / "codeclone.baseline.json" + cache_path = project_root / ".cache" / "codeclone" / "cache.json" + + exit_code = _run_cli( + [ + str(project_root), + "--json", + str(report_path), + "--baseline", + str(baseline_path), + "--metrics-baseline", + str(baseline_path), + "--cache-path", + str(cache_path), + "--no-progress", + "--quiet", + ], + monkeypatch, + ) + assert exit_code == 0 + + payload = json.loads(report_path.read_text("utf-8")) + meta = payload["meta"] + findings = payload["findings"] + clone_groups = findings["groups"]["clones"] + structural_groups = findings["groups"]["structural"]["groups"] + return { + "meta": {"python_tag": current_python_tag()}, + "report_schema_version": payload["report_schema_version"], + "project_name": meta["project_name"], + "scan_root": meta["scan_root"], + "baseline_status": meta["baseline"]["status"], + "baseline_loaded": meta["baseline"]["loaded"], + "cache_used": meta["cache"]["used"], + "findings_summary": findings["summary"], + "function_group_ids": [group["id"] for group in clone_groups["functions"]], + "block_group_ids": [group["id"] for group in clone_groups["blocks"]], + "segment_group_ids": [group["id"] for group in clone_groups["segments"]], + "structural_group_ids": [group["id"] for group in structural_groups], + "structural_group_kinds": [group["kind"] for group in structural_groups], + } + + +def test_golden_v2_cli_pyproject_contract( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + fixture_root = _GOLDEN_V2_ROOT / "pyproject_defaults" + expected_path = fixture_root / "golden_expected_cli_snapshot.json" + expected = json.loads(expected_path.read_text("utf-8")) + expected_python_tag = snapshot_python_tag(expected) + + runtime_tag = current_python_tag() + if runtime_tag != expected_python_tag: + pytest.skip( + "Golden detector fixture is canonicalized for " + f"{expected_python_tag}; runtime is {runtime_tag}." + ) + + snapshot = _collect_cli_snapshot( + fixture_root=fixture_root, + tmp_path=tmp_path, + monkeypatch=monkeypatch, + ) + assert snapshot == expected diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 028eb03..6375e40 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -10,16 +10,21 @@ from codeclone.errors import FileProcessingError from codeclone.html_report import ( _FileCache, - _prefix_css, _pygments_css, _render_code_block, _try_pygments, - pairwise, ) from codeclone.html_report import ( build_html_report as _core_build_html_report, ) -from codeclone.report import build_block_group_facts, to_json_report +from codeclone.models import ( + StructuralFindingGroup, + StructuralFindingOccurrence, + Suggestion, +) +from codeclone.report import build_block_group_facts +from codeclone.report.json_contract import build_report_document +from codeclone.report.serialize import render_json_report_document from tests._report_fixtures import ( REPEATED_ASSERT_SOURCE, repeated_block_group_key, @@ -31,6 +36,19 @@ _REPEATED_BLOCK_GROUP_KEY = repeated_block_group_key() +def to_json_report( + func_groups: dict[str, list[dict[str, Any]]], + block_groups: dict[str, list[dict[str, Any]]], + segment_groups: dict[str, list[dict[str, Any]]], +) -> str: + payload = build_report_document( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + ) + return render_json_report_document(payload) + + def build_html_report( *, func_groups: dict[str, list[dict[str, Any]]], @@ -53,6 +71,68 @@ def build_html_report( ) +def _assert_html_contains(html: str, *needles: str) -> None: + for needle in needles: + assert needle in html + + +def _coupling_metrics_payload(coupled_classes: list[str]) -> dict[str, object]: + payload = _metrics_payload( + health_score=70, + health_grade="B", + complexity_max=1, + complexity_high_risk=0, + coupling_high_risk=0, + cohesion_low=0, + dep_cycles=[], + dep_max_depth=1, + dead_total=0, + dead_critical=0, + ) + coupling = payload["coupling"] + assert isinstance(coupling, dict) + classes = coupling["classes"] + assert isinstance(classes, list) + classes[0]["coupled_classes"] = coupled_classes + return payload + + +def _render_metrics_html(payload: dict[str, object]) -> str: + return build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/outside/project"}, + metrics=payload, + ) + + +def _dependency_metrics_payload( + *, + edge_list: list[dict[str, object]], + longest_chains: list[list[str]], + dep_cycles: list[list[str]], + dep_max_depth: int, +) -> dict[str, object]: + payload = _metrics_payload( + health_score=70, + health_grade="B", + complexity_max=1, + complexity_high_risk=0, + coupling_high_risk=0, + cohesion_low=0, + dep_cycles=dep_cycles, + dep_max_depth=dep_max_depth, + dead_total=0, + dead_critical=0, + ) + deps = payload["dependencies"] + assert isinstance(deps, dict) + deps["edge_list"] = edge_list + deps["longest_chains"] = longest_chains + return payload + + def _repeated_assert_block_groups( tmp_path: Path, *, @@ -137,10 +217,7 @@ def test_html_report_generation(tmp_path: Path) -> None: max_snippet_lines=10, ) - assert "Test Report" in html - assert "f1" in html - assert "f2" in html - assert "codebox" in html + _assert_html_contains(html, "Test Report", "f1", "f2", "codebox") def test_html_report_group_and_item_metadata_attrs(tmp_path: Path) -> None: @@ -161,12 +238,15 @@ def test_html_report_group_and_item_metadata_attrs(tmp_path: Path) -> None: segment_groups={}, title="Attrs", ) - assert 'data-group-key="hash1"' in html - assert '
    hash1
    ' in html - assert 'data-qualname="pkg.mod:f"' in html - assert 'data-filepath="' in html - assert 'data-start-line="1"' in html - assert 'data-end-line="2"' in html + _assert_html_contains( + html, + 'data-group-key="hash1"', + '
    hash1
    ', + 'data-qualname="pkg.mod:f"', + 'data-filepath="', + 'data-start-line="1"', + 'data-end-line="2"', + ) def test_html_report_renders_novelty_tabs_and_group_flags(tmp_path: Path) -> None: @@ -196,15 +276,21 @@ def test_html_report_renders_novelty_tabs_and_group_flags(tmp_path: Path) -> Non new_function_group_keys={"new-func"}, report_meta={"baseline_loaded": True, "baseline_status": "ok"}, ) - assert "New duplicates" in html - assert "Known duplicates" in html - assert 'id="global-novelty-controls"' in html - assert 'data-global-novelty="new"' in html - assert 'data-global-novelty="known"' in html + _assert_html_contains( + html, + "New duplicates", + "Known duplicates", + 'id="global-novelty-controls"', + 'data-global-novelty="new"', + 'data-global-novelty="known"', + ) assert 'data-novelty-filter="functions"' not in html - assert 'data-group-key="new-func" data-novelty="new"' in html - assert 'data-group-key="known-func" data-novelty="known"' in html - assert "Split is based on baseline" in html + _assert_html_contains( + html, + 'data-group-key="new-func" data-novelty="new"', + 'data-group-key="known-func" data-novelty="known"', + "Split is based on baseline", + ) def test_html_report_renders_untrusted_baseline_novelty_note(tmp_path: Path) -> None: @@ -263,6 +349,209 @@ def test_html_report_renders_block_novelty_tabs_and_group_flags(tmp_path: Path) assert 'data-group-key="known-block" data-novelty="known"' in html +def test_html_report_exposes_scope_counter_hooks_for_clone_ui(tmp_path: Path) -> None: + f = tmp_path / "a.py" + f.write_text("def f():\n return 1\n", "utf-8") + html = build_html_report( + func_groups={ + "known-func": [ + { + "qualname": "pkg.mod:known", + "filepath": str(f), + "start_line": 1, + "end_line": 2, + } + ], + "new-func": [ + { + "qualname": "pkg.mod:new", + "filepath": str(f), + "start_line": 3, + "end_line": 4, + } + ], + }, + block_groups={ + "known-block": [ + { + "qualname": "pkg.mod:block", + "filepath": str(f), + "start_line": 5, + "end_line": 8, + } + ] + }, + segment_groups={}, + new_function_group_keys={"new-func"}, + report_meta={"baseline_loaded": True, "baseline_status": "ok"}, + ) + _assert_html_contains( + html, + "data-main-clones-count", + 'data-clone-tab-count="functions"', + 'data-clone-tab-count="blocks"', + 'data-total-groups="2"', + "updateCloneScopeCounters", + ) + + +def test_html_report_structural_findings_tab_uses_normalized_groups() -> None: + meaningful_sig = { + "calls": "0", + "has_loop": "1", + "has_try": "0", + "nested_if": "0", + "raises": "0", + "stmt_seq": "Expr,For", + "terminal": "fallthrough", + } + trivial_sig = { + "calls": "2+", + "has_loop": "0", + "has_try": "0", + "nested_if": "0", + "raises": "0", + "stmt_seq": "Expr", + "terminal": "expr", + } + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[ + StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key="a" * 40, + signature=meaningful_sig, + items=( + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="a" * 40, + file_path="/proj/a.py", + qualname="mod:fn", + start=10, + end=12, + signature=meaningful_sig, + ), + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="a" * 40, + file_path="/proj/a.py", + qualname="mod:fn", + start=20, + end=22, + signature=meaningful_sig, + ), + ), + ), + StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key="b" * 40, + signature=trivial_sig, + items=( + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="b" * 40, + file_path="/proj/a.py", + qualname="mod:fn", + start=30, + end=30, + signature=trivial_sig, + ), + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="b" * 40, + file_path="/proj/a.py", + qualname="mod:fn", + start=40, + end=40, + signature=trivial_sig, + ), + ), + ), + ], + ) + _assert_html_contains( + html, + 'data-tab="structural-findings"', + ">1
    ", + "Repeated non-overlapping branch-body shapes", + "1 function", + ) + assert "stmt seq" in html and "Expr,For" in html + assert "stmt_seq=Expr" not in html + + +def test_html_report_structural_findings_why_modal_renders_examples( + tmp_path: Path, +) -> None: + sample = tmp_path / "sample.py" + sample.write_text( + "def fn(x):\n" + " if x == 1:\n" + ' warn("a")\n' + " return None\n" + " elif x == 2:\n" + ' warn("b")\n' + " return None\n", + "utf-8", + ) + sig = { + "calls": "1", + "has_loop": "0", + "has_try": "0", + "nested_if": "0", + "raises": "0", + "stmt_seq": "Expr,Return", + "terminal": "return_const", + } + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[ + StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key="c" * 40, + signature=sig, + items=( + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="c" * 40, + file_path=str(sample), + qualname="pkg.mod:fn", + start=3, + end=4, + signature=sig, + ), + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="c" * 40, + file_path=str(sample), + qualname="pkg.mod:fn", + start=6, + end=7, + signature=sig, + ), + ), + ) + ], + context_lines=0, + max_snippet_lines=20, + ) + for needle in ( + 'data-finding-why-btn="finding-why-template-cccc', + 'id="finding-why-modal"', + "Finding Details", + "Examples", + "Example A", + "Example B", + "warn", + "codebox", + ): + assert needle in html + + def test_html_report_block_group_includes_match_basis_and_compact_key() -> None: group_key = _REPEATED_BLOCK_GROUP_KEY html = build_html_report( @@ -279,12 +568,15 @@ def test_html_report_block_group_includes_match_basis_and_compact_key() -> None: }, segment_groups={}, ) - assert 'data-match-rule="normalized_sliding_window"' in html - assert 'data-block-size="4"' in html - assert 'data-signature-kind="stmt_hash_sequence"' in html - assert 'data-merged-regions="true"' in html - assert 'data-pattern="repeated_stmt_hash"' in html - assert f"{_REPEATED_STMT_HASH[:12]} x4" in html + _assert_html_contains( + html, + 'data-match-rule="normalized_sliding_window"', + 'data-block-size="4"', + 'data-signature-kind="stmt_hash_sequence"', + 'data-merged-regions="true"', + 'data-pattern="repeated_stmt_hash"', + f"{_REPEATED_STMT_HASH[:12]} x4", + ) def test_html_report_block_group_includes_assert_only_explanation( @@ -442,28 +734,24 @@ def test_html_report_n_way_group_without_compare_note(tmp_path: Path) -> None: assert '
    ' not in html -def test_html_report_command_palette_full_actions_present() -> None: +def test_html_report_topbar_actions_present() -> None: html = build_html_report(func_groups={}, block_groups={}, segment_groups={}) - assert "Export Report" in html - assert "Toggle Theme" in html - assert "Open Help" in html - assert "Expand All" in html - assert "Collapse All" in html - assert "window.print();" in html - assert "Generated by CodeClone v" in html - assert 'data-shortcut="mod+K"' in html - assert 'data-shortcut="mod+I"' in html - assert "key === 'i'" in html - assert 'id="help-modal"' in html - - -def test_html_report_help_modal_links_present() -> None: + assert "Report Provenance" in html + assert "data-prov-open" in html + assert 'class="theme-toggle"' in html + assert 'title="Toggle theme"' in html + assert "Theme" in html + assert "Export Report" not in html + assert "Open Help" not in html + assert 'id="help-modal"' not in html + + +def test_html_report_footer_links_present() -> None: html = build_html_report(func_groups={}, block_groups={}, segment_groups={}) - assert "Help & Support" in html assert f'href="{REPOSITORY_URL}"' in html assert f'href="{ISSUES_URL}"' in html assert f'href="{DOCS_URL}"' in html - assert 'rel="noopener noreferrer"' in html + assert 'target="_blank" rel="noopener"' in html def test_html_report_includes_provenance_metadata( @@ -493,6 +781,7 @@ def test_html_report_includes_provenance_metadata( expected = [ "Report Provenance", "CodeClone", + "Report generated (UTC)", "Baseline file", "Baseline path", "Baseline schema", @@ -503,6 +792,7 @@ def test_html_report_includes_provenance_metadata( 'data-baseline-status="ok"', 'data-baseline-payload-verified="true"', 'data-baseline-file="codeclone.baseline.json"', + 'data-report-generated-at-utc="2026-03-10T12:00:00Z"', "/repo/codeclone.baseline.json", 'data-cache-used="true"', "Cache schema", @@ -514,6 +804,9 @@ def test_html_report_includes_provenance_metadata( ] for token in expected: assert token in html + assert "Generated at 2026-03-10T12:00:00Z" in html + assert "generated 2026-03-10T12:00:00Z" not in html + assert "deterministic render" not in html def test_html_report_escapes_meta_and_title( @@ -652,8 +945,10 @@ def test_html_and_json_group_order_consistent(tmp_path: Path) -> None: } html = build_html_report(func_groups=groups, block_groups={}, segment_groups={}) json_report = json.loads(to_json_report(groups, {}, {})) - json_keys = list(json_report["groups"]["functions"].keys()) - assert json_keys == ["a", "b", "c"] + json_keys = [ + row["id"] for row in json_report["findings"]["groups"]["clones"]["functions"] + ] + assert json_keys == ["clone:function:c", "clone:function:a", "clone:function:b"] assert html.find('data-group-key="c"') < html.find('data-group-key="a"') assert html.find('data-group-key="a"') < html.find('data-group-key="b"') @@ -676,10 +971,13 @@ def test_html_report_escapes_control_chars_in_payload(tmp_path: Path) -> None: block_groups={}, segment_groups={}, ) - assert "</div>" in html - assert "`" in html - assert "
" in html - assert "
" in html + _assert_html_contains( + html, + "</div>", + "`", + "
", + "
", + ) def test_file_cache_reads_ranges(tmp_path: Path) -> None: @@ -766,20 +1064,6 @@ def test_render_code_block_truncate(tmp_path: Path) -> None: assert "Truncate" in html -def test_prefix_css() -> None: - css = "/* c */\n\n.a{color:red}\nplain\n.b { color: blue; }\n" - prefixed = _prefix_css(css, ".wrap") - assert ".wrap .a" in prefixed - assert ".wrap .b" in prefixed - assert "/* c */" in prefixed - - -def test_prefix_css_empty_selector_passthrough() -> None: - css = " { color: red; }\n" - prefixed = _prefix_css(css, ".wrap") - assert "{ color: red; }" in prefixed - - def test_pygments_css() -> None: css = _pygments_css("default") assert ".codebox" in css or css == "" @@ -932,10 +1216,6 @@ def test_html_report_single_item_group(tmp_path: Path) -> None: assert f"{f}:1-2" in html -def test_pairwise_helper() -> None: - assert list(pairwise([1, 2, 3])) == [(1, 2), (2, 3)] - - def test_render_code_block_truncates_and_fallback( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: @@ -979,3 +1259,774 @@ class _Mod: monkeypatch.setattr(importlib, "import_module", lambda _name: _Mod) assert _pygments_css("default") == "" + + +def _metrics_payload( + *, + health_score: object, + health_grade: object, + complexity_max: object, + complexity_high_risk: object, + coupling_high_risk: object, + cohesion_low: object, + dep_cycles: list[list[str]], + dep_max_depth: object, + dead_total: object, + dead_critical: object, + dead_suppressed: object = 0, +) -> dict[str, object]: + suppressed_items: list[dict[str, object]] = [] + if isinstance(dead_suppressed, int) and dead_suppressed > 0: + suppressed_items = [ + { + "qualname": "pkg.mod:suppressed_unused", + "filepath": "/outside/project/pkg/mod.py", + "start_line": 70, + "end_line": 71, + "kind": "function", + "confidence": "high", + "suppressed_by": [{"rule": "dead-code", "source": "inline_codeclone"}], + } + ] + return { + "complexity": { + "functions": [ + { + "qualname": "pkg.mod.func", + "filepath": "/outside/project/pkg/mod.py", + "start_line": 10, + "end_line": 40, + "cyclomatic_complexity": complexity_max, + "nesting_depth": 3, + "risk": "mystery", + }, + { + "qualname": "", + "filepath": "/outside/project/pkg/empty.py", + "start_line": 1, + "end_line": 1, + "cyclomatic_complexity": 1, + "nesting_depth": 0, + "risk": "low", + }, + ], + "summary": { + "total": 2, + "average": 2.5, + "max": complexity_max, + "high_risk": complexity_high_risk, + }, + }, + "coupling": { + "classes": [ + { + "qualname": "pkg.mod.Service", + "filepath": "/outside/project/pkg/mod.py", + "start_line": 1, + "end_line": 80, + "cbo": 9, + "risk": "warning", + } + ], + "summary": { + "total": 1, + "average": 9.0, + "max": 9, + "high_risk": coupling_high_risk, + }, + }, + "cohesion": { + "classes": [ + { + "qualname": "pkg.mod.Service", + "filepath": "/outside/project/pkg/mod.py", + "start_line": 1, + "end_line": 80, + "lcom4": 4, + "risk": "high", + "method_count": 5, + "instance_var_count": 2, + } + ], + "summary": { + "total": 1, + "average": 4.0, + "max": 4, + "low_cohesion": cohesion_low, + }, + }, + "dependencies": { + "modules": 4, + "edges": 4, + "max_depth": dep_max_depth, + "cycles": dep_cycles, + "longest_chains": [["pkg.a", "pkg.b", "pkg.c"]], + "edge_list": [ + { + "source": "pkg.a", + "target": "pkg.b", + "import_type": "import", + "line": 1, + }, + { + "source": "pkg.b", + "target": "pkg.c", + "import_type": "import", + "line": 2, + }, + { + "source": "pkg.c", + "target": "pkg.d", + "import_type": "import", + "line": 3, + }, + ], + }, + "dead_code": { + "items": [ + { + "qualname": "pkg.mod:unused", + "filepath": "/outside/project/pkg/mod.py", + "start_line": 50, + "end_line": 60, + "kind": "function", + "confidence": "high", + } + ], + "suppressed_items": suppressed_items, + "summary": { + "total": dead_total, + "critical": dead_critical, + "suppressed": dead_suppressed, + }, + }, + "health": { + "score": health_score, + "grade": health_grade, + "dimensions": {"coverage": 99}, + }, + } + + +def test_html_report_metrics_warn_branches_and_dependency_svg() -> None: + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/repo"}, + metrics=_metrics_payload( + health_score=70, + health_grade="B", + complexity_max=25, + complexity_high_risk=1, + coupling_high_risk=1, + cohesion_low=0, + dep_cycles=[], + dep_max_depth=9, + dead_total=2, + dead_critical=0, + ), + ) + assert "insight-warn" in html + assert "dep-graph-svg" in html + assert "Grade B" in html + assert "pkg.mod.func" in html + assert "outside/project/pkg/mod.py" in html + + +def test_html_report_metrics_risk_branches() -> None: + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/outside/project"}, + metrics=_metrics_payload( + health_score="50", + health_grade="C", + complexity_max=55, + complexity_high_risk=3, + coupling_high_risk=2, + cohesion_low=2, + dep_cycles=[["pkg.a", "pkg.b"]], + dep_max_depth=4, + dead_total=5, + dead_critical=2, + ), + ) + assert "insight-risk" in html + assert 'stroke="var(--error)"' in html + assert "Cycles: 1; max dependency depth: 4." in html + assert "5 candidates total; 2 high-confidence items; 0 suppressed." in html + assert 'Dead Code2' in html + + +def test_html_report_metrics_without_health_score_uses_info_overview() -> None: + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/outside/project"}, + metrics=_metrics_payload( + health_score=" ", + health_grade="n/a", + complexity_max="bad", + complexity_high_risk=True, + coupling_high_risk=False, + cohesion_low=False, + dep_cycles=[], + dep_max_depth="bad", + dead_total="2", + dead_critical="0", + ), + ) + assert "metrics were skipped for this run" not in html + assert ( + "clone groups; 2 dead-code items (0 suppressed); 0 dependency cycles." in html + ) + assert "High Complexity" in html + assert '2.5' in html + assert 'avg' in html + + +def test_html_report_metrics_bad_health_score_and_dead_code_ok_tone() -> None: + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/outside/project"}, + metrics=_metrics_payload( + health_score="bad", + health_grade="n/a", + complexity_max=1, + complexity_high_risk=0, + coupling_high_risk=0, + cohesion_low=0, + dep_cycles=[], + dep_max_depth=0, + dead_total=0, + dead_critical=0, + ), + ) + assert "Health 0/100 (n/a);" in html + assert "0 candidates total; 0 high-confidence items; 0 suppressed." in html + assert "insight-ok" in html + + +def test_html_report_metrics_bool_health_score_and_long_dependency_labels() -> None: + payload = _metrics_payload( + health_score=True, + health_grade="F", + complexity_max=1, + complexity_high_risk=0, + coupling_high_risk=0, + cohesion_low=0, + dep_cycles=[], + dep_max_depth=1, + dead_total=0, + dead_critical=0, + ) + deps = payload["dependencies"] + assert isinstance(deps, dict) + deps["edge_list"] = [ + { + "source": "pkg.really_long_module_name_source", + "target": "pkg.really_long_module_name_target", + "import_type": "import", + "line": 1, + } + ] + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/outside/project"}, + metrics=payload, + ) + assert "really_l..e_target" in html + + +def test_html_report_renders_dead_code_split_with_suppressed_layer() -> None: + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/outside/project"}, + metrics=_metrics_payload( + health_score=90, + health_grade="A", + complexity_max=1, + complexity_high_risk=0, + coupling_high_risk=0, + cohesion_low=0, + dep_cycles=[], + dep_max_depth=0, + dead_total=0, + dead_critical=0, + dead_suppressed=9, + ), + ) + _assert_html_contains( + html, + "0 candidates total; 0 high-confidence items; 9 suppressed.", + 'data-subtab-group="dead-code"', + 'data-clone-tab="active" data-subtab-group="dead-code"', + 'data-clone-tab="suppressed" data-subtab-group="dead-code"', + 'Suppressed 9', + "inline_codeclone", + "dead-code", + ) + + +def test_html_report_metrics_object_health_score_uses_float_fallback() -> None: + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/outside/project"}, + metrics=_metrics_payload( + health_score={"bad": "value"}, + health_grade="n/a", + complexity_max=1, + complexity_high_risk=0, + coupling_high_risk=0, + cohesion_low=0, + dep_cycles=[], + dep_max_depth=1, + dead_total=0, + dead_critical=0, + ), + ) + assert "Health 0/100 (n/a);" in html + + +def test_html_report_coupling_coupled_classes_inline_for_three_or_less() -> None: + html = _render_metrics_html(_coupling_metrics_payload(["Alpha", "Beta", "Gamma"])) + _assert_html_contains( + html, + '', + 'Alpha', + 'Beta', + 'Gamma', + ) + assert "(+1 more)" not in html + + +def test_html_report_coupling_coupled_classes_expands_for_more_than_three() -> None: + html = _render_metrics_html( + _coupling_metrics_payload(["Alpha", "Beta", "Gamma", "Delta"]) + ) + _assert_html_contains( + html, + '
    ', + '', + 'Alpha', + 'Beta', + 'Delta', + 'Gamma', + ) + assert "(+1 more)" in html + + +def test_html_report_coupling_coupled_classes_truncates_long_labels() -> None: + long_name = "pkg.mod.VeryLongClassNameSegmentXYZ12345" + html = _render_metrics_html(_coupling_metrics_payload([long_name])) + label = "VeryLongClassNameSegmentXYZ12345" + assert f"{label[:8]}..{label[-8:]}" in html + + +def test_html_report_dependency_graph_handles_rootless_and_disconnected_nodes() -> None: + html = _render_metrics_html( + _dependency_metrics_payload( + edge_list=[ + { + "source": "pkg.a", + "target": "pkg.b", + "import_type": "import", + "line": 1, + }, + { + "source": "pkg.c", + "target": "pkg.d", + "import_type": "import", + "line": 2, + }, + { + "source": "pkg.d", + "target": "pkg.c", + "import_type": "import", + "line": 3, + }, + ], + longest_chains=[["pkg.a", "pkg.b"]], + dep_cycles=[["pkg.c", "pkg.d"]], + dep_max_depth=4, + ) + ) + _assert_html_contains( + html, + 'data-node="pkg.c"', + 'data-node="pkg.d"', + "dep-graph-svg", + ) + + +def test_html_report_dependency_graph_rootless_fallback_seed() -> None: + html = _render_metrics_html( + _dependency_metrics_payload( + edge_list=[ + { + "source": "pkg.c", + "target": "pkg.d", + "import_type": "import", + "line": 1, + }, + { + "source": "pkg.d", + "target": "pkg.c", + "import_type": "import", + "line": 2, + }, + ], + longest_chains=[["pkg.c", "pkg.d"]], + dep_cycles=[["pkg.c", "pkg.d"]], + dep_max_depth=2, + ) + ) + _assert_html_contains(html, 'data-node="pkg.c"', 'data-node="pkg.d"') + + +def test_html_report_provenance_badges_cover_mismatch_and_untrusted_metrics() -> None: + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={ + "baseline_loaded": False, + "baseline_payload_sha256_verified": False, + "baseline_generator_name": "other-generator", + "metrics_baseline_loaded": True, + "metrics_baseline_payload_sha256_verified": False, + "cache_used": None, + "analysis_mode": "full", + "report_schema_version": "2.0", + "baseline_fingerprint_version": "1", + }, + ) + assert "Baseline missing" in html + assert "Generator mismatch: other-generator" in html + assert "Metrics baseline untrusted" in html + assert "Cache N/A" in html + + +def test_html_report_provenance_handles_non_boolean_baseline_loaded() -> None: + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={ + "baseline_loaded": "unknown", + "baseline_payload_sha256_verified": False, + "report_schema_version": "2.0", + }, + ) + assert "Schema 2.0" in html + assert "Baseline missing" not in html + + +def test_html_report_dependency_hubs_deterministic_tie_order() -> None: + html = _render_metrics_html( + _dependency_metrics_payload( + edge_list=[ + { + "source": "mod.gamma", + "target": "mod.hub", + "import_type": "import", + "line": 1, + }, + { + "source": "mod.alpha", + "target": "mod.hub", + "import_type": "import", + "line": 2, + }, + { + "source": "mod.beta", + "target": "mod.hub", + "import_type": "import", + "line": 3, + }, + ], + longest_chains=[["mod.alpha", "mod.hub"]], + dep_cycles=[], + dep_max_depth=2, + ) + ) + hub_pos = html.find('dep-hub-name">hub3') + alpha_pos = html.find('dep-hub-name">alpha1') + beta_pos = html.find('dep-hub-name">beta1') + gamma_pos = html.find('dep-hub-name">gamma1') + assert hub_pos != -1 + assert alpha_pos != -1 + assert beta_pos != -1 + assert gamma_pos != -1 + assert hub_pos < alpha_pos < beta_pos < gamma_pos + + +def test_html_report_dependency_chain_columns_render_html() -> None: + payload = _metrics_payload( + health_score=70, + health_grade="B", + complexity_max=1, + complexity_high_risk=0, + coupling_high_risk=0, + cohesion_low=0, + dep_cycles=[["pkg.a", "pkg.b", "pkg.c"]], + dep_max_depth=3, + dead_total=0, + dead_critical=0, + ) + deps = payload["dependencies"] + assert isinstance(deps, dict) + deps["longest_chains"] = [["pkg.root", "pkg.mid", "pkg.leaf"]] + + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/outside/project"}, + metrics=payload, + ) + assert '' in html + assert "<span class="chain-flow">" not in html + + +def test_html_report_bare_qualname_keeps_non_python_path_prefix() -> None: + html = build_html_report( + func_groups={ + "q1": [ + { + "qualname": "pkg.mod.txt.", + "filepath": "/repo/pkg/mod.txt", + "start_line": 1, + "end_line": 1, + } + ] + }, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/repo"}, + ) + assert "pkg.mod.txt." in html + + +def test_html_report_suggestions_cards_split_facts_assessment_and_action() -> None: + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/repo"}, + suggestions=( + Suggestion( + severity="info", + category="clone", + title="Refactor duplicate block", + location="/repo/pkg/mod.py", + steps=("Extract helper",), + effort="easy", + priority=0.5, + finding_family="clones", + fact_kind="Block clone group", + fact_summary="same repeated setup/assert pattern", + fact_count=4, + spread_files=1, + spread_functions=1, + clone_type="Type-4", + confidence="high", + source_kind="production", + source_breakdown=(("production", 4),), + ), + ), + ) + assert "Facts" in html + assert "Assessment" in html + assert "Suggestion" in html + assert "Source breakdown" in html + assert "Refactor duplicate block" in html + + +def test_html_report_overview_includes_hotspot_sections_without_quick_views() -> None: + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/repo"}, + metrics=_metrics_payload( + health_score=87, + health_grade="B", + complexity_max=21, + complexity_high_risk=1, + coupling_high_risk=0, + cohesion_low=1, + dep_cycles=[], + dep_max_depth=2, + dead_total=1, + dead_critical=1, + ), + suggestions=( + Suggestion( + severity="warning", + category="clone", + title="Function clone group (Type-2)", + location="2 occurrences across 2 files / 2 functions", + steps=("Extract shared function",), + effort="easy", + priority=2.0, + finding_family="clones", + fact_kind="Function clone group", + fact_summary="same parameterized function body", + fact_count=2, + spread_files=2, + spread_functions=2, + clone_type="Type-2", + confidence="high", + source_kind="production", + source_breakdown=(("production", 2),), + location_label="2 occurrences across 2 files / 2 functions", + ), + ), + ) + _assert_html_contains( + html, + "Executive Summary", + "Issue breakdown", + "Source breakdown", + "Health Profile", + ) + assert "Most Actionable" not in html + assert 'data-quick-view="' not in html + assert 'class="suggestion-context"' in html + + +def test_html_report_overview_uses_canonical_report_overview_hotlists() -> None: + structural = ( + StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key="z" * 40, + signature={ + "stmt_seq": "Expr,Return", + "terminal": "return", + "raises": "0", + "has_loop": "0", + }, + items=( + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="z" * 40, + file_path="/repo/pkg/mod.py", + qualname="pkg.mod:fn", + start=10, + end=12, + signature={}, + ), + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="z" * 40, + file_path="/repo/pkg/mod.py", + qualname="pkg.mod:fn", + start=20, + end=22, + signature={}, + ), + ), + ), + ) + metrics = _metrics_payload( + health_score=84, + health_grade="B", + complexity_max=20, + complexity_high_risk=0, + coupling_high_risk=0, + cohesion_low=0, + dep_cycles=[], + dep_max_depth=2, + dead_total=0, + dead_critical=0, + ) + payload = build_report_document( + func_groups={ + "g1": [ + { + "qualname": "tests.fixtures.sample:a", + "filepath": "/repo/tests/fixtures/sample/a.py", + "start_line": 1, + "end_line": 20, + "loc": 20, + "stmt_count": 8, + "fingerprint": "fp-a", + "loc_bucket": "20-49", + }, + { + "qualname": "tests.fixtures.sample:b", + "filepath": "/repo/tests/fixtures/sample/b.py", + "start_line": 1, + "end_line": 20, + "loc": 20, + "stmt_count": 8, + "fingerprint": "fp-a", + "loc_bucket": "20-49", + }, + ] + }, + block_groups={}, + segment_groups={}, + meta={"scan_root": "/repo"}, + metrics=metrics, + structural_findings=structural, + ) + + html = build_html_report( + func_groups={ + "g1": [ + { + "qualname": "tests.fixtures.sample:a", + "filepath": "/repo/tests/fixtures/sample/a.py", + "start_line": 1, + "end_line": 20, + "loc": 20, + "stmt_count": 8, + "fingerprint": "fp-a", + "loc_bucket": "20-49", + }, + { + "qualname": "tests.fixtures.sample:b", + "filepath": "/repo/tests/fixtures/sample/b.py", + "start_line": 1, + "end_line": 20, + "loc": 20, + "stmt_count": 8, + "fingerprint": "fp-a", + "loc_bucket": "20-49", + }, + ] + }, + block_groups={}, + segment_groups={}, + report_meta=payload["meta"], + metrics=payload["metrics"], + structural_findings=structural, + report_document=payload, + ) + + for needle in ( + "Executive Summary", + 'class="overview-kpi-cards"', + "Findings", + "Suggestions", + "source-kind-badge source-kind-fixtures", + "source-kind-badge source-kind-production", + 'breakdown-count">1', + ): + assert needle in html + assert '
    n/a
    ' not in html + # Issue breakdown replaces old hotspot sections + assert "Issue breakdown" in html diff --git a/tests/test_html_report_helpers.py b/tests/test_html_report_helpers.py new file mode 100644 index 0000000..8a10ab5 --- /dev/null +++ b/tests/test_html_report_helpers.py @@ -0,0 +1,95 @@ +from types import SimpleNamespace +from typing import Any, cast + +from codeclone._html_report._components import ( + overview_source_breakdown_html, + overview_summary_item_html, +) +from codeclone._html_report._sections._clones import ( + _derive_group_display_name, + _render_group_explanation, +) +from codeclone._html_report._sections._dependencies import ( + _hub_threshold, + _render_dep_nodes_and_labels, + _select_dep_nodes, +) +from codeclone._html_report._tabs import render_split_tabs + + +def test_summary_helpers_cover_empty_and_non_clone_context_branches() -> None: + assert overview_source_breakdown_html({}) == ( + '
    n/a
    ' + ) + + +def test_summary_helpers_cover_breakdown_bars_and_clone_badges() -> None: + breakdown_html = overview_source_breakdown_html({"production": 3, "tests": 1}) + assert "source-kind-production" in breakdown_html + assert "source-kind-tests" in breakdown_html + assert "width:75%" in breakdown_html + assert "width:25%" in breakdown_html + + summary_html = overview_summary_item_html( + label="Source Breakdown", + body_html="
    body
    ", + ) + assert "summary-icon--info" in summary_html + + +def test_clone_display_name_and_group_explanation_edge_branches() -> None: + ctx = SimpleNamespace( + bare_qualname=lambda _qualname, _filepath: "", + relative_path=lambda filepath: filepath.replace("/abs/", ""), + ) + items = [ + {"qualname": "", "filepath": "/abs/" + "a" * 40 + ".py"}, + {"qualname": "", "filepath": "/abs/" + "b" * 40 + ".py"}, + {"qualname": "", "filepath": "/abs/" + "c" * 40 + ".py"}, + ] + derived = _derive_group_display_name( + "deadbeefdeadbeefdeadbeefdeadbeef", + items, + "blocks", + {}, + cast(Any, ctx), + ) + assert derived.endswith("…") + assert "aaaaaaaa" in derived + + fallback = _derive_group_display_name( + "x" * 60, + (), + "segments", + {}, + cast(Any, ctx), + ) + assert fallback == ("x" * 24) + "…" + ("x" * 16) + + assert _render_group_explanation({}) == "" + + +def test_dependency_helpers_cover_dense_and_empty_branches() -> None: + edges = [(f"n{i}", f"n{i + 1}") for i in range(21)] + nodes, filtered = _select_dep_nodes(edges) + assert len(nodes) == 20 + assert len(filtered) <= 100 + assert _hub_threshold([], {}, {}) == 99 + + node_svg, label_svg = _render_dep_nodes_and_labels( + [f"n{i}" for i in range(9)], + positions={f"n{i}": (float(i), float(i + 1)) for i in range(9)}, + node_radii={f"n{i}": 3.0 for i in range(9)}, + in_degree={f"n{i}": 1 for i in range(9)}, + out_degree={f"n{i}": 1 for i in range(9)}, + cycle_node_set={"n0"}, + hub_threshold=1, + max_per_layer=9, + ) + assert len(node_svg) == 9 + assert len(label_svg) == 9 + assert "rotate(-45)" in label_svg[0] + + +def test_render_split_tabs_returns_empty_for_no_tabs() -> None: + assert render_split_tabs(group_id="dead-code", tabs=()) == "" diff --git a/tests/test_metrics_baseline.py b/tests/test_metrics_baseline.py new file mode 100644 index 0000000..ba2a035 --- /dev/null +++ b/tests/test_metrics_baseline.py @@ -0,0 +1,543 @@ +from __future__ import annotations + +import json +import os +from pathlib import Path + +import pytest + +import codeclone.metrics_baseline as mb_mod +from codeclone.errors import BaselineValidationError +from codeclone.metrics_baseline import MetricsBaseline, MetricsBaselineStatus +from codeclone.models import ( + DeadItem, + HealthScore, + MetricsSnapshot, + ProjectMetrics, +) + + +def _snapshot() -> MetricsSnapshot: + return MetricsSnapshot( + max_complexity=50, + high_risk_functions=("pkg.mod:hot",), + max_coupling=10, + high_coupling_classes=("pkg.mod:Service",), + max_cohesion=4, + low_cohesion_classes=("pkg.mod:Service",), + dependency_cycles=(("pkg.a", "pkg.b"),), + dependency_max_depth=6, + dead_code_items=("pkg.mod:unused",), + health_score=70, + health_grade="C", + ) + + +def _project_metrics() -> ProjectMetrics: + return ProjectMetrics( + complexity_avg=3.2, + complexity_max=50, + high_risk_functions=("pkg.mod:hot", "pkg.mod:hot"), + coupling_avg=2.0, + coupling_max=10, + high_risk_classes=("pkg.mod:Service", "pkg.mod:Service"), + cohesion_avg=1.8, + cohesion_max=4, + low_cohesion_classes=("pkg.mod:Service", "pkg.mod:Service"), + dependency_modules=2, + dependency_edges=2, + dependency_edge_list=(), + dependency_cycles=(("pkg.a", "pkg.b"), ("pkg.a", "pkg.b")), + dependency_max_depth=6, + dependency_longest_chains=(("pkg.a", "pkg.b"),), + dead_code=( + DeadItem( + qualname="pkg.mod:unused", + filepath="pkg/mod.py", + start_line=1, + end_line=2, + kind="function", + confidence="high", + ), + DeadItem( + qualname="pkg.mod:unused", + filepath="pkg/mod.py", + start_line=1, + end_line=2, + kind="function", + confidence="high", + ), + ), + health=HealthScore(total=70, grade="C", dimensions={"health": 70}), + ) + + +def _write_json(path: Path, payload: object) -> None: + path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), "utf-8") + + +def _valid_payload( + *, + schema_version: str = mb_mod.METRICS_BASELINE_SCHEMA_VERSION, + python_tag: str | None = None, +) -> dict[str, object]: + return mb_mod._build_payload( + snapshot=_snapshot(), + schema_version=schema_version, + python_tag=python_tag or mb_mod.current_python_tag(), + generator_name=mb_mod.METRICS_BASELINE_GENERATOR, + generator_version="2.0.0", + created_at="2026-03-06T00:00:00Z", + ) + + +def test_coerce_metrics_baseline_status_variants() -> None: + assert ( + mb_mod.coerce_metrics_baseline_status(MetricsBaselineStatus.OK) + == MetricsBaselineStatus.OK + ) + assert mb_mod.coerce_metrics_baseline_status("ok") == MetricsBaselineStatus.OK + assert ( + mb_mod.coerce_metrics_baseline_status("not-a-status") + == MetricsBaselineStatus.INVALID_TYPE + ) + assert ( + mb_mod.coerce_metrics_baseline_status(None) + == MetricsBaselineStatus.INVALID_TYPE + ) + + +def test_metrics_baseline_load_missing_file_is_noop(tmp_path: Path) -> None: + baseline = MetricsBaseline(tmp_path / "missing.json") + baseline.load() + assert baseline.snapshot is None + + +def test_metrics_baseline_load_stat_errors( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + path = tmp_path / "metrics-baseline.json" + _write_json(path, _valid_payload()) + baseline = MetricsBaseline(path) + + original_exists = Path.exists + + def _boom_exists(self: Path) -> bool: + if self == path: + raise OSError("exists failed") + return original_exists(self) + + monkeypatch.setattr(Path, "exists", _boom_exists) + with pytest.raises( + BaselineValidationError, match="Cannot stat metrics baseline file" + ): + baseline.load() + + monkeypatch.setattr(Path, "exists", original_exists) + original_stat = Path.stat + + def _boom_stat( + self: Path, + *, + follow_symlinks: bool = True, + ) -> os.stat_result: + if self == path: + raise OSError("stat failed") + try: + return original_stat(self, follow_symlinks=follow_symlinks) + except TypeError: + return original_stat(self) + + monkeypatch.setattr(Path, "stat", _boom_stat) + with pytest.raises( + BaselineValidationError, match="Cannot stat metrics baseline file" + ): + baseline.load() + + +def test_metrics_baseline_load_size_and_shape_validation(tmp_path: Path) -> None: + path = tmp_path / "metrics-baseline.json" + _write_json(path, _valid_payload()) + + baseline = MetricsBaseline(path) + with pytest.raises(BaselineValidationError, match="too large"): + baseline.load(max_size_bytes=1) + + _write_json(path, {"meta": [], "metrics": {}}) + with pytest.raises(BaselineValidationError, match="'meta' must be object"): + baseline.load() + + _write_json(path, {"meta": {}, "metrics": []}) + with pytest.raises(BaselineValidationError, match="'metrics' must be object"): + baseline.load() + + +def test_metrics_baseline_load_stat_error_after_exists_true( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + path = tmp_path / "metrics-baseline.json" + _write_json(path, _valid_payload()) + baseline = MetricsBaseline(path) + + monkeypatch.setattr(Path, "exists", lambda self: self == path) + original_stat = Path.stat + + def _boom_stat( + self: Path, + *, + follow_symlinks: bool = True, + ) -> os.stat_result: + if self == path: + raise OSError("stat failed") + try: + return original_stat(self, follow_symlinks=follow_symlinks) + except TypeError: + return original_stat(self) + + monkeypatch.setattr(Path, "stat", _boom_stat) + with pytest.raises( + BaselineValidationError, match="Cannot stat metrics baseline file" + ): + baseline.load() + + +def test_metrics_baseline_save_requires_snapshot(tmp_path: Path) -> None: + baseline = MetricsBaseline(tmp_path / "metrics-baseline.json") + with pytest.raises(BaselineValidationError, match="snapshot is missing"): + baseline.save() + + +def test_metrics_baseline_save_standalone_payload_sets_metadata(tmp_path: Path) -> None: + path = tmp_path / "metrics-baseline.json" + baseline = MetricsBaseline(path) + baseline.snapshot = _snapshot() + baseline.schema_version = mb_mod.METRICS_BASELINE_SCHEMA_VERSION + baseline.python_tag = mb_mod.current_python_tag() + baseline.generator_name = mb_mod.METRICS_BASELINE_GENERATOR + baseline.generator_version = "2.0.0" + baseline.created_at = "2026-03-06T00:00:00Z" + baseline.save() + + payload = json.loads(path.read_text("utf-8")) + assert set(payload.keys()) == {"meta", "metrics"} + assert baseline.is_embedded_in_clone_baseline is False + assert baseline.schema_version == mb_mod.METRICS_BASELINE_SCHEMA_VERSION + assert baseline.python_tag == mb_mod.current_python_tag() + assert baseline.created_at == "2026-03-06T00:00:00Z" + assert isinstance(baseline.payload_sha256, str) + + +def test_metrics_baseline_save_with_existing_plain_payload_rewrites_plain( + tmp_path: Path, +) -> None: + path = tmp_path / "metrics-baseline.json" + _write_json(path, _valid_payload()) + baseline = MetricsBaseline(path) + baseline.snapshot = _snapshot() + baseline.save() + payload = json.loads(path.read_text("utf-8")) + assert "clones" not in payload + assert baseline.is_embedded_in_clone_baseline is False + + +def test_metrics_baseline_save_rejects_corrupted_existing_payload( + tmp_path: Path, +) -> None: + path = tmp_path / "metrics-baseline.json" + path.write_text("{broken", "utf-8") + baseline = MetricsBaseline(path) + baseline.snapshot = _snapshot() + with pytest.raises( + BaselineValidationError, match="Cannot read existing baseline file" + ): + baseline.save() + + +def test_metrics_baseline_verify_compatibility_and_integrity_failures( + tmp_path: Path, +) -> None: + baseline = MetricsBaseline(tmp_path / "metrics-baseline.json") + baseline.snapshot = _snapshot() + baseline.payload_sha256 = mb_mod._compute_payload_sha256(_snapshot()) + baseline.generator_name = "other" + baseline.schema_version = mb_mod.METRICS_BASELINE_SCHEMA_VERSION + baseline.python_tag = mb_mod.current_python_tag() + with pytest.raises(BaselineValidationError, match="generator mismatch"): + baseline.verify_compatibility(runtime_python_tag=mb_mod.current_python_tag()) + + baseline.generator_name = mb_mod.METRICS_BASELINE_GENERATOR + baseline.schema_version = "9.9" + with pytest.raises(BaselineValidationError, match="schema version mismatch"): + baseline.verify_compatibility(runtime_python_tag=mb_mod.current_python_tag()) + + baseline.schema_version = mb_mod.METRICS_BASELINE_SCHEMA_VERSION + baseline.python_tag = "cp310" + with pytest.raises(BaselineValidationError, match="python tag mismatch"): + baseline.verify_compatibility(runtime_python_tag="cp313") + + baseline.python_tag = mb_mod.current_python_tag() + baseline.snapshot = None + with pytest.raises(BaselineValidationError, match="snapshot is missing"): + baseline.verify_integrity() + + baseline.snapshot = _snapshot() + baseline.payload_sha256 = None + with pytest.raises(BaselineValidationError, match="payload hash is missing"): + baseline.verify_integrity() + + baseline.payload_sha256 = "abc" + with pytest.raises(BaselineValidationError, match="payload hash is missing"): + baseline.verify_integrity() + + baseline.payload_sha256 = "a" * 64 + with pytest.raises(BaselineValidationError, match="integrity check failed"): + baseline.verify_integrity() + + +def test_metrics_baseline_diff_without_snapshot_uses_default_snapshot( + tmp_path: Path, +) -> None: + baseline = MetricsBaseline(tmp_path / "metrics-baseline.json") + diff = baseline.diff(_project_metrics()) + assert diff.new_high_risk_functions == ("pkg.mod:hot",) + assert diff.new_high_coupling_classes == ("pkg.mod:Service",) + assert diff.new_cycles == (("pkg.a", "pkg.b"),) + assert diff.new_dead_code == ("pkg.mod:unused",) + assert diff.health_delta == 70 + + +def test_snapshot_from_project_metrics_and_from_project_metrics_factory( + tmp_path: Path, +) -> None: + snapshot = mb_mod.snapshot_from_project_metrics(_project_metrics()) + assert snapshot.high_risk_functions == ("pkg.mod:hot",) + assert snapshot.high_coupling_classes == ("pkg.mod:Service",) + assert snapshot.low_cohesion_classes == ("pkg.mod:Service",) + assert snapshot.dependency_cycles == (("pkg.a", "pkg.b"),) + assert snapshot.dead_code_items == ("pkg.mod:unused",) + + baseline = MetricsBaseline.from_project_metrics( + project_metrics=_project_metrics(), + path=tmp_path / "metrics-baseline.json", + generator_version="2.0.0", + ) + assert baseline.generator_name == "codeclone" + assert baseline.generator_version == "2.0.0" + assert baseline.schema_version == mb_mod.METRICS_BASELINE_SCHEMA_VERSION + assert baseline.snapshot is not None + assert isinstance(baseline.payload_sha256, str) + + +def test_metrics_baseline_json_and_structure_validators(tmp_path: Path) -> None: + path = tmp_path / "metrics-baseline.json" + path.write_text("[]", "utf-8") + with pytest.raises(BaselineValidationError, match="must be an object"): + mb_mod._load_json_object(path) + + mb_mod._validate_top_level_structure(_valid_payload(), path=path) + with pytest.raises(BaselineValidationError, match="unexpected top-level keys"): + mb_mod._validate_top_level_structure( + {**_valid_payload(), "extra": 1}, + path=path, + ) + with pytest.raises(BaselineValidationError, match="missing required fields"): + mb_mod._validate_required_keys( + {"only": "one"}, frozenset({"required"}), path=path + ) + with pytest.raises(BaselineValidationError, match="unexpected fields"): + mb_mod._validate_exact_keys({"a": 1, "b": 2}, frozenset({"a"}), path=path) + + +def test_metrics_baseline_field_parsers_and_cycle_parser(tmp_path: Path) -> None: + path = tmp_path / "metrics-baseline.json" + + with pytest.raises(BaselineValidationError, match="'name' must be str"): + mb_mod._require_str({"name": 1}, "name", path=path) + assert ( + mb_mod._extract_metrics_payload_sha256({"payload_sha256": "x"}, path=path) + == "x" + ) + assert ( + mb_mod._extract_metrics_payload_sha256( + {"metrics_payload_sha256": "y", "payload_sha256": "x"}, + path=path, + ) + == "y" + ) + + with pytest.raises(BaselineValidationError, match="must be int"): + mb_mod._require_int({"value": True}, "value", path=path) + with pytest.raises(BaselineValidationError, match="must be int"): + mb_mod._require_int({"value": "1"}, "value", path=path) + + with pytest.raises(BaselineValidationError, match="must be list\\[str\\]"): + mb_mod._require_str_list({"items": "bad"}, "items", path=path) + with pytest.raises(BaselineValidationError, match="must be list\\[str\\]"): + mb_mod._require_str_list({"items": [1]}, "items", path=path) + + with pytest.raises(BaselineValidationError, match="must be list"): + mb_mod._parse_cycles( + {"dependency_cycles": "bad"}, key="dependency_cycles", path=path + ) + with pytest.raises( + BaselineValidationError, match="cycle item must be list\\[str\\]" + ): + mb_mod._parse_cycles( + {"dependency_cycles": ["bad"]}, + key="dependency_cycles", + path=path, + ) + with pytest.raises( + BaselineValidationError, match="cycle item must be list\\[str\\]" + ): + mb_mod._parse_cycles( + {"dependency_cycles": [[1]]}, + key="dependency_cycles", + path=path, + ) + assert mb_mod._parse_cycles( + {"dependency_cycles": [["b", "a"], ["a", "b"], ["b", "a"]]}, + key="dependency_cycles", + path=path, + ) == (("a", "b"), ("b", "a")) + + +def test_metrics_baseline_parse_generator_variants(tmp_path: Path) -> None: + path = tmp_path / "metrics-baseline.json" + assert mb_mod._parse_generator({"generator": "codeclone"}, path=path) == ( + "codeclone", + None, + ) + assert mb_mod._parse_generator( + {"generator": "codeclone", "codeclone_version": "1.0.0"}, + path=path, + ) == ("codeclone", "1.0.0") + with pytest.raises(BaselineValidationError, match="generator_version must be str"): + mb_mod._parse_generator( + {"generator": "codeclone", "generator_version": 1}, + path=path, + ) + + assert mb_mod._parse_generator( + {"generator": {"name": "codeclone", "version": "2.0.0"}}, + path=path, + ) == ("codeclone", "2.0.0") + with pytest.raises(BaselineValidationError, match="unexpected generator keys"): + mb_mod._parse_generator( + {"generator": {"name": "codeclone", "extra": 1}}, + path=path, + ) + with pytest.raises(BaselineValidationError, match=r"generator\.name must be str"): + mb_mod._parse_generator( + {"generator": {"name": 1, "version": "2.0.0"}}, + path=path, + ) + with pytest.raises( + BaselineValidationError, + match=r"generator\.version must be str", + ): + mb_mod._parse_generator( + {"generator": {"name": "codeclone", "version": 2}}, + path=path, + ) + with pytest.raises( + BaselineValidationError, match="generator must be object or str" + ): + mb_mod._parse_generator({"generator": 1}, path=path) + + +def test_metrics_baseline_embedded_clone_payload_and_schema_resolution( + tmp_path: Path, +) -> None: + path = tmp_path / "baseline.json" + valid_embedded = { + "meta": { + "generator": {"name": "codeclone", "version": "2.0.0"}, + "schema_version": "1.0", + "python_tag": mb_mod.current_python_tag(), + "created_at": "2026-03-06T00:00:00Z", + "payload_sha256": "a" * 64, + }, + "clones": { + "functions": ["a" * 40 + "|0-19"], + "blocks": ["|".join(["a" * 40, "b" * 40, "c" * 40, "d" * 40])], + }, + } + meta_obj, clones_obj = mb_mod._require_embedded_clone_baseline_payload( + valid_embedded, path=path + ) + assert "schema_version" in meta_obj + assert "functions" in clones_obj + assert ( + mb_mod._resolve_embedded_schema_version(meta_obj, path=path) + == mb_mod.BASELINE_SCHEMA_VERSION + ) + assert ( + mb_mod._resolve_embedded_schema_version( + {**meta_obj, "schema_version": "2.1"}, + path=path, + ) + == "2.1" + ) + + with pytest.raises(BaselineValidationError, match="'meta' must be object"): + mb_mod._require_embedded_clone_baseline_payload( + {"meta": [], "clones": {}}, + path=path, + ) + with pytest.raises(BaselineValidationError, match="'clones' must be object"): + mb_mod._require_embedded_clone_baseline_payload( + {"meta": {}, "clones": []}, + path=path, + ) + with pytest.raises( + BaselineValidationError, + match=r"'clones\.functions' must be list\[str\]", + ): + mb_mod._require_embedded_clone_baseline_payload( + { + "meta": valid_embedded["meta"], + "clones": {"functions": [1], "blocks": []}, + }, + path=path, + ) + with pytest.raises( + BaselineValidationError, + match=r"'clones\.blocks' must be list\[str\]", + ): + mb_mod._require_embedded_clone_baseline_payload( + { + "meta": valid_embedded["meta"], + "clones": {"functions": [], "blocks": [1]}, + }, + path=path, + ) + with pytest.raises(BaselineValidationError, match="must be semver string"): + mb_mod._resolve_embedded_schema_version( + {**meta_obj, "schema_version": "broken"}, + path=path, + ) + + +def test_metrics_baseline_parse_snapshot_grade_validation(tmp_path: Path) -> None: + path = tmp_path / "metrics-baseline.json" + payload = mb_mod._snapshot_payload(_snapshot()) + payload["health_grade"] = "Z" + with pytest.raises(BaselineValidationError, match="must be one of A/B/C/D/F"): + mb_mod._parse_snapshot(payload, path=path) + + +def test_metrics_baseline_load_json_read_oserror_status( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + path = tmp_path / "metrics-baseline.json" + path.write_text("{}", "utf-8") + + def _boom_read(_self: Path, _encoding: str) -> str: + raise OSError("read failed") + + monkeypatch.setattr(Path, "read_text", _boom_read) + with pytest.raises( + BaselineValidationError, match="Cannot read metrics baseline file" + ): + mb_mod._load_json_object(path) diff --git a/tests/test_metrics_modules.py b/tests/test_metrics_modules.py new file mode 100644 index 0000000..614f50f --- /dev/null +++ b/tests/test_metrics_modules.py @@ -0,0 +1,601 @@ +from __future__ import annotations + +import ast + +from codeclone.cfg_model import CFG +from codeclone.metrics import ( + HealthInputs, + build_dep_graph, + build_import_graph, + cohesion_risk, + compute_cbo, + compute_health, + compute_lcom4, + coupling_risk, + cyclomatic_complexity, + find_cycles, + find_suppressed_unused, + find_unused, + longest_chains, + max_depth, + nesting_depth, + risk_level, +) +from codeclone.metrics import complexity as complexity_mod +from codeclone.metrics import coupling as coupling_mod +from codeclone.metrics import health as health_mod +from codeclone.models import DeadCandidate, DeadItem, ModuleDep +from codeclone.paths import is_test_filepath + + +def _parse_class(source: str, name: str) -> ast.ClassDef: + module = ast.parse(source) + for node in module.body: + if isinstance(node, ast.ClassDef) and node.name == name: + return node + raise AssertionError(f"class {name!r} not found") + + +def _parse_function(source: str, name: str) -> ast.FunctionDef: + module = ast.parse(source) + for node in module.body: + if isinstance(node, ast.FunctionDef) and node.name == name: + return node + raise AssertionError(f"function {name!r} not found") + + +def _parse_async_function(source: str, name: str) -> ast.AsyncFunctionDef: + module = ast.parse(source) + for node in module.body: + if isinstance(node, ast.AsyncFunctionDef) and node.name == name: + return node + raise AssertionError(f"async function {name!r} not found") + + +def test_cyclomatic_complexity_floor_and_nontrivial_graph() -> None: + trivial_cfg = CFG("pkg.mod:f") + assert cyclomatic_complexity(trivial_cfg) == 1 + + cfg = CFG("pkg.mod:g") + mid = cfg.create_block() + cfg.entry.add_successor(mid) + cfg.entry.add_successor(cfg.exit) + mid.add_successor(cfg.exit) + assert cyclomatic_complexity(cfg) == 2 + + +def test_nesting_depth_covers_control_flow_and_generic_body_nodes() -> None: + func = _parse_function( + """ +def f(x): + if x: + for i in range(3): + if i: + pass + class Inner: + def method(self): + pass +""".strip(), + "f", + ) + assert nesting_depth(func) == 3 + + +def test_nesting_depth_handles_async_and_match_nodes() -> None: + func = _parse_async_function( + """ +async def worker(items, value): + async for item in items: + async with item: + match value: + case 1: + while False: + pass +""".strip(), + "worker", + ) + assert nesting_depth(func) == 4 + + +def test_iter_nested_statement_lists_try_and_empty_match() -> None: + module = ast.parse( + """ +def f() -> None: + try: + x = 1 + except Exception: + x = 2 + else: + x = 3 + finally: + x = 4 +""".strip() + ) + function = module.body[0] + assert isinstance(function, ast.FunctionDef) + try_stmt = function.body[0] + assert isinstance(try_stmt, ast.Try) + nested_lists = list(complexity_mod._iter_nested_statement_lists(try_stmt)) + assert len(nested_lists) == 4 + + match_stmt = ast.Match(subject=ast.Name(id="x"), cases=[]) + assert list(complexity_mod._iter_nested_statement_lists(match_stmt)) == [] + assert list(complexity_mod._iter_nested_statement_lists(ast.Pass())) == [] + + bare_try = ast.Try(body=[ast.Pass()], handlers=[], orelse=[], finalbody=[]) + assert list(complexity_mod._iter_nested_statement_lists(bare_try)) == [ + bare_try.body + ] + + +def test_risk_level_boundaries() -> None: + assert risk_level(10) == "low" + assert risk_level(11) == "medium" + assert risk_level(21) == "high" + + +def test_annotation_name_variants() -> None: + name_node = ast.Name(id="TypeA") + assert coupling_mod._annotation_name(name_node) == "TypeA" + + attr_node = ast.Attribute(value=ast.Name(id="pkg"), attr="TypeB") + assert coupling_mod._annotation_name(attr_node) == "TypeB" + + subscript_node = ast.Subscript(value=ast.Name(id="list"), slice=ast.Name(id="int")) + assert coupling_mod._annotation_name(subscript_node) == "list" + + tuple_node = ast.Tuple( + elts=[ast.Constant(value=1), ast.Name(id="TypeC")], ctx=ast.Load() + ) + assert coupling_mod._annotation_name(tuple_node) == "TypeC" + assert ( + coupling_mod._annotation_name( + ast.Tuple(elts=[ast.Constant(value=1)], ctx=ast.Load()) + ) + is None + ) + + assert coupling_mod._annotation_name(ast.Constant(value=1)) is None + + +def test_compute_cbo_filters_builtins_and_self_references() -> None: + class_node = _parse_class( + """ +from ext import External, Helper + +class Local: + pass + +class Sample(External): + field: list[Helper] + + def __init__(self, dep: Helper) -> None: + self.dep = dep + self.local = Local() + dep.api() + self.run() + len([]) +""".strip(), + "Sample", + ) + cbo, resolved = compute_cbo( + class_node, + module_import_names={"External", "Helper"}, + module_class_names={"Sample", "Local"}, + ) + assert cbo == 3 + assert resolved == ("External", "Helper", "Local") + + +def test_compute_cbo_handles_non_symbolic_variants() -> None: + synthetic = ast.ClassDef( + name="Sample", + bases=[ast.Constant(value=1)], + keywords=[], + body=[ast.Pass()], + decorator_list=[], + ) + cbo, resolved = compute_cbo( + synthetic, + module_import_names=set(), + module_class_names={"Sample"}, + ) + assert cbo == 0 + assert resolved == () + + class_node = _parse_class( + """ +class DynamicCalls: + def run(self, value: "str") -> None: + (lambda fn: fn)(value) +""".strip(), + "DynamicCalls", + ) + cbo_dynamic, resolved_dynamic = compute_cbo( + class_node, + module_import_names={"External"}, + module_class_names={"DynamicCalls"}, + ) + assert cbo_dynamic == 0 + assert resolved_dynamic == () + + +def test_coupling_risk_boundaries() -> None: + assert coupling_risk(5) == "low" + assert coupling_risk(10) == "medium" + assert coupling_risk(11) == "high" + + +def test_compute_lcom4_for_empty_and_partially_connected_class() -> None: + empty_class = _parse_class( + """ +class Empty: + value = 1 +""".strip(), + "Empty", + ) + assert compute_lcom4(empty_class) == (1, 0, 0) + + class_node = _parse_class( + """ +class Service: + def first(self) -> None: + self.counter = 1 + self.second() + + def second(self) -> int: + return self.counter + + def third(self) -> int: + return 1 +""".strip(), + "Service", + ) + assert compute_lcom4(class_node) == (2, 3, 2) + + recursive = _parse_class( + """ +class Recursive: + def left(self) -> None: + self.right() + + def right(self) -> None: + self.left() +""".strip(), + "Recursive", + ) + assert compute_lcom4(recursive) == (1, 2, 2) + + triangle = _parse_class( + """ +class Triangle: + def a(self) -> None: + self.shared = 1 + self.b() + + def b(self) -> None: + self.shared = 2 + self.c() + + def c(self) -> None: + self.shared = 3 + self.a() +""".strip(), + "Triangle", + ) + assert compute_lcom4(triangle) == (1, 3, 4) + + +def test_compute_lcom4_ignores_unknown_self_calls() -> None: + class_node = _parse_class( + """ +class UnknownCall: + def first(self) -> None: + self.external() + + def second(self) -> None: + pass +""".strip(), + "UnknownCall", + ) + assert compute_lcom4(class_node) == (2, 2, 1) + + +def test_cohesion_risk_boundaries() -> None: + assert cohesion_risk(1) == "low" + assert cohesion_risk(3) == "medium" + assert cohesion_risk(4) == "high" + + +def test_find_unused_filters_non_actionable_and_preserves_ordering() -> None: + definitions = ( + DeadCandidate( + qualname="pkg.mod:used", + local_name="used", + filepath="pkg/mod.py", + start_line=1, + end_line=1, + kind="function", + ), + DeadCandidate( + qualname="pkg.mod:dead", + local_name="dead", + filepath="pkg/mod.py", + start_line=3, + end_line=4, + kind="function", + ), + DeadCandidate( + qualname="pkg.mod:MaybeUsed", + local_name="unreferenced_name", + filepath="pkg/mod.py", + start_line=2, + end_line=2, + kind="class", + ), + DeadCandidate( + qualname="pkg.tests:test_func", + local_name="test_func", + filepath="pkg/tests/test_mod.py", + start_line=5, + end_line=5, + kind="function", + ), + DeadCandidate( + qualname="pkg.mod:Visitor.visit_Name", + local_name="visit_Name", + filepath="pkg/mod.py", + start_line=6, + end_line=6, + kind="method", + ), + DeadCandidate( + qualname="pkg.mod:Model.__repr__", + local_name="__repr__", + filepath="pkg/mod.py", + start_line=7, + end_line=7, + kind="method", + ), + DeadCandidate( + qualname="pkg.mod:Hooks.setup_method", + local_name="setup_method", + filepath="pkg/mod.py", + start_line=8, + end_line=8, + kind="method", + ), + DeadCandidate( + qualname="pkg.mod:__getattr__", + local_name="__getattr__", + filepath="pkg/mod.py", + start_line=9, + end_line=9, + kind="function", + ), + DeadCandidate( + qualname="pkg.mod:__dir__", + local_name="__dir__", + filepath="pkg/mod.py", + start_line=10, + end_line=10, + kind="function", + ), + DeadCandidate( + qualname="pkg.mod:suppressed", + local_name="suppressed", + filepath="pkg/mod.py", + start_line=11, + end_line=12, + kind="function", + suppressed_rules=("dead-code",), + ), + ) + found = find_unused( + definitions=definitions, + referenced_names=frozenset({"used", "MaybeUsed"}), + ) + assert found == ( + DeadItem( + qualname="pkg.mod:MaybeUsed", + filepath="pkg/mod.py", + start_line=2, + end_line=2, + kind="class", + confidence="medium", + ), + DeadItem( + qualname="pkg.mod:dead", + filepath="pkg/mod.py", + start_line=3, + end_line=4, + kind="function", + confidence="high", + ), + ) + + +def test_dead_code_test_filepath_helpers() -> None: + candidate = DeadCandidate( + qualname="pkg.mod:fixture", + local_name="fixture", + filepath="pkg/tests/helpers.py", + start_line=1, + end_line=1, + kind="function", + ) + assert find_unused(definitions=(candidate,), referenced_names=frozenset()) == () + assert is_test_filepath("pkg/tests/test_mod.py") is True + + regular_method = DeadCandidate( + qualname="pkg.mod:Service.method", + local_name="method", + filepath="pkg/mod.py", + start_line=2, + end_line=3, + kind="method", + ) + found = find_unused(definitions=(regular_method,), referenced_names=frozenset()) + assert found and found[0].qualname == "pkg.mod:Service.method" + + +def test_find_unused_respects_referenced_qualnames() -> None: + candidate = DeadCandidate( + qualname="pkg.mod:wrapped", + local_name="wrapped", + filepath="pkg/mod.py", + start_line=1, + end_line=3, + kind="function", + ) + found = find_unused( + definitions=(candidate,), + referenced_names=frozenset(), + referenced_qualnames=frozenset({"pkg.mod:wrapped"}), + ) + assert found == () + + +def test_find_unused_applies_inline_dead_code_suppression() -> None: + candidate = DeadCandidate( + qualname="pkg.mod:runtime_callback", + local_name="runtime_callback", + filepath="pkg/mod.py", + start_line=1, + end_line=2, + kind="function", + suppressed_rules=("dead-code",), + ) + found = find_unused(definitions=(candidate,), referenced_names=frozenset()) + assert found == () + + +def test_find_suppressed_unused_returns_actionable_suppressed_candidates() -> None: + candidate = DeadCandidate( + qualname="pkg.mod:runtime_callback", + local_name="runtime_callback", + filepath="pkg/mod.py", + start_line=1, + end_line=2, + kind="function", + suppressed_rules=("dead-code",), + ) + found = find_suppressed_unused( + definitions=(candidate,), + referenced_names=frozenset(), + ) + assert found == ( + DeadItem( + qualname="pkg.mod:runtime_callback", + filepath="pkg/mod.py", + start_line=1, + end_line=2, + kind="function", + confidence="high", + ), + ) + + +def test_find_unused_keeps_non_pep562_module_dunders_actionable() -> None: + candidate = DeadCandidate( + qualname="pkg.mod:__custom__", + local_name="__custom__", + filepath="pkg/mod.py", + start_line=1, + end_line=2, + kind="function", + ) + found = find_unused(definitions=(candidate,), referenced_names=frozenset()) + assert found == ( + DeadItem( + qualname="pkg.mod:__custom__", + filepath="pkg/mod.py", + start_line=1, + end_line=2, + kind="function", + confidence="high", + ), + ) + + +def test_build_import_graph_cycle_depth_and_chain_helpers() -> None: + deps = ( + ModuleDep(source="a", target="b", import_type="import", line=1), + ModuleDep(source="b", target="a", import_type="from_import", line=2), + ModuleDep(source="c", target="c", import_type="import", line=3), + ModuleDep(source="d", target="e", import_type="import", line=4), + ) + graph = build_import_graph(modules={"d", "f"}, deps=deps) + assert set(graph) == {"a", "b", "c", "d", "e", "f"} + assert graph["a"] == {"b"} + assert graph["f"] == set() + + cycles = find_cycles(graph) + assert cycles == (("a", "b"), ("c",)) + assert max_depth(graph) >= 2 + assert longest_chains(graph, limit=0) == () + assert longest_chains(graph, limit=2) + + +def test_build_dep_graph_deduplicates_edges() -> None: + repeated = ModuleDep(source="pkg.a", target="pkg.b", import_type="import", line=1) + dep_graph = build_dep_graph(modules={"pkg.a"}, deps=(repeated, repeated)) + assert dep_graph.modules == frozenset({"pkg.a", "pkg.b"}) + assert dep_graph.edges == (repeated,) + + +def test_clone_piecewise_score_breakpoints() -> None: + pw = health_mod._clone_piecewise_score + assert pw(0.0) == 100 + assert pw(-0.1) == 100 + # First segment: 0 → 0.05 maps 100 → 90 + assert pw(0.025) == 95 + assert pw(0.05) == 90 + # Second segment: 0.05 → 0.20 maps 90 → 50 + assert pw(0.10) == 77 # 90 + (0.05/0.15)*(-40) ≈ 76.7 → 77 + assert pw(0.20) == 50 + # Third segment: 0.20 → 0.50 maps 50 → 0 + assert pw(0.35) == 25 + assert pw(0.50) == 0 + # Beyond last breakpoint + assert pw(1.0) == 0 + + +def test_health_helpers_and_compute_health_boundaries() -> None: + assert health_mod._safe_div(10, 0) == 0.0 + assert health_mod._grade(95) == "A" + assert health_mod._grade(80) == "B" + assert health_mod._grade(65) == "C" + assert health_mod._grade(45) == "D" + assert health_mod._grade(10) == "F" + + health = compute_health( + HealthInputs( + files_found=0, + files_analyzed_or_cached=0, + function_clone_groups=50, + block_clone_groups=50, + complexity_avg=50.0, + complexity_max=200, + high_risk_functions=20, + coupling_avg=20.0, + coupling_max=50, + high_risk_classes=10, + cohesion_avg=10.0, + low_cohesion_classes=10, + dependency_cycles=10, + dependency_max_depth=20, + dead_code_items=30, + ) + ) + assert 0 <= health.total <= 100 + assert health.grade in {"A", "B", "C", "D", "F"} + assert set(health.dimensions) == { + "clones", + "complexity", + "coupling", + "cohesion", + "dead_code", + "dependencies", + "coverage", + } diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 1310571..613a320 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -7,9 +7,14 @@ from codeclone.meta_markers import CFG_META_PREFIX from codeclone.normalize import ( NormalizationConfig, - normalized_ast_dump, normalized_ast_dump_from_list, ) +from tests._assertions import assert_contains_all +from tests._ast_helpers import fix_missing_single_function + + +def normalized_ast_dump(node: ast.AST, cfg: NormalizationConfig) -> str: + return normalized_ast_dump_from_list([node], cfg) def _normalized_dump(source: str, cfg: NormalizationConfig) -> str: @@ -67,49 +72,48 @@ def test_normalization_equivalent_sources(src1: str, src2: str) -> None: assert normalized_ast_dump(a1, cfg) == normalized_ast_dump(a2, cfg) -def test_normalization_type_annotations_removed() -> None: - src1 = """ +@pytest.mark.parametrize( + ("src1", "src2"), + [ + ( + """ def f(x: int) -> int: return x -""" - src2 = """ +""", + """ def f(x): return x -""" - cfg = NormalizationConfig() - a1 = ast.parse(src1).body[0] - a2 = ast.parse(src2).body[0] - assert normalized_ast_dump(a1, cfg) == normalized_ast_dump(a2, cfg) - - -def test_normalization_attributes_and_constants() -> None: - src1 = """ +""", + ), + ( + """ def f(): obj.attr = 123 -""" - src2 = """ +""", + """ def f(): x.y = 999 -""" - cfg = NormalizationConfig() - a1 = ast.parse(src1).body[0] - a2 = ast.parse(src2).body[0] - assert normalized_ast_dump(a1, cfg) == normalized_ast_dump(a2, cfg) - - -def test_normalization_augassign_equivalence() -> None: - src1 = """ +""", + ), + ( + """ def f(): x += 1 -""" - src2 = """ +""", + """ def f(): x = x + 1 -""" - cfg = NormalizationConfig() - a1 = ast.parse(src1).body[0] - a2 = ast.parse(src2).body[0] - assert normalized_ast_dump(a1, cfg) == normalized_ast_dump(a2, cfg) +""", + ), + ], + ids=[ + "type_annotations_removed", + "attributes_and_constants", + "augassign_equivalence", + ], +) +def test_normalization_equivalent_shapes(src1: str, src2: str) -> None: + _assert_normalized_equal(src1, src2, NormalizationConfig()) def test_normalization_augassign_target_without_ctx() -> None: @@ -125,26 +129,31 @@ def test_normalization_augassign_target_without_ctx() -> None: assert "Assign" in dump -def test_normalization_unary_non_not_preserved() -> None: - src = """ +@pytest.mark.parametrize( + ("src", "needle"), + [ + ( + """ def f(x): return -x -""" - cfg = NormalizationConfig(normalize_names=False) - node = ast.parse(src).body[0] - dump = normalized_ast_dump(node, cfg) - assert "UnaryOp" in dump - - -def test_normalization_not_non_compare_preserved() -> None: - src = """ +""", + "UnaryOp", + ), + ( + """ def f(x): return not x -""" +""", + "Not", + ), + ], + ids=["unary_non_not_preserved", "not_non_compare_preserved"], +) +def test_normalization_unary_shapes_preserved(src: str, needle: str) -> None: cfg = NormalizationConfig(normalize_names=False) node = ast.parse(src).body[0] dump = normalized_ast_dump(node, cfg) - assert "Not" in dump + assert needle in dump def test_normalization_commutative_binop_reorders() -> None: @@ -197,34 +206,48 @@ def test_normalization_commutative_binop_not_reordered(src1: str, src2: str) -> _assert_normalized_not_equal(src1, src2, cfg) -def test_normalization_preserves_call_target_names() -> None: - src1 = """ +@pytest.mark.parametrize( + ("src1", "src2"), + [ + ( + """ def f(x): return load_user(x) -""" - src2 = """ +""", + """ def f(x): return delete_user(x) -""" - cfg = NormalizationConfig() - a1 = ast.parse(src1).body[0] - a2 = ast.parse(src2).body[0] - assert normalized_ast_dump(a1, cfg) != normalized_ast_dump(a2, cfg) - - -def test_normalization_preserves_call_target_attributes() -> None: - src1 = """ +""", + ), + ( + """ def f(): return svc.load_user() -""" - src2 = """ +""", + """ def f(): return svc.delete_user() -""" - cfg = NormalizationConfig() - a1 = ast.parse(src1).body[0] - a2 = ast.parse(src2).body[0] - assert normalized_ast_dump(a1, cfg) != normalized_ast_dump(a2, cfg) +""", + ), + ( + """ +def f(): + return factory_a().run() +""", + """ +def f(): + return factory_b().run() +""", + ), + ], + ids=[ + "call_target_names", + "call_target_attributes", + "attribute_call_target_with_call_value", + ], +) +def test_normalization_preserves_call_targets(src1: str, src2: str) -> None: + _assert_normalized_not_equal(src1, src2, NormalizationConfig()) @pytest.mark.parametrize( @@ -262,21 +285,6 @@ def test_normalization_call_values_normalize(src1: str, src2: str) -> None: _assert_normalized_equal(src1, src2, cfg) -def test_normalization_preserves_attribute_call_target_with_call_value() -> None: - src1 = """ -def f(): - return factory_a().run() -""" - src2 = """ -def f(): - return factory_b().run() -""" - cfg = NormalizationConfig() - a1 = ast.parse(src1).body[0] - a2 = ast.parse(src2).body[0] - assert normalized_ast_dump(a1, cfg) != normalized_ast_dump(a2, cfg) - - def test_commutative_operand_recursive_and_constant_guards() -> None: nested = ast.parse("(1 + 2) + 3", mode="eval").body assert isinstance(nested, ast.BinOp) @@ -306,28 +314,41 @@ def test_normalization_preserves_semantic_marker_names() -> None: ], decorator_list=[], ) - module = ast.Module(body=[fn], type_ignores=[]) - module = ast.fix_missing_locations(module) - node = module.body[0] - assert isinstance(node, ast.FunctionDef) + node = fix_missing_single_function(fn) cfg = NormalizationConfig() dump = normalized_ast_dump(node, cfg) assert f"{CFG_META_PREFIX}MATCH_PATTERN:MatchValue(Constant(value=1))" in dump -def test_normalization_non_commutative_binop_not_reordered() -> None: - src1 = """ +@pytest.mark.parametrize( + ("src1", "src2"), + [ + ( + """ def f(): return a - b -""" - src2 = """ +""", + """ def f(): return b - a -""" +""", + ), + ( + """ +def f(x, y): + return not (x == y) +""", + """ +def f(x, y): + return x != y +""", + ), + ], + ids=["non_commutative_binop_not_reordered", "no_demorgan"], +) +def test_normalization_intentional_non_equivalences(src1: str, src2: str) -> None: cfg = NormalizationConfig(normalize_names=False) - a1 = ast.parse(src1).body[0] - a2 = ast.parse(src2).body[0] - assert normalized_ast_dump(a1, cfg) != normalized_ast_dump(a2, cfg) + _assert_normalized_not_equal(src1, src2, cfg) def test_normalization_not_in_and_is_not_equivalence() -> None: @@ -356,21 +377,6 @@ def f(x, y): assert normalized_ast_dump(a3, cfg) == normalized_ast_dump(a4, cfg) -def test_normalization_no_demorgan() -> None: - src1 = """ -def f(x, y): - return not (x == y) -""" - src2 = """ -def f(x, y): - return x != y -""" - cfg = NormalizationConfig(normalize_names=False) - a1 = ast.parse(src1).body[0] - a2 = ast.parse(src2).body[0] - assert normalized_ast_dump(a1, cfg) != normalized_ast_dump(a2, cfg) - - def test_normalization_flags_false_preserve_details() -> None: src = """ def f(x: int, /, y: int, *, z: int, **k: int) -> int: @@ -387,18 +393,27 @@ def f(x: int, /, y: int, *, z: int, **k: int) -> int: ) node = ast.parse(src).body[0] dump = normalized_ast_dump(node, cfg) - assert "my_attr" in dump - assert "123" in dump - assert "doc" in dump - assert "id='x'" in dump - assert "id='int'" in dump + assert_contains_all(dump, "my_attr", "123", "doc", "id='x'", "id='int'") -def test_normalization_type_annotations_posonly_kwonly_vararg() -> None: - src = """ +@pytest.mark.parametrize( + "src", + [ + """ def f(a: int, /, b: int, *args: int, c: int, **kwargs: int) -> int: return a -""" +""", + """ +async def af(x): + return x +""", + ], + ids=[ + "type_annotations_posonly_kwonly_vararg", + "async_function", + ], +) +def test_normalization_dump_is_string_for_supported_function_shapes(src: str) -> None: cfg = NormalizationConfig() node = ast.parse(src).body[0] dump = normalized_ast_dump(node, cfg) @@ -420,14 +435,3 @@ def f(): dump = normalized_ast_dump(node, cfg) assert "attr" in dump assert "7" in dump - - -def test_normalization_async_function() -> None: - src = """ -async def af(x): - return x -""" - cfg = NormalizationConfig() - node = ast.parse(src).body[0] - dump = normalized_ast_dump(node, cfg) - assert isinstance(dump, str) diff --git a/tests/test_pipeline_metrics.py b/tests/test_pipeline_metrics.py new file mode 100644 index 0000000..59935bd --- /dev/null +++ b/tests/test_pipeline_metrics.py @@ -0,0 +1,347 @@ +from __future__ import annotations + +from codeclone.cache import CacheEntry +from codeclone.models import ( + ClassMetrics, + DeadCandidate, + DeadItem, + HealthScore, + MetricsDiff, + ModuleDep, + ProjectMetrics, +) +from codeclone.pipeline import ( + MetricGateConfig, + _as_int, + _as_sorted_str_tuple, + _as_str, + _class_metric_sort_key, + _load_cached_metrics, + _module_dep_sort_key, + _module_names_from_units, + _should_use_parallel, + build_metrics_report_payload, + compute_project_metrics, + metric_gate_reasons, +) + + +def _project_metrics(*, dead_confidence: str = "high") -> ProjectMetrics: + return ProjectMetrics( + complexity_avg=10.0, + complexity_max=30, + high_risk_functions=("pkg.mod:hot",), + coupling_avg=5.0, + coupling_max=12, + high_risk_classes=("pkg.mod:Service",), + cohesion_avg=2.5, + cohesion_max=4, + low_cohesion_classes=("pkg.mod:Service",), + dependency_modules=2, + dependency_edges=1, + dependency_edge_list=( + ModuleDep(source="pkg.mod", target="pkg.dep", import_type="import", line=1), + ), + dependency_cycles=(("pkg.mod", "pkg.dep"),), + dependency_max_depth=9, + dependency_longest_chains=(("pkg.mod", "pkg.dep"),), + dead_code=( + DeadItem( + qualname="pkg.mod:dead", + filepath="pkg/mod.py", + start_line=1, + end_line=2, + kind="function", + confidence="high" if dead_confidence == "high" else "medium", + ), + ), + health=HealthScore(total=50, grade="D", dimensions={"health": 50}), + ) + + +def test_pipeline_basic_helpers_and_sort_keys() -> None: + assert _as_int(True) == 1 + assert _as_int("15") == 15 + assert _as_int("bad", default=7) == 7 + assert _as_int(1.5, default=3) == 3 + assert _as_str("value", default="x") == "value" + assert _as_str(1, default="x") == "x" + assert _as_sorted_str_tuple(("a", "b")) == () + assert _as_sorted_str_tuple(["b", "a", "b", ""]) == ("a", "b") + assert _should_use_parallel(files_count=100, processes=1) is False + + dep = ModuleDep(source="a", target="b", import_type="import", line=2) + cls = ClassMetrics( + qualname="pkg.mod:Service", + filepath="pkg/mod.py", + start_line=10, + end_line=30, + cbo=3, + lcom4=2, + method_count=4, + instance_var_count=2, + risk_coupling="low", + risk_cohesion="low", + ) + assert _module_dep_sort_key(dep) == ("a", "b", "import", 2) + assert _class_metric_sort_key(cls) == ("pkg/mod.py", 10, 30, "pkg.mod:Service") + + +def test_module_names_from_units_extracts_module_prefixes() -> None: + units = ( + {"qualname": "pkg.core:build"}, + {"qualname": "pkg.utils.helper"}, + {"qualname": ""}, + ) + assert _module_names_from_units(units) == frozenset( + {"pkg.core", "pkg.utils.helper"} + ) + + +def test_compute_project_metrics_respects_skip_flags() -> None: + project_metrics, dep_graph, dead_items = compute_project_metrics( + units=( + { + "qualname": "pkg.mod:run", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 5, + "cyclomatic_complexity": 3, + "nesting_depth": 1, + "risk": "high", + }, + ), + class_metrics=(), + module_deps=(), + dead_candidates=( + DeadCandidate( + qualname="pkg.mod:unused", + local_name="unused", + filepath="pkg/mod.py", + start_line=7, + end_line=9, + kind="function", + ), + ), + referenced_names=frozenset(), + referenced_qualnames=frozenset(), + files_found=1, + files_analyzed_or_cached=1, + function_clone_groups=0, + block_clone_groups=0, + skip_dependencies=True, + skip_dead_code=True, + ) + assert dep_graph.modules == frozenset() + assert dead_items == () + assert project_metrics.dependency_modules == 0 + assert project_metrics.dead_code == () + + +def test_build_metrics_report_payload_includes_suppressed_dead_code_items() -> None: + payload = build_metrics_report_payload( + project_metrics=_project_metrics(dead_confidence="high"), + units=(), + class_metrics=(), + suppressed_dead_code=( + DeadItem( + qualname="pkg.mod:suppressed_dead", + filepath="pkg/mod.py", + start_line=10, + end_line=12, + kind="function", + confidence="high", + ), + ), + ) + dead_code = payload["dead_code"] + assert isinstance(dead_code, dict) + summary = dead_code["summary"] + assert summary == {"total": 1, "critical": 1, "high_confidence": 1, "suppressed": 1} + suppressed_items = dead_code["suppressed_items"] + assert suppressed_items == [ + { + "qualname": "pkg.mod:suppressed_dead", + "filepath": "pkg/mod.py", + "start_line": 10, + "end_line": 12, + "kind": "function", + "confidence": "high", + "suppressed_by": [{"rule": "dead-code", "source": "inline_codeclone"}], + } + ] + + +def test_load_cached_metrics_ignores_referenced_names_from_test_files() -> None: + entry: CacheEntry = { + "stat": {"mtime_ns": 1, "size": 1}, + "units": [], + "blocks": [], + "segments": [], + "referenced_names": ["orphan", "helper"], + } + _, _, _, test_names, test_qualnames = _load_cached_metrics( + entry, + filepath="pkg/tests/test_mod.py", + ) + _, _, _, regular_names, regular_qualnames = _load_cached_metrics( + entry, + filepath="pkg/mod.py", + ) + assert test_names == frozenset() + assert test_qualnames == frozenset() + assert regular_names == frozenset({"helper", "orphan"}) + assert regular_qualnames == frozenset() + + +def test_load_cached_metrics_preserves_coupled_classes() -> None: + entry: CacheEntry = { + "stat": {"mtime_ns": 1, "size": 1}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [ + { + "qualname": "pkg.mod:Service", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 10, + "cbo": 2, + "lcom4": 1, + "method_count": 3, + "instance_var_count": 1, + "risk_coupling": "low", + "risk_cohesion": "low", + "coupled_classes": ["TypeB", "TypeA", "TypeA"], + } + ], + } + class_metrics, _, _, _, _ = _load_cached_metrics(entry, filepath="pkg/mod.py") + assert len(class_metrics) == 1 + assert class_metrics[0].coupled_classes == ("TypeA", "TypeB") + + +def test_load_cached_metrics_preserves_dead_candidate_suppressions() -> None: + entry: CacheEntry = { + "stat": {"mtime_ns": 1, "size": 1}, + "units": [], + "blocks": [], + "segments": [], + "dead_candidates": [ + { + "qualname": "pkg.mod:runtime_hook", + "local_name": "runtime_hook", + "filepath": "pkg/mod.py", + "start_line": 10, + "end_line": 11, + "kind": "function", + "suppressed_rules": ["dead-code", "dead-code"], + } + ], + } + _, _, dead_candidates, _, _ = _load_cached_metrics(entry, filepath="pkg/mod.py") + assert len(dead_candidates) == 1 + assert dead_candidates[0].suppressed_rules == ("dead-code",) + + +def test_metric_gate_reasons_collects_all_enabled_reasons() -> None: + reasons = metric_gate_reasons( + project_metrics=_project_metrics(dead_confidence="high"), + metrics_diff=MetricsDiff( + new_high_risk_functions=("pkg.mod:new_hot",), + new_high_coupling_classes=("pkg.mod:new_class",), + new_cycles=(("pkg.x", "pkg.y"),), + new_dead_code=("pkg.mod:new_dead",), + health_delta=-1, + ), + config=MetricGateConfig( + fail_complexity=20, + fail_coupling=10, + fail_cohesion=3, + fail_cycles=True, + fail_dead_code=True, + fail_health=70, + fail_on_new_metrics=True, + ), + ) + assert len(reasons) == 11 + assert any(reason.startswith("Complexity threshold exceeded") for reason in reasons) + assert any(reason.startswith("Coupling threshold exceeded") for reason in reasons) + assert any(reason.startswith("Cohesion threshold exceeded") for reason in reasons) + assert any(reason.startswith("Dependency cycles detected") for reason in reasons) + assert any(reason.startswith("Dead code detected") for reason in reasons) + assert any(reason.startswith("Health score below threshold") for reason in reasons) + assert any(reason.startswith("New high-risk functions") for reason in reasons) + assert any(reason.startswith("New high-coupling classes") for reason in reasons) + assert any(reason.startswith("New dependency cycles") for reason in reasons) + assert any(reason.startswith("New dead code items") for reason in reasons) + assert any(reason.startswith("Health score regressed") for reason in reasons) + + +def test_metric_gate_reasons_skip_disabled_and_non_critical_paths() -> None: + reasons = metric_gate_reasons( + project_metrics=_project_metrics(dead_confidence="medium"), + metrics_diff=None, + config=MetricGateConfig( + fail_complexity=-1, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=False, + fail_dead_code=True, + fail_health=-1, + fail_on_new_metrics=True, + ), + ) + assert reasons == () + + +def test_metric_gate_reasons_partial_new_metrics_paths() -> None: + reasons = metric_gate_reasons( + project_metrics=_project_metrics(dead_confidence="medium"), + metrics_diff=MetricsDiff( + new_high_risk_functions=(), + new_high_coupling_classes=("pkg.mod:new_class",), + new_cycles=(), + new_dead_code=("pkg.mod:new_dead",), + health_delta=0, + ), + config=MetricGateConfig( + fail_complexity=-1, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=False, + fail_dead_code=False, + fail_health=-1, + fail_on_new_metrics=True, + ), + ) + assert reasons == ( + "New high-coupling classes vs metrics baseline: 1.", + "New dead code items vs metrics baseline: 1.", + ) + + +def test_metric_gate_reasons_new_metrics_optional_buckets_empty() -> None: + reasons = metric_gate_reasons( + project_metrics=_project_metrics(dead_confidence="medium"), + metrics_diff=MetricsDiff( + new_high_risk_functions=(), + new_high_coupling_classes=(), + new_cycles=(("pkg.a", "pkg.b"),), + new_dead_code=(), + health_delta=-2, + ), + config=MetricGateConfig( + fail_complexity=-1, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=False, + fail_dead_code=False, + fail_health=-1, + fail_on_new_metrics=True, + ), + ) + assert reasons == ( + "New dependency cycles vs metrics baseline: 1.", + "Health score regressed vs metrics baseline: delta=-2.", + ) diff --git a/tests/test_pipeline_process.py b/tests/test_pipeline_process.py new file mode 100644 index 0000000..f3474bc --- /dev/null +++ b/tests/test_pipeline_process.py @@ -0,0 +1,456 @@ +from __future__ import annotations + +import builtins +from argparse import Namespace +from collections.abc import Callable +from pathlib import Path +from typing import Literal + +import pytest + +import codeclone.pipeline as pipeline +from codeclone.cache import Cache, CacheEntry, SourceStatsDict, file_stat_signature +from codeclone.normalize import NormalizationConfig + + +class _FailExec: + def __init__(self, *args: object, **kwargs: object) -> None: + return None + + def __enter__(self) -> _FailExec: + raise RuntimeError("executor unavailable") + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: object | None, + ) -> Literal[False]: + return False + + +class _UnexpectedExec: + def __init__(self, *args: object, **kwargs: object) -> None: + raise AssertionError("ProcessPoolExecutor should not be used for small batches") + + +def _build_boot(tmp_path: Path, *, processes: int) -> pipeline.BootstrapResult: + return pipeline.BootstrapResult( + root=tmp_path, + config=NormalizationConfig(), + args=Namespace( + processes=processes, + min_loc=1, + min_stmt=1, + block_min_loc=20, + block_min_stmt=8, + segment_min_loc=20, + segment_min_stmt=10, + skip_metrics=True, + ), + output_paths=pipeline.OutputPaths(html=None, json=None, text=None), + cache_path=tmp_path / "cache.json", + ) + + +def _build_discovery(filepaths: tuple[str, ...]) -> pipeline.DiscoveryResult: + return pipeline.DiscoveryResult( + files_found=len(filepaths), + cache_hits=0, + files_skipped=0, + all_file_paths=filepaths, + cached_units=(), + cached_blocks=(), + cached_segments=(), + cached_class_metrics=(), + cached_module_deps=(), + cached_dead_candidates=(), + cached_referenced_names=frozenset(), + files_to_process=filepaths, + skipped_warnings=(), + ) + + +def _ok_result(filepath: str) -> pipeline.FileProcessResult: + return pipeline.FileProcessResult( + filepath=filepath, + success=True, + units=[], + blocks=[], + segments=[], + lines=2, + functions=1, + methods=0, + classes=0, + stat=file_stat_signature(filepath), + ) + + +def _stub_process_file( + *, + expected_root: str | None = None, + expected_filepath: str | None = None, +) -> object: + def _process_file( + filepath: str, + root: str, + cfg: NormalizationConfig, + min_loc: int, + min_stmt: int, + collect_structural_findings: bool = True, + block_min_loc: int = 20, + block_min_stmt: int = 8, + segment_min_loc: int = 20, + segment_min_stmt: int = 10, + ) -> pipeline.FileProcessResult: + if expected_root is not None: + assert root == expected_root + if expected_filepath is not None: + assert filepath == expected_filepath + assert min_loc == 1 + assert min_stmt == 1 + assert collect_structural_findings is False + return _ok_result(filepath) + + return _process_file + + +def _build_large_batch_case( + tmp_path: Path, +) -> tuple[pipeline.BootstrapResult, pipeline.DiscoveryResult, Cache, list[str]]: + filepaths: list[str] = [] + for idx in range(pipeline._parallel_min_files(2) + 1): + src = tmp_path / f"a{idx}.py" + src.write_text("def f():\n return 1\n", "utf-8") + filepaths.append(str(src)) + + boot = _build_boot(tmp_path, processes=2) + discovery = _build_discovery(tuple(filepaths)) + cache = Cache(tmp_path / "cache.json", root=tmp_path) + return boot, discovery, cache, filepaths + + +def _build_single_file_process_case( + tmp_path: Path, +) -> tuple[str, pipeline.BootstrapResult, pipeline.DiscoveryResult]: + src = tmp_path / "a.py" + src.write_text("def f():\n return 1\n", "utf-8") + filepath = str(src) + return filepath, _build_boot(tmp_path, processes=1), _build_discovery((filepath,)) + + +def _build_report_case( + tmp_path: Path, + *, + json_out: bool = True, + md_out: bool = False, + sarif_out: bool = False, +) -> tuple[ + pipeline.BootstrapResult, + pipeline.DiscoveryResult, + pipeline.ProcessingResult, + pipeline.AnalysisResult, +]: + boot = pipeline.BootstrapResult( + root=tmp_path, + config=NormalizationConfig(), + args=Namespace(), + output_paths=pipeline.OutputPaths( + json=tmp_path / "report.json" if json_out else None, + md=tmp_path / "report.md" if md_out else None, + sarif=tmp_path / "report.sarif" if sarif_out else None, + ), + cache_path=tmp_path / "cache.json", + ) + discovery = _build_discovery(()) + processing = pipeline.ProcessingResult( + units=(), + blocks=(), + segments=(), + class_metrics=(), + module_deps=(), + dead_candidates=(), + referenced_names=frozenset(), + files_analyzed=0, + files_skipped=0, + analyzed_lines=0, + analyzed_functions=0, + analyzed_methods=0, + analyzed_classes=0, + failed_files=(), + source_read_failures=(), + ) + analysis = pipeline.AnalysisResult( + func_groups={}, + block_groups={}, + block_groups_report={}, + segment_groups={}, + suppressed_segment_groups=0, + block_group_facts={}, + func_clones_count=0, + block_clones_count=0, + segment_clones_count=0, + files_analyzed_or_cached=0, + project_metrics=None, + metrics_payload=None, + suggestions=(), + segment_groups_raw_digest="", + ) + return boot, discovery, processing, analysis + + +def test_process_parallel_fallback_without_callback_uses_sequential( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + boot, discovery, cache, filepaths = _build_large_batch_case(tmp_path) + + monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailExec) + monkeypatch.setattr( + pipeline, + "process_file", + _stub_process_file( + expected_root=str(tmp_path), + ), + ) + + result = pipeline.process( + boot=boot, + discovery=discovery, + cache=cache, + on_parallel_fallback=None, + ) + + assert result.files_analyzed == len(filepaths) + assert result.files_skipped == 0 + assert result.analyzed_functions == len(filepaths) + + +def test_process_small_batch_skips_parallel_executor( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + src = tmp_path / "a.py" + src.write_text("def f():\n return 1\n", "utf-8") + + boot = _build_boot(tmp_path, processes=4) + discovery = _build_discovery((str(src),)) + cache = Cache(tmp_path / "cache.json", root=tmp_path) + callbacks: list[str] = [] + + monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _UnexpectedExec) + monkeypatch.setattr( + pipeline, + "process_file", + _stub_process_file(expected_root=str(tmp_path)), + ) + result = pipeline.process( + boot=boot, + discovery=discovery, + cache=cache, + on_parallel_fallback=lambda exc: callbacks.append(str(exc)), + ) + + assert callbacks == [] + assert result.files_analyzed == 1 + assert result.files_skipped == 0 + + +def test_process_parallel_failure_large_batch_invokes_fallback_callback( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + boot, discovery, cache, filepaths = _build_large_batch_case(tmp_path) + callbacks: list[str] = [] + + monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailExec) + monkeypatch.setattr( + pipeline, + "process_file", + _stub_process_file(expected_root=str(tmp_path)), + ) + result = pipeline.process( + boot=boot, + discovery=discovery, + cache=cache, + on_parallel_fallback=lambda exc: callbacks.append(type(exc).__name__), + ) + + assert callbacks == ["RuntimeError"] + assert result.files_analyzed == len(filepaths) + assert result.files_skipped == 0 + + +def test_process_cache_put_file_entry_fallback_without_source_stats_support( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + filepath, boot, discovery = _build_single_file_process_case(tmp_path) + + class _LegacyCache: + def __init__(self) -> None: + self.calls = 0 + + def put_file_entry( + self, + _filepath: str, + _stat_sig: object, + _units: object, + _blocks: object, + _segments: object, + *, + file_metrics: object | None = None, + structural_findings: object | None = None, + ) -> None: + self.calls += 1 + + def save(self) -> None: + return None + + cache = _LegacyCache() + monkeypatch.setattr( + pipeline, + "process_file", + _stub_process_file( + expected_root=str(tmp_path), + expected_filepath=filepath, + ), + ) + + result = pipeline.process( + boot=boot, + discovery=discovery, + cache=cache, # type: ignore[arg-type] + ) + + assert result.files_analyzed == 1 + assert result.files_skipped == 0 + assert cache.calls == 1 + + +def test_process_cache_put_file_entry_type_error_is_raised( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + filepath, boot, discovery = _build_single_file_process_case(tmp_path) + + class _BrokenCache: + def put_file_entry( + self, + _filepath: str, + _stat_sig: object, + _units: object, + _blocks: object, + _segments: object, + *, + source_stats: object | None = None, + file_metrics: object | None = None, + structural_findings: object | None = None, + ) -> None: + raise TypeError("broken cache write") + + monkeypatch.setattr( + pipeline, + "process_file", + _stub_process_file( + expected_root=str(tmp_path), + expected_filepath=filepath, + ), + ) + + with pytest.raises(TypeError, match="broken cache write"): + pipeline.process( + boot=boot, + discovery=discovery, + cache=_BrokenCache(), # type: ignore[arg-type] + ) + + +def test_usable_cached_source_stats_respects_required_sections() -> None: + source_stats: SourceStatsDict = { + "lines": 5, + "functions": 2, + "methods": 1, + "classes": 1, + } + base_entry: CacheEntry = { + "stat": {"mtime_ns": 1, "size": 1}, + "units": [], + "blocks": [], + "segments": [], + "source_stats": source_stats, + } + complete_entry: CacheEntry = { + **base_entry, + "source_stats": source_stats, + "class_metrics": [], + "module_deps": [], + "dead_candidates": [], + "referenced_names": [], + "referenced_qualnames": [], + "import_names": [], + "class_names": [], + "structural_findings": [], + } + assert pipeline._usable_cached_source_stats( + complete_entry, + skip_metrics=False, + collect_structural_findings=True, + ) == (5, 2, 1, 1) + assert ( + pipeline._usable_cached_source_stats( + base_entry, + skip_metrics=False, + collect_structural_findings=False, + ) + is None + ) + assert ( + pipeline._usable_cached_source_stats( + { + **base_entry, + "class_metrics": [], + "module_deps": [], + "dead_candidates": [], + "referenced_names": [], + "referenced_qualnames": [], + "import_names": [], + "class_names": [], + }, + skip_metrics=False, + collect_structural_findings=True, + ) + is None + ) + + +def test_report_json_only_does_not_import_markdown_or_sarif( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + boot, discovery, processing, analysis = _build_report_case(tmp_path, json_out=True) + original_import: Callable[..., object] = builtins.__import__ + + def _guard_import( + name: str, + globals: dict[str, object] | None = None, + locals: dict[str, object] | None = None, + fromlist: tuple[str, ...] = (), + level: int = 0, + ) -> object: + if name in {"codeclone.report.markdown", "codeclone.report.sarif"}: + raise AssertionError(f"unexpected import: {name}") + return original_import(name, globals, locals, fromlist, level) + + monkeypatch.setattr(builtins, "__import__", _guard_import) + + artifacts = pipeline.report( + boot=boot, + discovery=discovery, + processing=processing, + analysis=analysis, + report_meta={}, + new_func=(), + new_block=(), + html_builder=None, + metrics_diff=None, + ) + + assert artifacts.json is not None + assert artifacts.md is None + assert artifacts.sarif is None diff --git a/tests/test_python_syntax_compat.py b/tests/test_python_syntax_compat.py new file mode 100644 index 0000000..6d52266 --- /dev/null +++ b/tests/test_python_syntax_compat.py @@ -0,0 +1,19 @@ +import ast +from pathlib import Path + + +def _python_sources() -> list[Path]: + roots = (Path("codeclone"), Path("tests")) + paths: list[Path] = [] + for root in roots: + paths.extend( + path for path in root.rglob("*.py") if "__pycache__" not in path.parts + ) + return sorted(paths) + + +def test_repo_syntax_is_compatible_with_python_310_and_311() -> None: + for minor in (10, 11): + for path in _python_sources(): + source = path.read_text(encoding="utf-8") + ast.parse(source, filename=str(path), feature_version=minor) diff --git a/tests/test_report.py b/tests/test_report.py index a072fe5..79b1370 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -1,13 +1,22 @@ import ast import json -from collections.abc import Callable +from collections.abc import Callable, Collection, Mapping, Sequence +from hashlib import sha256 from pathlib import Path from typing import cast import pytest import codeclone.report as report_mod +import codeclone.report.merge as merge_mod +import codeclone.report.overview as overview_mod +import codeclone.report.serialize as serialize_mod from codeclone.contracts import CACHE_VERSION, REPORT_SCHEMA_VERSION +from codeclone.models import ( + StructuralFindingGroup, + StructuralFindingOccurrence, + Suggestion, +) from codeclone.report import ( GroupMap, build_block_group_facts, @@ -16,9 +25,22 @@ build_segment_groups, prepare_block_report_groups, prepare_segment_report_groups, - to_json, - to_json_report, - to_text_report, + to_markdown_report, + to_sarif_report, +) +from codeclone.report.findings import build_structural_findings_html_panel +from codeclone.report.json_contract import build_report_document +from codeclone.report.overview import materialize_report_overview +from codeclone.report.serialize import ( + render_json_report_document, + render_text_report_document, +) +from tests._assertions import assert_contains_all, assert_mapping_entries +from tests._report_access import ( + report_clone_groups as _clone_groups, +) +from tests._report_access import ( + report_structural_groups as _structural_groups, ) from tests._report_fixtures import ( REPEATED_STMT_HASH, @@ -27,6 +49,82 @@ ) +def to_json_report( + func_groups: GroupMap, + block_groups: GroupMap, + segment_groups: GroupMap, + meta: Mapping[str, object] | None = None, + inventory: Mapping[str, object] | None = None, + block_facts: Mapping[str, Mapping[str, str]] | None = None, + new_function_group_keys: Collection[str] | None = None, + new_block_group_keys: Collection[str] | None = None, + new_segment_group_keys: Collection[str] | None = None, + metrics: Mapping[str, object] | None = None, + suggestions: Sequence[Suggestion] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, +) -> str: + payload = build_report_document( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + meta=meta, + inventory=inventory, + block_facts=block_facts, + new_function_group_keys=new_function_group_keys, + new_block_group_keys=new_block_group_keys, + new_segment_group_keys=new_segment_group_keys, + metrics=metrics, + suggestions=suggestions or (), + structural_findings=structural_findings or (), + ) + return render_json_report_document(payload) + + +def to_text_report( + *, + meta: Mapping[str, object], + inventory: Mapping[str, object] | None = None, + func_groups: GroupMap, + block_groups: GroupMap, + segment_groups: GroupMap, + block_facts: Mapping[str, Mapping[str, str]] | None = None, + new_function_group_keys: Collection[str] | None = None, + new_block_group_keys: Collection[str] | None = None, + new_segment_group_keys: Collection[str] | None = None, + metrics: Mapping[str, object] | None = None, + suggestions: Sequence[Suggestion] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, +) -> str: + payload = build_report_document( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + meta=meta, + inventory=inventory, + block_facts=block_facts or {}, + new_function_group_keys=new_function_group_keys, + new_block_group_keys=new_block_group_keys, + new_segment_group_keys=new_segment_group_keys, + metrics=metrics, + suggestions=suggestions or (), + structural_findings=structural_findings or (), + ) + return render_text_report_document(payload) + + +def _clone_group_map( + payload: dict[str, object], + kind: str, +) -> dict[str, dict[str, object]]: + rows = _clone_groups(payload, kind) + mapping: dict[str, dict[str, object]] = {} + for row in rows: + facts = row["facts"] + assert isinstance(facts, dict) + mapping[str(facts["group_key"])] = row + return mapping + + def test_build_function_groups() -> None: units = [ {"fingerprint": "abc", "loc_bucket": "20-49", "qualname": "a"}, @@ -193,10 +291,13 @@ def test_build_block_group_facts_assert_only(tmp_path: Path) -> None: assert group["pattern_display"] == f"{REPEATED_STMT_HASH[:12]} x4" assert group["hint"] == "assert_only" assert group["hint_label"] == "Assert-only block" - assert group["hint_confidence"] == "deterministic" - assert group["assert_ratio"] == "100%" - assert group["consecutive_asserts"] == "4" - assert group["group_display_name"] == "Assert pattern block" + assert_mapping_entries( + group, + hint_confidence="deterministic", + assert_ratio="100%", + consecutive_asserts="4", + group_display_name="Assert pattern block", + ) assert group["group_arity"] == "1" assert group["instance_peer_count"] == "0" @@ -256,9 +357,21 @@ def test_report_output_formats( baseline_path="/tmp/codeclone.baseline.json", baseline_schema_version=1, cache_path="/tmp/cache.json", + scan_root="/repo", ) - json_out = to_json(groups) report_out = to_json_report(groups, groups, {}, meta) + markdown_out = to_markdown_report( + meta=meta, + func_groups=groups, + block_groups=groups, + segment_groups={}, + ) + sarif_out = to_sarif_report( + meta=meta, + func_groups=groups, + block_groups=groups, + segment_groups={}, + ) text_out = to_text_report( meta=meta, func_groups=groups, @@ -266,42 +379,150 @@ def test_report_output_formats( segment_groups={}, ) - expected_json = ["group_count"] expected_report = [ '"meta"', - '"groups"', - '"groups_split"', - '"group_item_layout"', + '"inventory"', + '"findings"', + '"integrity"', f'"report_schema_version": "{REPORT_SCHEMA_VERSION}"', - '"baseline_schema_version": 1', - f'"baseline_payload_sha256": "{"a" * 64}"', - '"baseline_payload_sha256_verified": true', - f'"cache_schema_version": "{CACHE_VERSION}"', - '"cache_status": "ok"', - '"files_skipped_source_io": 0', + '"report_generated_at_utc": "2026-03-10T12:00:00Z"', + '"schema_version": "1"', + f'"payload_sha256": "{"a" * 64}"', + '"payload_sha256_verified": true', + f'"schema_version": "{CACHE_VERSION}"', + '"status": "ok"', + '"source_io_skipped": 0', ] expected_text = [ "REPORT METADATA", - "Report schema version: 1.1", + f"Report schema version: {REPORT_SCHEMA_VERSION}", "Python tag: cp313", + "Report generated (UTC): 2026-03-10T12:00:00Z", + "Baseline path: codeclone.baseline.json", "Baseline schema version: 1", "Baseline generator name: codeclone", f"Baseline payload sha256: {'a' * 64}", "Baseline payload verified: true", + "Cache path: cache.json", f"Cache schema version: {CACHE_VERSION}", "Cache status: ok", - "Source IO skipped: 0", + "INVENTORY", + "source_io_skipped=0", + "INTEGRITY", "FUNCTION CLONES (NEW) (groups=2)", "FUNCTION CLONES (KNOWN) (groups=0)", "Clone group #1", ] + expected_markdown = [ + "# CodeClone Report", + "- Markdown schema: 1.0", + f"- Source report schema: {REPORT_SCHEMA_VERSION}", + "- Report generated (UTC): 2026-03-10T12:00:00Z", + '', + "## Overview", + '', + "### Clone Findings", + '', + "## Integrity", + ] + sarif_payload = json.loads(sarif_out) + run = sarif_payload["runs"][0] - for token in expected_json: - assert token in json_out for token in expected_report: assert token in report_out for token in expected_text: assert token in text_out + for token in expected_markdown: + assert token in markdown_out + assert sarif_payload["$schema"].endswith("sarif-2.1.0.json") + assert sarif_payload["version"] == "2.1.0" + assert run["tool"]["driver"]["name"] == "codeclone" + assert run["automationDetails"]["id"] == "codeclone/full" + assert run["properties"]["reportSchemaVersion"] == REPORT_SCHEMA_VERSION + assert run["properties"]["reportGeneratedAtUtc"] == "2026-03-10T12:00:00Z" + assert run["columnKind"] == "utf16CodeUnits" + assert run["originalUriBaseIds"]["%SRCROOT%"]["uri"] == "file:///repo/" + assert run["artifacts"] + assert run["invocations"][0]["workingDirectory"]["uri"] == "file:///repo/" + assert any(rule["id"] == "CCLONE001" for rule in run["tool"]["driver"]["rules"]) + first_rule = run["tool"]["driver"]["rules"][0] + assert first_rule["name"].startswith("codeclone.") + assert "help" in first_rule + assert "markdown" in first_rule["help"] + assert first_rule["properties"]["tags"] + assert any( + result["fingerprints"]["codecloneFindingId"].startswith("clone:") + for result in run["results"] + ) + + +def test_report_sarif_uses_representative_and_related_locations() -> None: + groups = { + "k1": [ + { + "qualname": "pkg.alpha:transform_alpha", + "filepath": "tests/fixtures/golden_project/alpha.py", + "start_line": 1, + "end_line": 10, + "loc": 10, + "stmt_count": 6, + "fingerprint": "fp1", + "loc_bucket": "1-19", + "cyclomatic_complexity": 2, + "nesting_depth": 1, + "risk": "low", + "raw_hash": "raw1", + }, + { + "qualname": "pkg.beta:transform_beta", + "filepath": "tests/fixtures/golden_project/beta.py", + "start_line": 2, + "end_line": 11, + "loc": 10, + "stmt_count": 6, + "fingerprint": "fp1", + "loc_bucket": "1-19", + "cyclomatic_complexity": 2, + "nesting_depth": 1, + "risk": "low", + "raw_hash": "raw2", + }, + ] + } + sarif_payload = json.loads( + to_sarif_report( + meta={"codeclone_version": "2.0.0b1", "scan_root": "/repo"}, + func_groups=groups, + block_groups={}, + segment_groups={}, + ) + ) + run = sarif_payload["runs"][0] + result = run["results"][0] + assert result["ruleId"] == "CCLONE001" + assert result["level"] == "warning" + assert result["baselineState"] == "new" + assert result["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] == ( + "tests/fixtures/golden_project/alpha.py" + ) + assert ( + result["locations"][0]["physicalLocation"]["artifactLocation"]["uriBaseId"] + == "%SRCROOT%" + ) + assert result["locations"][0]["physicalLocation"]["artifactLocation"]["index"] == 0 + assert result["locations"][0]["logicalLocations"][0]["fullyQualifiedName"] == ( + "pkg.alpha:transform_alpha" + ) + assert result["locations"][0]["message"]["text"] == "Representative occurrence" + assert ( + result["relatedLocations"][0]["physicalLocation"]["artifactLocation"]["uri"] + == "tests/fixtures/golden_project/beta.py" + ) + assert result["relatedLocations"][0]["id"] == 1 + assert result["relatedLocations"][0]["message"]["text"] == "Related occurrence #1" + assert result["properties"]["cloneType"] == "Type-2" + assert result["properties"]["groupArity"] == 2 + assert "primaryLocationLineHash" in result["partialFingerprints"] def test_report_json_deterministic_group_order() -> None: @@ -332,7 +553,7 @@ def test_report_json_deterministic_group_order() -> None: assert out_a == out_b -def test_report_json_group_order_is_lexicographic() -> None: +def test_report_json_group_order_is_deterministic_by_count_then_id() -> None: groups = { "b": [ { @@ -371,7 +592,11 @@ def test_report_json_group_order_is_lexicographic() -> None: } payload = to_json_report(groups, {}, {}, {"codeclone_version": "1.3.0"}) report_obj = json.loads(payload) - assert list(report_obj["groups"]["functions"].keys()) == ["a", "b", "c"] + assert [row["id"] for row in _clone_groups(report_obj, "functions")] == [ + "clone:function:c", + "clone:function:a", + "clone:function:b", + ] def test_report_json_deterministic_with_shuffled_units() -> None: @@ -404,7 +629,7 @@ def test_report_json_deterministic_with_shuffled_units() -> None: assert out_a == out_b -def test_report_json_compact_v11_contract() -> None: +def test_report_json_compact_v21_contract() -> None: groups = { "g1": [ { @@ -431,50 +656,74 @@ def test_report_json_compact_v11_contract() -> None: } payload = json.loads(to_json_report(groups, {}, {}, {"codeclone_version": "1.4.0"})) - assert payload["meta"]["report_schema_version"] == REPORT_SCHEMA_VERSION - assert payload["files"] == ["a.py", "z.py"] - assert set(payload["groups"]) == {"functions", "blocks", "segments"} - assert payload["groups_split"] == { - "functions": {"new": ["g1"], "known": []}, - "blocks": {"new": [], "known": []}, - "segments": {"new": [], "known": []}, - } - assert payload["meta"]["groups_counts"] == { - "functions": {"total": 1, "new": 1, "known": 0}, - "blocks": {"total": 0, "new": 0, "known": 0}, - "segments": {"total": 0, "new": 0, "known": 0}, + assert "report_schema_version" not in payload["meta"] + assert payload["inventory"]["file_registry"] == { + "encoding": "relative_path", + "items": ["a.py", "z.py"], } - assert payload["group_item_layout"] == { - "functions": [ - "file_i", - "qualname", - "start", - "end", - "loc", - "stmt_count", - "fingerprint", - "loc_bucket", - ], - "blocks": ["file_i", "qualname", "start", "end", "size"], - "segments": [ - "file_i", - "qualname", - "start", - "end", - "size", - "segment_hash", - "segment_sig", - ], + clones = payload["findings"]["groups"]["clones"] + assert set(clones) == {"functions", "blocks", "segments"} + assert payload["findings"]["summary"]["clones"] == { + "functions": 1, + "blocks": 0, + "segments": 0, + "new": 1, + "known": 0, } - assert "function_clones" not in payload - assert "block_clones" not in payload - assert "segment_clones" not in payload - - function_rows = payload["groups"]["functions"]["g1"] - assert function_rows == [ - [0, "m:b", 1, 2, 2, 1, "fp-a", "0-19"], - [1, "m:a", 3, 4, 2, 1, "fp-z", "0-19"], + + function_group = _clone_group_map(payload, "functions")["g1"] + assert function_group["clone_type"] == "Type-3" + assert function_group["novelty"] == "new" + assert function_group["items"] == [ + { + "relative_path": "a.py", + "qualname": "m:b", + "start_line": 1, + "end_line": 2, + "loc": 2, + "stmt_count": 1, + "fingerprint": "fp-a", + "loc_bucket": "0-19", + "cyclomatic_complexity": 1, + "nesting_depth": 0, + "risk": "low", + "raw_hash": "", + }, + { + "relative_path": "z.py", + "qualname": "m:a", + "start_line": 3, + "end_line": 4, + "loc": 2, + "stmt_count": 1, + "fingerprint": "fp-z", + "loc_bucket": "0-19", + "cyclomatic_complexity": 1, + "nesting_depth": 0, + "risk": "low", + "raw_hash": "", + }, ] + assert set(payload) == { + "report_schema_version", + "meta", + "inventory", + "findings", + "metrics", + "derived", + "integrity", + } + for legacy_key in ( + "files", + "clones", + "groups", + "groups_split", + "clone_types", + "suggestions", + "overview", + "structural_findings", + ): + assert legacy_key not in payload def test_report_json_block_records_do_not_repeat_group_hash() -> None: @@ -497,15 +746,298 @@ def test_report_json_block_records_do_not_repeat_group_hash() -> None: {"codeclone_version": "1.4.0"}, ) ) - rows = payload["groups"]["blocks"][block_group_key] - assert rows == [[0, "m:f", 10, 13, 4]] + block_group = _clone_group_map(payload, "blocks")[block_group_key] + assert block_group["items"] == [ + { + "relative_path": "a.py", + "qualname": "m:f", + "start_line": 10, + "end_line": 13, + "size": 4, + } + ] -def test_report_json_includes_sorted_block_facts() -> None: +def test_report_json_serializes_rich_suggestions_and_overview() -> None: + payload = json.loads( + to_json_report( + {}, + {}, + {}, + {"codeclone_version": "1.4.0"}, + suggestions=( + Suggestion( + severity="warning", + category="clone", + title="Function clone group (Type-2)", + location="2 occurrences across 2 files / 2 functions", + steps=("Extract shared function",), + effort="easy", + priority=2.0, + finding_family="clones", + subject_key="clone:g1", + fact_kind="Function clone group", + fact_summary="same parameterized function body", + fact_count=2, + spread_files=2, + spread_functions=2, + clone_type="Type-2", + confidence="high", + source_kind="production", + source_breakdown=(("production", 2),), + location_label="2 occurrences across 2 files / 2 functions", + ), + ), + ) + ) + suggestion = payload["derived"]["suggestions"][0] + assert set(suggestion) == { + "id", + "finding_id", + "title", + "summary", + "location_label", + "representative_locations", + "action", + } + assert suggestion["finding_id"] == "clone:function:clone:g1" + assert suggestion["summary"] == "same parameterized function body" + assert suggestion["representative_locations"] == [] + assert suggestion["action"] == { + "effort": "easy", + "steps": ["Extract shared function"], + } + overview = payload["derived"]["overview"] + assert overview["families"]["clones"] == 0 + assert overview["source_scope_breakdown"] == {} + assert payload["derived"]["hotlists"]["most_actionable_ids"] == [] + + +def test_report_json_integrity_matches_canonical_sections() -> None: payload = json.loads( to_json_report( + { + "g1": [ + { + "qualname": "m:a", + "filepath": "a.py", + "start_line": 1, + "end_line": 3, + "loc": 3, + "stmt_count": 2, + "fingerprint": "fp-a", + "loc_bucket": "0-19", + }, + { + "qualname": "m:b", + "filepath": "b.py", + "start_line": 2, + "end_line": 4, + "loc": 3, + "stmt_count": 2, + "fingerprint": "fp-a", + "loc_bucket": "0-19", + }, + ] + }, + {}, {}, + {"codeclone_version": "1.4.0"}, + ) + ) + canonical_payload = { + "report_schema_version": payload["report_schema_version"], + "meta": { + key: value for key, value in payload["meta"].items() if key != "runtime" + }, + "inventory": payload["inventory"], + "findings": payload["findings"], + "metrics": payload["metrics"], + } + canonical_json = json.dumps( + canonical_payload, + ensure_ascii=False, + separators=(",", ":"), + sort_keys=True, + ).encode("utf-8") + assert payload["integrity"]["canonicalization"] == { + "version": "1", + "scope": "canonical_only", + "sections": [ + "report_schema_version", + "meta", + "inventory", + "findings", + "metrics", + ], + } + assert payload["integrity"]["digest"] == { + "verified": True, + "algorithm": "sha256", + "value": sha256(canonical_json).hexdigest(), + } + + +def test_report_json_integrity_ignores_derived_changes() -> None: + base_args: tuple[ + dict[str, list[dict[str, object]]], + dict[str, list[dict[str, object]]], + dict[str, list[dict[str, object]]], + dict[str, object], + ] = ( + { + "g1": [ + { + "qualname": "m:a", + "filepath": "a.py", + "start_line": 1, + "end_line": 3, + "loc": 3, + "stmt_count": 2, + "fingerprint": "fp-a", + "loc_bucket": "0-19", + }, + { + "qualname": "m:b", + "filepath": "b.py", + "start_line": 2, + "end_line": 4, + "loc": 3, + "stmt_count": 2, + "fingerprint": "fp-a", + "loc_bucket": "0-19", + }, + ] + }, + {}, + {}, + {"codeclone_version": "1.4.0"}, + ) + suggestion_a = Suggestion( + severity="warning", + category="clone", + title="Function clone group (Type-2)", + location="2 occurrences across 2 files / 2 functions", + steps=("Extract shared function",), + effort="easy", + priority=2.0, + finding_family="clones", + subject_key="clone:g1", + fact_kind="Function clone group", + fact_summary="same parameterized function body", + fact_count=2, + spread_files=2, + spread_functions=2, + clone_type="Type-2", + confidence="high", + source_kind="production", + source_breakdown=(("production", 2),), + location_label="2 occurrences across 2 files / 2 functions", + ) + suggestion_b = Suggestion( + severity="warning", + category="clone", + title="Refactor duplicated function body", + location="example location", + steps=("Extract helper", "Pass parameters"), + effort="moderate", + priority=1.5, + finding_family="clones", + subject_key="clone:g1", + fact_kind="Function clone group", + fact_summary="same parameterized function body", + fact_count=2, + spread_files=2, + spread_functions=2, + clone_type="Type-2", + confidence="high", + source_kind="production", + source_breakdown=(("production", 2),), + location_label="example location", + ) + payload_a = json.loads(to_json_report(*base_args, suggestions=(suggestion_a,))) + payload_b = json.loads(to_json_report(*base_args, suggestions=(suggestion_b,))) + assert payload_a["derived"]["suggestions"] != payload_b["derived"]["suggestions"] + assert payload_a["integrity"]["digest"] == payload_b["integrity"]["digest"] + + +def test_report_json_integrity_ignores_display_facts_changes() -> None: + base_args: tuple[ + dict[str, list[dict[str, object]]], + dict[str, list[dict[str, object]]], + dict[str, list[dict[str, object]]], + dict[str, object], + ] = ( + {}, + { + "group-a": [ + { + "qualname": "pkg:fa", + "filepath": "/root/a.py", + "start_line": 20, + "end_line": 23, + "size": 4, + } + ] + }, + {}, + {"codeclone_version": "1.4.0", "scan_root": "/root"}, + ) + payload_a = json.loads( + to_json_report( + *base_args, + block_facts={ + "group-a": { + "block_size": "4", + "merged_regions": "true", + "pattern_display": "abcd1234 x4", + } + }, + ) + ) + payload_b = json.loads( + to_json_report( + *base_args, + block_facts={ + "group-a": { + "block_size": "4", + "merged_regions": "true", + "pattern_display": "different display string", + } + }, + ) + ) + assert ( + payload_a["findings"]["groups"]["clones"]["blocks"][0]["display_facts"] + != payload_b["findings"]["groups"]["clones"]["blocks"][0]["display_facts"] + ) + assert payload_a["integrity"]["digest"] == payload_b["integrity"]["digest"] + + +def test_report_json_includes_sorted_block_facts() -> None: + payload = json.loads( + to_json_report( {}, + { + "group-b": [ + { + "qualname": "pkg:fb", + "filepath": "b.py", + "start_line": 10, + "end_line": 13, + "size": 4, + } + ], + "group-a": [ + { + "qualname": "pkg:fa", + "filepath": "a.py", + "start_line": 20, + "end_line": 23, + "size": 4, + } + ], + }, {}, {"codeclone_version": "1.4.0"}, block_facts={ @@ -514,12 +1046,605 @@ def test_report_json_includes_sorted_block_facts() -> None: }, ) ) - assert payload["facts"] == { - "blocks": { - "group-a": {"k": "v"}, - "group-b": {"a": "x", "z": "3"}, + block_groups = _clone_group_map(payload, "blocks") + assert block_groups["group-a"]["facts"] == { + "group_key": "group-a", + "group_arity": 1, + } + assert block_groups["group-a"]["display_facts"] == {"k": "v"} + assert block_groups["group-b"]["facts"] == { + "group_key": "group-b", + "group_arity": 1, + } + assert block_groups["group-b"]["display_facts"] == {"a": "x", "z": "3"} + + +def test_report_json_block_group_splits_machine_and_display_facts() -> None: + payload = json.loads( + to_json_report( + {}, + { + "group-a": [ + { + "qualname": "pkg:fa", + "filepath": "/root/a.py", + "start_line": 20, + "end_line": 23, + "size": 4, + } + ], + }, + {}, + {"codeclone_version": "1.4.0", "scan_root": "/root"}, + block_facts={ + "group-a": { + "group_arity": "1", + "block_size": "4", + "merged_regions": "true", + "assert_ratio": "25%", + "consecutive_asserts": "2", + "pattern_display": "abcd1234 x4", + "group_compare_note": "display note", + } + }, + ) + ) + group = _clone_group_map(payload, "blocks")["group-a"] + assert group["facts"] == { + "group_key": "group-a", + "group_arity": 1, + "block_size": 4, + "merged_regions": True, + "assert_ratio": 0.25, + "consecutive_asserts": 2, + } + assert group["display_facts"] == { + "assert_ratio": "25%", + "group_compare_note": "display note", + "pattern_display": "abcd1234 x4", + } + + +def test_report_json_uses_relative_paths_in_canonical_layers() -> None: + payload = json.loads( + to_json_report( + { + "g1": [ + { + "qualname": "m:a", + "filepath": "/root/src/a.py", + "start_line": 1, + "end_line": 2, + "loc": 2, + "stmt_count": 1, + "fingerprint": "fp-a", + "loc_bucket": "0-19", + } + ] + }, + {}, + {}, + { + "codeclone_version": "1.4.0", + "scan_root": "/root", + "baseline_path": "/root/codeclone.baseline.json", + }, + ) + ) + assert payload["meta"]["scan_root"] == "." + assert payload["meta"]["runtime"]["report_generated_at_utc"] is None + assert payload["meta"]["runtime"]["scan_root_absolute"] == "/root" + assert payload["meta"]["baseline"]["path"] == "codeclone.baseline.json" + assert payload["inventory"]["file_registry"]["items"] == ["src/a.py"] + items = _clone_group_map(payload, "functions")["g1"]["items"] + assert isinstance(items, list) + item = items[0] + assert isinstance(item, dict) + assert item["relative_path"] == "src/a.py" + + +def test_report_json_dead_code_summary_uses_high_confidence_key() -> None: + payload = json.loads( + to_json_report( + {}, + {}, + {}, + {"codeclone_version": "1.4.0"}, + metrics={ + "dead_code": { + "items": [ + { + "qualname": "pkg.mod:unused", + "filepath": "pkg/mod.py", + "start_line": 10, + "end_line": 12, + "kind": "function", + "confidence": "high", + } + ], + "summary": {"critical": 1}, + } + }, + ) + ) + summary = payload["metrics"]["families"]["dead_code"]["summary"] + assert summary == {"total": 1, "high_confidence": 1, "suppressed": 0} + + +def test_report_json_dead_code_suppressed_items_are_reported_separately() -> None: + payload = json.loads( + to_json_report( + {}, + {}, + {}, + {"codeclone_version": "1.4.0", "scan_root": "/root"}, + metrics={ + "dead_code": { + "items": [], + "suppressed_items": [ + { + "qualname": "pkg.mod:runtime_hook", + "filepath": "/root/pkg/mod.py", + "start_line": 40, + "end_line": 41, + "kind": "function", + "confidence": "high", + "suppressed_by": [ + {"rule": "dead-code", "source": "inline_codeclone"}, + {"rule": "dead-code", "source": "inline_codeclone"}, + ], + } + ], + "summary": {"suppressed": 1}, + } + }, + ) + ) + dead_code = payload["metrics"]["families"]["dead_code"] + assert dead_code["summary"] == {"total": 0, "high_confidence": 0, "suppressed": 1} + suppressed_items = dead_code["suppressed_items"] + assert suppressed_items == [ + { + "qualname": "pkg.mod:runtime_hook", + "relative_path": "pkg/mod.py", + "start_line": 40, + "end_line": 41, + "kind": "function", + "confidence": "high", + "suppressed_by": [{"rule": "dead-code", "source": "inline_codeclone"}], + "suppression_rule": "dead-code", + "suppression_source": "inline_codeclone", } + ] + assert payload["findings"]["groups"]["dead_code"]["groups"] == [] + assert payload["findings"]["summary"]["suppressed"] == {"dead_code": 1} + + +def test_report_json_integrity_ignores_runtime_report_timestamp() -> None: + payload_a = json.loads( + to_json_report( + {}, + {}, + {}, + { + "codeclone_version": "1.4.0", + "report_generated_at_utc": "2026-03-10T12:00:00Z", + }, + ) + ) + payload_b = json.loads( + to_json_report( + {}, + {}, + {}, + { + "codeclone_version": "1.4.0", + "report_generated_at_utc": "2030-01-01T00:00:00Z", + }, + ) + ) + assert ( + payload_a["meta"]["runtime"]["report_generated_at_utc"] + != payload_b["meta"]["runtime"]["report_generated_at_utc"] + ) + assert payload_a["integrity"]["digest"] == payload_b["integrity"]["digest"] + + +def test_report_json_hotlists_reference_existing_finding_ids() -> None: + payload = json.loads( + to_json_report( + { + "g1": [ + { + "qualname": "pkg.mod:a", + "filepath": "/root/a.py", + "start_line": 1, + "end_line": 20, + "loc": 20, + "stmt_count": 8, + "fingerprint": "fp-a", + "loc_bucket": "20-49", + }, + { + "qualname": "pkg.mod:b", + "filepath": "/root/b.py", + "start_line": 1, + "end_line": 20, + "loc": 20, + "stmt_count": 8, + "fingerprint": "fp-a", + "loc_bucket": "20-49", + }, + ] + }, + {}, + {}, + {"codeclone_version": "1.4.0", "scan_root": "/root"}, + metrics={ + "dead_code": { + "items": [ + { + "qualname": "pkg.mod:unused", + "filepath": "/root/pkg/mod.py", + "start_line": 10, + "end_line": 12, + "kind": "function", + "confidence": "high", + } + ], + "summary": {"critical": 1}, + }, + "health": {"score": 80, "grade": "B", "dimensions": {"clones": 80}}, + }, + ) + ) + groups = payload["findings"]["groups"] + canonical_ids = { + *(group["id"] for group in groups["clones"]["functions"]), + *(group["id"] for group in groups["clones"]["blocks"]), + *(group["id"] for group in groups["clones"]["segments"]), + *(group["id"] for group in groups["structural"]["groups"]), + *(group["id"] for group in groups["dead_code"]["groups"]), + *(group["id"] for group in groups["design"]["groups"]), } + hotlists = payload["derived"]["hotlists"] + for ids in hotlists.values(): + assert set(ids).issubset(canonical_ids) + + +def test_report_overview_materializes_source_breakdown_and_hotlist_cards() -> None: + structural = ( + StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key="k" * 40, + signature={ + "stmt_seq": "Expr,Return", + "terminal": "return", + "raises": "0", + "has_loop": "0", + }, + items=( + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="k" * 40, + file_path="/repo/pkg/mod.py", + qualname="pkg.mod:fn", + start=10, + end=12, + signature={}, + ), + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="k" * 40, + file_path="/repo/pkg/mod.py", + qualname="pkg.mod:fn", + start=20, + end=22, + signature={}, + ), + ), + ), + ) + payload = build_report_document( + func_groups={ + "g1": [ + { + "qualname": "tests.fixtures.sample:a", + "filepath": "/repo/tests/fixtures/sample/a.py", + "start_line": 1, + "end_line": 20, + "loc": 20, + "stmt_count": 8, + "fingerprint": "fp-a", + "loc_bucket": "20-49", + }, + { + "qualname": "tests.fixtures.sample:b", + "filepath": "/repo/tests/fixtures/sample/b.py", + "start_line": 1, + "end_line": 20, + "loc": 20, + "stmt_count": 8, + "fingerprint": "fp-a", + "loc_bucket": "20-49", + }, + ] + }, + block_groups={}, + segment_groups={}, + meta={"scan_root": "/repo"}, + structural_findings=structural, + ) + + derived = cast(Mapping[str, object], payload["derived"]) + materialized = materialize_report_overview( + overview=cast(Mapping[str, object], derived["overview"]), + hotlists=cast(Mapping[str, object], derived["hotlists"]), + findings=cast(Mapping[str, object], payload["findings"]), + ) + + assert materialized["source_breakdown"] == {"production": 1, "fixtures": 1} + assert materialized["highest_spread"] + assert materialized["production_hotspots"] + assert materialized["test_fixture_hotspots"] + production_hotspots = cast( + Sequence[Mapping[str, object]], + materialized["production_hotspots"], + ) + test_fixture_hotspots = cast( + Sequence[Mapping[str, object]], + materialized["test_fixture_hotspots"], + ) + assert production_hotspots[0]["title"] == "Repeated branch family" + assert test_fixture_hotspots[0]["title"] == "Function clone group (Type-2)" + + +def test_report_overview_clone_summary_variants() -> None: + assert ( + overview_mod._clone_summary_from_group( + {"category": "function", "clone_type": "Type-1", "facts": {}} + ) + == "same exact function body" + ) + assert ( + overview_mod._clone_summary_from_group( + {"category": "function", "clone_type": "Type-3", "facts": {}} + ) + == "same structural function body with small identifier changes" + ) + assert ( + overview_mod._clone_summary_from_group( + {"category": "function", "clone_type": "Type-4", "facts": {}} + ) + == "same structural function body" + ) + assert ( + overview_mod._clone_summary_from_group( + { + "category": "block", + "clone_type": "Type-4", + "facts": {"hint": "assert_only"}, + } + ) + == "same assertion template" + ) + assert ( + overview_mod._clone_summary_from_group( + { + "category": "block", + "clone_type": "Type-4", + "facts": {"pattern": "repeated_stmt_hash"}, + } + ) + == "same repeated setup/assert pattern" + ) + assert ( + overview_mod._clone_summary_from_group( + {"category": "block", "clone_type": "Type-4", "facts": {}} + ) + == "same structural sequence with small value changes" + ) + assert ( + overview_mod._clone_summary_from_group( + {"category": "segment", "clone_type": "Type-4", "facts": {}} + ) + == "same structural segment sequence" + ) + + +def test_report_overview_structural_summary_variants() -> None: + assert overview_mod._structural_summary_from_group( + {"category": "clone_guard_exit_divergence"} + ) == ( + "Clone guard/exit divergence", + "clone cohort members differ in entry guards or early-exit behavior", + ) + assert overview_mod._structural_summary_from_group( + {"category": "clone_cohort_drift"} + ) == ( + "Clone cohort drift", + "clone cohort members drift from majority terminal/guard/try profile", + ) + assert overview_mod._structural_summary_from_group( + { + "category": "duplicated_branches", + "signature": {"stable": {"terminal_kind": "raise"}, "debug": {}}, + } + ) == ("Repeated branch family", "same repeated guard/validation branch") + assert overview_mod._structural_summary_from_group( + { + "category": "duplicated_branches", + "signature": {"stable": {"terminal_kind": "return"}, "debug": {}}, + } + ) == ("Repeated branch family", "same repeated return branch") + assert overview_mod._structural_summary_from_group( + { + "category": "duplicated_branches", + "signature": {"debug": {"has_loop": "1"}}, + } + ) == ("Repeated branch family", "same repeated loop branch") + assert overview_mod._structural_summary_from_group( + { + "category": "duplicated_branches", + "signature": {"debug": {"stmt_seq": "Expr,If"}}, + } + ) == ("Repeated branch family", "same repeated branch shape (Expr,If)") + assert overview_mod._structural_summary_from_group( + {"category": "duplicated_branches", "signature": {}} + ) == ("Repeated branch family", "same repeated branch shape") + + +def test_report_overview_location_helpers_cover_edge_cases() -> None: + assert overview_mod._single_item_location({"module": "pkg.alpha"}) == "pkg.alpha" + assert overview_mod._single_item_location({}) == "(unknown)" + assert ( + overview_mod._single_item_location({"relative_path": "pkg/mod.py"}) + == "pkg/mod.py" + ) + assert ( + overview_mod._single_item_location( + {"relative_path": "pkg/mod.py", "start_line": 10, "end_line": 12} + ) + == "pkg/mod.py:10-12" + ) + assert ( + overview_mod._group_location_label( + { + "category": "dependency", + "items": [{"module": "pkg.a"}, {"module": "pkg.b"}], + "count": 2, + "spread": {"files": 2, "functions": 0}, + } + ) + == "pkg.a -> pkg.b" + ) + assert ( + overview_mod._group_location_label( + { + "category": "function", + "items": [ + {"relative_path": "pkg/mod.py", "start_line": 5, "end_line": 5} + ], + "count": 1, + "spread": {"files": 1, "functions": 1}, + } + ) + == "pkg/mod.py:5" + ) + assert ( + overview_mod._group_location_label( + { + "category": "function", + "items": [{"relative_path": "pkg/mod.py"}], + "count": 3, + "spread": {"files": 2, "functions": 3}, + } + ) + == "3 occurrences across 2 files / 3 functions" + ) + + +def test_report_overview_serialize_finding_group_card_covers_families() -> None: + dead_card = overview_mod.serialize_finding_group_card( + { + "family": "dead_code", + "category": "method", + "severity": "warning", + "confidence": "high", + "count": 1, + "source_scope": {"dominant_kind": "production"}, + "spread": {"files": 1, "functions": 1}, + "items": [ + { + "relative_path": "pkg/mod.py", + "qualname": "pkg.mod:C.m", + "start_line": 7, + "end_line": 8, + } + ], + "facts": {}, + } + ) + assert dead_card["title"] == "Remove or explicitly keep unused code" + assert dead_card["summary"] == "method with high confidence" + + complexity_card = overview_mod.serialize_finding_group_card( + { + "family": "design", + "category": "complexity", + "severity": "warning", + "confidence": "high", + "count": 1, + "source_scope": {"dominant_kind": "production"}, + "spread": {"files": 1, "functions": 1}, + "items": [{"relative_path": "pkg/mod.py", "start_line": 3, "end_line": 9}], + "facts": {"cyclomatic_complexity": 21, "nesting_depth": 4}, + } + ) + assert complexity_card["title"] == "Reduce high-complexity function" + assert complexity_card["summary"] == "cyclomatic_complexity=21, nesting_depth=4" + + coupling_card = overview_mod.serialize_finding_group_card( + { + "family": "design", + "category": "coupling", + "severity": "warning", + "confidence": "high", + "count": 1, + "source_scope": {"dominant_kind": "production"}, + "spread": {"files": 1, "functions": 1}, + "items": [{"relative_path": "pkg/mod.py", "start_line": 3, "end_line": 9}], + "facts": {"cbo": 11}, + } + ) + assert coupling_card["title"] == "Split high-coupling class" + assert coupling_card["summary"] == "cbo=11" + + cohesion_card = overview_mod.serialize_finding_group_card( + { + "family": "design", + "category": "cohesion", + "severity": "warning", + "confidence": "high", + "count": 1, + "source_scope": {"dominant_kind": "production"}, + "spread": {"files": 1, "functions": 1}, + "items": [{"relative_path": "pkg/mod.py", "start_line": 3, "end_line": 9}], + "facts": {"lcom4": 5}, + } + ) + assert cohesion_card["title"] == "Split low-cohesion class" + assert cohesion_card["summary"] == "lcom4=5" + + dependency_card = overview_mod.serialize_finding_group_card( + { + "family": "design", + "category": "dependency", + "severity": "critical", + "confidence": "high", + "count": 3, + "source_scope": {"dominant_kind": "other"}, + "spread": {"files": 3, "functions": 0}, + "items": [{"module": "pkg.a"}, {"module": "pkg.b"}, {"module": "pkg.c"}], + "facts": {"cycle_length": 3}, + } + ) + assert dependency_card["title"] == "Break circular dependency" + assert dependency_card["summary"] == "3 modules participate in this cycle" + assert dependency_card["location"] == "pkg.a -> pkg.b -> pkg.c" + + +def test_report_overview_materialize_preserves_existing_cards_and_breakdown() -> None: + materialized = materialize_report_overview( + overview={ + "source_breakdown": {"tests": 9}, + "highest_spread": [{"title": "preset"}], + }, + hotlists={"highest_spread_ids": ["clone:function:abc"]}, + findings={"groups": {}}, + ) + assert materialized["source_breakdown"] == {"tests": 9} + assert materialized["highest_spread"] == [{"title": "preset"}] def test_report_json_groups_split_trusted_baseline() -> None: @@ -593,20 +1718,22 @@ def test_report_json_groups_split_trusted_baseline() -> None: new_segment_group_keys={"segment-new"}, ) ) - split = payload["groups_split"] - assert split["functions"] == {"new": ["func-new"], "known": ["func-known"]} - assert split["blocks"] == {"new": ["block-new"], "known": ["block-known"]} - assert split["segments"] == {"new": ["segment-new"], "known": []} - for section_name in ("functions", "blocks", "segments"): - new_keys = set(split[section_name]["new"]) - known_keys = set(split[section_name]["known"]) - group_keys = set(payload["groups"][section_name].keys()) - assert new_keys.isdisjoint(known_keys) - assert new_keys | known_keys == group_keys - counts = payload["meta"]["groups_counts"][section_name] - assert counts["total"] == len(group_keys) - assert counts["new"] == len(new_keys) - assert counts["known"] == len(known_keys) + clones = payload["findings"]["groups"]["clones"] + function_map = _clone_group_map(payload, "functions") + block_map = _clone_group_map(payload, "blocks") + segment_map = _clone_group_map(payload, "segments") + assert function_map["func-new"]["novelty"] == "new" + assert function_map["func-known"]["novelty"] == "known" + assert block_map["block-new"]["novelty"] == "new" + assert block_map["block-known"]["novelty"] == "known" + assert segment_map["segment-new"]["novelty"] == "new" + assert payload["findings"]["summary"]["clones"] == { + "functions": len(clones["functions"]), + "blocks": len(clones["blocks"]), + "segments": len(clones["segments"]), + "new": 3, + "known": 2, + } def test_report_json_groups_split_untrusted_baseline() -> None: @@ -633,10 +1760,15 @@ def test_report_json_groups_split_untrusted_baseline() -> None: new_function_group_keys=set(), ) ) - split = payload["groups_split"] - assert split["functions"] == {"new": ["func-a"], "known": []} - assert split["blocks"] == {"new": [], "known": []} - assert split["segments"] == {"new": [], "known": []} + function_map = _clone_group_map(payload, "functions") + assert function_map["func-a"]["novelty"] == "new" + assert payload["findings"]["summary"]["clones"] == { + "functions": 1, + "blocks": 0, + "segments": 0, + "new": 1, + "known": 0, + } def test_text_report_deterministic_group_order() -> None: @@ -660,10 +1792,15 @@ def test_text_report_deterministic_group_order() -> None: } ], } - text = report_mod.to_text(groups) - first_idx = text.find("Clone group #1") - a_idx = text.find("a.py") - b_idx = text.find("b.py") + text = to_text_report( + meta={}, + func_groups=groups, + block_groups={}, + segment_groups={}, + ) + first_idx = text.find("=== Clone group #1 ===") + a_idx = text.find("a.py:1-2") + b_idx = text.find("b.py:2-3") assert first_idx != -1 assert a_idx != -1 assert b_idx != -1 @@ -677,18 +1814,24 @@ def test_to_text_report_handles_missing_meta_fields() -> None: block_groups={}, segment_groups={}, ) - assert "Report schema version: (none)" in text_out - assert "CodeClone version: (none)" in text_out - assert "Baseline status: (none)" in text_out - assert "Cache path: (none)" in text_out - assert "Cache used: (none)" in text_out - assert "Note: baseline is untrusted; all groups are treated as NEW." in text_out - assert "FUNCTION CLONES (NEW) (groups=0)\n(none)" in text_out - assert "FUNCTION CLONES (KNOWN) (groups=0)\n(none)" in text_out - assert "BLOCK CLONES (NEW) (groups=0)\n(none)" in text_out - assert "BLOCK CLONES (KNOWN) (groups=0)\n(none)" in text_out - assert "SEGMENT CLONES (NEW) (groups=0)\n(none)" in text_out - assert "SEGMENT CLONES (KNOWN) (groups=0)\n(none)" in text_out + assert_contains_all( + text_out, + f"Report schema version: {REPORT_SCHEMA_VERSION}", + "CodeClone version: (none)", + "Report generated (UTC): (none)", + "Baseline status: (none)", + "Cache path: (none)", + "Cache used: false", + "INVENTORY", + "INTEGRITY", + "Note: baseline is untrusted; all groups are treated as NEW.", + "FUNCTION CLONES (NEW) (groups=0)\n(none)", + "FUNCTION CLONES (KNOWN) (groups=0)\n(none)", + "BLOCK CLONES (NEW) (groups=0)\n(none)", + "BLOCK CLONES (KNOWN) (groups=0)\n(none)", + "SEGMENT CLONES (NEW) (groups=0)\n(none)", + "SEGMENT CLONES (KNOWN) (groups=0)\n(none)", + ) def test_to_text_report_uses_section_specific_metric_labels() -> None: @@ -1300,3 +2443,498 @@ def test_collect_file_functions_class_and_async(tmp_path: Path) -> None: ] groups = build_segment_groups(segments) assert groups == {} + + +def test_report_serialize_helpers_and_text_metrics_section() -> None: + assert merge_mod.coerce_positive_int(True) == 1 + assert serialize_mod._as_int(True) == 1 + assert serialize_mod._as_int("42") == 42 + assert serialize_mod._as_int("bad") == 0 + assert serialize_mod._as_int(1.2) == 0 + + text_report = to_text_report( + meta={}, + func_groups={}, + block_groups={}, + segment_groups={}, + metrics={"health": {"score": 90}}, + ) + assert "METRICS SUMMARY" in text_report + assert "health: score=90" in text_report + + +def test_text_and_markdown_report_include_suppressed_dead_code_sections() -> None: + payload = build_report_document( + func_groups={}, + block_groups={}, + segment_groups={}, + meta={"scan_root": "/root"}, + metrics={ + "dead_code": { + "items": [], + "suppressed_items": [ + { + "qualname": "pkg.mod:runtime_hook", + "filepath": "/root/pkg/mod.py", + "start_line": 5, + "end_line": 6, + "kind": "function", + "confidence": "high", + "suppressed_by": [ + {"rule": "dead-code", "source": "inline_codeclone"} + ], + } + ], + "summary": {"suppressed": 1}, + } + }, + ) + text = render_text_report_document(payload) + assert_contains_all( + text, + "dead_code: total=0 high_confidence=0 suppressed=1", + "SUPPRESSED DEAD CODE (items=1)", + "suppressed_by=dead-code@inline_codeclone", + ) + + markdown = to_markdown_report( + report_document=payload, + meta={}, + func_groups={}, + block_groups={}, + segment_groups={}, + ) + assert '' in markdown + assert "suppression_rule=dead-code" in markdown + + +# --------------------------------------------------------------------------- +# Structural findings serialization +# --------------------------------------------------------------------------- + + +def _make_sf_group() -> StructuralFindingGroup: + """Build a StructuralFindingGroup for serialization tests.""" + sig = { + "calls": "1", + "has_loop": "1", + "has_try": "0", + "nested_if": "0", + "raises": "0", + "stmt_seq": "Expr,For", + "terminal": "fallthrough", + } + occ1 = StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="abc" * 13 + "a", + file_path="/proj/a.py", + qualname="mod:fn", + start=5, + end=6, + signature=sig, + ) + occ2 = StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="abc" * 13 + "a", + file_path="/proj/a.py", + qualname="mod:fn", + start=8, + end=9, + signature=sig, + ) + return StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key="abc" * 13 + "a", + signature=sig, + items=(occ1, occ2), + ) + + +def _make_guard_divergence_group() -> StructuralFindingGroup: + sig = { + "cohort_id": "fp-a|20-49", + "cohort_arity": "4", + "divergent_members": "1", + "majority_guard_count": "2", + "majority_guard_terminal_profile": "return_const,raise", + "majority_terminal_kind": "return_const", + "majority_side_effect_before_guard": "0", + "guard_count_values": "1,2", + "guard_terminal_values": "raise,return_const,raise", + "terminal_values": "raise,return_const", + "side_effect_before_guard_values": "0,1", + } + occ = StructuralFindingOccurrence( + finding_kind="clone_guard_exit_divergence", + finding_key="guard-div", + file_path="/proj/b.py", + qualname="mod:drift_fn", + start=40, + end=60, + signature=sig, + ) + return StructuralFindingGroup( + finding_kind="clone_guard_exit_divergence", + finding_key="guard-div", + signature=sig, + items=(occ,), + ) + + +def _make_cohort_drift_group() -> StructuralFindingGroup: + sig = { + "cohort_id": "fp-a|20-49", + "cohort_arity": "4", + "divergent_members": "1", + "drift_fields": "terminal_kind,guard_exit_profile", + "majority_terminal_kind": "return_const", + "majority_guard_exit_profile": "2x:return_const,raise", + "majority_try_finally_profile": "none", + "majority_side_effect_order_profile": "guard_then_effect", + } + occ = StructuralFindingOccurrence( + finding_kind="clone_cohort_drift", + finding_key="cohort-drift", + file_path="/proj/c.py", + qualname="mod:drift_fn", + start=70, + end=90, + signature=sig, + ) + return StructuralFindingGroup( + finding_kind="clone_cohort_drift", + finding_key="cohort-drift", + signature=sig, + items=(occ,), + ) + + +def test_json_includes_structural_findings_when_non_empty() -> None: + group = _make_sf_group() + report_str = to_json_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[group], + ) + payload = json.loads(report_str) + sf = payload["findings"]["groups"]["structural"] + assert len(sf["groups"]) == 1 + g = sf["groups"][0] + assert g["kind"] == "duplicated_branches" + assert g["count"] == 2 + assert g["spread"]["files"] == 1 + assert g["items"][0] == { + "relative_path": "a.py", + "qualname": "mod:fn", + "start_line": 5, + "end_line": 6, + } + + +def test_json_includes_clone_guard_exit_divergence_structural_group() -> None: + group = _make_guard_divergence_group() + payload = json.loads( + to_json_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[group], + ) + ) + finding = _structural_groups(payload)[0] + assert finding["kind"] == "clone_guard_exit_divergence" + assert finding["count"] == 1 + assert finding["confidence"] == "high" + signature = cast(dict[str, object], finding["signature"]) + stable = cast(dict[str, object], signature["stable"]) + assert stable["family"] == "clone_guard_exit_divergence" + facts = cast(dict[str, object], finding["facts"]) + assert facts["cohort_id"] == "fp-a|20-49" + assert facts["divergent_members"] == 1 + + +def test_json_includes_clone_cohort_drift_structural_group() -> None: + group = _make_cohort_drift_group() + payload = json.loads( + to_json_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[group], + ) + ) + finding = _structural_groups(payload)[0] + assert finding["kind"] == "clone_cohort_drift" + signature = cast(dict[str, object], finding["signature"]) + stable = cast(dict[str, object], signature["stable"]) + assert stable["family"] == "clone_cohort_drift" + assert stable["drift_fields"] == ["guard_exit_profile", "terminal_kind"] + + +def test_text_and_sarif_renderers_cover_new_structural_kinds() -> None: + payload = json.loads( + to_json_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[ + _make_guard_divergence_group(), + _make_cohort_drift_group(), + ], + ) + ) + text = render_text_report_document(payload) + assert_contains_all( + text, + "Clone guard/exit divergence", + "Clone cohort drift", + "majority_guard_count", + "drift_fields", + ) + + sarif = json.loads( + to_sarif_report( + report_document=payload, + meta={}, + func_groups={}, + block_groups={}, + segment_groups={}, + ) + ) + run = sarif["runs"][0] + rule_ids = {rule["id"] for rule in run["tool"]["driver"]["rules"]} + assert "CSTRUCT002" in rule_ids + assert "CSTRUCT003" in rule_ids + messages = [result["message"]["text"] for result in run["results"]] + assert any("guard/exit divergence" in message for message in messages) + assert any("cohort drift" in message for message in messages) + + +def test_json_structural_findings_deduplicates_occurrences() -> None: + group = _make_sf_group() + duplicate_group = StructuralFindingGroup( + finding_kind=group.finding_kind, + finding_key=group.finding_key, + signature=group.signature, + items=(group.items[0], group.items[0], group.items[1]), + ) + payload = json.loads( + to_json_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[duplicate_group], + ) + ) + finding = _structural_groups(payload)[0] + assert finding["count"] == 2 + assert finding["items"] == [ + { + "relative_path": "a.py", + "qualname": "mod:fn", + "start_line": 5, + "end_line": 6, + }, + { + "relative_path": "a.py", + "qualname": "mod:fn", + "start_line": 8, + "end_line": 9, + }, + ] + + +def test_json_structural_findings_sorts_signature_keys() -> None: + signature = { + "stmt_seq": "Expr,Return", + "terminal": "return_const", + "calls": "1", + "raises": "0", + } + group = StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key="sig-order", + signature=signature, + items=( + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="sig-order", + file_path="/proj/a.py", + qualname="mod:fn", + start=5, + end=6, + signature=signature, + ), + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="sig-order", + file_path="/proj/a.py", + qualname="mod:fn", + start=8, + end=9, + signature=signature, + ), + ), + ) + payload = json.loads( + to_json_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[group], + ) + ) + finding = _structural_groups(payload)[0] + finding_signature = finding["signature"] + assert isinstance(finding_signature, dict) + debug = finding_signature["debug"] + assert isinstance(debug, dict) + assert list(debug) == [ + "calls", + "raises", + "stmt_seq", + "terminal", + ] + + +def test_json_structural_findings_prunes_overlapping_occurrences() -> None: + group = _make_sf_group() + overlapping_group = StructuralFindingGroup( + finding_kind=group.finding_kind, + finding_key=group.finding_key, + signature=group.signature, + items=( + group.items[0], + StructuralFindingOccurrence( + finding_kind=group.finding_kind, + finding_key=group.finding_key, + file_path="/proj/a.py", + qualname="mod:fn", + start=6, + end=6, + signature=group.signature, + ), + group.items[1], + ), + ) + payload = json.loads( + to_json_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[overlapping_group], + ) + ) + finding = _structural_groups(payload)[0] + assert finding["count"] == 2 + assert finding["items"] == [ + { + "relative_path": "a.py", + "qualname": "mod:fn", + "start_line": 5, + "end_line": 6, + }, + { + "relative_path": "a.py", + "qualname": "mod:fn", + "start_line": 8, + "end_line": 9, + }, + ] + + +def test_json_structural_findings_filters_trivial_groups() -> None: + sig = { + "calls": "2+", + "has_loop": "0", + "has_try": "0", + "nested_if": "0", + "raises": "0", + "stmt_seq": "Expr", + "terminal": "expr", + } + trivial_group = StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key="def" * 13 + "d", + signature=sig, + items=( + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="def" * 13 + "d", + file_path="/proj/a.py", + qualname="mod:fn", + start=5, + end=5, + signature=sig, + ), + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="def" * 13 + "d", + file_path="/proj/a.py", + qualname="mod:fn", + start=8, + end=8, + signature=sig, + ), + ), + ) + payload = json.loads( + to_json_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[trivial_group], + ) + ) + assert _structural_groups(payload) == [] + + +def test_json_no_structural_findings_key_when_empty() -> None: + report_str = to_json_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[], + ) + payload = json.loads(report_str) + assert _structural_groups(payload) == [] + + +def test_structural_findings_json_deterministic() -> None: + group = _make_sf_group() + r1 = to_json_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[group], + ) + r2 = to_json_report( + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[group], + ) + assert r1 == r2 + + +def test_txt_includes_structural_findings_block() -> None: + group = _make_sf_group() + report_str = to_text_report( + meta={}, + func_groups={}, + block_groups={}, + segment_groups={}, + structural_findings=[group], + ) + assert "STRUCTURAL FINDINGS" in report_str + assert "Duplicated branches" in report_str + + +def test_html_panel_explains_local_non_overlapping_structural_findings() -> None: + group = _make_sf_group() + html = build_structural_findings_html_panel([group], ["/proj/a.py"]) + assert "Repeated non-overlapping branch-body shapes" in html + assert "local, report-only refactoring hints" in html + assert "Occurrences (2)" in html + assert "All occurrences belong to 1 function in 1 file." in html diff --git a/tests/test_report_branch_invariants.py b/tests/test_report_branch_invariants.py new file mode 100644 index 0000000..098abf2 --- /dev/null +++ b/tests/test_report_branch_invariants.py @@ -0,0 +1,365 @@ +from __future__ import annotations + +from codeclone._html_snippets import _FileCache +from codeclone.models import StructuralFindingGroup, StructuralFindingOccurrence +from codeclone.report.explain_contract import ( + BLOCK_HINT_ASSERT_ONLY, + BLOCK_PATTERN_REPEATED_STMT_HASH, +) +from codeclone.report.findings import ( + _dedupe_items, + _finding_matters_html, + _finding_scope_text, + _finding_why_template_html, + _occurrences_table_html, +) +from codeclone.report.markdown import ( + _append_findings_section, + _append_metric_items, + _location_text, +) +from codeclone.report.markdown import ( + _as_float as _markdown_as_float, +) +from codeclone.report.overview import _health_snapshot +from codeclone.report.sarif import _result_properties +from codeclone.report.suggestions import ( + _clone_steps, + _clone_summary, + _structural_steps, + _structural_summary, +) +from tests._assertions import assert_contains_all + + +def _occurrence( + *, + qualname: str, + start: int, + end: int, + file_path: str = "/repo/codeclone/codeclone/cache.py", +) -> StructuralFindingOccurrence: + return StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="k", + file_path=file_path, + qualname=qualname, + start=start, + end=end, + signature={"stmt_seq": "Expr,Return", "terminal": "return"}, + ) + + +def _group( + *, + key: str, + signature: dict[str, str], + items: tuple[StructuralFindingOccurrence, ...], +) -> StructuralFindingGroup: + return StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key=key, + signature=signature, + items=items, + ) + + +def test_clone_summary_and_steps_cover_branch_kinds() -> None: + assert _clone_summary(kind="function", clone_type="Type-4", facts={}) == ( + "same structural function body" + ) + assert ( + _clone_summary( + kind="block", + clone_type="Type-4", + facts={"hint": BLOCK_HINT_ASSERT_ONLY}, + ) + == "same assertion template" + ) + assert ( + _clone_summary( + kind="block", + clone_type="Type-4", + facts={"pattern": BLOCK_PATTERN_REPEATED_STMT_HASH}, + ) + == "same repeated setup/assert pattern" + ) + assert _clone_steps( + kind="block", + clone_type="Type-4", + facts={"hint": BLOCK_HINT_ASSERT_ONLY}, + )[0].startswith("Collapse the repeated assertion template") + + +def test_structural_summary_and_steps_cover_all_terminal_paths() -> None: + raise_group = _group( + key="raise", + signature={"terminal": "raise", "stmt_seq": "Expr,Raise"}, + items=(_occurrence(qualname="pkg:a", start=1, end=2),) * 2, + ) + return_group = _group( + key="return", + signature={"terminal": "return", "stmt_seq": "Expr,Return"}, + items=(_occurrence(qualname="pkg:a", start=3, end=4),) * 2, + ) + loop_group = _group( + key="loop", + signature={"has_loop": "1", "stmt_seq": "For,Expr"}, + items=(_occurrence(qualname="pkg:a", start=5, end=7),) * 2, + ) + shape_group = _group( + key="shape", + signature={"stmt_seq": "Assign,Expr"}, + items=(_occurrence(qualname="pkg:a", start=8, end=9),) * 2, + ) + fallback_group = _group( + key="fallback", + signature={}, + items=(_occurrence(qualname="pkg:a", start=10, end=11),) * 2, + ) + guard_div_group = StructuralFindingGroup( + finding_kind="clone_guard_exit_divergence", + finding_key="guard-div", + signature={"cohort_id": "fp|20-49"}, + items=(_occurrence(qualname="pkg:a", start=12, end=13),), + ) + drift_group = StructuralFindingGroup( + finding_kind="clone_cohort_drift", + finding_key="cohort-drift", + signature={"cohort_id": "fp|20-49"}, + items=(_occurrence(qualname="pkg:a", start=14, end=15),), + ) + + assert _structural_summary(raise_group)[1] == ( + "same repeated guard/validation branch" + ) + assert _structural_summary(return_group)[1] == "same repeated return branch" + assert _structural_summary(loop_group)[1] == "same repeated loop branch" + assert _structural_summary(shape_group)[1] == ( + "same repeated branch shape (Assign,Expr)" + ) + assert _structural_summary(fallback_group)[1] == "same repeated branch shape" + assert _structural_summary(guard_div_group)[0] == "Clone guard/exit divergence" + assert _structural_summary(drift_group)[0] == "Clone cohort drift" + + assert _structural_steps(raise_group)[0].startswith( + "Factor the repeated validation/guard path" + ) + assert _structural_steps(return_group)[0].startswith( + "Consolidate the repeated return-path logic" + ) + assert _structural_steps(guard_div_group)[0].startswith( + "Compare divergent clone members" + ) + assert _structural_steps(drift_group)[0].startswith( + "Review whether cohort drift is intentional" + ) + + +def test_findings_occurrence_table_scope_and_dedupe_invariants() -> None: + duplicate = _occurrence(qualname="pkg.mod:f", start=10, end=12) + deduped = _dedupe_items( + ( + duplicate, + duplicate, + _occurrence(qualname="pkg.mod:g", start=20, end=22), + ) + ) + assert len(deduped) == 2 + + table_html = _occurrences_table_html( + ( + _occurrence(qualname="pkg.mod:f", start=1, end=2), + _occurrence(qualname="pkg.mod:f", start=3, end=4), + _occurrence(qualname="pkg.mod:f", start=5, end=6), + _occurrence(qualname="pkg.mod:f", start=7, end=8), + _occurrence(qualname="pkg.mod:g", start=9, end=10), + ), + scan_root="/repo/codeclone", + visible_limit=4, + ) + assert "Show 1 more occurrences" in table_html + assert ( + _finding_scope_text( + ( + _occurrence(qualname="pkg.mod:f", start=1, end=2), + _occurrence(qualname="pkg.mod:g", start=3, end=4), + ) + ) + == "across 2 functions in 1 file" + ) + + +def test_finding_matters_message_depends_on_scope_and_terminal() -> None: + cross_function_items = ( + _occurrence(qualname="pkg.mod:f", start=1, end=2), + _occurrence(qualname="pkg.mod:g", start=3, end=4), + ) + assert "repeats across 2 functions and 1 files" in _finding_matters_html( + _group( + key="cross", + signature={"terminal": "expr", "stmt_seq": "Expr,Expr"}, + items=cross_function_items, + ), + cross_function_items, + ) + + local_items = ( + _occurrence(qualname="pkg.mod:f", start=10, end=12), + _occurrence(qualname="pkg.mod:f", start=20, end=22), + ) + assert "repeated guard or validation exits" in _finding_matters_html( + _group( + key="raise", + signature={"terminal": "raise", "stmt_seq": "If,Raise"}, + items=local_items, + ), + local_items, + ) + assert "repeated return-path logic" in _finding_matters_html( + _group( + key="return", + signature={"terminal": "return", "stmt_seq": "Expr,Return"}, + items=local_items, + ), + local_items, + ) + + +def test_structural_why_template_covers_new_kind_reasoning_paths() -> None: + guard_group = StructuralFindingGroup( + finding_kind="clone_guard_exit_divergence", + finding_key="guard-div", + signature={ + "cohort_id": "fp-a|20-49", + "majority_guard_count": "2", + }, + items=( + _occurrence(qualname="pkg.mod:a", start=10, end=12), + _occurrence(qualname="pkg.mod:b", start=20, end=22), + ), + ) + drift_group = StructuralFindingGroup( + finding_kind="clone_cohort_drift", + finding_key="cohort-drift", + signature={ + "cohort_id": "fp-a|20-49", + "cohort_arity": "4", + "drift_fields": "terminal_kind,guard_exit_profile", + }, + items=( + _occurrence(qualname="pkg.mod:c", start=30, end=33), + _occurrence(qualname="pkg.mod:d", start=40, end=43), + ), + ) + + guard_html = _finding_why_template_html( + guard_group, + guard_group.items, + file_cache=_FileCache(), + context_lines=1, + max_snippet_lines=20, + ) + drift_html = _finding_why_template_html( + drift_group, + drift_group.items, + file_cache=_FileCache(), + context_lines=1, + max_snippet_lines=20, + ) + + assert_contains_all( + guard_html, + "clone cohort members with guard/exit divergence", + "majority guard count", + ) + assert_contains_all( + drift_html, + "cohort members that drift from majority profile", + "Drift fields", + ) + + +def test_markdown_helpers_cover_non_numeric_and_missing_fact_paths() -> None: + assert _markdown_as_float(object()) == 0.0 + assert ( + _location_text( + { + "relative_path": "a.py", + "start_line": 10, + "end_line": 10, + "qualname": "pkg:a", + } + ) + == "`a.py:10` :: `pkg:a`" + ) + + lines: list[str] = [] + _append_findings_section( + lines, + groups=( + { + "id": "clone:function:k", + "family": "clone", + "category": "function", + "kind": "clone_group", + "severity": "warning", + "confidence": "high", + "priority": 1.0, + "source_scope": { + "dominant_kind": "production", + "impact_scope": "runtime", + }, + "spread": {"files": 1, "functions": 1}, + "count": 1, + "items": [ + { + "relative_path": "code/a.py", + "start_line": 1, + "end_line": 1, + "qualname": "pkg:a", + } + ], + }, + ), + ) + rendered = "\n".join(lines) + assert "Presentation facts" not in rendered + + metric_lines: list[str] = [] + _append_metric_items( + metric_lines, + items=({"qualname": "pkg:a", "cyclomatic_complexity": 21},), + key_order=("qualname", "cyclomatic_complexity"), + ) + assert "pkg:a" in "\n".join(metric_lines) + + +def test_overview_and_sarif_branch_invariants() -> None: + health = _health_snapshot( + { + "health": { + "score": 88, + "grade": "B", + "dimensions": {"coverage": 90, "complexity": "bad"}, + } + } + ) + assert health["strongest_dimension"] == "coverage" + assert health["weakest_dimension"] == "coverage" + + props = _result_properties( + { + "id": "dead_code:pkg.mod:unused", + "family": "dead_code", + "category": "function", + "kind": "unused_symbol", + "severity": "warning", + "confidence": "high", + "priority": 1.0, + "source_scope": {"impact_scope": "runtime", "dominant_kind": "production"}, + "spread": {"files": 1, "functions": 1}, + "facts": {}, + } + ) + assert props["confidence"] == "high" diff --git a/tests/test_report_contract_coverage.py b/tests/test_report_contract_coverage.py new file mode 100644 index 0000000..36c0a12 --- /dev/null +++ b/tests/test_report_contract_coverage.py @@ -0,0 +1,1665 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import cast + +import pytest + +import codeclone.report.json_contract as json_contract_mod +from codeclone import _coerce +from codeclone.models import ( + ReportLocation, + StructuralFindingGroup, + StructuralFindingOccurrence, + Suggestion, +) +from codeclone.report import derived as derived_mod +from codeclone.report import overview as overview_mod +from codeclone.report.json_contract import ( + _build_design_groups, + _clone_group_assessment, + _collect_paths_from_metrics, + _collect_report_file_list, + _combined_impact_scope, + _contract_path, + _count_file_lines, + _count_file_lines_for_path, + _csv_values, + _derive_inventory_code_counts, + _findings_summary, + _is_absolute_path, + _normalize_block_machine_facts, + _normalize_nested_string_rows, + _parse_ratio_percent, + _source_scope_from_filepaths, + _source_scope_from_locations, + _structural_group_assessment, + _suggestion_finding_id, + build_report_document, +) +from codeclone.report.markdown import ( + render_markdown_report_document, + to_markdown_report, +) +from codeclone.report.sarif import ( + _baseline_state as _sarif_baseline_state, +) +from codeclone.report.sarif import ( + _location_entry as _sarif_location_entry, +) +from codeclone.report.sarif import ( + _location_message as _sarif_location_message, +) +from codeclone.report.sarif import ( + _logical_locations as _sarif_logical_locations, +) +from codeclone.report.sarif import ( + _partial_fingerprints as _sarif_partial_fingerprints, +) +from codeclone.report.sarif import ( + _result_message as _sarif_result_message, +) +from codeclone.report.sarif import ( + _result_properties as _sarif_result_properties, +) +from codeclone.report.sarif import ( + _rule_spec as _sarif_rule_spec, +) +from codeclone.report.sarif import ( + _scan_root_uri as _sarif_scan_root_uri, +) +from codeclone.report.sarif import ( + _severity_to_level, + render_sarif_report_document, + to_sarif_report, +) +from codeclone.report.sarif import ( + _slug as _sarif_slug, +) +from codeclone.report.sarif import ( + _text as _sarif_text, +) +from codeclone.report.serialize import ( + _append_single_item_findings, + _append_structural_findings, + _append_suggestions, + _append_suppressed_dead_code_items, + _structural_kind_label, + render_text_report_document, +) +from tests._assertions import assert_mapping_entries + + +def _rich_report_document() -> dict[str, object]: + func_groups = { + "fn-key": [ + { + "qualname": "pkg.alpha:run", + "filepath": "/repo/codeclone/codeclone/alpha.py", + "start_line": 10, + "end_line": 20, + "loc": 11, + "stmt_count": 6, + "fingerprint": "fp-a", + "loc_bucket": "1-19", + "cyclomatic_complexity": 4, + "nesting_depth": 2, + "risk": "medium", + "raw_hash": "rh-a", + }, + { + "qualname": "tests.alpha:test_run", + "filepath": "/repo/codeclone/tests/test_alpha.py", + "start_line": 12, + "end_line": 22, + "loc": 11, + "stmt_count": 6, + "fingerprint": "fp-a", + "loc_bucket": "1-19", + "cyclomatic_complexity": 2, + "nesting_depth": 1, + "risk": "low", + "raw_hash": "rh-b", + }, + ] + } + block_groups = { + "blk-key": [ + { + "block_hash": "blk-key", + "qualname": "pkg.alpha:run", + "filepath": "/repo/codeclone/codeclone/alpha.py", + "start_line": 100, + "end_line": 104, + "size": 5, + }, + { + "block_hash": "blk-key", + "qualname": "tests.fixtures.alpha:run_case", + "filepath": "/repo/codeclone/tests/fixtures/case.py", + "start_line": 40, + "end_line": 44, + "size": 5, + }, + ] + } + segment_groups = { + "seg-key": [ + { + "segment_hash": "seg-key", + "segment_sig": "sig-1", + "qualname": "pkg.alpha:seg", + "filepath": "/repo/codeclone/codeclone/alpha.py", + "start_line": 200, + "end_line": 205, + "size": 6, + }, + { + "segment_hash": "seg-key", + "segment_sig": "sig-1", + "qualname": "pkg.beta:seg", + "filepath": "/repo/codeclone/codeclone/beta.py", + "start_line": 210, + "end_line": 215, + "size": 6, + }, + ] + } + block_facts = { + "blk-key": { + "group_arity": "2", + "block_size": "5", + "consecutive_asserts": "1", + "instance_peer_count": "1", + "merged_regions": "true", + "assert_ratio": "75%", + "match_rule": "structural", + "pattern": "blk-pattern", + "signature_kind": "stmt-hash", + "hint": "same setup pattern", + "hint_confidence": "high", + "group_compare_note": "N-way group compare note", + } + } + structural_findings = ( + StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key="sf-1", + signature={"stmt_seq": "Expr,Return", "terminal": "return_const"}, + items=( + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="sf-1", + file_path="/repo/codeclone/codeclone/cache.py", + qualname="codeclone.cache:Cache._load_and_validate", + start=120, + end=124, + signature={"stmt_seq": "Expr,Return", "terminal": "return_const"}, + ), + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="sf-1", + file_path="/repo/codeclone/codeclone/cache.py", + qualname="codeclone.cache:Cache._load_and_validate", + start=140, + end=144, + signature={"stmt_seq": "Expr,Return", "terminal": "return_const"}, + ), + ), + ), + ) + metrics = { + "complexity": { + "avg": 3.0, + "max": 50, + "functions": [ + { + "qualname": "pkg.alpha:hot", + "filepath": "/repo/codeclone/codeclone/alpha.py", + "start_line": 10, + "end_line": 40, + "cyclomatic_complexity": 50, + "nesting_depth": 3, + "risk": "high", + }, + { + "qualname": "pkg.alpha:warm", + "filepath": "/repo/codeclone/codeclone/alpha.py", + "start_line": 50, + "end_line": 70, + "cyclomatic_complexity": 25, + "nesting_depth": 2, + "risk": "medium", + }, + { + "qualname": "pkg.alpha:ok", + "filepath": "/repo/codeclone/codeclone/alpha.py", + "start_line": 80, + "end_line": 90, + "cyclomatic_complexity": 10, + "nesting_depth": 1, + "risk": "low", + }, + ], + }, + "coupling": { + "avg": 2.0, + "max": 11, + "classes": [ + { + "qualname": "pkg.alpha:HotClass", + "filepath": "/repo/codeclone/codeclone/alpha.py", + "start_line": 1, + "end_line": 40, + "cbo": 11, + "risk": "high", + "coupled_classes": ["X", "X", "Y"], + }, + { + "qualname": "pkg.alpha:ColdClass", + "filepath": "/repo/codeclone/codeclone/alpha.py", + "start_line": 41, + "end_line": 60, + "cbo": 2, + "risk": "low", + "coupled_classes": [], + }, + ], + }, + "cohesion": { + "avg": 2.0, + "max": 4, + "classes": [ + { + "qualname": "pkg.alpha:LowCohesion", + "filepath": "/repo/codeclone/codeclone/alpha.py", + "start_line": 1, + "end_line": 40, + "lcom4": 4, + "risk": "high", + "method_count": 4, + "instance_var_count": 1, + }, + { + "qualname": "pkg.alpha:FineCohesion", + "filepath": "/repo/codeclone/codeclone/alpha.py", + "start_line": 41, + "end_line": 60, + "lcom4": 2, + "risk": "low", + "method_count": 2, + "instance_var_count": 1, + }, + ], + }, + "dependencies": { + "module_count": 2, + "edge_count": 1, + "cycles": [[], ["pkg.alpha", "pkg.beta"]], + "max_depth": 5, + "edges": [ + { + "source": "pkg.alpha", + "target": "pkg.beta", + "import_type": "import", + "line": 3, + } + ], + "longest_chains": [["pkg.alpha", "pkg.beta", "pkg.gamma"]], + }, + "dead_code": { + "summary": {"count": 2, "critical": 2}, + "items": [ + { + "qualname": "pkg.alpha:unused_fn", + "filepath": "/repo/codeclone/codeclone/alpha.py", + "start_line": 300, + "end_line": 305, + "kind": "function", + "confidence": "high", + }, + { + "qualname": "tests.alpha:unused_test", + "filepath": "/repo/codeclone/tests/test_alpha.py", + "start_line": 30, + "end_line": 33, + "kind": "function", + "confidence": "medium", + }, + ], + }, + "health": { + "summary": { + "score": 77, + "grade": "C", + "dimensions": { + "coverage": 90, + "complexity": 40, + }, + } + }, + } + suggestions = ( + Suggestion( + severity="critical", + category="clone", + title="Refactor function clones", + location="codeclone/alpha.py:10-20", + steps=("Extract helper", "Parametrize values"), + effort="moderate", + priority=3.0, + finding_family="clones", + finding_kind="clone_group", + subject_key="fn-key", + fact_kind="Function clone group", + fact_summary="same parameterized body", + fact_count=2, + spread_files=2, + spread_functions=2, + clone_type="Type-2", + confidence="high", + source_kind="production", + source_breakdown=(("production", 2),), + representative_locations=( + ReportLocation( + filepath="/repo/codeclone/codeclone/alpha.py", + relative_path="codeclone/alpha.py", + start_line=10, + end_line=20, + qualname="pkg.alpha:run", + source_kind="production", + ), + ), + location_label="2 occurrences across 2 files / 2 functions", + ), + Suggestion( + severity="warning", + category="structural", + title="Consolidate branch family", + location="codeclone/cache.py:120-124", + steps=("Extract branch helper",), + effort="easy", + priority=2.0, + finding_family="structural", + finding_kind="duplicated_branches", + subject_key="sf-1", + fact_kind="duplicated_branches", + fact_summary="same branch sequence", + fact_count=2, + spread_files=1, + spread_functions=1, + confidence="medium", + source_kind="production", + source_breakdown=(("production", 1),), + representative_locations=( + ReportLocation( + filepath="/repo/codeclone/codeclone/cache.py", + relative_path="codeclone/cache.py", + start_line=120, + end_line=124, + qualname="codeclone.cache:Cache._load_and_validate", + source_kind="production", + ), + ), + location_label="2 occurrences across 1 file / 1 function", + ), + Suggestion( + severity="warning", + category="dependency", + title="Break dependency cycle", + location="pkg.alpha -> pkg.beta", + steps=("Split imports",), + effort="hard", + priority=1.0, + finding_family="metrics", + finding_kind="cycle", + subject_key="pkg.alpha -> pkg.beta", + fact_kind="dependency cycle", + fact_summary="cycle detected", + fact_count=2, + spread_files=2, + spread_functions=0, + confidence="high", + source_kind="production", + source_breakdown=(("production", 2),), + ), + ) + meta = { + "codeclone_version": "2.0.0b1", + "project_name": "codeclone", + "scan_root": "/repo/codeclone", + "python_version": "3.13.11", + "python_tag": "cp313", + "analysis_mode": "full", + "report_mode": "full", + "baseline_loaded": True, + "baseline_status": "ok", + "cache_used": True, + "cache_status": "ok", + "report_generated_at_utc": "2026-03-11T10:00:00Z", + } + inventory = { + "files": {"total_found": 4, "analyzed": 4, "cached": 0, "skipped": 0}, + "code": {"parsed_lines": 100, "functions": 4, "methods": 1, "classes": 1}, + "file_list": [ + "/repo/codeclone/codeclone/alpha.py", + "/repo/codeclone/codeclone/beta.py", + "/repo/codeclone/tests/test_alpha.py", + "/repo/codeclone/tests/fixtures/case.py", + 123, # ignored by collector + ], + } + + return build_report_document( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + meta=meta, + inventory=inventory, + block_facts=block_facts, + new_function_group_keys={"fn-key"}, + new_block_group_keys={"blk-key"}, + new_segment_group_keys={"seg-key"}, + metrics=metrics, + suggestions=suggestions, + structural_findings=structural_findings, + ) + + +def test_report_document_rich_invariants_and_renderers() -> None: + payload = _rich_report_document() + findings = cast(dict[str, object], payload["findings"]) + groups = cast(dict[str, object], findings["groups"]) + design = cast(dict[str, object], groups["design"])["groups"] + design_groups = cast(list[dict[str, object]], design) + categories = {str(item["category"]) for item in design_groups} + assert {"complexity", "coupling", "cohesion", "dependency"}.issubset(categories) + + clones = cast(dict[str, object], groups["clones"]) + block_groups = cast(list[dict[str, object]], clones["blocks"]) + block_group = block_groups[0] + assert cast(dict[str, object], block_group["facts"])["assert_ratio"] == 0.75 + assert "group_compare_note" in cast(dict[str, object], block_group["display_facts"]) + + md = render_markdown_report_document(payload) + sarif = json.loads(render_sarif_report_document(payload)) + txt = render_text_report_document(payload) + assert "## Top Risks" in md + assert "SUGGESTIONS (count=" in txt + run = sarif["runs"][0] + rule_ids = {rule["id"] for rule in run["tool"]["driver"]["rules"]} + assert {"CCLONE001", "CSTRUCT001", "CDEAD001", "CDESIGN001", "CDESIGN004"}.issubset( + rule_ids + ) + assert run["originalUriBaseIds"]["%SRCROOT%"]["uri"] == "file:///repo/codeclone/" + assert run["artifacts"] + assert run["artifacts"][0]["location"]["uriBaseId"] == "%SRCROOT%" + assert any("relatedLocations" in result for result in run["results"]) + assert any("baselineState" in result for result in run["results"]) + assert all("help" in rule for rule in run["tool"]["driver"]["rules"]) + + +def test_markdown_and_sarif_reuse_prebuilt_report_document() -> None: + payload = _rich_report_document() + md = to_markdown_report( + report_document=payload, + meta={}, + func_groups={}, + block_groups={}, + segment_groups={}, + ) + sarif = to_sarif_report( + report_document=payload, + meta={}, + func_groups={}, + block_groups={}, + segment_groups={}, + ) + assert md.startswith("# CodeClone Report") + sarif_payload = json.loads(sarif) + assert sarif_payload["version"] == "2.1.0" + + +def test_json_contract_private_helpers_cover_edge_cases(tmp_path: Path) -> None: + assert _coerce.as_int(True) == 1 + assert _coerce.as_int("x", 9) == 9 + assert _coerce.as_float(True) == 1.0 + assert _coerce.as_float("x", 1.5) == 1.5 + assert _parse_ratio_percent("") is None + assert _parse_ratio_percent("25%") == 0.25 + assert _parse_ratio_percent("2") == 0.02 + assert _parse_ratio_percent("bad%") is None + assert _parse_ratio_percent("bad") is None + + machine, display = _normalize_block_machine_facts( + group_key="k", + group_arity=2, + block_facts={"assert_ratio": "not-a-ratio", "merged_regions": "yes"}, + ) + assert machine["merged_regions"] is True + assert display["assert_ratio"] == "not-a-ratio" + + in_root, scope, original = _contract_path( + "/repo/codeclone/codeclone/a.py", scan_root="/repo/codeclone" + ) + assert (in_root, scope, original) == ( + "codeclone/a.py", + "in_root", + "/repo/codeclone/codeclone/a.py", + ) + external, scope_ext, original_ext = _contract_path( + "/opt/ext/x.py", scan_root="/repo/codeclone" + ) + assert (external, scope_ext, original_ext) == ("x.py", "external", "/opt/ext/x.py") + rel, scope_rel, original_rel = _contract_path( + "codeclone/a.py", scan_root="/repo/codeclone" + ) + assert (rel, scope_rel, original_rel) == ("codeclone/a.py", "relative", None) + assert _is_absolute_path("") is False + + runtime_scope = _source_scope_from_filepaths( + ["/repo/codeclone/codeclone/a.py"], + scan_root="/repo/codeclone", + ) + non_runtime_scope = _source_scope_from_filepaths( + ["/repo/codeclone/tests/test_a.py"], + scan_root="/repo/codeclone", + ) + mixed_runtime_scope = _source_scope_from_filepaths( + ["/repo/codeclone/codeclone/a.py", "/repo/codeclone/tests/test_a.py"], + scan_root="/repo/codeclone", + ) + mixed_scope = _source_scope_from_locations( + [{"source_kind": "production"}, {"source_kind": "strange"}] + ) + assert { + "runtime": runtime_scope["impact_scope"], + "non_runtime": non_runtime_scope["impact_scope"], + "mixed_runtime": mixed_runtime_scope["impact_scope"], + "mixed_other": mixed_scope["impact_scope"], + } == { + "runtime": "runtime", + "non_runtime": "non_runtime", + "mixed_runtime": "mixed", + "mixed_other": "mixed", + } + + assert _normalize_nested_string_rows([["b", "a"], [], ["b", "a"], ["c"]]) == [ + ["c"], + ["b", "a"], + ["b", "a"], + ] + assert _count_file_lines_for_path(str(tmp_path / "missing.py")) == 0 + existing = tmp_path / "ok.py" + existing.write_text("a\nb\n", "utf-8") + assert _count_file_lines_for_path(str(existing)) == 2 + assert _combined_impact_scope([]) == "non_runtime" + assert ( + _combined_impact_scope([{"source_scope": {"impact_scope": "runtime"}}]) + == "runtime" + ) + assert ( + _combined_impact_scope( + [ + {"source_scope": {"impact_scope": "runtime"}}, + {"source_scope": {"impact_scope": "non_runtime"}}, + ] + ) + == "mixed" + ) + assert _clone_group_assessment(count=4, clone_type="Type-4")[0] == "critical" + + design_groups = _build_design_groups( + {"families": {"dependencies": {"cycles": [5]}}}, + scan_root="/repo/codeclone", + ) + assert design_groups == [] + + +def test_coerce_helper_numeric_branches() -> None: + assert _coerce.as_int(True) == 1 + assert _coerce.as_int("bad") == 0 + assert _coerce.as_float(True) == 1.0 + assert _coerce.as_float("bad") == 0.0 + assert _coerce.as_mapping("bad") == {} + assert _coerce.as_sequence("bad") == () + + +def test_count_file_lines_aggregates_paths(tmp_path: Path) -> None: + one = tmp_path / "one.py" + two = tmp_path / "two.py" + one.write_text("a\nb\n", "utf-8") + two.write_text("x\n", "utf-8") + assert _count_file_lines([str(one), str(two), str(tmp_path / "missing.py")]) == 3 + + +def test_derive_inventory_code_counts_uses_cached_line_scan_fallback( + tmp_path: Path, +) -> None: + source = tmp_path / "a.py" + source.write_text("def f():\n return 1\n", "utf-8") + + counts = _derive_inventory_code_counts( + metrics_payload={ + "families": { + "complexity": {"items": []}, + "cohesion": {"items": []}, + } + }, + inventory_code={ + "parsed_lines": "unknown", + "functions": 9, + "methods": 4, + "classes": 2, + }, + file_list=[str(source)], + cached_files=1, + ) + + assert_mapping_entries( + counts, + parsed_lines=2, + scope="mixed", + functions=9, + methods=4, + classes=2, + ) + + +def test_markdown_render_long_list_branches() -> None: + payload = cast(dict[str, object], json.loads(json.dumps(_rich_report_document()))) + findings = cast(dict[str, object], payload["findings"]) + groups = cast(dict[str, object], findings["groups"]) + clone_groups = cast(dict[str, object], groups["clones"]) + function_groups = cast(list[dict[str, object]], clone_groups["functions"]) + first_group = function_groups[0] + first_group_items = cast(list[dict[str, object]], first_group["items"]) + base_item = first_group_items[0] + first_group["items"] = [ + { + **base_item, + "start_line": 10 + idx, + "end_line": 11 + idx, + } + for idx in range(7) + ] + + metrics = cast(dict[str, object], payload["metrics"]) + families = cast(dict[str, object], metrics["families"]) + complexity = cast(dict[str, object], families["complexity"]) + complexity_items = cast(list[dict[str, object]], complexity["items"]) + base_metric = complexity_items[0] + complexity["items"] = [ + { + **base_metric, + "start_line": 100 + idx, + "end_line": 101 + idx, + "qualname": f"pkg.alpha:f{idx}", + } + for idx in range(12) + ] + + derived = cast(dict[str, object], payload["derived"]) + suggestions = cast(list[dict[str, object]], derived["suggestions"]) + suggestions[0]["action"] = {"effort": "easy", "steps": []} + markdown = render_markdown_report_document(payload) + assert "... and 2 more occurrence(s)" in markdown + assert "... and 2 more item(s)" in markdown + + +def test_sarif_helper_level_mapping() -> None: + assert _severity_to_level("critical") == "error" + assert _severity_to_level("warning") == "warning" + assert _severity_to_level("info") == "note" + assert _severity_to_level("unexpected") == "note" + + +def test_derived_module_branches() -> None: + assert derived_mod.relative_report_path("", scan_root="/repo/proj") == "" + assert ( + derived_mod.relative_report_path("/repo/proj/a.py", scan_root="/repo/proj") + == "a.py" + ) + assert ( + derived_mod.relative_report_path("/repo/proj", scan_root="/repo/proj") == "proj" + ) + assert derived_mod.classify_source_kind(".", scan_root="/repo/proj") == "other" + assert derived_mod.classify_source_kind("tests/fixtures/x.py") == "fixtures" + assert derived_mod.classify_source_kind("tests/x.py") == "tests" + assert derived_mod.combine_source_kinds([]) == "other" + assert derived_mod.combine_source_kinds(["production", "tests"]) == "mixed" + + loc = derived_mod.report_location_from_group_item( + { + "filepath": "/repo/proj/code/a.py", + "qualname": "pkg:a", + "start_line": True, + "end_line": 2, + }, + scan_root="/repo/proj", + ) + fallback_loc = derived_mod.report_location_from_group_item( + { + "filepath": "/repo/proj/code/b.py", + "qualname": "pkg:b", + "start_line": "x", + "end_line": "y", + }, + scan_root="/repo/proj", + ) + assert fallback_loc.start_line == 0 + assert fallback_loc.end_line == 0 + reps = derived_mod.representative_locations([loc, loc], limit=3) + assert len(reps) == 1 + assert derived_mod.format_group_location_label(reps, total_count=0) == "(unknown)" + assert derived_mod.format_group_location_label(reps, total_count=1).startswith( + "code/a.py" + ) + + +def test_overview_module_branches() -> None: + suggestion = Suggestion( + severity="warning", + category="dead_code", + title="Remove dead code", + location="code/a.py:1-2", + steps=("Delete symbol",), + effort="easy", + priority=2.0, + finding_family="metrics", + finding_kind="dead_code", + subject_key="code.a:dead", + fact_kind="dead code", + fact_summary="unused function", + fact_count=1, + spread_files=1, + spread_functions=1, + confidence="high", + source_kind="production", + ) + overview = overview_mod.build_report_overview( + suggestions=( + suggestion, + Suggestion( + severity="warning", + category="structural", + title="Structural signal", + location="code/b.py:3-4", + steps=("Refactor",), + effort="moderate", + priority=2.0, + finding_family="structural", + finding_kind="duplicated_branches", + subject_key="sf", + fact_kind="duplicated_branches", + fact_summary="same branch family", + fact_count=2, + spread_files=1, + spread_functions=1, + confidence="medium", + source_kind="production", + ), + Suggestion( + severity="critical", + category="clone", + title="Fixture clone", + location="tests/fixtures/x.py:1-4", + steps=("Extract fixture builder",), + effort="easy", + priority=3.0, + finding_family="clones", + finding_kind="clone_group", + subject_key="g", + fact_kind="Function clone group", + fact_summary="same body", + fact_count=2, + spread_files=1, + spread_functions=1, + confidence="high", + source_kind="fixtures", + ), + ), + metrics={ + "dead_code": {"summary": {"critical": 1}}, + "cohesion": {"summary": {"low_cohesion": 1}}, + "health": { + "score": 80, + "grade": "B", + "dimensions": { + "coverage": 90, + "complexity": 60, + }, + }, + }, + ) + families = cast(dict[str, object], overview["families"]) + assert families["dead_code"] == 1 + assert overview["top_risks"] + health = cast(dict[str, object], overview["health"]) + assert health["strongest_dimension"] == "coverage" + assert health["weakest_dimension"] == "complexity" + empty_overview = overview_mod.build_report_overview(suggestions=(), metrics=None) + assert empty_overview["top_risks"] == [] + + +def test_overview_handles_non_mapping_metric_summaries() -> None: + suggestion = Suggestion( + severity="warning", + category="structural", + title="Structural signal", + location="code/b.py:3-4", + steps=("Refactor",), + effort="moderate", + priority=2.0, + finding_family="structural", + finding_kind="duplicated_branches", + subject_key="sf", + fact_kind="duplicated_branches", + fact_summary="same branch family", + fact_count=2, + spread_files=1, + spread_functions=1, + confidence="medium", + source_kind="production", + ) + overview = overview_mod.build_report_overview( + suggestions=(suggestion,), + metrics={ + "dead_code": {"summary": []}, + "cohesion": {"summary": []}, + "health": {"score": 75, "grade": "C", "dimensions": {"quality": "bad"}}, + }, + ) + assert overview["top_risks"] == ["1 structural finding in production code"] + health = cast(dict[str, object], overview["health"]) + assert health["strongest_dimension"] is None + assert health["weakest_dimension"] is None + + +def test_overview_health_snapshot_handles_non_mapping_dimensions() -> None: + overview = overview_mod.build_report_overview( + suggestions=(), + metrics={"health": {"score": 72, "grade": "C", "dimensions": []}}, + ) + health = cast(dict[str, object], overview["health"]) + assert health == { + "score": 72, + "grade": "C", + "strongest_dimension": None, + "weakest_dimension": None, + } + + +def test_suggestion_finding_id_fallback_branch() -> None: + @dataclass + class _FakeSuggestion: + finding_family: str + finding_kind: str + subject_key: str + category: str + title: str + + fake = cast( + Suggestion, + _FakeSuggestion( + finding_family="metrics", + finding_kind="misc", + subject_key="", + category="unmapped_category", + title="Synthetic title", + ), + ) + assert _suggestion_finding_id(fake) == "design:unmapped_category:Synthetic title" + + +def test_suggestion_finding_id_segment_clone_branch() -> None: + segment_clone = Suggestion( + severity="info", + category="clone", + title="Segment clone", + location="code/a.py:1-3", + steps=(), + effort="easy", + priority=1.0, + finding_family="clones", + finding_kind="clone_group", + subject_key="seg-1", + fact_kind="Segment clone group", + fact_summary="same segment", + fact_count=2, + spread_files=2, + spread_functions=2, + confidence="medium", + source_kind="production", + ) + assert _suggestion_finding_id(segment_clone) == "clone:segment:seg-1" + + +def test_suggestion_finding_id_block_clone_branch() -> None: + block_clone = Suggestion( + severity="warning", + category="clone", + title="Block clone", + location="code/a.py:10-15", + steps=(), + effort="easy", + priority=1.5, + finding_family="clones", + finding_kind="clone_group", + subject_key="blk-1", + fact_kind="Block clone group", + fact_summary="same statement sequence", + fact_count=2, + spread_files=2, + spread_functions=2, + confidence="high", + source_kind="production", + ) + assert _suggestion_finding_id(block_clone) == "clone:block:blk-1" + + +def test_sarif_private_helper_branches() -> None: + assert _coerce.as_int(True) == 1 + assert _coerce.as_int("bad") == 0 + assert _coerce.as_float(True) == 1.0 + assert _coerce.as_float("bad") == 0.0 + assert _coerce.as_float(object()) == 0.0 + assert _coerce.as_mapping("bad") == {} + assert _coerce.as_sequence("bad") == () + assert _sarif_text(None) == "" + + dead_class = _sarif_rule_spec({"family": "dead_code", "category": "class"}) + dead_method = _sarif_rule_spec({"family": "dead_code", "category": "method"}) + dead_other = _sarif_rule_spec({"family": "dead_code", "category": "other"}) + assert dead_class.rule_id == "CDEAD002" + assert dead_method.rule_id == "CDEAD003" + assert dead_other.rule_id == "CDEAD004" + + dep_message = _sarif_result_message( + { + "family": "design", + "category": "dependency", + "count": 2, + "items": [{"module": "pkg.a"}, {"module": "pkg.b"}], + "spread": {"files": 2}, + } + ) + assert "Dependency cycle" in dep_message + structural_without_qualname = _sarif_result_message( + { + "family": "structural", + "category": "duplicated_branches", + "count": 2, + "signature": {"stable": {"stmt_shape": "Expr,Return"}}, + "items": [{"relative_path": "code/a.py"}], + } + ) + assert "Repeated branch family" in structural_without_qualname + + assert _sarif_logical_locations({"module": "pkg.a"}) == [ + {"fullyQualifiedName": "pkg.a"} + ] + related = _sarif_location_entry( + {"relative_path": "code/a.py", "start_line": 1, "end_line": 2}, + related_id=7, + artifact_index_map={"code/a.py": 3}, + use_uri_base_id=True, + message_text="Related occurrence #7", + ) + related_message = cast(dict[str, object], related["message"]) + related_physical = cast(dict[str, object], related["physicalLocation"]) + related_artifact = cast(dict[str, object], related_physical["artifactLocation"]) + assert ( + related["id"], + related_message["text"], + related_artifact["uriBaseId"], + related_artifact["index"], + ) == (7, "Related occurrence #7", "%SRCROOT%", 3) + no_end_line = _sarif_location_entry( + {"relative_path": "code/a.py", "start_line": 1, "end_line": 0} + ) + region = cast(dict[str, object], no_end_line["physicalLocation"])["region"] + assert region == {"startLine": 1} + logical_only = _sarif_location_entry( + {"module": "pkg.a"}, + message_text="Cycle member", + ) + logical_message = cast(dict[str, object], logical_only["message"]) + assert "physicalLocation" not in logical_only + assert logical_only["logicalLocations"] == [{"fullyQualifiedName": "pkg.a"}] + assert logical_message["text"] == "Cycle member" + + +def test_sarif_private_helper_family_dispatches() -> None: + clone_function = _sarif_rule_spec({"family": "clone", "category": "function"}) + clone_block = _sarif_rule_spec({"family": "clone", "category": "block"}) + structural_guard = _sarif_rule_spec( + { + "family": "structural", + "kind": "clone_guard_exit_divergence", + } + ) + structural_drift = _sarif_rule_spec( + { + "family": "structural", + "kind": "clone_cohort_drift", + } + ) + design_cohesion = _sarif_rule_spec({"family": "design", "category": "cohesion"}) + design_complexity = _sarif_rule_spec({"family": "design", "category": "complexity"}) + design_coupling = _sarif_rule_spec({"family": "design", "category": "coupling"}) + design_dependency = _sarif_rule_spec({"family": "design", "category": "dependency"}) + assert clone_function.rule_id == "CCLONE001" + assert clone_block.rule_id == "CCLONE002" + assert structural_guard.rule_id == "CSTRUCT002" + assert structural_drift.rule_id == "CSTRUCT003" + assert design_cohesion.rule_id == "CDESIGN001" + assert design_complexity.rule_id == "CDESIGN002" + assert design_coupling.rule_id == "CDESIGN003" + assert design_dependency.rule_id == "CDESIGN004" + + assert ( + _sarif_result_message( + { + "family": "clone", + "category": "function", + "clone_type": "Type-2", + "count": 3, + "spread": {"files": 2}, + "items": [{"qualname": "pkg.mod:fn"}], + } + ) + == "Function clone group (Type-2), 3 occurrences across 2 files." + ) + assert ( + _sarif_result_message( + { + "family": "dead_code", + "category": "function", + "confidence": "medium", + "items": [{"relative_path": "pkg/mod.py"}], + } + ) + == "Unused function with medium confidence: pkg/mod.py" + ) + assert "LCOM4=4" in _sarif_result_message( + { + "family": "design", + "category": "cohesion", + "facts": {"lcom4": 4}, + "items": [{"qualname": "pkg.mod:Thing"}], + } + ) + assert "CC=25" in _sarif_result_message( + { + "family": "design", + "category": "complexity", + "facts": {"cyclomatic_complexity": 25}, + "items": [{"qualname": "pkg.mod:run"}], + } + ) + assert "CBO=12" in _sarif_result_message( + { + "family": "design", + "category": "coupling", + "facts": {"cbo": 12}, + "items": [{"qualname": "pkg.mod:Thing"}], + } + ) + assert "Dependency cycle" in _sarif_result_message( + { + "family": "design", + "category": "dependency", + "items": [{"module": "pkg.a"}, {"module": "pkg.b"}], + } + ) + + clone_props = _sarif_result_properties( + { + "family": "clone", + "novelty": "new", + "clone_kind": "function", + "clone_type": "Type-2", + "count": 2, + } + ) + guard_props = _sarif_result_properties( + { + "family": "structural", + "count": 3, + "signature": { + "stable": { + "family": "clone_guard_exit_divergence", + "cohort_id": "cohort-1", + "majority_guard_count": 2, + "majority_terminal_kind": "return_expr", + } + }, + } + ) + drift_props = _sarif_result_properties( + { + "family": "structural", + "count": 3, + "signature": { + "stable": { + "family": "clone_cohort_drift", + "cohort_id": "cohort-2", + "drift_fields": ["guard_exit_profile", "terminal_kind"], + } + }, + } + ) + design_props = _sarif_result_properties( + { + "family": "design", + "facts": { + "lcom4": 5, + "method_count": 7, + "instance_var_count": 2, + "cbo": 12, + "cyclomatic_complexity": 25, + "nesting_depth": 4, + "cycle_length": 3, + }, + } + ) + assert clone_props["groupArity"] == 2 + assert guard_props["cohortId"] == "cohort-1" + assert drift_props["driftFields"] == [ + "guard_exit_profile", + "terminal_kind", + ] + assert design_props["cycle_length"] == 3 + + assert _sarif_location_message({"family": "clone"}) == "Representative occurrence" + assert ( + _sarif_location_message({"family": "structural"}, related_id=2) + == "Related occurrence #2" + ) + assert ( + _sarif_location_message({"family": "dead_code"}, related_id=3) + == "Related declaration #3" + ) + assert ( + _sarif_location_message({"family": "design", "category": "dependency"}) + == "Cycle member" + ) + assert ( + _sarif_location_message( + {"family": "design", "category": "coupling"}, + related_id=4, + ) + == "Related location #4" + ) + + line_hash = _sarif_partial_fingerprints( + rule_id="CDESIGN002", + group={"id": "design:complexity:pkg.mod:run"}, + primary_item={ + "relative_path": "pkg/mod.py", + "qualname": "pkg.mod:run", + "start_line": 10, + "end_line": 14, + }, + ) + no_line_hash = _sarif_partial_fingerprints( + rule_id="CDESIGN001", + group={"id": "design:cohesion:pkg.mod:Thing"}, + primary_item={"relative_path": "", "qualname": "", "start_line": 0}, + ) + assert "primaryLocationLineHash" in line_hash + assert "primaryLocationLineHash" not in no_line_hash + + +def test_sarif_private_helper_edge_branches( + monkeypatch: pytest.MonkeyPatch, +) -> None: + assert _sarif_slug("Function /// clone group") == "function-clone-group" + assert ( + _sarif_scan_root_uri({"meta": {"runtime": {"scan_root_absolute": "repo"}}}) + == "" + ) + + path_type = type(Path("/tmp")) + original_as_uri = path_type.as_uri + + def _broken_as_uri(self: Path) -> str: + raise ValueError("boom") + + monkeypatch.setattr(path_type, "as_uri", _broken_as_uri) + try: + assert ( + _sarif_scan_root_uri( + {"meta": {"runtime": {"scan_root_absolute": "/repo/project"}}} + ) + == "" + ) + finally: + monkeypatch.setattr(path_type, "as_uri", original_as_uri) + + dead_code_props = _sarif_result_properties( + {"family": "dead_code", "confidence": "medium"} + ) + assert dead_code_props["confidence"] == "medium" + assert _sarif_baseline_state({"novelty": "known"}) == "unchanged" + + +def test_render_sarif_report_document_without_srcroot_keeps_relative_payload() -> None: + payload = { + "report_schema_version": "2.1", + "meta": { + "codeclone_version": "2.0.0b1", + "analysis_mode": "ci", + "report_mode": "full", + "runtime": {}, + }, + "integrity": {"digest": {"value": "abc123"}}, + "findings": { + "groups": { + "clones": {"functions": [], "blocks": [], "segments": []}, + "dead_code": {"groups": []}, + "structural": {"groups": []}, + "design": { + "groups": [ + { + "id": "design:dependency:pkg.a -> pkg.b", + "family": "design", + "category": "dependency", + "kind": "cycle", + "severity": "critical", + "confidence": "high", + "priority": 3.0, + "count": 2, + "source_scope": { + "impact_scope": "runtime", + "dominant_kind": "production", + }, + "spread": {"files": 2, "functions": 0}, + "items": [ + {"module": "pkg.a", "relative_path": "pkg/a.py"}, + {"module": "pkg.b", "relative_path": "pkg/b.py"}, + ], + "facts": {"cycle_length": 2}, + } + ] + }, + } + }, + } + sarif = json.loads(render_sarif_report_document(payload)) + run = cast(dict[str, object], sarif["runs"][0]) + assert "originalUriBaseIds" not in run + invocation = cast(dict[str, object], cast(list[object], run["invocations"])[0]) + assert "workingDirectory" not in invocation + result = cast(dict[str, object], cast(list[object], run["results"])[0]) + assert "baselineState" not in result + primary_location = cast(list[object], result["locations"])[0] + location_map = cast(dict[str, object], primary_location) + assert cast(dict[str, object], location_map["message"])["text"] == "Cycle member" + + +def test_collect_paths_from_metrics_covers_all_metric_families_and_skips_missing() -> ( + None +): + metrics = { + "complexity": { + "functions": [ + {"filepath": "/repo/complexity.py"}, + {"filepath": ""}, + {}, + ] + }, + "coupling": { + "classes": [ + {"filepath": "/repo/coupling.py"}, + {"filepath": None}, + ] + }, + "cohesion": { + "classes": [ + {"filepath": "/repo/cohesion.py"}, + {}, + ] + }, + "dead_code": { + "items": [ + {"filepath": "/repo/dead.py"}, + {"filepath": ""}, + ], + "suppressed_items": [ + {"filepath": "/repo/suppressed.py"}, + {"filepath": None}, + ], + }, + } + + assert _collect_paths_from_metrics(metrics) == { + "/repo/complexity.py", + "/repo/coupling.py", + "/repo/cohesion.py", + "/repo/dead.py", + "/repo/suppressed.py", + } + + +def test_collect_report_file_list_deterministically_merges_all_sources( + monkeypatch: pytest.MonkeyPatch, +) -> None: + class _Occurrence: + def __init__(self, file_path: str) -> None: + self.file_path = file_path + + class _Group: + def __init__(self, *paths: str) -> None: + self.items = tuple(_Occurrence(path) for path in paths) + + monkeypatch.setattr( + json_contract_mod, + "normalize_structural_findings", + lambda _findings: [_Group("/repo/struct.py", "")], + ) + structural_seed = ( + StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key="seed", + signature={"stmt_seq": "Expr,Return"}, + items=( + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="seed", + file_path="/repo/ignored.py", + qualname="pkg.mod:fn", + start=1, + end=2, + signature={"stmt_seq": "Expr,Return"}, + ), + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key="seed", + file_path="/repo/ignored.py", + qualname="pkg.mod:fn", + start=3, + end=4, + signature={"stmt_seq": "Expr,Return"}, + ), + ), + ), + ) + + files = _collect_report_file_list( + inventory={"file_list": ["/repo/inventory.py", "", None]}, + func_groups={"f": [{"filepath": "/repo/function.py"}, {"filepath": ""}]}, + block_groups={"b": [{"filepath": "/repo/block.py"}]}, + segment_groups={"s": [{"filepath": None}, {"filepath": "/repo/segment.py"}]}, + metrics={ + "complexity": {"functions": [{"filepath": "/repo/metric.py"}]}, + "coupling": {"classes": []}, + "cohesion": {"classes": []}, + "dead_code": {"items": [], "suppressed_items": []}, + }, + structural_findings=structural_seed, + ) + + assert files == [ + "/repo/block.py", + "/repo/function.py", + "/repo/inventory.py", + "/repo/metric.py", + "/repo/segment.py", + "/repo/struct.py", + ] + + +def test_json_contract_private_helper_edge_branches() -> None: + assert _csv_values("") == [] + assert _csv_values(" , , ") == [] + assert _csv_values("b, a, b") == ["a", "b"] + + severity, priority = _structural_group_assessment( + finding_kind="clone_guard_exit_divergence", + count=3, + spread_functions=1, + ) + assert severity == "critical" + assert priority > 0 + + severity, priority = _structural_group_assessment( + finding_kind="clone_cohort_drift", + count=1, + spread_functions=2, + ) + assert severity == "critical" + assert priority > 0 + + summary = _findings_summary( + clone_functions=( + { + "severity": "mystery", + "novelty": "new", + "source_scope": {"impact_scope": "alien"}, + }, + ), + clone_blocks=(), + clone_segments=(), + structural_groups=(), + dead_code_groups=(), + design_groups=(), + dead_code_suppressed=-4, + ) + assert summary["severity"] == { + "critical": 0, + "warning": 0, + "info": 0, + } + assert summary["impact_scope"] == { + "runtime": 0, + "non_runtime": 0, + "mixed": 0, + } + assert cast(dict[str, int], summary["clones"])["new"] == 1 + assert cast(dict[str, int], summary["suppressed"])["dead_code"] == 0 + + +def test_build_report_document_suppressed_dead_code_accepts_empty_bindings() -> None: + payload = build_report_document( + func_groups={}, + block_groups={}, + segment_groups={}, + meta={"scan_root": "/repo"}, + metrics={ + "complexity": {"summary": {}, "functions": []}, + "coupling": {"summary": {}, "classes": []}, + "cohesion": {"summary": {}, "classes": []}, + "dependencies": {"cycles": [], "edge_list": [], "longest_chains": []}, + "dead_code": { + "summary": {"total": 0, "high_confidence": 0, "suppressed": 1}, + "items": [], + "suppressed_items": [ + { + "qualname": "pkg.mod:kept", + "filepath": "/repo/pkg/mod.py", + "start_line": 10, + "end_line": 12, + "kind": "function", + "confidence": "high", + "suppressed_by": [{"rule": "", "source": " "}, {}], + } + ], + }, + "health": {"score": 100, "grade": "A", "dimensions": {}}, + }, + ) + + dead_code = cast( + dict[str, object], + cast(dict[str, object], payload["metrics"])["families"], + )["dead_code"] + dead_code_map = cast(dict[str, object], dead_code) + suppressed_item = cast(list[dict[str, object]], dead_code_map["suppressed_items"])[ + 0 + ] + assert suppressed_item["suppressed_by"] == [] + assert suppressed_item["suppression_rule"] == "" + assert suppressed_item["suppression_source"] == "" + + +def test_serialize_private_helpers_cover_structural_and_suppression_paths() -> None: + assert _structural_kind_label("custom_kind") == "custom_kind" + assert _structural_kind_label("") == "(none)" + + structural_lines: list[str] = [] + _append_structural_findings( + structural_lines, + [ + { + "id": "structural:custom:1", + "kind": "custom_kind", + "severity": "warning", + "confidence": "medium", + "count": 4, + "spread": {"files": 1, "functions": 1}, + "source_scope": { + "dominant_kind": "production", + "impact_scope": "runtime", + }, + "signature": { + "stable": { + "family": "custom", + "stmt_shape": "Expr,Return", + "terminal_kind": "return", + "control_flow": { + "has_loop": "0", + "has_try": "0", + "nested_if": "0", + }, + } + }, + "facts": {"calls": 2}, + "items": [ + { + "qualname": "pkg.mod:fn", + "relative_path": "pkg/mod.py", + "start_line": 1, + "end_line": 1, + }, + { + "qualname": "pkg.mod:fn", + "relative_path": "pkg/mod.py", + "start_line": 2, + "end_line": 2, + }, + { + "qualname": "pkg.mod:fn", + "relative_path": "pkg/mod.py", + "start_line": 3, + "end_line": 3, + }, + { + "qualname": "pkg.mod:fn", + "relative_path": "pkg/mod.py", + "start_line": 4, + "end_line": 4, + }, + ], + } + ], + ) + assert any(line.startswith("facts: ") for line in structural_lines) + assert any("... and 1 more occurrences" in line for line in structural_lines) + assert structural_lines[-1] != "" + + finding_lines: list[str] = [] + _append_single_item_findings( + finding_lines, + title="DESIGN FINDINGS", + groups=[ + { + "id": "design:complexity:pkg.mod:fn", + "category": "complexity", + "kind": "function_hotspot", + "severity": "warning", + "confidence": "high", + "source_scope": { + "dominant_kind": "production", + "impact_scope": "runtime", + }, + "facts": {"cyclomatic_complexity": 25}, + "items": [ + { + "qualname": "pkg.mod:fn", + "relative_path": "pkg/mod.py", + "start_line": 10, + "end_line": 14, + } + ], + } + ], + fact_keys=("cyclomatic_complexity",), + ) + assert any(line.startswith("facts: ") for line in finding_lines) + assert finding_lines[-1] != "" + + suppressed_lines: list[str] = [] + _append_suppressed_dead_code_items( + suppressed_lines, + items=[ + { + "kind": "function", + "confidence": "high", + "relative_path": "pkg/mod.py", + "qualname": "pkg.mod:kept", + "start_line": 20, + "end_line": 22, + "suppression_rule": "dead-code", + "suppression_source": "inline_codeclone", + } + ], + ) + assert any( + "suppressed_by=dead-code@inline_codeclone" in line for line in suppressed_lines + ) + assert suppressed_lines[-1] != "" + + suppressed_none_lines: list[str] = [] + _append_suppressed_dead_code_items( + suppressed_none_lines, + items=[ + { + "kind": "function", + "confidence": "medium", + "relative_path": "pkg/mod.py", + "qualname": "pkg.mod:unknown", + "start_line": 30, + "end_line": 31, + } + ], + ) + assert any("suppressed_by=(none)" in line for line in suppressed_none_lines) + + suggestion_lines: list[str] = [] + _append_suggestions( + suggestion_lines, + suggestions=[ + { + "title": "Investigate repeated flow", + "finding_id": "missing:finding", + "summary": "", + "location_label": "pkg/mod.py:10-12", + "representative_locations": [], + "action": {"effort": "easy", "steps": []}, + } + ], + findings={ + "groups": { + "clones": {"functions": [], "blocks": [], "segments": []}, + "structural": {"groups": []}, + "dead_code": {"groups": []}, + "design": {"groups": []}, + } + }, + ) + assert any("Investigate repeated flow" in line for line in suggestion_lines) + assert not any(line.lstrip().startswith("summary:") for line in suggestion_lines) diff --git a/tests/test_report_explain.py b/tests/test_report_explain.py index f2e7314..57689bf 100644 --- a/tests/test_report_explain.py +++ b/tests/test_report_explain.py @@ -1,12 +1,41 @@ +import ast from pathlib import Path -from codeclone._report_explain import build_block_group_facts +import codeclone.report.explain as explain_mod +from codeclone.report import build_block_group_facts from tests._report_fixtures import ( repeated_block_group_key, write_repeated_assert_source, ) +def _build_group_facts_for_source( + *, + tmp_path: Path, + filename: str, + source: str, + qualname: str = "mod:f", + start_line: int = 2, + end_line: int = 4, +) -> dict[str, str]: + group_key = repeated_block_group_key() + test_file = tmp_path / filename + test_file.write_text(source, "utf-8") + facts = build_block_group_facts( + { + group_key: [ + { + "qualname": qualname, + "filepath": str(test_file), + "start_line": start_line, + "end_line": end_line, + } + ] + } + ) + return facts[group_key] + + def test_build_block_group_facts_handles_missing_file() -> None: group_key = repeated_block_group_key() facts = build_block_group_facts( @@ -48,56 +77,34 @@ def test_build_block_group_facts_handles_syntax_error_file(tmp_path: Path) -> No def test_build_block_group_facts_assert_detection_with_calls(tmp_path: Path) -> None: - group_key = repeated_block_group_key() - test_file = tmp_path / "test_calls.py" - test_file.write_text( - "def f(checker):\n" - ' "doc"\n' - " assert_ok(checker)\n" - " checker.assert_ready(checker)\n", - "utf-8", - ) - facts = build_block_group_facts( - { - group_key: [ - { - "qualname": "tests.mod:f", - "filepath": str(test_file), - "start_line": 2, - "end_line": 4, - } - ] - } + group = _build_group_facts_for_source( + tmp_path=tmp_path, + filename="test_calls.py", + source=( + "def f(checker):\n" + ' "doc"\n' + " assert_ok(checker)\n" + " checker.assert_ready(checker)\n" + ), + qualname="tests.mod:f", ) - group = facts[group_key] assert group["hint"] == "assert_only" assert group["assert_ratio"] == "100%" assert group["consecutive_asserts"] == "3" def test_build_block_group_facts_non_assert_breaks_hint(tmp_path: Path) -> None: - group_key = repeated_block_group_key() - test_file = tmp_path / "test_mixed.py" - test_file.write_text( - "def f(html):\n" - " assert 'a' in html\n" - " check(html)\n" - " assert 'b' in html\n", - "utf-8", + group = _build_group_facts_for_source( + tmp_path=tmp_path, + filename="test_mixed.py", + source=( + "def f(html):\n" + " assert 'a' in html\n" + " check(html)\n" + " assert 'b' in html\n" + ), + qualname="tests.mod:f", ) - facts = build_block_group_facts( - { - group_key: [ - { - "qualname": "tests.mod:f", - "filepath": str(test_file), - "start_line": 2, - "end_line": 4, - } - ] - } - ) - group = facts[group_key] assert "hint" not in group assert group["assert_ratio"] == "67%" assert group["consecutive_asserts"] == "1" @@ -138,25 +145,13 @@ def test_build_block_group_facts_handles_empty_stmt_range(tmp_path: Path) -> Non def test_build_block_group_facts_non_assert_call_shapes(tmp_path: Path) -> None: - group_key = repeated_block_group_key() - test_file = tmp_path / "module.py" - test_file.write_text( - "def f(checker, x):\n checker.validate(x)\n (lambda y: y)(x)\n x\n", - "utf-8", + group = _build_group_facts_for_source( + tmp_path=tmp_path, + filename="module.py", + source=( + "def f(checker, x):\n checker.validate(x)\n (lambda y: y)(x)\n x\n" + ), ) - facts = build_block_group_facts( - { - group_key: [ - { - "qualname": "mod:f", - "filepath": str(test_file), - "start_line": 2, - "end_line": 4, - } - ] - } - ) - group = facts[group_key] assert group["assert_ratio"] == "0%" assert group["consecutive_asserts"] == "0" assert "hint" not in group @@ -227,3 +222,60 @@ def test_build_block_group_facts_n_way_group_compare_facts(tmp_path: Path) -> No assert group["group_compare_note"] == ( "N-way group: each block matches 2 peers in this group." ) + + +def test_explain_as_int_variants() -> None: + assert explain_mod._as_int(True) == 1 + assert explain_mod._as_int("7") == 7 + assert explain_mod._as_int("bad") == 0 + assert explain_mod._as_int(1.5) == 0 + + +def test_parsed_file_tree_cache_and_empty_statement_index_paths(tmp_path: Path) -> None: + module = tmp_path / "empty_module.py" + module.write_text("", "utf-8") + + ast_cache: dict[str, ast.AST | None] = {} + first = explain_mod.parsed_file_tree(str(module), ast_cache=ast_cache) + second = explain_mod.parsed_file_tree(str(module), ast_cache=ast_cache) + assert first is second + + stmt_index_cache: dict[str, explain_mod._StatementIndex | None] = {} + range_cache: dict[tuple[str, int, int], tuple[int, int, int]] = {} + total, assert_like, consecutive = explain_mod.assert_range_stats( + filepath=str(module), + start_line=1, + end_line=10, + ast_cache=ast_cache, + stmt_index_cache=stmt_index_cache, + range_cache=range_cache, + ) + assert (total, assert_like, consecutive) == (0, 0, 0) + + +def test_assert_range_stats_skips_records_outside_requested_end_line( + tmp_path: Path, +) -> None: + module = tmp_path / "multiline_stmt.py" + module.write_text( + "def f() -> int:\n" + " value = (\n" + " 1 +\n" + " 2\n" + " )\n" + " return value\n", + "utf-8", + ) + ast_cache: dict[str, ast.AST | None] = {} + stmt_index_cache: dict[str, explain_mod._StatementIndex | None] = {} + range_cache: dict[tuple[str, int, int], tuple[int, int, int]] = {} + + total, assert_like, consecutive = explain_mod.assert_range_stats( + filepath=str(module), + start_line=2, + end_line=2, + ast_cache=ast_cache, + stmt_index_cache=stmt_index_cache, + range_cache=range_cache, + ) + assert (total, assert_like, consecutive) == (0, 0, 0) diff --git a/tests/test_report_source_kinds.py b/tests/test_report_source_kinds.py new file mode 100644 index 0000000..2f64c8c --- /dev/null +++ b/tests/test_report_source_kinds.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from codeclone.report._source_kinds import ( + SOURCE_KIND_FILTER_VALUES, + normalize_source_kind, + source_kind_label, +) + + +def test_normalize_source_kind_handles_whitespace_and_empty() -> None: + assert normalize_source_kind(" Production ") == "production" + assert normalize_source_kind("\n") == "other" + + +def test_source_kind_label_maps_known_and_unknown_values() -> None: + assert source_kind_label("production") == "Production" + assert source_kind_label("fixtures") == "Fixtures" + assert source_kind_label("experimental_scope") == "Experimental_Scope" + assert source_kind_label(" ") == "Other" + + +def test_source_kind_filter_values_are_stable_and_unique() -> None: + assert SOURCE_KIND_FILTER_VALUES == ( + "production", + "tests", + "fixtures", + "mixed", + ) + assert len(SOURCE_KIND_FILTER_VALUES) == len(set(SOURCE_KIND_FILTER_VALUES)) diff --git a/tests/test_report_suggestions.py b/tests/test_report_suggestions.py new file mode 100644 index 0000000..44063ea --- /dev/null +++ b/tests/test_report_suggestions.py @@ -0,0 +1,368 @@ +from __future__ import annotations + +from codeclone.models import ( + ClassMetrics, + DeadItem, + HealthScore, + ProjectMetrics, +) +from codeclone.report import suggestions as suggestions_mod +from codeclone.report.suggestions import classify_clone_type, generate_suggestions + + +def _project_metrics() -> ProjectMetrics: + return ProjectMetrics( + complexity_avg=3.5, + complexity_max=50, + high_risk_functions=("pkg.mod:critical",), + coupling_avg=4.0, + coupling_max=15, + high_risk_classes=("pkg.mod:Service",), + cohesion_avg=2.0, + cohesion_max=5, + low_cohesion_classes=("pkg.mod:Service",), + dependency_modules=3, + dependency_edges=2, + dependency_edge_list=(), + dependency_cycles=(("pkg.a", "pkg.b"),), + dependency_max_depth=4, + dependency_longest_chains=(("pkg.a", "pkg.b"),), + dead_code=( + DeadItem( + qualname="pkg.mod:unused", + filepath="pkg/mod.py", + start_line=10, + end_line=12, + kind="function", + confidence="high", + ), + DeadItem( + qualname="pkg.mod:maybe", + filepath="pkg/mod.py", + start_line=20, + end_line=22, + kind="function", + confidence="medium", + ), + ), + health=HealthScore(total=70, grade="C", dimensions={"clones": 70}), + ) + + +def test_suggestion_helpers_convert_types() -> None: + assert suggestions_mod._as_int(True) == 1 + assert suggestions_mod._as_int("42") == 42 + assert suggestions_mod._as_int("bad", default=7) == 7 + assert suggestions_mod._as_int(object(), default=9) == 9 + assert suggestions_mod._as_str("value", default="x") == "value" + assert suggestions_mod._as_str(10, default="x") == "x" + + +def test_classify_clone_type_all_modes() -> None: + assert classify_clone_type(items=(), kind="block") == "Type-4" + assert ( + classify_clone_type( + items=( + {"raw_hash": "abc", "fingerprint": "f1"}, + {"raw_hash": "abc", "fingerprint": "f2"}, + ), + kind="function", + ) + == "Type-1" + ) + assert ( + classify_clone_type( + items=( + {"fingerprint": "fp"}, + {"fingerprint": "fp"}, + ), + kind="function", + ) + == "Type-2" + ) + assert ( + classify_clone_type( + items=( + {"fingerprint": "fp1"}, + {"fingerprint": "fp2"}, + ), + kind="function", + ) + == "Type-3" + ) + assert ( + classify_clone_type( + items=( + {"fingerprint": ""}, + {"raw_hash": ""}, + ), + kind="function", + ) + == "Type-4" + ) + + +def test_generate_suggestions_covers_clone_metrics_and_dependency_categories() -> None: + project_metrics = _project_metrics() + units = ( + { + "qualname": "pkg.mod:critical", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 30, + "cyclomatic_complexity": 50, + "nesting_depth": 5, + "risk": "high", + }, + { + "qualname": "pkg.mod:warning", + "filepath": "pkg/mod.py", + "start_line": 35, + "end_line": 60, + "cyclomatic_complexity": 25, + "nesting_depth": 3, + "risk": "medium", + }, + { + "qualname": "pkg.mod:ok", + "filepath": "pkg/mod.py", + "start_line": 70, + "end_line": 75, + "cyclomatic_complexity": 10, + "nesting_depth": 1, + "risk": "low", + }, + ) + class_metrics = ( + ClassMetrics( + qualname="pkg.mod:Service", + filepath="pkg/mod.py", + start_line=1, + end_line=80, + cbo=11, + lcom4=4, + method_count=8, + instance_var_count=5, + risk_coupling="high", + risk_cohesion="high", + ), + ) + func_groups = { + "type1_group": [ + { + "qualname": "pkg.mod:a", + "filepath": "pkg/mod.py", + "start_line": 5, + "end_line": 9, + "raw_hash": "same", + "fingerprint": "f1", + } + for _ in range(4) + ], + "type2_group": [ + { + "qualname": "pkg.mod:b", + "filepath": "pkg/mod.py", + "start_line": 15, + "end_line": 19, + "raw_hash": "", + "fingerprint": "fp-shared", + }, + { + "qualname": "pkg.mod:c", + "filepath": "pkg/mod.py", + "start_line": 25, + "end_line": 29, + "raw_hash": "", + "fingerprint": "fp-shared", + }, + ], + } + block_groups = { + "block-heavy": [ + { + "qualname": "pkg.mod:block", + "filepath": "pkg/mod.py", + "start_line": 100, + "end_line": 110, + } + for _ in range(4) + ] + } + segment_groups = { + "segment-heavy": [ + { + "qualname": "pkg.mod:segment", + "filepath": "pkg/mod.py", + "start_line": 120, + "end_line": 130, + } + for _ in range(4) + ] + } + + suggestions = generate_suggestions( + project_metrics=project_metrics, + units=units, + class_metrics=class_metrics, + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + ) + assert suggestions + categories = {item.category for item in suggestions} + assert categories == { + "clone", + "complexity", + "coupling", + "cohesion", + "dead_code", + "dependency", + } + assert any(item.title.endswith("(Type-1)") for item in suggestions) + assert any(item.title.endswith("(Type-2)") for item in suggestions) + assert any( + item.category == "complexity" + and item.severity == "critical" + and item.title == "Reduce function complexity" + for item in suggestions + ) + assert any( + item.category == "complexity" + and item.severity == "warning" + and item.title == "Reduce function complexity" + for item in suggestions + ) + assert any( + item.category == "clone" + and item.fact_kind == "Function clone group" + and item.fact_summary == "same exact function body" + and item.source_kind == "production" + for item in suggestions + ) + assert all( + not ( + item.category == "dead_code" + and item.location == "pkg/mod.py:20-22" + and item.title == "Remove or explicitly keep unused code" + ) + for item in suggestions + ) + + ordered = list(suggestions) + assert ordered == sorted( + ordered, + key=lambda item: ( + -item.priority, + item.severity, + item.category, + item.source_kind, + item.location_label or item.location, + item.title, + item.subject_key, + ), + ) + + +def test_generate_suggestions_covers_skip_branches_for_optional_rules() -> None: + project_metrics = _project_metrics() + class_metrics = ( + ClassMetrics( + qualname="pkg.mod:OnlyCohesion", + filepath="pkg/mod.py", + start_line=10, + end_line=20, + cbo=5, + lcom4=5, + method_count=3, + instance_var_count=1, + risk_coupling="low", + risk_cohesion="high", + ), + ClassMetrics( + qualname="pkg.mod:NoWarnings", + filepath="pkg/mod.py", + start_line=30, + end_line=40, + cbo=2, + lcom4=1, + method_count=2, + instance_var_count=1, + risk_coupling="low", + risk_cohesion="low", + ), + ) + suggestions = generate_suggestions( + project_metrics=project_metrics, + units=(), + class_metrics=class_metrics, + func_groups={ + "type3": [ + {"fingerprint": "a", "raw_hash": "", "filepath": "pkg/mod.py"}, + {"fingerprint": "b", "raw_hash": "", "filepath": "pkg/mod.py"}, + ] + }, + block_groups={"small": [{"filepath": "pkg/mod.py"}]}, + segment_groups={"small": [{"filepath": "pkg/mod.py"}]}, + ) + assert any(item.category == "cohesion" for item in suggestions) + assert not any(item.title.endswith("(Type-1)") for item in suggestions) + assert not any(item.title.endswith("(Type-2)") for item in suggestions) + + +def test_generate_suggestions_uses_full_spread_for_group_location_label() -> None: + suggestions = generate_suggestions( + project_metrics=_project_metrics(), + units=(), + class_metrics=(), + func_groups={ + "type2": [ + { + "qualname": "pkg.alpha:transform_alpha", + "filepath": "/root/tests/fixtures/alpha.py", + "start_line": 1, + "end_line": 10, + "fingerprint": "fp-shared", + "raw_hash": "", + }, + { + "qualname": "pkg.beta:transform_beta", + "filepath": "/root/tests/fixtures/beta.py", + "start_line": 1, + "end_line": 10, + "fingerprint": "fp-shared", + "raw_hash": "", + }, + { + "qualname": "pkg.gamma:transform_gamma", + "filepath": "/root/tests/fixtures/gamma.py", + "start_line": 1, + "end_line": 10, + "fingerprint": "fp-shared", + "raw_hash": "", + }, + { + "qualname": "pkg.delta:transform_delta", + "filepath": "/root/tests/fixtures/delta.py", + "start_line": 1, + "end_line": 10, + "fingerprint": "fp-shared", + "raw_hash": "", + }, + ] + }, + block_groups={}, + segment_groups={}, + scan_root="/root", + ) + clone_suggestion = next( + suggestion + for suggestion in suggestions + if suggestion.finding_family == "clones" + ) + assert len(clone_suggestion.representative_locations) == 3 + assert clone_suggestion.spread_files == 4 + assert clone_suggestion.spread_functions == 4 + assert ( + clone_suggestion.location_label == "4 occurrences across 4 files / 4 functions" + ) diff --git a/tests/test_scanner_extra.py b/tests/test_scanner_extra.py index 4b95154..c2fa01e 100644 --- a/tests/test_scanner_extra.py +++ b/tests/test_scanner_extra.py @@ -21,6 +21,13 @@ def _symlink_or_skip( pytest.skip("symlink creation is not available in this environment") +def _configure_fake_tempdir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + fake_temp = tmp_path / "fake_tmp" + fake_temp.mkdir() + monkeypatch.setattr(scanner, "_get_tempdir", lambda: fake_temp.resolve()) + return fake_temp + + def test_iter_py_files_in_temp(tmp_path: Path) -> None: src = tmp_path / "a.py" src.write_text("def f():\n return 1\n", "utf-8") @@ -129,6 +136,98 @@ def test_iter_py_files_symlink_loop_does_not_traverse(tmp_path: Path) -> None: assert files.count(str(src)) == 1 +def test_scanner_internal_path_guards_and_symlink_resolve_error( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + root = tmp_path / "root" + root.mkdir() + + inside = root / "inside.py" + inside.write_text("x = 1\n", "utf-8") + assert scanner._is_under_root(inside, root) is True + + excluded = root / "__pycache__" / "skip.py" + excluded.parent.mkdir() + excluded.write_text("x = 1\n", "utf-8") + assert ( + scanner._is_included_python_file( + file_path=excluded, + excludes_set={"__pycache__"}, + rootp=root, + ) + is False + ) + + target = root / "target.py" + target.write_text("x = 1\n", "utf-8") + link = root / "link.py" + _symlink_or_skip(link, target) + + original_resolve = Path.resolve + + def _resolve_with_error(self: Path, *, strict: bool = False) -> Path: + if self == link: + raise OSError("resolve failed") + return original_resolve(self, strict=strict) + + monkeypatch.setattr(Path, "resolve", _resolve_with_error) + assert ( + scanner._is_included_python_file( + file_path=link, + excludes_set=set(), + rootp=root, + ) + is False + ) + + +def test_is_included_python_file_non_py_rejected(tmp_path: Path) -> None: + root = tmp_path / "root" + root.mkdir() + txt = root / "a.txt" + txt.write_text("x", "utf-8") + assert ( + scanner._is_included_python_file( + file_path=txt, + excludes_set=set(), + rootp=root, + ) + is False + ) + + +def test_is_included_python_file_regular_py_accepted(tmp_path: Path) -> None: + root = tmp_path / "root" + root.mkdir() + pyf = root / "a.py" + pyf.write_text("x = 1\n", "utf-8") + assert ( + scanner._is_included_python_file( + file_path=pyf, + excludes_set=set(), + rootp=root, + ) + is True + ) + + +def test_iter_py_files_excluded_root_short_circuit(tmp_path: Path) -> None: + excluded_root = tmp_path / "__pycache__" + excluded_root.mkdir() + (excluded_root / "a.py").write_text("x = 1\n", "utf-8") + assert list(iter_py_files(str(excluded_root))) == [] + + +def test_iter_py_files_excluded_parent_dir_does_not_short_circuit( + tmp_path: Path, +) -> None: + root = tmp_path / "build" / "project" + root.mkdir(parents=True) + src = root / "a.py" + src.write_text("x = 1\n", "utf-8") + assert list(iter_py_files(str(root))) == [str(src)] + + def test_sensitive_prefix_blocked( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: @@ -160,9 +259,7 @@ def test_sensitive_root_blocked( def test_sensitive_directory_blocked_via_dotdot( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: - fake_temp = tmp_path / "fake_tmp" - fake_temp.mkdir() - monkeypatch.setattr(scanner, "_get_tempdir", lambda: fake_temp.resolve()) + _configure_fake_tempdir(tmp_path, monkeypatch) base = tmp_path / "base" sensitive_root = tmp_path / "sensitive" @@ -184,9 +281,7 @@ def test_sensitive_directory_blocked_via_dotdot( def test_symlink_to_sensitive_directory_skipped( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: - fake_temp = tmp_path / "fake_tmp" - fake_temp.mkdir() - monkeypatch.setattr(scanner, "_get_tempdir", lambda: fake_temp.resolve()) + _configure_fake_tempdir(tmp_path, monkeypatch) root = tmp_path / "root" sensitive_root = tmp_path / "sensitive_link_target" diff --git a/tests/test_structural_findings.py b/tests/test_structural_findings.py new file mode 100644 index 0000000..6f3752e --- /dev/null +++ b/tests/test_structural_findings.py @@ -0,0 +1,824 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +"""Unit tests for codeclone.structural_findings (Phase 1: duplicated_branches).""" + +from __future__ import annotations + +import ast +import sys + +import pytest + +import codeclone.structural_findings as sf +from codeclone.models import StructuralFindingGroup, StructuralFindingOccurrence +from codeclone.structural_findings import ( + build_clone_cohort_structural_findings, + scan_function_structure, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _parse_fn(source: str) -> ast.FunctionDef | ast.AsyncFunctionDef: + """Parse a source snippet and return the first function definition.""" + tree = ast.parse(source) + for node in ast.walk(tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + return node + raise ValueError("No function found in source") + + +def _findings(source: str, qualname: str = "mod:fn") -> list[StructuralFindingGroup]: + fn = _parse_fn(source) + return list( + scan_function_structure( + fn, + "mod.py", + qualname, + collect_findings=True, + ).structural_findings + ) + + +# --------------------------------------------------------------------------- +# Core detection +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "source", + [ + """ +def fn(x): + if x == 1: + y = 1 + return y + elif x == 2: + y = 2 + return y +""", + pytest.param( + """ +def fn(x): + match x: + case 1: + y = x + return y + case 2: + y = x + return y +""", + marks=pytest.mark.skipif( + sys.version_info < (3, 10), reason="match/case requires Python 3.10+" + ), + id="match_case", + ), + ], + ids=["if_elif_chain", "match_case_chain"], +) +def test_detects_identical_branch_families(source: str) -> None: + groups = _findings(source) + assert len(groups) == 1 + assert groups[0].finding_kind == "duplicated_branches" + assert len(groups[0].items) == 2 + + +def test_no_finding_single_arm() -> None: + source = """ +def fn(x): + if x == 1: + return 1 +""" + groups = _findings(source) + assert groups == [] + + +def test_no_finding_pass_only_branch() -> None: + source = """ +def fn(x): + if x == 1: + pass + elif x == 2: + pass +""" + groups = _findings(source) + assert groups == [] + + +def test_no_finding_empty_body() -> None: + source = """ +def fn(): + pass +""" + groups = _findings(source) + assert groups == [] + + +def test_single_statement_return_branches_are_filtered() -> None: + source = """ +def fn(x): + if x == 1: + return 1 + elif x == 2: + return 2 +""" + groups = _findings(source) + assert groups == [] + + +def test_single_statement_call_branch_is_filtered_as_trivial() -> None: + source = """ +def fn(x): + if x == 1: + warn("a") + elif x == 2: + warn("b") +""" + groups = _findings(source) + assert groups == [] + + +def test_single_statement_try_branch_still_counts_as_meaningful() -> None: + source = """ +def fn(x): + if x == 1: + try: + warn("a") + except RuntimeError: + recover("a") + elif x == 2: + try: + warn("b") + except RuntimeError: + recover("b") +""" + groups = _findings(source) + assert len(groups) == 1 + assert len(groups[0].items) == 2 + + +def test_multi_statement_guard_exit_branch_still_counts_as_meaningful() -> None: + source = """ +def fn(x): + if x == 1: + note("a") + return None + elif x == 2: + note("b") + return None +""" + groups = _findings(source) + assert len(groups) == 1 + assert len(groups[0].items) == 2 + + +def test_homogeneous_trivial_multi_statement_branch_is_filtered() -> None: + source = """ +def fn(x): + if x == 1: + left = 1 + right = 2 + elif x == 2: + left = 3 + right = 4 +""" + groups = _findings(source) + assert groups == [] + + +def test_single_statement_raise_else_branch_is_filtered() -> None: + source = """ +def fn(x): + if x > 0: + raise ValueError("a") + else: + raise ValueError("b") +""" + groups = _findings(source) + assert groups == [] + + +def test_different_signatures_no_group() -> None: + """Different branch shapes should NOT form a group.""" + source = """ +def fn(x): + if x == 1: + return x + elif x == 2: + raise ValueError("nope") +""" + groups = _findings(source) + assert groups == [] + + +# --------------------------------------------------------------------------- +# Signature components +# --------------------------------------------------------------------------- + + +def test_terminal_return_none() -> None: + source = """ +def fn(x): + if x == 1: + y = 1 + return + elif x == 2: + y = 2 + return +""" + groups = _findings(source) + assert len(groups) == 1 + assert groups[0].signature["terminal"] == "return_none" + + +def test_terminal_return_const() -> None: + source = """ +def fn(x): + if x == 1: + y = x + return 42 + elif x == 2: + y = x + 1 + return 99 +""" + groups = _findings(source) + assert len(groups) == 1 + assert groups[0].signature["terminal"] == "return_const" + + +def test_terminal_return_name() -> None: + source = """ +def fn(x, y): + if x: + z = y + return y + elif not x: + z = y + return y +""" + groups = _findings(source) + assert len(groups) == 1 + assert groups[0].signature["terminal"] == "return_name" + + +def test_terminal_return_expr() -> None: + source = """ +def fn(x): + if x == 1: + y = x + return x + 1 + elif x == 2: + y = x + return x - 1 +""" + groups = _findings(source) + assert len(groups) == 1 + assert groups[0].signature["terminal"] == "return_expr" + + +def test_nested_if_flag() -> None: + source = """ +def fn(x): + if x == 1: + if x > 0: + pass + return x + elif x == 2: + if x > 0: + pass + return x +""" + groups = _findings(source) + assert len(groups) == 1 + assert groups[0].signature["nested_if"] == "1" + + +def test_has_loop_flag() -> None: + source = """ +def fn(x): + if x == 1: + for i in range(x): + pass + return x + elif x == 2: + for i in range(x): + pass + return x +""" + groups = _findings(source) + assert len(groups) == 1 + assert groups[0].signature["has_loop"] == "1" + + +def test_has_try_flag() -> None: + source = """ +def fn(x): + if x == 1: + try: + pass + except Exception: + pass + return x + elif x == 2: + try: + pass + except Exception: + pass + return x +""" + groups = _findings(source) + assert len(groups) == 1 + assert groups[0].signature["has_try"] == "1" + + +def test_calls_bucketed_zero() -> None: + source = """ +def fn(x): + if x == 1: + y = x + 1 + return y + elif x == 2: + y = x - 1 + return y +""" + groups = _findings(source) + assert len(groups) == 1 + assert groups[0].signature["calls"] == "0" + + +def test_calls_bucketed_one() -> None: + source = """ +def fn(x): + if x == 1: + foo() + return x + elif x == 2: + bar() + return x +""" + groups = _findings(source) + assert len(groups) == 1 + assert groups[0].signature["calls"] == "1" + + +def test_calls_bucketed_two_plus() -> None: + source = """ +def fn(x): + if x == 1: + foo() + bar() + return x + elif x == 2: + baz() + qux() + return x +""" + groups = _findings(source) + assert len(groups) == 1 + assert groups[0].signature["calls"] == "2+" + + +# --------------------------------------------------------------------------- +# Determinism +# --------------------------------------------------------------------------- + + +def test_finding_key_stable() -> None: + source = """ +def fn(x): + if x == 1: + y = 1 + return y + elif x == 2: + y = 2 + return y +""" + groups_a = _findings(source) + groups_b = _findings(source) + assert groups_a[0].finding_key == groups_b[0].finding_key + + +def test_ordering_stable() -> None: + """Groups sorted by (-count, finding_key) — consistent across calls.""" + source = """ +def fn(x): + if x == 1: + y = 1 + return y + elif x == 2: + y = 2 + return y + elif x == 3: + y = 3 + return y +""" + groups_a = _findings(source) + groups_b = _findings(source) + assert [g.finding_key for g in groups_a] == [g.finding_key for g in groups_b] + + +def test_item_line_ranges_correct() -> None: + source = """ +def fn(x): + if x == 1: + y = 1 + return y + elif x == 2: + y = 2 + return y +""" + groups = _findings(source) + assert len(groups) == 1 + items = sorted(groups[0].items, key=lambda o: o.start) + assert items[0].start > 0 + assert items[1].start > items[0].start + + +def test_qualname_and_filepath_set() -> None: + source = """ +def fn(x): + if x == 1: + y = 1 + return y + elif x == 2: + y = 2 + return y +""" + groups = _findings(source, qualname="mymod:fn") + assert groups[0].items[0].qualname == "mymod:fn" + assert groups[0].items[0].file_path == "mod.py" + + +# --------------------------------------------------------------------------- +# match/case (Python 3.10+) +# --------------------------------------------------------------------------- + + +@pytest.mark.skipif( + sys.version_info < (3, 10), reason="match/case requires Python 3.10+" +) +def test_match_case_no_finding_different_body() -> None: + source = """ +def fn(x): + match x: + case 1: + return 1 + case 2: + raise ValueError("x") +""" + groups = _findings(source) + assert groups == [] + + +def test_scan_function_structure_collects_stable_guard_facts() -> None: + source = """ +def fn(x): + if not x: + return 0 + if x < 0: + raise ValueError("x") + try: + y = x + 1 + finally: + y = x + return y +""" + facts = scan_function_structure( + _parse_fn(source), + "mod.py", + "pkg.mod:fn", + collect_findings=True, + ) + assert facts.entry_guard_count == 2 + assert facts.entry_guard_terminal_profile == "return_const,raise" + assert facts.entry_guard_has_side_effect_before is False + assert facts.terminal_kind == "return_name" + assert facts.try_finally_profile == "try_finally" + assert facts.side_effect_order_profile == "guard_then_effect" + + +def test_build_clone_cohort_structural_findings_emits_new_families() -> None: + func_groups = { + "fp-a|20-49": [ + { + "filepath": "pkg/a.py", + "qualname": "pkg.a:f1", + "start_line": 10, + "end_line": 40, + "entry_guard_count": 2, + "entry_guard_terminal_profile": "return_const,raise", + "entry_guard_has_side_effect_before": False, + "terminal_kind": "return_const", + "try_finally_profile": "none", + "side_effect_order_profile": "guard_then_effect", + }, + { + "filepath": "pkg/b.py", + "qualname": "pkg.b:f1", + "start_line": 11, + "end_line": 41, + "entry_guard_count": 2, + "entry_guard_terminal_profile": "return_const,raise", + "entry_guard_has_side_effect_before": False, + "terminal_kind": "return_const", + "try_finally_profile": "none", + "side_effect_order_profile": "guard_then_effect", + }, + { + "filepath": "pkg/c.py", + "qualname": "pkg.c:f1", + "start_line": 12, + "end_line": 42, + "entry_guard_count": 2, + "entry_guard_terminal_profile": "return_const,raise", + "entry_guard_has_side_effect_before": False, + "terminal_kind": "return_const", + "try_finally_profile": "none", + "side_effect_order_profile": "guard_then_effect", + }, + { + "filepath": "pkg/d.py", + "qualname": "pkg.d:f1", + "start_line": 13, + "end_line": 43, + "entry_guard_count": 1, + "entry_guard_terminal_profile": "raise", + "entry_guard_has_side_effect_before": True, + "terminal_kind": "raise", + "try_finally_profile": "try_no_finally", + "side_effect_order_profile": "effect_before_guard", + }, + ] + } + groups = build_clone_cohort_structural_findings(func_groups=func_groups) + kinds = {group.finding_kind for group in groups} + assert "clone_guard_exit_divergence" in kinds + assert "clone_cohort_drift" in kinds + + +def test_build_clone_cohort_structural_findings_skips_uniform_groups() -> None: + func_groups = { + "fp-a|20-49": [ + { + "filepath": "pkg/a.py", + "qualname": "pkg.a:f1", + "start_line": 10, + "end_line": 40, + "entry_guard_count": 2, + "entry_guard_terminal_profile": "return_const,raise", + "entry_guard_has_side_effect_before": False, + "terminal_kind": "return_const", + "try_finally_profile": "none", + "side_effect_order_profile": "guard_then_effect", + }, + { + "filepath": "pkg/b.py", + "qualname": "pkg.b:f1", + "start_line": 11, + "end_line": 41, + "entry_guard_count": 2, + "entry_guard_terminal_profile": "return_const,raise", + "entry_guard_has_side_effect_before": False, + "terminal_kind": "return_const", + "try_finally_profile": "none", + "side_effect_order_profile": "guard_then_effect", + }, + { + "filepath": "pkg/c.py", + "qualname": "pkg.c:f1", + "start_line": 12, + "end_line": 42, + "entry_guard_count": 2, + "entry_guard_terminal_profile": "return_const,raise", + "entry_guard_has_side_effect_before": False, + "terminal_kind": "return_const", + "try_finally_profile": "none", + "side_effect_order_profile": "guard_then_effect", + }, + ] + } + groups = build_clone_cohort_structural_findings(func_groups=func_groups) + assert groups == () + + +def test_private_helper_fallbacks_and_defaults_are_deterministic() -> None: + assert sf._terminal_kind([]) == "fallthrough" + assert sf._stmt_names_from_signature({"stmt_seq": ""}) == () + assert sf.is_reportable_structural_signature({}) is False + assert ( + sf.is_reportable_structural_signature( + {"stmt_seq": "Lambda,Assign", "terminal": "assign"}, + ) + is True + ) + assert sf._kind_min_occurrence_count("unknown_kind") == 2 + assert sf._summarize_branch([]) is None + assert sf._guard_profile_text(count=0, terminal_profile="raise") == "none" + + if_node = ast.parse("if x:\n value = 1\n").body[0] + is_guard, terminal = sf._is_guard_exit_if(if_node) + assert is_guard is False + assert terminal == "none" + + signature = { + "stmt_seq": "Expr", + "terminal": "expr", + "calls": "0", + "raises": "0", + "nested_if": "0", + "has_loop": "0", + "has_try": "0", + } + occurrence = StructuralFindingOccurrence( + finding_kind="unknown_kind", + finding_key="unknown-key", + file_path="a.py", + qualname="mod:fn", + start=1, + end=2, + signature=signature, + ) + group = StructuralFindingGroup( + finding_kind="unknown_kind", + finding_key="unknown-key", + signature=signature, + items=(occurrence,), + ) + assert sf.normalize_structural_finding_group(group) is None + + +def test_private_member_decoding_and_majority_defaults() -> None: + assert sf._as_item_int(True) == 1 + assert sf._as_item_int("bad-int") == 0 + assert sf._as_item_bool(1) is True + assert sf._as_item_bool("yes") is True + assert sf._as_item_bool("no") is False + assert sf._clone_member_from_item({}) is None + assert sf._majority_str([], default="fallback") == "fallback" + assert sf._majority_int([], default=7) == 7 + assert sf._majority_bool([], default=True) is True + + member = sf._CloneCohortMember( + file_path="pkg/a.py", + qualname="pkg.a:f", + start=1, + end=2, + entry_guard_count=0, + entry_guard_terminal_profile="none", + entry_guard_has_side_effect_before=False, + terminal_kind="return_const", + try_finally_profile="none", + side_effect_order_profile="none", + ) + assert sf._member_profile_value(member, "unknown-field") == "" + + +def test_clone_cohort_builders_cover_early_exit_paths() -> None: + base_member = sf._CloneCohortMember( + file_path="pkg/a.py", + qualname="pkg.a:f", + start=1, + end=2, + entry_guard_count=1, + entry_guard_terminal_profile="return_const", + entry_guard_has_side_effect_before=False, + terminal_kind="return_const", + try_finally_profile="none", + side_effect_order_profile="guard_then_effect", + ) + no_guard_member = sf._CloneCohortMember( + file_path="pkg/b.py", + qualname="pkg.b:f", + start=2, + end=3, + entry_guard_count=0, + entry_guard_terminal_profile="none", + entry_guard_has_side_effect_before=False, + terminal_kind="return_const", + try_finally_profile="none", + side_effect_order_profile="effect_only", + ) + + assert sf._clone_guard_exit_divergence("c1", (base_member, base_member)) is None + assert ( + sf._clone_guard_exit_divergence( + "c2", + (no_guard_member, no_guard_member, no_guard_member), + ) + is None + ) + assert ( + sf._clone_guard_exit_divergence( + "c3", + (base_member, base_member, base_member), + ) + is None + ) + + assert sf._clone_cohort_drift("c4", (base_member, base_member)) is None + assert sf._clone_cohort_drift("c5", (base_member, base_member, base_member)) is None + + +def test_scanner_private_paths_cover_collection_and_normalization_branches() -> None: + scanner = sf._FunctionStructureScanner( + filepath="pkg/mod.py", + qualname="pkg.mod:f", + collect_findings=True, + ) + reportable_signature = { + "stmt_seq": "Assign,Return", + "terminal": "return_name", + "calls": "0", + "raises": "0", + "nested_if": "0", + "has_loop": "0", + "has_try": "0", + } + trivial_signature = { + "stmt_seq": "Expr", + "terminal": "expr", + "calls": "0", + "raises": "0", + "nested_if": "0", + "has_loop": "0", + "has_try": "0", + } + scanner._sig_to_branches["single"] = [(reportable_signature, 10, 11)] + scanner._sig_to_branches["trivial"] = [ + (trivial_signature, 12, 12), + (trivial_signature, 13, 13), + ] + assert scanner._build_groups() == [] + + if_chain = ast.parse( + "if x:\n a = 1\nelif y:\n b = 2\nelse:\n pass\n", + ).body[0] + assert isinstance(if_chain, ast.If) + bodies = sf._collect_if_branch_bodies(if_chain) + assert len(bodies) == 2 + + match_stmt = ast.parse( + "match x:\n case 1:\n pass\n case 2:\n value = 2\n", + ).body[0] + match_bodies = sf._collect_match_branch_bodies(match_stmt) + assert len(match_bodies) == 1 + + iter_scanner = sf._FunctionStructureScanner( + filepath="pkg/mod.py", + qualname="pkg.mod:f", + collect_findings=False, + ) + for_stmt = ast.parse("for i in xs:\n pass\nelse:\n pass\n").body[0] + with_stmt = ast.parse("with cm:\n pass\n").body[0] + try_stmt = ast.parse( + "try:\n" + " pass\n" + "except Exception:\n" + " pass\n" + "else:\n" + " pass\n" + "finally:\n" + " pass\n", + ).body[0] + assign_stmt = ast.parse("value = 1\n").body[0] + assert len(iter_scanner._iter_nested_statement_lists(for_stmt)) == 2 + assert len(iter_scanner._iter_nested_statement_lists(with_stmt)) == 1 + assert len(iter_scanner._iter_nested_statement_lists(try_stmt)) == 4 + assert iter_scanner._iter_nested_statement_lists(assign_stmt) == () + + +def test_scan_function_structure_visits_nested_bodies_and_match_without_findings() -> ( + None +): + class_body_source = """ +def fn(): + class Inner: + value = 1 + return 1 +""" + class_facts = scan_function_structure( + _parse_fn(class_body_source), + "mod.py", + "pkg.mod:fn", + collect_findings=False, + ) + assert class_facts.terminal_kind == "return_const" + + match_source = """ +def fn(x): + match x: + case 1: + return 1 + case _: + return 2 +""" + match_facts = scan_function_structure( + _parse_fn(match_source), + "mod.py", + "pkg.mod:fn", + collect_findings=False, + ) + assert match_facts.structural_findings == () diff --git a/tests/test_suppressions.py b/tests/test_suppressions.py new file mode 100644 index 0000000..2848704 --- /dev/null +++ b/tests/test_suppressions.py @@ -0,0 +1,430 @@ +from __future__ import annotations + +import pytest + +from codeclone.suppressions import ( + DeclarationTarget, + SuppressionBinding, + SuppressionDirective, + bind_suppressions_to_declarations, + build_suppression_index, + extract_suppression_directives, + suppression_target_key, +) + + +def test_extract_suppression_directives_supports_inline_and_leading_forms() -> None: + source = """ +# codeclone: ignore[dead-code] +def a() -> int: + return 1 + +def b() -> int: # codeclone: ignore[dead-code] + return 2 + +class C: # codeclone: ignore[dead-code] + pass + +# codeclone: ignore [ dead-code , clone-cohort-drift ] +async def d() -> int: + return 3 +""".strip() + directives = extract_suppression_directives(source) + assert directives == ( + SuppressionDirective(line=1, binding="leading", rules=("dead-code",)), + SuppressionDirective(line=5, binding="inline", rules=("dead-code",)), + SuppressionDirective(line=8, binding="inline", rules=("dead-code",)), + SuppressionDirective( + line=11, + binding="leading", + rules=("dead-code", "clone-cohort-drift"), + ), + ) + + +def test_extract_suppression_directives_ignores_unknown_and_malformed_safely() -> None: + source = """ +def a() -> int: # codeclone: ignore[dead-code, dead-code, unknown-rule] + return 1 + +def b() -> int: # codeclone: ignore + return 2 + +def c() -> int: # codeclone: IGNORE[dead-code] + return 3 + +def d() -> int: # codeclone ignore[dead-code] + return 4 +""".strip() + directives = extract_suppression_directives(source) + assert directives == ( + SuppressionDirective(line=1, binding="inline", rules=("dead-code",)), + ) + + +def test_extract_suppression_directives_ignores_invalid_rule_tokens() -> None: + source = """ +def a() -> int: # codeclone: ignore[dead-code, , invalid!, unknown-rule] + return 1 + +def b() -> int: # codeclone: ignore[unknown-rule] + return 2 +""".strip() + directives = extract_suppression_directives(source) + assert directives == ( + SuppressionDirective(line=1, binding="inline", rules=("dead-code",)), + ) + + +def test_extract_suppression_directives_returns_empty_on_tokenize_error() -> None: + # Unclosed triple quote triggers tokenize.TokenError and must be ignored safely. + source = '"""\n# codeclone: ignore[dead-code]\n' + assert extract_suppression_directives(source) == () + + +def test_bind_suppressions_applies_only_to_adjacent_declaration_line() -> None: + source = """ +# codeclone: ignore[dead-code] +def kept() -> int: + return 1 + +# codeclone: ignore[dead-code] + +def not_bound() -> int: + return 2 +""".strip() + directives = extract_suppression_directives(source) + declarations = ( + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:kept", + start_line=2, + end_line=3, + kind="function", + ), + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:not_bound", + start_line=7, + end_line=8, + kind="function", + ), + ) + bindings = bind_suppressions_to_declarations( + directives=directives, + declarations=declarations, + ) + assert bindings == ( + SuppressionBinding( + filepath="pkg/mod.py", + qualname="pkg.mod:kept", + start_line=2, + end_line=3, + kind="function", + rules=("dead-code",), + ), + ) + + +def test_bind_suppressions_does_not_propagate_class_inline_to_method() -> None: + source = """ +class Demo: # codeclone: ignore[dead-code] + def method(self) -> int: + return 1 +""".strip() + directives = extract_suppression_directives(source) + declarations = ( + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:Demo", + start_line=1, + end_line=3, + kind="class", + ), + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:Demo.method", + start_line=2, + end_line=3, + kind="method", + ), + ) + bindings = bind_suppressions_to_declarations( + directives=directives, + declarations=declarations, + ) + assert bindings == ( + SuppressionBinding( + filepath="pkg/mod.py", + qualname="pkg.mod:Demo", + start_line=1, + end_line=3, + kind="class", + rules=("dead-code",), + ), + ) + + +def test_bind_suppressions_applies_to_method_target() -> None: + source = """ +class Demo: + # codeclone: ignore[dead-code] + def method(self) -> int: + return 1 +""".strip() + directives = extract_suppression_directives(source) + declarations = ( + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:Demo", + start_line=1, + end_line=4, + kind="class", + ), + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:Demo.method", + start_line=3, + end_line=4, + kind="method", + ), + ) + bindings = bind_suppressions_to_declarations( + directives=directives, + declarations=declarations, + ) + assert bindings == ( + SuppressionBinding( + filepath="pkg/mod.py", + qualname="pkg.mod:Demo.method", + start_line=3, + end_line=4, + kind="method", + rules=("dead-code",), + ), + ) + + +def test_bind_suppressions_supports_inline_on_multiline_declaration_end_line() -> None: + source = """ +@decorator +def keep( + arg: int, +) -> int: # codeclone: ignore[dead-code] + return arg +""".strip() + directives = extract_suppression_directives(source) + declarations = ( + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:keep", + start_line=2, + end_line=5, + kind="function", + declaration_end_line=4, + ), + ) + bindings = bind_suppressions_to_declarations( + directives=directives, + declarations=declarations, + ) + assert bindings == ( + SuppressionBinding( + filepath="pkg/mod.py", + qualname="pkg.mod:keep", + start_line=2, + end_line=5, + kind="function", + rules=("dead-code",), + ), + ) + + +@pytest.mark.parametrize( + ("source", "declaration"), + [ + ( + """ +@decorator +def keep( # codeclone: ignore[dead-code] + arg: int, +) -> int: + return arg +""".strip(), + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:keep", + start_line=2, + end_line=5, + kind="function", + declaration_end_line=4, + ), + ), + ( + """ +async def keep_async( # codeclone: ignore[dead-code] + arg: int, +) -> int: + return arg +""".strip(), + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:keep_async", + start_line=1, + end_line=4, + kind="function", + declaration_end_line=3, + ), + ), + ( + """ +class Demo( # codeclone: ignore[dead-code] + Base, +): + pass +""".strip(), + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:Demo", + start_line=1, + end_line=4, + kind="class", + declaration_end_line=3, + ), + ), + ], +) +def test_bind_suppressions_supports_inline_on_multiline_declaration_start_line( + source: str, + declaration: DeclarationTarget, +) -> None: + directives = extract_suppression_directives(source) + bindings = bind_suppressions_to_declarations( + directives=directives, + declarations=(declaration,), + ) + assert bindings == ( + SuppressionBinding( + filepath=declaration.filepath, + qualname=declaration.qualname, + start_line=declaration.start_line, + end_line=declaration.end_line, + kind=declaration.kind, + rules=("dead-code",), + ), + ) + + +def test_bind_suppressions_ignores_inline_comment_on_middle_signature_line() -> None: + source = """ +def keep( + arg: int, # codeclone: ignore[dead-code] +) -> int: + return arg +""".strip() + directives = extract_suppression_directives(source) + declarations = ( + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:keep", + start_line=1, + end_line=4, + kind="function", + declaration_end_line=3, + ), + ) + assert ( + bind_suppressions_to_declarations( + directives=directives, + declarations=declarations, + ) + == () + ) + + +@pytest.mark.parametrize( + ("source", "declaration"), + [ + ( + """ +async def keep_async( + arg: int, +) -> int: # codeclone: ignore[dead-code] + return arg +""".strip(), + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:keep_async", + start_line=1, + end_line=4, + kind="function", + declaration_end_line=3, + ), + ), + ( + """ +class Demo( + Base, +): # codeclone: ignore[dead-code] + pass +""".strip(), + DeclarationTarget( + filepath="pkg/mod.py", + qualname="pkg.mod:Demo", + start_line=1, + end_line=4, + kind="class", + declaration_end_line=3, + ), + ), + ], +) +def test_bind_suppressions_supports_multiline_inline_for_supported_targets( + source: str, + declaration: DeclarationTarget, +) -> None: + directives = extract_suppression_directives(source) + bindings = bind_suppressions_to_declarations( + directives=directives, + declarations=(declaration,), + ) + assert bindings == ( + SuppressionBinding( + filepath=declaration.filepath, + qualname=declaration.qualname, + start_line=declaration.start_line, + end_line=declaration.end_line, + kind=declaration.kind, + rules=("dead-code",), + ), + ) + + +def test_build_suppression_index_deduplicates_rules_stably() -> None: + bindings = ( + SuppressionBinding( + filepath="pkg/mod.py", + qualname="pkg.mod:Demo.method", + start_line=3, + end_line=4, + kind="method", + rules=("dead-code",), + ), + SuppressionBinding( + filepath="pkg/mod.py", + qualname="pkg.mod:Demo.method", + start_line=3, + end_line=4, + kind="method", + rules=("dead-code", "clone-cohort-drift"), + ), + ) + index = build_suppression_index(bindings) + key = suppression_target_key( + filepath="pkg/mod.py", + qualname="pkg.mod:Demo.method", + start_line=3, + end_line=4, + kind="method", + ) + assert index[key] == ("dead-code", "clone-cohort-drift") diff --git a/uv.lock b/uv.lock index 4c49b89..6c42f6e 100644 --- a/uv.lock +++ b/uv.lock @@ -13,7 +13,7 @@ wheels = [ [[package]] name = "build" -version = "1.4.0" +version = "1.4.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "os_name == 'nt'" }, @@ -22,9 +22,9 @@ dependencies = [ { name = "pyproject-hooks" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/42/18/94eaffda7b329535d91f00fe605ab1f1e5cd68b2074d03f255c7d250687d/build-1.4.0.tar.gz", hash = "sha256:f1b91b925aa322be454f8330c6fb48b465da993d1e7e7e6fa35027ec49f3c936", size = 50054, upload-time = "2026-01-08T16:41:47.696Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/12/fa7bd9f677a2dcc58a395217c221e2a5e5cebd59ddc9756bc4f5fede8719/build-1.4.1.tar.gz", hash = "sha256:30adeb28821e573a49b556030d8c84186d112f6a38b12fa5476092c4544ae55a", size = 83276, upload-time = "2026-03-24T23:09:00.209Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/0d/84a4380f930db0010168e0aa7b7a8fed9ba1835a8fbb1472bc6d0201d529/build-1.4.0-py3-none-any.whl", hash = "sha256:6a07c1b8eb6f2b311b96fcbdbce5dab5fe637ffda0fd83c9cac622e927501596", size = 24141, upload-time = "2026-01-08T16:41:46.453Z" }, + { url = "https://files.pythonhosted.org/packages/f9/54/8d858f562f598897a7e5e89a8da4f54de06bcd85a98add1275c84efc9ce4/build-1.4.1-py3-none-any.whl", hash = "sha256:21c81f7a0fa423f0da229335c5c2a605967fbfc9af3c4b6ecd368265ed59c6bc", size = 24633, upload-time = "2026-03-24T23:08:58.677Z" }, ] [[package]] @@ -88,100 +88,117 @@ wheels = [ [[package]] name = "charset-normalizer" -version = "3.4.5" +version = "3.4.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1d/35/02daf95b9cd686320bb622eb148792655c9412dbb9b67abb5694e5910a24/charset_normalizer-3.4.5.tar.gz", hash = "sha256:95adae7b6c42a6c5b5b559b1a99149f090a57128155daeea91732c8d970d8644", size = 134804, upload-time = "2026-03-06T06:03:19.46Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/60/e3bec1881450851b087e301bedc3daa9377a4d45f1c26aa90b0b235e38aa/charset_normalizer-3.4.6.tar.gz", hash = "sha256:1ae6b62897110aa7c79ea2f5dd38d1abca6db663687c0b1ad9aed6f6bae3d9d6", size = 143363, upload-time = "2026-03-15T18:53:25.478Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a7/21/a2b1505639008ba2e6ef03733a81fc6cfd6a07ea6139a2b76421230b8dad/charset_normalizer-3.4.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4167a621a9a1a986c73777dbc15d4b5eac8ac5c10393374109a343d4013ec765", size = 283319, upload-time = "2026-03-06T06:00:26.433Z" }, - { url = "https://files.pythonhosted.org/packages/70/67/df234c29b68f4e1e095885c9db1cb4b69b8aba49cf94fac041db4aaf1267/charset_normalizer-3.4.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f64c6bf8f32f9133b668c7f7a7cbdbc453412bc95ecdbd157f3b1e377a92990", size = 189974, upload-time = "2026-03-06T06:00:28.222Z" }, - { url = "https://files.pythonhosted.org/packages/df/7f/fc66af802961c6be42e2c7b69c58f95cbd1f39b0e81b3365d8efe2a02a04/charset_normalizer-3.4.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:568e3c34b58422075a1b49575a6abc616d9751b4d61b23f712e12ebb78fe47b2", size = 207866, upload-time = "2026-03-06T06:00:29.769Z" }, - { url = "https://files.pythonhosted.org/packages/c9/23/404eb36fac4e95b833c50e305bba9a241086d427bb2167a42eac7c4f7da4/charset_normalizer-3.4.5-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:036c079aa08a6a592b82487f97c60b439428320ed1b2ea0b3912e99d30c77765", size = 203239, upload-time = "2026-03-06T06:00:31.086Z" }, - { url = "https://files.pythonhosted.org/packages/4b/2f/8a1d989bfadd120c90114ab33e0d2a0cbde05278c1fc15e83e62d570f50a/charset_normalizer-3.4.5-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:340810d34ef83af92148e96e3e44cb2d3f910d2bf95e5618a5c467d9f102231d", size = 196529, upload-time = "2026-03-06T06:00:32.608Z" }, - { url = "https://files.pythonhosted.org/packages/a5/0c/c75f85ff7ca1f051958bb518cd43922d86f576c03947a050fbedfdfb4f15/charset_normalizer-3.4.5-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:cd2d0f0ec9aa977a27731a3209ebbcacebebaf41f902bd453a928bfd281cf7f8", size = 184152, upload-time = "2026-03-06T06:00:33.93Z" }, - { url = "https://files.pythonhosted.org/packages/f9/20/4ed37f6199af5dde94d4aeaf577f3813a5ec6635834cda1d957013a09c76/charset_normalizer-3.4.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0b362bcd27819f9c07cbf23db4e0e8cd4b44c5ecd900c2ff907b2b92274a7412", size = 195226, upload-time = "2026-03-06T06:00:35.469Z" }, - { url = "https://files.pythonhosted.org/packages/28/31/7ba1102178cba7c34dcc050f43d427172f389729e356038f0726253dd914/charset_normalizer-3.4.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:77be992288f720306ab4108fe5c74797de327f3248368dfc7e1a916d6ed9e5a2", size = 192933, upload-time = "2026-03-06T06:00:36.83Z" }, - { url = "https://files.pythonhosted.org/packages/4b/23/f86443ab3921e6a60b33b93f4a1161222231f6c69bc24fb18f3bee7b8518/charset_normalizer-3.4.5-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:8b78d8a609a4b82c273257ee9d631ded7fac0d875bdcdccc109f3ee8328cfcb1", size = 185647, upload-time = "2026-03-06T06:00:38.367Z" }, - { url = "https://files.pythonhosted.org/packages/82/44/08b8be891760f1f5a6d23ce11d6d50c92981603e6eb740b4f72eea9424e2/charset_normalizer-3.4.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ba20bdf69bd127f66d0174d6f2a93e69045e0b4036dc1ca78e091bcc765830c4", size = 209533, upload-time = "2026-03-06T06:00:41.931Z" }, - { url = "https://files.pythonhosted.org/packages/3b/5f/df114f23406199f8af711ddccfbf409ffbc5b7cdc18fa19644997ff0c9bb/charset_normalizer-3.4.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:76a9d0de4d0eab387822e7b35d8f89367dd237c72e82ab42b9f7bf5e15ada00f", size = 195901, upload-time = "2026-03-06T06:00:43.978Z" }, - { url = "https://files.pythonhosted.org/packages/07/83/71ef34a76fe8aa05ff8f840244bda2d61e043c2ef6f30d200450b9f6a1be/charset_normalizer-3.4.5-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8fff79bf5978c693c9b1a4d71e4a94fddfb5fe744eb062a318e15f4a2f63a550", size = 204950, upload-time = "2026-03-06T06:00:45.202Z" }, - { url = "https://files.pythonhosted.org/packages/58/40/0253be623995365137d7dc68e45245036207ab2227251e69a3d93ce43183/charset_normalizer-3.4.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c7e84e0c0005e3bdc1a9211cd4e62c78ba80bc37b2365ef4410cd2007a9047f2", size = 198546, upload-time = "2026-03-06T06:00:46.481Z" }, - { url = "https://files.pythonhosted.org/packages/ed/5c/5f3cb5b259a130895ef5ae16b38eaf141430fa3f7af50cd06c5d67e4f7b2/charset_normalizer-3.4.5-cp310-cp310-win32.whl", hash = "sha256:58ad8270cfa5d4bef1bc85bd387217e14ff154d6630e976c6f56f9a040757475", size = 132516, upload-time = "2026-03-06T06:00:47.924Z" }, - { url = "https://files.pythonhosted.org/packages/a5/c3/84fb174e7770f2df2e1a2115090771bfbc2227fb39a765c6d00568d1aab4/charset_normalizer-3.4.5-cp310-cp310-win_amd64.whl", hash = "sha256:02a9d1b01c1e12c27883b0c9349e0bcd9ae92e727ff1a277207e1a262b1cbf05", size = 142906, upload-time = "2026-03-06T06:00:49.389Z" }, - { url = "https://files.pythonhosted.org/packages/d7/b2/6f852f8b969f2cbd0d4092d2e60139ab1af95af9bb651337cae89ec0f684/charset_normalizer-3.4.5-cp310-cp310-win_arm64.whl", hash = "sha256:039215608ac7b358c4da0191d10fc76868567fbf276d54c14721bdedeb6de064", size = 133258, upload-time = "2026-03-06T06:00:51.051Z" }, - { url = "https://files.pythonhosted.org/packages/8f/9e/bcec3b22c64ecec47d39bf5167c2613efd41898c019dccd4183f6aa5d6a7/charset_normalizer-3.4.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:610f72c0ee565dfb8ae1241b666119582fdbfe7c0975c175be719f940e110694", size = 279531, upload-time = "2026-03-06T06:00:52.252Z" }, - { url = "https://files.pythonhosted.org/packages/58/12/81fd25f7e7078ab5d1eedbb0fac44be4904ae3370a3bf4533c8f2d159acd/charset_normalizer-3.4.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60d68e820af339df4ae8358c7a2e7596badeb61e544438e489035f9fbf3246a5", size = 188006, upload-time = "2026-03-06T06:00:53.8Z" }, - { url = "https://files.pythonhosted.org/packages/ae/6e/f2d30e8c27c1b0736a6520311982cf5286cfc7f6cac77d7bc1325e3a23f2/charset_normalizer-3.4.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:10b473fc8dca1c3ad8559985794815f06ca3fc71942c969129070f2c3cdf7281", size = 205085, upload-time = "2026-03-06T06:00:55.311Z" }, - { url = "https://files.pythonhosted.org/packages/d0/90/d12cefcb53b5931e2cf792a33718d7126efb116a320eaa0742c7059a95e4/charset_normalizer-3.4.5-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d4eb8ac7469b2a5d64b5b8c04f84d8bf3ad340f4514b98523805cbf46e3b3923", size = 200545, upload-time = "2026-03-06T06:00:56.532Z" }, - { url = "https://files.pythonhosted.org/packages/03/f4/44d3b830a20e89ff82a3134912d9a1cf6084d64f3b95dcad40f74449a654/charset_normalizer-3.4.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bcb3227c3d9aaf73eaaab1db7ccd80a8995c509ee9941e2aae060ca6e4e5d81", size = 193863, upload-time = "2026-03-06T06:00:57.823Z" }, - { url = "https://files.pythonhosted.org/packages/25/4b/f212119c18a6320a9d4a730d1b4057875cdeabf21b3614f76549042ef8a8/charset_normalizer-3.4.5-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:75ee9c1cce2911581a70a3c0919d8bccf5b1cbc9b0e5171400ec736b4b569497", size = 181827, upload-time = "2026-03-06T06:00:59.323Z" }, - { url = "https://files.pythonhosted.org/packages/74/00/b26158e48b425a202a92965f8069e8a63d9af1481dfa206825d7f74d2a3c/charset_normalizer-3.4.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1d1401945cb77787dbd3af2446ff2d75912327c4c3a1526ab7955ecf8600687c", size = 191085, upload-time = "2026-03-06T06:01:00.546Z" }, - { url = "https://files.pythonhosted.org/packages/c4/c2/1c1737bf6fd40335fe53d28fe49afd99ee4143cc57a845e99635ce0b9b6d/charset_normalizer-3.4.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a45e504f5e1be0bd385935a8e1507c442349ca36f511a47057a71c9d1d6ea9e", size = 190688, upload-time = "2026-03-06T06:01:02.479Z" }, - { url = "https://files.pythonhosted.org/packages/5a/3d/abb5c22dc2ef493cd56522f811246a63c5427c08f3e3e50ab663de27fcf4/charset_normalizer-3.4.5-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:e09f671a54ce70b79a1fc1dc6da3072b7ef7251fadb894ed92d9aa8218465a5f", size = 183077, upload-time = "2026-03-06T06:01:04.231Z" }, - { url = "https://files.pythonhosted.org/packages/44/33/5298ad4d419a58e25b3508e87f2758d1442ff00c2471f8e0403dab8edad5/charset_normalizer-3.4.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d01de5e768328646e6a3fa9e562706f8f6641708c115c62588aef2b941a4f88e", size = 206706, upload-time = "2026-03-06T06:01:05.773Z" }, - { url = "https://files.pythonhosted.org/packages/7b/17/51e7895ac0f87c3b91d276a449ef09f5532a7529818f59646d7a55089432/charset_normalizer-3.4.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:131716d6786ad5e3dc542f5cc6f397ba3339dc0fb87f87ac30e550e8987756af", size = 191665, upload-time = "2026-03-06T06:01:07.473Z" }, - { url = "https://files.pythonhosted.org/packages/90/8f/cce9adf1883e98906dbae380d769b4852bb0fa0004bc7d7a2243418d3ea8/charset_normalizer-3.4.5-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:1a374cc0b88aa710e8865dc1bd6edb3743c59f27830f0293ab101e4cf3ce9f85", size = 201950, upload-time = "2026-03-06T06:01:08.973Z" }, - { url = "https://files.pythonhosted.org/packages/08/ca/bce99cd5c397a52919e2769d126723f27a4c037130374c051c00470bcd38/charset_normalizer-3.4.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d31f0d1671e1534e395f9eb84a68e0fb670e1edb1fe819a9d7f564ae3bc4e53f", size = 195830, upload-time = "2026-03-06T06:01:10.155Z" }, - { url = "https://files.pythonhosted.org/packages/87/4f/2e3d023a06911f1281f97b8f036edc9872167036ca6f55cc874a0be6c12c/charset_normalizer-3.4.5-cp311-cp311-win32.whl", hash = "sha256:cace89841c0599d736d3d74a27bc5821288bb47c5441923277afc6059d7fbcb4", size = 132029, upload-time = "2026-03-06T06:01:11.706Z" }, - { url = "https://files.pythonhosted.org/packages/fe/1f/a853b73d386521fd44b7f67ded6b17b7b2367067d9106a5c4b44f9a34274/charset_normalizer-3.4.5-cp311-cp311-win_amd64.whl", hash = "sha256:f8102ae93c0bc863b1d41ea0f4499c20a83229f52ed870850892df555187154a", size = 142404, upload-time = "2026-03-06T06:01:12.865Z" }, - { url = "https://files.pythonhosted.org/packages/b4/10/dba36f76b71c38e9d391abe0fd8a5b818790e053c431adecfc98c35cd2a9/charset_normalizer-3.4.5-cp311-cp311-win_arm64.whl", hash = "sha256:ed98364e1c262cf5f9363c3eca8c2df37024f52a8fa1180a3610014f26eac51c", size = 132796, upload-time = "2026-03-06T06:01:14.106Z" }, - { url = "https://files.pythonhosted.org/packages/9c/b6/9ee9c1a608916ca5feae81a344dffbaa53b26b90be58cc2159e3332d44ec/charset_normalizer-3.4.5-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ed97c282ee4f994ef814042423a529df9497e3c666dca19be1d4cd1129dc7ade", size = 280976, upload-time = "2026-03-06T06:01:15.276Z" }, - { url = "https://files.pythonhosted.org/packages/f8/d8/a54f7c0b96f1df3563e9190f04daf981e365a9b397eedfdfb5dbef7e5c6c/charset_normalizer-3.4.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0294916d6ccf2d069727d65973c3a1ca477d68708db25fd758dd28b0827cff54", size = 189356, upload-time = "2026-03-06T06:01:16.511Z" }, - { url = "https://files.pythonhosted.org/packages/42/69/2bf7f76ce1446759a5787cb87d38f6a61eb47dbbdf035cfebf6347292a65/charset_normalizer-3.4.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dc57a0baa3eeedd99fafaef7511b5a6ef4581494e8168ee086031744e2679467", size = 206369, upload-time = "2026-03-06T06:01:17.853Z" }, - { url = "https://files.pythonhosted.org/packages/10/9c/949d1a46dab56b959d9a87272482195f1840b515a3380e39986989a893ae/charset_normalizer-3.4.5-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ed1a9a204f317ef879b32f9af507d47e49cd5e7f8e8d5d96358c98373314fc60", size = 203285, upload-time = "2026-03-06T06:01:19.473Z" }, - { url = "https://files.pythonhosted.org/packages/67/5c/ae30362a88b4da237d71ea214a8c7eb915db3eec941adda511729ac25fa2/charset_normalizer-3.4.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ad83b8f9379176c841f8865884f3514d905bcd2a9a3b210eaa446e7d2223e4d", size = 196274, upload-time = "2026-03-06T06:01:20.728Z" }, - { url = "https://files.pythonhosted.org/packages/b2/07/c9f2cb0e46cb6d64fdcc4f95953747b843bb2181bda678dc4e699b8f0f9a/charset_normalizer-3.4.5-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:a118e2e0b5ae6b0120d5efa5f866e58f2bb826067a646431da4d6a2bdae7950e", size = 184715, upload-time = "2026-03-06T06:01:22.194Z" }, - { url = "https://files.pythonhosted.org/packages/36/64/6b0ca95c44fddf692cd06d642b28f63009d0ce325fad6e9b2b4d0ef86a52/charset_normalizer-3.4.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:754f96058e61a5e22e91483f823e07df16416ce76afa4ebf306f8e1d1296d43f", size = 193426, upload-time = "2026-03-06T06:01:23.795Z" }, - { url = "https://files.pythonhosted.org/packages/50/bc/a730690d726403743795ca3f5bb2baf67838c5fea78236098f324b965e40/charset_normalizer-3.4.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0c300cefd9b0970381a46394902cd18eaf2aa00163f999590ace991989dcd0fc", size = 191780, upload-time = "2026-03-06T06:01:25.053Z" }, - { url = "https://files.pythonhosted.org/packages/97/4f/6c0bc9af68222b22951552d73df4532b5be6447cee32d58e7e8c74ecbb7b/charset_normalizer-3.4.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c108f8619e504140569ee7de3f97d234f0fbae338a7f9f360455071ef9855a95", size = 185805, upload-time = "2026-03-06T06:01:26.294Z" }, - { url = "https://files.pythonhosted.org/packages/dd/b9/a523fb9b0ee90814b503452b2600e4cbc118cd68714d57041564886e7325/charset_normalizer-3.4.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d1028de43596a315e2720a9849ee79007ab742c06ad8b45a50db8cdb7ed4a82a", size = 208342, upload-time = "2026-03-06T06:01:27.55Z" }, - { url = "https://files.pythonhosted.org/packages/4d/61/c59e761dee4464050713e50e27b58266cc8e209e518c0b378c1580c959ba/charset_normalizer-3.4.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:19092dde50335accf365cce21998a1c6dd8eafd42c7b226eb54b2747cdce2fac", size = 193661, upload-time = "2026-03-06T06:01:29.051Z" }, - { url = "https://files.pythonhosted.org/packages/1c/43/729fa30aad69783f755c5ad8649da17ee095311ca42024742701e202dc59/charset_normalizer-3.4.5-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4354e401eb6dab9aed3c7b4030514328a6c748d05e1c3e19175008ca7de84fb1", size = 204819, upload-time = "2026-03-06T06:01:30.298Z" }, - { url = "https://files.pythonhosted.org/packages/87/33/d9b442ce5a91b96fc0840455a9e49a611bbadae6122778d0a6a79683dd31/charset_normalizer-3.4.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a68766a3c58fde7f9aaa22b3786276f62ab2f594efb02d0a1421b6282e852e98", size = 198080, upload-time = "2026-03-06T06:01:31.478Z" }, - { url = "https://files.pythonhosted.org/packages/56/5a/b8b5a23134978ee9885cee2d6995f4c27cc41f9baded0a9685eabc5338f0/charset_normalizer-3.4.5-cp312-cp312-win32.whl", hash = "sha256:1827734a5b308b65ac54e86a618de66f935a4f63a8a462ff1e19a6788d6c2262", size = 132630, upload-time = "2026-03-06T06:01:33.056Z" }, - { url = "https://files.pythonhosted.org/packages/70/53/e44a4c07e8904500aec95865dc3f6464dc3586a039ef0df606eb3ac38e35/charset_normalizer-3.4.5-cp312-cp312-win_amd64.whl", hash = "sha256:728c6a963dfab66ef865f49286e45239384249672cd598576765acc2a640a636", size = 142856, upload-time = "2026-03-06T06:01:34.489Z" }, - { url = "https://files.pythonhosted.org/packages/ea/aa/c5628f7cad591b1cf45790b7a61483c3e36cf41349c98af7813c483fd6e8/charset_normalizer-3.4.5-cp312-cp312-win_arm64.whl", hash = "sha256:75dfd1afe0b1647449e852f4fb428195a7ed0588947218f7ba929f6538487f02", size = 132982, upload-time = "2026-03-06T06:01:35.641Z" }, - { url = "https://files.pythonhosted.org/packages/f5/48/9f34ec4bb24aa3fdba1890c1bddb97c8a4be1bd84ef5c42ac2352563ad05/charset_normalizer-3.4.5-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ac59c15e3f1465f722607800c68713f9fbc2f672b9eb649fe831da4019ae9b23", size = 280788, upload-time = "2026-03-06T06:01:37.126Z" }, - { url = "https://files.pythonhosted.org/packages/0e/09/6003e7ffeb90cc0560da893e3208396a44c210c5ee42efff539639def59b/charset_normalizer-3.4.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:165c7b21d19365464e8f70e5ce5e12524c58b48c78c1f5a57524603c1ab003f8", size = 188890, upload-time = "2026-03-06T06:01:38.73Z" }, - { url = "https://files.pythonhosted.org/packages/42/1e/02706edf19e390680daa694d17e2b8eab4b5f7ac285e2a51168b4b22ee6b/charset_normalizer-3.4.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:28269983f25a4da0425743d0d257a2d6921ea7d9b83599d4039486ec5b9f911d", size = 206136, upload-time = "2026-03-06T06:01:40.016Z" }, - { url = "https://files.pythonhosted.org/packages/c7/87/942c3def1b37baf3cf786bad01249190f3ca3d5e63a84f831e704977de1f/charset_normalizer-3.4.5-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d27ce22ec453564770d29d03a9506d449efbb9fa13c00842262b2f6801c48cce", size = 202551, upload-time = "2026-03-06T06:01:41.522Z" }, - { url = "https://files.pythonhosted.org/packages/94/0a/af49691938dfe175d71b8a929bd7e4ace2809c0c5134e28bc535660d5262/charset_normalizer-3.4.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0625665e4ebdddb553ab185de5db7054393af8879fb0c87bd5690d14379d6819", size = 195572, upload-time = "2026-03-06T06:01:43.208Z" }, - { url = "https://files.pythonhosted.org/packages/20/ea/dfb1792a8050a8e694cfbde1570ff97ff74e48afd874152d38163d1df9ae/charset_normalizer-3.4.5-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:c23eb3263356d94858655b3e63f85ac5d50970c6e8febcdde7830209139cc37d", size = 184438, upload-time = "2026-03-06T06:01:44.755Z" }, - { url = "https://files.pythonhosted.org/packages/72/12/c281e2067466e3ddd0595bfaea58a6946765ace5c72dfa3edc2f5f118026/charset_normalizer-3.4.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e6302ca4ae283deb0af68d2fbf467474b8b6aedcd3dab4db187e07f94c109763", size = 193035, upload-time = "2026-03-06T06:01:46.051Z" }, - { url = "https://files.pythonhosted.org/packages/ba/4f/3792c056e7708e10464bad0438a44708886fb8f92e3c3d29ec5e2d964d42/charset_normalizer-3.4.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e51ae7d81c825761d941962450f50d041db028b7278e7b08930b4541b3e45cb9", size = 191340, upload-time = "2026-03-06T06:01:47.547Z" }, - { url = "https://files.pythonhosted.org/packages/e7/86/80ddba897127b5c7a9bccc481b0cd36c8fefa485d113262f0fe4332f0bf4/charset_normalizer-3.4.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:597d10dec876923e5c59e48dbd366e852eacb2b806029491d307daea6b917d7c", size = 185464, upload-time = "2026-03-06T06:01:48.764Z" }, - { url = "https://files.pythonhosted.org/packages/4d/00/b5eff85ba198faacab83e0e4b6f0648155f072278e3b392a82478f8b988b/charset_normalizer-3.4.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:5cffde4032a197bd3b42fd0b9509ec60fb70918d6970e4cc773f20fc9180ca67", size = 208014, upload-time = "2026-03-06T06:01:50.371Z" }, - { url = "https://files.pythonhosted.org/packages/c8/11/d36f70be01597fd30850dde8a1269ebc8efadd23ba5785808454f2389bde/charset_normalizer-3.4.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2da4eedcb6338e2321e831a0165759c0c620e37f8cd044a263ff67493be8ffb3", size = 193297, upload-time = "2026-03-06T06:01:51.933Z" }, - { url = "https://files.pythonhosted.org/packages/1a/1d/259eb0a53d4910536c7c2abb9cb25f4153548efb42800c6a9456764649c0/charset_normalizer-3.4.5-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:65a126fb4b070d05340a84fc709dd9e7c75d9b063b610ece8a60197a291d0adf", size = 204321, upload-time = "2026-03-06T06:01:53.887Z" }, - { url = "https://files.pythonhosted.org/packages/84/31/faa6c5b9d3688715e1ed1bb9d124c384fe2fc1633a409e503ffe1c6398c1/charset_normalizer-3.4.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c7a80a9242963416bd81f99349d5f3fce1843c303bd404f204918b6d75a75fd6", size = 197509, upload-time = "2026-03-06T06:01:56.439Z" }, - { url = "https://files.pythonhosted.org/packages/fd/a5/c7d9dd1503ffc08950b3260f5d39ec2366dd08254f0900ecbcf3a6197c7c/charset_normalizer-3.4.5-cp313-cp313-win32.whl", hash = "sha256:f1d725b754e967e648046f00c4facc42d414840f5ccc670c5670f59f83693e4f", size = 132284, upload-time = "2026-03-06T06:01:57.812Z" }, - { url = "https://files.pythonhosted.org/packages/b9/0f/57072b253af40c8aa6636e6de7d75985624c1eb392815b2f934199340a89/charset_normalizer-3.4.5-cp313-cp313-win_amd64.whl", hash = "sha256:e37bd100d2c5d3ba35db9c7c5ba5a9228cbcffe5c4778dc824b164e5257813d7", size = 142630, upload-time = "2026-03-06T06:01:59.062Z" }, - { url = "https://files.pythonhosted.org/packages/31/41/1c4b7cc9f13bd9d369ce3bc993e13d374ce25fa38a2663644283ecf422c1/charset_normalizer-3.4.5-cp313-cp313-win_arm64.whl", hash = "sha256:93b3b2cc5cf1b8743660ce77a4f45f3f6d1172068207c1defc779a36eea6bb36", size = 133254, upload-time = "2026-03-06T06:02:00.281Z" }, - { url = "https://files.pythonhosted.org/packages/43/be/0f0fd9bb4a7fa4fb5067fb7d9ac693d4e928d306f80a0d02bde43a7c4aee/charset_normalizer-3.4.5-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8197abe5ca1ffb7d91e78360f915eef5addff270f8a71c1fc5be24a56f3e4873", size = 280232, upload-time = "2026-03-06T06:02:01.508Z" }, - { url = "https://files.pythonhosted.org/packages/28/02/983b5445e4bef49cd8c9da73a8e029f0825f39b74a06d201bfaa2e55142a/charset_normalizer-3.4.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2aecdb364b8a1802afdc7f9327d55dad5366bc97d8502d0f5854e50712dbc5f", size = 189688, upload-time = "2026-03-06T06:02:02.857Z" }, - { url = "https://files.pythonhosted.org/packages/d0/88/152745c5166437687028027dc080e2daed6fe11cfa95a22f4602591c42db/charset_normalizer-3.4.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a66aa5022bf81ab4b1bebfb009db4fd68e0c6d4307a1ce5ef6a26e5878dfc9e4", size = 206833, upload-time = "2026-03-06T06:02:05.127Z" }, - { url = "https://files.pythonhosted.org/packages/cb/0f/ebc15c8b02af2f19be9678d6eed115feeeccc45ce1f4b098d986c13e8769/charset_normalizer-3.4.5-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d77f97e515688bd615c1d1f795d540f32542d514242067adcb8ef532504cb9ee", size = 202879, upload-time = "2026-03-06T06:02:06.446Z" }, - { url = "https://files.pythonhosted.org/packages/38/9c/71336bff6934418dc8d1e8a1644176ac9088068bc571da612767619c97b3/charset_normalizer-3.4.5-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01a1ed54b953303ca7e310fafe0fe347aab348bd81834a0bcd602eb538f89d66", size = 195764, upload-time = "2026-03-06T06:02:08.763Z" }, - { url = "https://files.pythonhosted.org/packages/b7/95/ce92fde4f98615661871bc282a856cf9b8a15f686ba0af012984660d480b/charset_normalizer-3.4.5-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:b2d37d78297b39a9eb9eb92c0f6df98c706467282055419df141389b23f93362", size = 183728, upload-time = "2026-03-06T06:02:10.137Z" }, - { url = "https://files.pythonhosted.org/packages/1c/e7/f5b4588d94e747ce45ae680f0f242bc2d98dbd4eccfab73e6160b6893893/charset_normalizer-3.4.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e71bbb595973622b817c042bd943c3f3667e9c9983ce3d205f973f486fec98a7", size = 192937, upload-time = "2026-03-06T06:02:11.663Z" }, - { url = "https://files.pythonhosted.org/packages/f9/29/9d94ed6b929bf9f48bf6ede6e7474576499f07c4c5e878fb186083622716/charset_normalizer-3.4.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4cd966c2559f501c6fd69294d082c2934c8dd4719deb32c22961a5ac6db0df1d", size = 192040, upload-time = "2026-03-06T06:02:13.489Z" }, - { url = "https://files.pythonhosted.org/packages/15/d2/1a093a1cf827957f9445f2fe7298bcc16f8fc5e05c1ed2ad1af0b239035e/charset_normalizer-3.4.5-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:d5e52d127045d6ae01a1e821acfad2f3a1866c54d0e837828538fabe8d9d1bd6", size = 184107, upload-time = "2026-03-06T06:02:14.83Z" }, - { url = "https://files.pythonhosted.org/packages/0f/7d/82068ce16bd36135df7b97f6333c5d808b94e01d4599a682e2337ed5fd14/charset_normalizer-3.4.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:30a2b1a48478c3428d047ed9690d57c23038dac838a87ad624c85c0a78ebeb39", size = 208310, upload-time = "2026-03-06T06:02:16.165Z" }, - { url = "https://files.pythonhosted.org/packages/84/4e/4dfb52307bb6af4a5c9e73e482d171b81d36f522b21ccd28a49656baa680/charset_normalizer-3.4.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d8ed79b8f6372ca4254955005830fd61c1ccdd8c0fac6603e2c145c61dd95db6", size = 192918, upload-time = "2026-03-06T06:02:18.144Z" }, - { url = "https://files.pythonhosted.org/packages/08/a4/159ff7da662cf7201502ca89980b8f06acf3e887b278956646a8aeb178ab/charset_normalizer-3.4.5-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:c5af897b45fa606b12464ccbe0014bbf8c09191e0a66aab6aa9d5cf6e77e0c94", size = 204615, upload-time = "2026-03-06T06:02:19.821Z" }, - { url = "https://files.pythonhosted.org/packages/d6/62/0dd6172203cb6b429ffffc9935001fde42e5250d57f07b0c28c6046deb6b/charset_normalizer-3.4.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1088345bcc93c58d8d8f3d783eca4a6e7a7752bbff26c3eee7e73c597c191c2e", size = 197784, upload-time = "2026-03-06T06:02:21.86Z" }, - { url = "https://files.pythonhosted.org/packages/c7/5e/1aab5cb737039b9c59e63627dc8bbc0d02562a14f831cc450e5f91d84ce1/charset_normalizer-3.4.5-cp314-cp314-win32.whl", hash = "sha256:ee57b926940ba00bca7ba7041e665cc956e55ef482f851b9b65acb20d867e7a2", size = 133009, upload-time = "2026-03-06T06:02:23.289Z" }, - { url = "https://files.pythonhosted.org/packages/40/65/e7c6c77d7aaa4c0d7974f2e403e17f0ed2cb0fc135f77d686b916bf1eead/charset_normalizer-3.4.5-cp314-cp314-win_amd64.whl", hash = "sha256:4481e6da1830c8a1cc0b746b47f603b653dadb690bcd851d039ffaefe70533aa", size = 143511, upload-time = "2026-03-06T06:02:26.195Z" }, - { url = "https://files.pythonhosted.org/packages/ba/91/52b0841c71f152f563b8e072896c14e3d83b195c188b338d3cc2e582d1d4/charset_normalizer-3.4.5-cp314-cp314-win_arm64.whl", hash = "sha256:97ab7787092eb9b50fb47fa04f24c75b768a606af1bcba1957f07f128a7219e4", size = 133775, upload-time = "2026-03-06T06:02:27.473Z" }, - { url = "https://files.pythonhosted.org/packages/c5/60/3a621758945513adfd4db86827a5bafcc615f913dbd0b4c2ed64a65731be/charset_normalizer-3.4.5-py3-none-any.whl", hash = "sha256:9db5e3fcdcee89a78c04dffb3fe33c79f77bd741a624946db2591c81b2fc85b0", size = 55455, upload-time = "2026-03-06T06:03:17.827Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8c/2c56124c6dc53a774d435f985b5973bc592f42d437be58c0c92d65ae7296/charset_normalizer-3.4.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2e1d8ca8611099001949d1cdfaefc510cf0f212484fe7c565f735b68c78c3c95", size = 298751, upload-time = "2026-03-15T18:50:00.003Z" }, + { url = "https://files.pythonhosted.org/packages/86/2a/2a7db6b314b966a3bcad8c731c0719c60b931b931de7ae9f34b2839289ee/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e25369dc110d58ddf29b949377a93e0716d72a24f62bad72b2b39f155949c1fd", size = 200027, upload-time = "2026-03-15T18:50:01.702Z" }, + { url = "https://files.pythonhosted.org/packages/68/f2/0fe775c74ae25e2a3b07b01538fc162737b3e3f795bada3bc26f4d4d495c/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:259695e2ccc253feb2a016303543d691825e920917e31f894ca1a687982b1de4", size = 220741, upload-time = "2026-03-15T18:50:03.194Z" }, + { url = "https://files.pythonhosted.org/packages/10/98/8085596e41f00b27dd6aa1e68413d1ddda7e605f34dd546833c61fddd709/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dda86aba335c902b6149a02a55b38e96287157e609200811837678214ba2b1db", size = 215802, upload-time = "2026-03-15T18:50:05.859Z" }, + { url = "https://files.pythonhosted.org/packages/fd/ce/865e4e09b041bad659d682bbd98b47fb490b8e124f9398c9448065f64fee/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51fb3c322c81d20567019778cb5a4a6f2dc1c200b886bc0d636238e364848c89", size = 207908, upload-time = "2026-03-15T18:50:07.676Z" }, + { url = "https://files.pythonhosted.org/packages/a8/54/8c757f1f7349262898c2f169e0d562b39dcb977503f18fdf0814e923db78/charset_normalizer-3.4.6-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:4482481cb0572180b6fd976a4d5c72a30263e98564da68b86ec91f0fe35e8565", size = 194357, upload-time = "2026-03-15T18:50:09.327Z" }, + { url = "https://files.pythonhosted.org/packages/6f/29/e88f2fac9218907fc7a70722b393d1bbe8334c61fe9c46640dba349b6e66/charset_normalizer-3.4.6-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:39f5068d35621da2881271e5c3205125cc456f54e9030d3f723288c873a71bf9", size = 205610, upload-time = "2026-03-15T18:50:10.732Z" }, + { url = "https://files.pythonhosted.org/packages/4c/c5/21d7bb0cb415287178450171d130bed9d664211fdd59731ed2c34267b07d/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8bea55c4eef25b0b19a0337dc4e3f9a15b00d569c77211fa8cde38684f234fb7", size = 203512, upload-time = "2026-03-15T18:50:12.535Z" }, + { url = "https://files.pythonhosted.org/packages/a4/be/ce52f3c7fdb35cc987ad38a53ebcef52eec498f4fb6c66ecfe62cfe57ba2/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:f0cdaecd4c953bfae0b6bb64910aaaca5a424ad9c72d85cb88417bb9814f7550", size = 195398, upload-time = "2026-03-15T18:50:14.236Z" }, + { url = "https://files.pythonhosted.org/packages/81/a0/3ab5dd39d4859a3555e5dadfc8a9fa7f8352f8c183d1a65c90264517da0e/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:150b8ce8e830eb7ccb029ec9ca36022f756986aaaa7956aad6d9ec90089338c0", size = 221772, upload-time = "2026-03-15T18:50:15.581Z" }, + { url = "https://files.pythonhosted.org/packages/04/6e/6a4e41a97ba6b2fa87f849c41e4d229449a586be85053c4d90135fe82d26/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:e68c14b04827dd76dcbd1aeea9e604e3e4b78322d8faf2f8132c7138efa340a8", size = 205759, upload-time = "2026-03-15T18:50:17.047Z" }, + { url = "https://files.pythonhosted.org/packages/db/3b/34a712a5ee64a6957bf355b01dc17b12de457638d436fdb05d01e463cd1c/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:3778fd7d7cd04ae8f54651f4a7a0bd6e39a0cf20f801720a4c21d80e9b7ad6b0", size = 216938, upload-time = "2026-03-15T18:50:18.44Z" }, + { url = "https://files.pythonhosted.org/packages/cb/05/5bd1e12da9ab18790af05c61aafd01a60f489778179b621ac2a305243c62/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dad6e0f2e481fffdcf776d10ebee25e0ef89f16d691f1e5dee4b586375fdc64b", size = 210138, upload-time = "2026-03-15T18:50:19.852Z" }, + { url = "https://files.pythonhosted.org/packages/bd/8e/3cb9e2d998ff6b21c0a1860343cb7b83eba9cdb66b91410e18fc4969d6ab/charset_normalizer-3.4.6-cp310-cp310-win32.whl", hash = "sha256:74a2e659c7ecbc73562e2a15e05039f1e22c75b7c7618b4b574a3ea9118d1557", size = 144137, upload-time = "2026-03-15T18:50:21.505Z" }, + { url = "https://files.pythonhosted.org/packages/d8/8f/78f5489ffadb0db3eb7aff53d31c24531d33eb545f0c6f6567c25f49a5ff/charset_normalizer-3.4.6-cp310-cp310-win_amd64.whl", hash = "sha256:aa9cccf4a44b9b62d8ba8b4dd06c649ba683e4bf04eea606d2e94cfc2d6ff4d6", size = 154244, upload-time = "2026-03-15T18:50:22.81Z" }, + { url = "https://files.pythonhosted.org/packages/e4/74/e472659dffb0cadb2f411282d2d76c60da1fc94076d7fffed4ae8a93ec01/charset_normalizer-3.4.6-cp310-cp310-win_arm64.whl", hash = "sha256:e985a16ff513596f217cee86c21371b8cd011c0f6f056d0920aa2d926c544058", size = 143312, upload-time = "2026-03-15T18:50:24.074Z" }, + { url = "https://files.pythonhosted.org/packages/62/28/ff6f234e628a2de61c458be2779cb182bc03f6eec12200d4a525bbfc9741/charset_normalizer-3.4.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:82060f995ab5003a2d6e0f4ad29065b7672b6593c8c63559beefe5b443242c3e", size = 293582, upload-time = "2026-03-15T18:50:25.454Z" }, + { url = "https://files.pythonhosted.org/packages/1c/b7/b1a117e5385cbdb3205f6055403c2a2a220c5ea80b8716c324eaf75c5c95/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60c74963d8350241a79cb8feea80e54d518f72c26db618862a8f53e5023deaf9", size = 197240, upload-time = "2026-03-15T18:50:27.196Z" }, + { url = "https://files.pythonhosted.org/packages/a1/5f/2574f0f09f3c3bc1b2f992e20bce6546cb1f17e111c5be07308dc5427956/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6e4333fb15c83f7d1482a76d45a0818897b3d33f00efd215528ff7c51b8e35d", size = 217363, upload-time = "2026-03-15T18:50:28.601Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d1/0ae20ad77bc949ddd39b51bf383b6ca932f2916074c95cad34ae465ab71f/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bc72863f4d9aba2e8fd9085e63548a324ba706d2ea2c83b260da08a59b9482de", size = 212994, upload-time = "2026-03-15T18:50:30.102Z" }, + { url = "https://files.pythonhosted.org/packages/60/ac/3233d262a310c1b12633536a07cde5ddd16985e6e7e238e9f3f9423d8eb9/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9cc4fc6c196d6a8b76629a70ddfcd4635a6898756e2d9cac5565cf0654605d73", size = 204697, upload-time = "2026-03-15T18:50:31.654Z" }, + { url = "https://files.pythonhosted.org/packages/25/3c/8a18fc411f085b82303cfb7154eed5bd49c77035eb7608d049468b53f87c/charset_normalizer-3.4.6-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:0c173ce3a681f309f31b87125fecec7a5d1347261ea11ebbb856fa6006b23c8c", size = 191673, upload-time = "2026-03-15T18:50:33.433Z" }, + { url = "https://files.pythonhosted.org/packages/ff/a7/11cfe61d6c5c5c7438d6ba40919d0306ed83c9ab957f3d4da2277ff67836/charset_normalizer-3.4.6-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c907cdc8109f6c619e6254212e794d6548373cc40e1ec75e6e3823d9135d29cc", size = 201120, upload-time = "2026-03-15T18:50:35.105Z" }, + { url = "https://files.pythonhosted.org/packages/b5/10/cf491fa1abd47c02f69687046b896c950b92b6cd7337a27e6548adbec8e4/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:404a1e552cf5b675a87f0651f8b79f5f1e6fd100ee88dc612f89aa16abd4486f", size = 200911, upload-time = "2026-03-15T18:50:36.819Z" }, + { url = "https://files.pythonhosted.org/packages/28/70/039796160b48b18ed466fde0af84c1b090c4e288fae26cd674ad04a2d703/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:e3c701e954abf6fc03a49f7c579cc80c2c6cc52525340ca3186c41d3f33482ef", size = 192516, upload-time = "2026-03-15T18:50:38.228Z" }, + { url = "https://files.pythonhosted.org/packages/ff/34/c56f3223393d6ff3124b9e78f7de738047c2d6bc40a4f16ac0c9d7a1cb3c/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7a6967aaf043bceabab5412ed6bd6bd26603dae84d5cb75bf8d9a74a4959d398", size = 218795, upload-time = "2026-03-15T18:50:39.664Z" }, + { url = "https://files.pythonhosted.org/packages/e8/3b/ce2d4f86c5282191a041fdc5a4ce18f1c6bd40a5bd1f74cf8625f08d51c1/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:5feb91325bbceade6afab43eb3b508c63ee53579fe896c77137ded51c6b6958e", size = 201833, upload-time = "2026-03-15T18:50:41.552Z" }, + { url = "https://files.pythonhosted.org/packages/3b/9b/b6a9f76b0fd7c5b5ec58b228ff7e85095370282150f0bd50b3126f5506d6/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f820f24b09e3e779fe84c3c456cb4108a7aa639b0d1f02c28046e11bfcd088ed", size = 213920, upload-time = "2026-03-15T18:50:43.33Z" }, + { url = "https://files.pythonhosted.org/packages/ae/98/7bc23513a33d8172365ed30ee3a3b3fe1ece14a395e5fc94129541fc6003/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b35b200d6a71b9839a46b9b7fff66b6638bb52fc9658aa58796b0326595d3021", size = 206951, upload-time = "2026-03-15T18:50:44.789Z" }, + { url = "https://files.pythonhosted.org/packages/32/73/c0b86f3d1458468e11aec870e6b3feac931facbe105a894b552b0e518e79/charset_normalizer-3.4.6-cp311-cp311-win32.whl", hash = "sha256:9ca4c0b502ab399ef89248a2c84c54954f77a070f28e546a85e91da627d1301e", size = 143703, upload-time = "2026-03-15T18:50:46.103Z" }, + { url = "https://files.pythonhosted.org/packages/c6/e3/76f2facfe8eddee0bbd38d2594e709033338eae44ebf1738bcefe0a06185/charset_normalizer-3.4.6-cp311-cp311-win_amd64.whl", hash = "sha256:a9e68c9d88823b274cf1e72f28cb5dc89c990edf430b0bfd3e2fb0785bfeabf4", size = 153857, upload-time = "2026-03-15T18:50:47.563Z" }, + { url = "https://files.pythonhosted.org/packages/e2/dc/9abe19c9b27e6cd3636036b9d1b387b78c40dedbf0b47f9366737684b4b0/charset_normalizer-3.4.6-cp311-cp311-win_arm64.whl", hash = "sha256:97d0235baafca5f2b09cf332cc275f021e694e8362c6bb9c96fc9a0eb74fc316", size = 142751, upload-time = "2026-03-15T18:50:49.234Z" }, + { url = "https://files.pythonhosted.org/packages/e5/62/c0815c992c9545347aeea7859b50dc9044d147e2e7278329c6e02ac9a616/charset_normalizer-3.4.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ef7fedc7a6ecbe99969cd09632516738a97eeb8bd7258bf8a0f23114c057dab", size = 295154, upload-time = "2026-03-15T18:50:50.88Z" }, + { url = "https://files.pythonhosted.org/packages/a8/37/bdca6613c2e3c58c7421891d80cc3efa1d32e882f7c4a7ee6039c3fc951a/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4ea868bc28109052790eb2b52a9ab33f3aa7adc02f96673526ff47419490e21", size = 199191, upload-time = "2026-03-15T18:50:52.658Z" }, + { url = "https://files.pythonhosted.org/packages/6c/92/9934d1bbd69f7f398b38c5dae1cbf9cc672e7c34a4adf7b17c0a9c17d15d/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:836ab36280f21fc1a03c99cd05c6b7af70d2697e374c7af0b61ed271401a72a2", size = 218674, upload-time = "2026-03-15T18:50:54.102Z" }, + { url = "https://files.pythonhosted.org/packages/af/90/25f6ab406659286be929fd89ab0e78e38aa183fc374e03aa3c12d730af8a/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f1ce721c8a7dfec21fcbdfe04e8f68174183cf4e8188e0645e92aa23985c57ff", size = 215259, upload-time = "2026-03-15T18:50:55.616Z" }, + { url = "https://files.pythonhosted.org/packages/4e/ef/79a463eb0fff7f96afa04c1d4c51f8fc85426f918db467854bfb6a569ce3/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e28d62a8fc7a1fa411c43bd65e346f3bce9716dc51b897fbe930c5987b402d5", size = 207276, upload-time = "2026-03-15T18:50:57.054Z" }, + { url = "https://files.pythonhosted.org/packages/f7/72/d0426afec4b71dc159fa6b4e68f868cd5a3ecd918fec5813a15d292a7d10/charset_normalizer-3.4.6-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:530d548084c4a9f7a16ed4a294d459b4f229db50df689bfe92027452452943a0", size = 195161, upload-time = "2026-03-15T18:50:58.686Z" }, + { url = "https://files.pythonhosted.org/packages/bf/18/c82b06a68bfcb6ce55e508225d210c7e6a4ea122bfc0748892f3dc4e8e11/charset_normalizer-3.4.6-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:30f445ae60aad5e1f8bdbb3108e39f6fbc09f4ea16c815c66578878325f8f15a", size = 203452, upload-time = "2026-03-15T18:51:00.196Z" }, + { url = "https://files.pythonhosted.org/packages/44/d6/0c25979b92f8adafdbb946160348d8d44aa60ce99afdc27df524379875cb/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ac2393c73378fea4e52aa56285a3d64be50f1a12395afef9cce47772f60334c2", size = 202272, upload-time = "2026-03-15T18:51:01.703Z" }, + { url = "https://files.pythonhosted.org/packages/2e/3d/7fea3e8fe84136bebbac715dd1221cc25c173c57a699c030ab9b8900cbb7/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:90ca27cd8da8118b18a52d5f547859cc1f8354a00cd1e8e5120df3e30d6279e5", size = 195622, upload-time = "2026-03-15T18:51:03.526Z" }, + { url = "https://files.pythonhosted.org/packages/57/8a/d6f7fd5cb96c58ef2f681424fbca01264461336d2a7fc875e4446b1f1346/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8e5a94886bedca0f9b78fecd6afb6629142fd2605aa70a125d49f4edc6037ee6", size = 220056, upload-time = "2026-03-15T18:51:05.269Z" }, + { url = "https://files.pythonhosted.org/packages/16/50/478cdda782c8c9c3fb5da3cc72dd7f331f031e7f1363a893cdd6ca0f8de0/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:695f5c2823691a25f17bc5d5ffe79fa90972cc34b002ac6c843bb8a1720e950d", size = 203751, upload-time = "2026-03-15T18:51:06.858Z" }, + { url = "https://files.pythonhosted.org/packages/75/fc/cc2fcac943939c8e4d8791abfa139f685e5150cae9f94b60f12520feaa9b/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:231d4da14bcd9301310faf492051bee27df11f2bc7549bc0bb41fef11b82daa2", size = 216563, upload-time = "2026-03-15T18:51:08.564Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b7/a4add1d9a5f68f3d037261aecca83abdb0ab15960a3591d340e829b37298/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a056d1ad2633548ca18ffa2f85c202cfb48b68615129143915b8dc72a806a923", size = 209265, upload-time = "2026-03-15T18:51:10.312Z" }, + { url = "https://files.pythonhosted.org/packages/6c/18/c094561b5d64a24277707698e54b7f67bd17a4f857bbfbb1072bba07c8bf/charset_normalizer-3.4.6-cp312-cp312-win32.whl", hash = "sha256:c2274ca724536f173122f36c98ce188fd24ce3dad886ec2b7af859518ce008a4", size = 144229, upload-time = "2026-03-15T18:51:11.694Z" }, + { url = "https://files.pythonhosted.org/packages/ab/20/0567efb3a8fd481b8f34f739ebddc098ed062a59fed41a8d193a61939e8f/charset_normalizer-3.4.6-cp312-cp312-win_amd64.whl", hash = "sha256:c8ae56368f8cc97c7e40a7ee18e1cedaf8e780cd8bc5ed5ac8b81f238614facb", size = 154277, upload-time = "2026-03-15T18:51:13.004Z" }, + { url = "https://files.pythonhosted.org/packages/15/57/28d79b44b51933119e21f65479d0864a8d5893e494cf5daab15df0247c17/charset_normalizer-3.4.6-cp312-cp312-win_arm64.whl", hash = "sha256:899d28f422116b08be5118ef350c292b36fc15ec2daeb9ea987c89281c7bb5c4", size = 142817, upload-time = "2026-03-15T18:51:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/1e/1d/4fdabeef4e231153b6ed7567602f3b68265ec4e5b76d6024cf647d43d981/charset_normalizer-3.4.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:11afb56037cbc4b1555a34dd69151e8e069bee82e613a73bef6e714ce733585f", size = 294823, upload-time = "2026-03-15T18:51:15.755Z" }, + { url = "https://files.pythonhosted.org/packages/47/7b/20e809b89c69d37be748d98e84dce6820bf663cf19cf6b942c951a3e8f41/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423fb7e748a08f854a08a222b983f4df1912b1daedce51a72bd24fe8f26a1843", size = 198527, upload-time = "2026-03-15T18:51:17.177Z" }, + { url = "https://files.pythonhosted.org/packages/37/a6/4f8d27527d59c039dce6f7622593cdcd3d70a8504d87d09eb11e9fdc6062/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d73beaac5e90173ac3deb9928a74763a6d230f494e4bfb422c217a0ad8e629bf", size = 218388, upload-time = "2026-03-15T18:51:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/f6/9b/4770ccb3e491a9bacf1c46cc8b812214fe367c86a96353ccc6daf87b01ec/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d60377dce4511655582e300dc1e5a5f24ba0cb229005a1d5c8d0cb72bb758ab8", size = 214563, upload-time = "2026-03-15T18:51:20.374Z" }, + { url = "https://files.pythonhosted.org/packages/2b/58/a199d245894b12db0b957d627516c78e055adc3a0d978bc7f65ddaf7c399/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:530e8cebeea0d76bdcf93357aa5e41336f48c3dc709ac52da2bb167c5b8271d9", size = 206587, upload-time = "2026-03-15T18:51:21.807Z" }, + { url = "https://files.pythonhosted.org/packages/7e/70/3def227f1ec56f5c69dfc8392b8bd63b11a18ca8178d9211d7cc5e5e4f27/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:a26611d9987b230566f24a0a125f17fe0de6a6aff9f25c9f564aaa2721a5fb88", size = 194724, upload-time = "2026-03-15T18:51:23.508Z" }, + { url = "https://files.pythonhosted.org/packages/58/ab/9318352e220c05efd31c2779a23b50969dc94b985a2efa643ed9077bfca5/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:34315ff4fc374b285ad7f4a0bf7dcbfe769e1b104230d40f49f700d4ab6bbd84", size = 202956, upload-time = "2026-03-15T18:51:25.239Z" }, + { url = "https://files.pythonhosted.org/packages/75/13/f3550a3ac25b70f87ac98c40d3199a8503676c2f1620efbf8d42095cfc40/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ddd609f9e1af8c7bd6e2aca279c931aefecd148a14402d4e368f3171769fd", size = 201923, upload-time = "2026-03-15T18:51:26.682Z" }, + { url = "https://files.pythonhosted.org/packages/1b/db/c5c643b912740b45e8eec21de1bbab8e7fc085944d37e1e709d3dcd9d72f/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:80d0a5615143c0b3225e5e3ef22c8d5d51f3f72ce0ea6fb84c943546c7b25b6c", size = 195366, upload-time = "2026-03-15T18:51:28.129Z" }, + { url = "https://files.pythonhosted.org/packages/5a/67/3b1c62744f9b2448443e0eb160d8b001c849ec3fef591e012eda6484787c/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:92734d4d8d187a354a556626c221cd1a892a4e0802ccb2af432a1d85ec012194", size = 219752, upload-time = "2026-03-15T18:51:29.556Z" }, + { url = "https://files.pythonhosted.org/packages/f6/98/32ffbaf7f0366ffb0445930b87d103f6b406bc2c271563644bde8a2b1093/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:613f19aa6e082cf96e17e3ffd89383343d0d589abda756b7764cf78361fd41dc", size = 203296, upload-time = "2026-03-15T18:51:30.921Z" }, + { url = "https://files.pythonhosted.org/packages/41/12/5d308c1bbe60cabb0c5ef511574a647067e2a1f631bc8634fcafaccd8293/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2b1a63e8224e401cafe7739f77efd3f9e7f5f2026bda4aead8e59afab537784f", size = 215956, upload-time = "2026-03-15T18:51:32.399Z" }, + { url = "https://files.pythonhosted.org/packages/53/e9/5f85f6c5e20669dbe56b165c67b0260547dea97dba7e187938833d791687/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6cceb5473417d28edd20c6c984ab6fee6c6267d38d906823ebfe20b03d607dc2", size = 208652, upload-time = "2026-03-15T18:51:34.214Z" }, + { url = "https://files.pythonhosted.org/packages/f1/11/897052ea6af56df3eef3ca94edafee410ca699ca0c7b87960ad19932c55e/charset_normalizer-3.4.6-cp313-cp313-win32.whl", hash = "sha256:d7de2637729c67d67cf87614b566626057e95c303bc0a55ffe391f5205e7003d", size = 143940, upload-time = "2026-03-15T18:51:36.15Z" }, + { url = "https://files.pythonhosted.org/packages/a1/5c/724b6b363603e419829f561c854b87ed7c7e31231a7908708ac086cdf3e2/charset_normalizer-3.4.6-cp313-cp313-win_amd64.whl", hash = "sha256:572d7c822caf521f0525ba1bce1a622a0b85cf47ffbdae6c9c19e3b5ac3c4389", size = 154101, upload-time = "2026-03-15T18:51:37.876Z" }, + { url = "https://files.pythonhosted.org/packages/01/a5/7abf15b4c0968e47020f9ca0935fb3274deb87cb288cd187cad92e8cdffd/charset_normalizer-3.4.6-cp313-cp313-win_arm64.whl", hash = "sha256:a4474d924a47185a06411e0064b803c68be044be2d60e50e8bddcc2649957c1f", size = 143109, upload-time = "2026-03-15T18:51:39.565Z" }, + { url = "https://files.pythonhosted.org/packages/25/6f/ffe1e1259f384594063ea1869bfb6be5cdb8bc81020fc36c3636bc8302a1/charset_normalizer-3.4.6-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:9cc6e6d9e571d2f863fa77700701dae73ed5f78881efc8b3f9a4398772ff53e8", size = 294458, upload-time = "2026-03-15T18:51:41.134Z" }, + { url = "https://files.pythonhosted.org/packages/56/60/09bb6c13a8c1016c2ed5c6a6488e4ffef506461aa5161662bd7636936fb1/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5960d965e67165d75b7c7ffc60a83ec5abfc5c11b764ec13ea54fbef8b4421", size = 199277, upload-time = "2026-03-15T18:51:42.953Z" }, + { url = "https://files.pythonhosted.org/packages/00/50/dcfbb72a5138bbefdc3332e8d81a23494bf67998b4b100703fd15fa52d81/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b3694e3f87f8ac7ce279d4355645b3c878d24d1424581b46282f24b92f5a4ae2", size = 218758, upload-time = "2026-03-15T18:51:44.339Z" }, + { url = "https://files.pythonhosted.org/packages/03/b3/d79a9a191bb75f5aa81f3aaaa387ef29ce7cb7a9e5074ba8ea095cc073c2/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5d11595abf8dd942a77883a39d81433739b287b6aa71620f15164f8096221b30", size = 215299, upload-time = "2026-03-15T18:51:45.871Z" }, + { url = "https://files.pythonhosted.org/packages/76/7e/bc8911719f7084f72fd545f647601ea3532363927f807d296a8c88a62c0d/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7bda6eebafd42133efdca535b04ccb338ab29467b3f7bf79569883676fc628db", size = 206811, upload-time = "2026-03-15T18:51:47.308Z" }, + { url = "https://files.pythonhosted.org/packages/e2/40/c430b969d41dda0c465aa36cc7c2c068afb67177bef50905ac371b28ccc7/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:bbc8c8650c6e51041ad1be191742b8b421d05bbd3410f43fa2a00c8db87678e8", size = 193706, upload-time = "2026-03-15T18:51:48.849Z" }, + { url = "https://files.pythonhosted.org/packages/48/15/e35e0590af254f7df984de1323640ef375df5761f615b6225ba8deb9799a/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:22c6f0c2fbc31e76c3b8a86fba1a56eda6166e238c29cdd3d14befdb4a4e4815", size = 202706, upload-time = "2026-03-15T18:51:50.257Z" }, + { url = "https://files.pythonhosted.org/packages/5e/bd/f736f7b9cc5e93a18b794a50346bb16fbfd6b37f99e8f306f7951d27c17c/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7edbed096e4a4798710ed6bc75dcaa2a21b68b6c356553ac4823c3658d53743a", size = 202497, upload-time = "2026-03-15T18:51:52.012Z" }, + { url = "https://files.pythonhosted.org/packages/9d/ba/2cc9e3e7dfdf7760a6ed8da7446d22536f3d0ce114ac63dee2a5a3599e62/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:7f9019c9cb613f084481bd6a100b12e1547cf2efe362d873c2e31e4035a6fa43", size = 193511, upload-time = "2026-03-15T18:51:53.723Z" }, + { url = "https://files.pythonhosted.org/packages/9e/cb/5be49b5f776e5613be07298c80e1b02a2d900f7a7de807230595c85a8b2e/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:58c948d0d086229efc484fe2f30c2d382c86720f55cd9bc33591774348ad44e0", size = 220133, upload-time = "2026-03-15T18:51:55.333Z" }, + { url = "https://files.pythonhosted.org/packages/83/43/99f1b5dad345accb322c80c7821071554f791a95ee50c1c90041c157ae99/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:419a9d91bd238052642a51938af8ac05da5b3343becde08d5cdeab9046df9ee1", size = 203035, upload-time = "2026-03-15T18:51:56.736Z" }, + { url = "https://files.pythonhosted.org/packages/87/9a/62c2cb6a531483b55dddff1a68b3d891a8b498f3ca555fbcf2978e804d9d/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5273b9f0b5835ff0350c0828faea623c68bfa65b792720c453e22b25cc72930f", size = 216321, upload-time = "2026-03-15T18:51:58.17Z" }, + { url = "https://files.pythonhosted.org/packages/6e/79/94a010ff81e3aec7c293eb82c28f930918e517bc144c9906a060844462eb/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:0e901eb1049fdb80f5bd11ed5ea1e498ec423102f7a9b9e4645d5b8204ff2815", size = 208973, upload-time = "2026-03-15T18:51:59.998Z" }, + { url = "https://files.pythonhosted.org/packages/2a/57/4ecff6d4ec8585342f0c71bc03efaa99cb7468f7c91a57b105bcd561cea8/charset_normalizer-3.4.6-cp314-cp314-win32.whl", hash = "sha256:b4ff1d35e8c5bd078be89349b6f3a845128e685e751b6ea1169cf2160b344c4d", size = 144610, upload-time = "2026-03-15T18:52:02.213Z" }, + { url = "https://files.pythonhosted.org/packages/80/94/8434a02d9d7f168c25767c64671fead8d599744a05d6a6c877144c754246/charset_normalizer-3.4.6-cp314-cp314-win_amd64.whl", hash = "sha256:74119174722c4349af9708993118581686f343adc1c8c9c007d59be90d077f3f", size = 154962, upload-time = "2026-03-15T18:52:03.658Z" }, + { url = "https://files.pythonhosted.org/packages/46/4c/48f2cdbfd923026503dfd67ccea45c94fd8fe988d9056b468579c66ed62b/charset_normalizer-3.4.6-cp314-cp314-win_arm64.whl", hash = "sha256:e5bcc1a1ae744e0bb59641171ae53743760130600da8db48cbb6e4918e186e4e", size = 143595, upload-time = "2026-03-15T18:52:05.123Z" }, + { url = "https://files.pythonhosted.org/packages/31/93/8878be7569f87b14f1d52032946131bcb6ebbd8af3e20446bc04053dc3f1/charset_normalizer-3.4.6-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:ad8faf8df23f0378c6d527d8b0b15ea4a2e23c89376877c598c4870d1b2c7866", size = 314828, upload-time = "2026-03-15T18:52:06.831Z" }, + { url = "https://files.pythonhosted.org/packages/06/b6/fae511ca98aac69ecc35cde828b0a3d146325dd03d99655ad38fc2cc3293/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f5ea69428fa1b49573eef0cc44a1d43bebd45ad0c611eb7d7eac760c7ae771bc", size = 208138, upload-time = "2026-03-15T18:52:08.239Z" }, + { url = "https://files.pythonhosted.org/packages/54/57/64caf6e1bf07274a1e0b7c160a55ee9e8c9ec32c46846ce59b9c333f7008/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:06a7e86163334edfc5d20fe104db92fcd666e5a5df0977cb5680a506fe26cc8e", size = 224679, upload-time = "2026-03-15T18:52:10.043Z" }, + { url = "https://files.pythonhosted.org/packages/aa/cb/9ff5a25b9273ef160861b41f6937f86fae18b0792fe0a8e75e06acb08f1d/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e1f6e2f00a6b8edb562826e4632e26d063ac10307e80f7461f7de3ad8ef3f077", size = 223475, upload-time = "2026-03-15T18:52:11.854Z" }, + { url = "https://files.pythonhosted.org/packages/fc/97/440635fc093b8d7347502a377031f9605a1039c958f3cd18dcacffb37743/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95b52c68d64c1878818687a473a10547b3292e82b6f6fe483808fb1468e2f52f", size = 215230, upload-time = "2026-03-15T18:52:13.325Z" }, + { url = "https://files.pythonhosted.org/packages/cd/24/afff630feb571a13f07c8539fbb502d2ab494019492aaffc78ef41f1d1d0/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:7504e9b7dc05f99a9bbb4525c67a2c155073b44d720470a148b34166a69c054e", size = 199045, upload-time = "2026-03-15T18:52:14.752Z" }, + { url = "https://files.pythonhosted.org/packages/e5/17/d1399ecdaf7e0498c327433e7eefdd862b41236a7e484355b8e0e5ebd64b/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:172985e4ff804a7ad08eebec0a1640ece87ba5041d565fff23c8f99c1f389484", size = 211658, upload-time = "2026-03-15T18:52:16.278Z" }, + { url = "https://files.pythonhosted.org/packages/b5/38/16baa0affb957b3d880e5ac2144caf3f9d7de7bc4a91842e447fbb5e8b67/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4be9f4830ba8741527693848403e2c457c16e499100963ec711b1c6f2049b7c7", size = 210769, upload-time = "2026-03-15T18:52:17.782Z" }, + { url = "https://files.pythonhosted.org/packages/05/34/c531bc6ac4c21da9ddfddb3107be2287188b3ea4b53b70fc58f2a77ac8d8/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:79090741d842f564b1b2827c0b82d846405b744d31e84f18d7a7b41c20e473ff", size = 201328, upload-time = "2026-03-15T18:52:19.553Z" }, + { url = "https://files.pythonhosted.org/packages/fa/73/a5a1e9ca5f234519c1953608a03fe109c306b97fdfb25f09182babad51a7/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:87725cfb1a4f1f8c2fc9890ae2f42094120f4b44db9360be5d99a4c6b0e03a9e", size = 225302, upload-time = "2026-03-15T18:52:21.043Z" }, + { url = "https://files.pythonhosted.org/packages/ba/f6/cd782923d112d296294dea4bcc7af5a7ae0f86ab79f8fefbda5526b6cfc0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:fcce033e4021347d80ed9c66dcf1e7b1546319834b74445f561d2e2221de5659", size = 211127, upload-time = "2026-03-15T18:52:22.491Z" }, + { url = "https://files.pythonhosted.org/packages/0e/c5/0b6898950627af7d6103a449b22320372c24c6feda91aa24e201a478d161/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:ca0276464d148c72defa8bb4390cce01b4a0e425f3b50d1435aa6d7a18107602", size = 222840, upload-time = "2026-03-15T18:52:24.113Z" }, + { url = "https://files.pythonhosted.org/packages/7d/25/c4bba773bef442cbdc06111d40daa3de5050a676fa26e85090fc54dd12f0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:197c1a244a274bb016dd8b79204850144ef77fe81c5b797dc389327adb552407", size = 216890, upload-time = "2026-03-15T18:52:25.541Z" }, + { url = "https://files.pythonhosted.org/packages/35/1a/05dacadb0978da72ee287b0143097db12f2e7e8d3ffc4647da07a383b0b7/charset_normalizer-3.4.6-cp314-cp314t-win32.whl", hash = "sha256:2a24157fa36980478dd1770b585c0f30d19e18f4fb0c47c13aa568f871718579", size = 155379, upload-time = "2026-03-15T18:52:27.05Z" }, + { url = "https://files.pythonhosted.org/packages/5d/7a/d269d834cb3a76291651256f3b9a5945e81d0a49ab9f4a498964e83c0416/charset_normalizer-3.4.6-cp314-cp314t-win_amd64.whl", hash = "sha256:cd5e2801c89992ed8c0a3f0293ae83c159a60d9a5d685005383ef4caca77f2c4", size = 169043, upload-time = "2026-03-15T18:52:28.502Z" }, + { url = "https://files.pythonhosted.org/packages/23/06/28b29fba521a37a8932c6a84192175c34d49f84a6d4773fa63d05f9aff22/charset_normalizer-3.4.6-cp314-cp314t-win_arm64.whl", hash = "sha256:47955475ac79cc504ef2704b192364e51d0d473ad452caedd0002605f780101c", size = 148523, upload-time = "2026-03-15T18:52:29.956Z" }, + { url = "https://files.pythonhosted.org/packages/2a/68/687187c7e26cb24ccbd88e5069f5ef00eba804d36dde11d99aad0838ab45/charset_normalizer-3.4.6-py3-none-any.whl", hash = "sha256:947cf925bc916d90adba35a64c82aace04fa39b46b52d4630ece166655905a69", size = 61455, upload-time = "2026-03-15T18:53:23.833Z" }, ] [[package]] name = "codeclone" -version = "1.4.3" +version = "2.0.0b1" source = { editable = "." } dependencies = [ { name = "pygments" }, { name = "rich" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, ] [package.optional-dependencies] @@ -197,14 +214,15 @@ dev = [ [package.metadata] requires-dist = [ - { name = "build", marker = "extra == 'dev'", specifier = ">=1.2.0" }, + { name = "build", marker = "extra == 'dev'", specifier = ">=1.4.1" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.19.1" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.5.1" }, { name = "pygments", specifier = ">=2.19.2" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.0" }, - { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.1.0" }, + { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=7.1.0" }, { name = "rich", specifier = ">=14.3.2" }, - { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.0" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.7" }, + { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.1" }, { name = "twine", marker = "extra == 'dev'", specifier = ">=5.0.0" }, ] provides-extras = ["dev"] @@ -220,115 +238,115 @@ wheels = [ [[package]] name = "coverage" -version = "7.13.4" +version = "7.13.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/24/56/95b7e30fa389756cb56630faa728da46a27b8c6eb46f9d557c68fff12b65/coverage-7.13.4.tar.gz", hash = "sha256:e5c8f6ed1e61a8b2dcdf31eb0b9bbf0130750ca79c1c49eb898e2ad86f5ccc91", size = 827239, upload-time = "2026-02-09T12:59:03.86Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/e0/70553e3000e345daff267cec284ce4cbf3fc141b6da229ac52775b5428f1/coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179", size = 915967, upload-time = "2026-03-17T10:33:18.341Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/44/d4/7827d9ffa34d5d4d752eec907022aa417120936282fc488306f5da08c292/coverage-7.13.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0fc31c787a84f8cd6027eba44010517020e0d18487064cd3d8968941856d1415", size = 219152, upload-time = "2026-02-09T12:56:11.974Z" }, - { url = "https://files.pythonhosted.org/packages/35/b0/d69df26607c64043292644dbb9dc54b0856fabaa2cbb1eeee3331cc9e280/coverage-7.13.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a32ebc02a1805adf637fc8dec324b5cdacd2e493515424f70ee33799573d661b", size = 219667, upload-time = "2026-02-09T12:56:13.33Z" }, - { url = "https://files.pythonhosted.org/packages/82/a4/c1523f7c9e47b2271dbf8c2a097e7a1f89ef0d66f5840bb59b7e8814157b/coverage-7.13.4-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e24f9156097ff9dc286f2f913df3a7f63c0e333dcafa3c196f2c18b4175ca09a", size = 246425, upload-time = "2026-02-09T12:56:14.552Z" }, - { url = "https://files.pythonhosted.org/packages/f8/02/aa7ec01d1a5023c4b680ab7257f9bfde9defe8fdddfe40be096ac19e8177/coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8041b6c5bfdc03257666e9881d33b1abc88daccaf73f7b6340fb7946655cd10f", size = 248229, upload-time = "2026-02-09T12:56:16.31Z" }, - { url = "https://files.pythonhosted.org/packages/35/98/85aba0aed5126d896162087ef3f0e789a225697245256fc6181b95f47207/coverage-7.13.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2a09cfa6a5862bc2fc6ca7c3def5b2926194a56b8ab78ffcf617d28911123012", size = 250106, upload-time = "2026-02-09T12:56:18.024Z" }, - { url = "https://files.pythonhosted.org/packages/96/72/1db59bd67494bc162e3e4cd5fbc7edba2c7026b22f7c8ef1496d58c2b94c/coverage-7.13.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:296f8b0af861d3970c2a4d8c91d48eb4dd4771bcef9baedec6a9b515d7de3def", size = 252021, upload-time = "2026-02-09T12:56:19.272Z" }, - { url = "https://files.pythonhosted.org/packages/9d/97/72899c59c7066961de6e3daa142d459d47d104956db43e057e034f015c8a/coverage-7.13.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e101609bcbbfb04605ea1027b10dc3735c094d12d40826a60f897b98b1c30256", size = 247114, upload-time = "2026-02-09T12:56:21.051Z" }, - { url = "https://files.pythonhosted.org/packages/39/1f/f1885573b5970235e908da4389176936c8933e86cb316b9620aab1585fa2/coverage-7.13.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:aa3feb8db2e87ff5e6d00d7e1480ae241876286691265657b500886c98f38bda", size = 248143, upload-time = "2026-02-09T12:56:22.585Z" }, - { url = "https://files.pythonhosted.org/packages/a8/cf/e80390c5b7480b722fa3e994f8202807799b85bc562aa4f1dde209fbb7be/coverage-7.13.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4fc7fa81bbaf5a02801b65346c8b3e657f1d93763e58c0abdf7c992addd81a92", size = 246152, upload-time = "2026-02-09T12:56:23.748Z" }, - { url = "https://files.pythonhosted.org/packages/44/bf/f89a8350d85572f95412debb0fb9bb4795b1d5b5232bd652923c759e787b/coverage-7.13.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:33901f604424145c6e9c2398684b92e176c0b12df77d52db81c20abd48c3794c", size = 249959, upload-time = "2026-02-09T12:56:25.209Z" }, - { url = "https://files.pythonhosted.org/packages/f7/6e/612a02aece8178c818df273e8d1642190c4875402ca2ba74514394b27aba/coverage-7.13.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:bb28c0f2cf2782508a40cec377935829d5fcc3ad9a3681375af4e84eb34b6b58", size = 246416, upload-time = "2026-02-09T12:56:26.475Z" }, - { url = "https://files.pythonhosted.org/packages/cb/98/b5afc39af67c2fa6786b03c3a7091fc300947387ce8914b096db8a73d67a/coverage-7.13.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d107aff57a83222ddbd8d9ee705ede2af2cc926608b57abed8ef96b50b7e8f9", size = 247025, upload-time = "2026-02-09T12:56:27.727Z" }, - { url = "https://files.pythonhosted.org/packages/51/30/2bba8ef0682d5bd210c38fe497e12a06c9f8d663f7025e9f5c2c31ce847d/coverage-7.13.4-cp310-cp310-win32.whl", hash = "sha256:a6f94a7d00eb18f1b6d403c91a88fd58cfc92d4b16080dfdb774afc8294469bf", size = 221758, upload-time = "2026-02-09T12:56:29.051Z" }, - { url = "https://files.pythonhosted.org/packages/78/13/331f94934cf6c092b8ea59ff868eb587bc8fe0893f02c55bc6c0183a192e/coverage-7.13.4-cp310-cp310-win_amd64.whl", hash = "sha256:2cb0f1e000ebc419632bbe04366a8990b6e32c4e0b51543a6484ffe15eaeda95", size = 222693, upload-time = "2026-02-09T12:56:30.366Z" }, - { url = "https://files.pythonhosted.org/packages/b4/ad/b59e5b451cf7172b8d1043dc0fa718f23aab379bc1521ee13d4bd9bfa960/coverage-7.13.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d490ba50c3f35dd7c17953c68f3270e7ccd1c6642e2d2afe2d8e720b98f5a053", size = 219278, upload-time = "2026-02-09T12:56:31.673Z" }, - { url = "https://files.pythonhosted.org/packages/f1/17/0cb7ca3de72e5f4ef2ec2fa0089beafbcaaaead1844e8b8a63d35173d77d/coverage-7.13.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:19bc3c88078789f8ef36acb014d7241961dbf883fd2533d18cb1e7a5b4e28b11", size = 219783, upload-time = "2026-02-09T12:56:33.104Z" }, - { url = "https://files.pythonhosted.org/packages/ab/63/325d8e5b11e0eaf6d0f6a44fad444ae58820929a9b0de943fa377fe73e85/coverage-7.13.4-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3998e5a32e62fdf410c0dbd3115df86297995d6e3429af80b8798aad894ca7aa", size = 250200, upload-time = "2026-02-09T12:56:34.474Z" }, - { url = "https://files.pythonhosted.org/packages/76/53/c16972708cbb79f2942922571a687c52bd109a7bd51175aeb7558dff2236/coverage-7.13.4-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e264226ec98e01a8e1054314af91ee6cde0eacac4f465cc93b03dbe0bce2fd7", size = 252114, upload-time = "2026-02-09T12:56:35.749Z" }, - { url = "https://files.pythonhosted.org/packages/eb/c2/7ab36d8b8cc412bec9ea2d07c83c48930eb4ba649634ba00cb7e4e0f9017/coverage-7.13.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a3aa4e7b9e416774b21797365b358a6e827ffadaaca81b69ee02946852449f00", size = 254220, upload-time = "2026-02-09T12:56:37.796Z" }, - { url = "https://files.pythonhosted.org/packages/d6/4d/cf52c9a3322c89a0e6febdfbc83bb45c0ed3c64ad14081b9503adee702e7/coverage-7.13.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:71ca20079dd8f27fcf808817e281e90220475cd75115162218d0e27549f95fef", size = 256164, upload-time = "2026-02-09T12:56:39.016Z" }, - { url = "https://files.pythonhosted.org/packages/78/e9/eb1dd17bd6de8289df3580e967e78294f352a5df8a57ff4671ee5fc3dcd0/coverage-7.13.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e2f25215f1a359ab17320b47bcdaca3e6e6356652e8256f2441e4ef972052903", size = 250325, upload-time = "2026-02-09T12:56:40.668Z" }, - { url = "https://files.pythonhosted.org/packages/71/07/8c1542aa873728f72267c07278c5cc0ec91356daf974df21335ccdb46368/coverage-7.13.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d65b2d373032411e86960604dc4edac91fdfb5dca539461cf2cbe78327d1e64f", size = 251913, upload-time = "2026-02-09T12:56:41.97Z" }, - { url = "https://files.pythonhosted.org/packages/74/d7/c62e2c5e4483a748e27868e4c32ad3daa9bdddbba58e1bc7a15e252baa74/coverage-7.13.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94eb63f9b363180aff17de3e7c8760c3ba94664ea2695c52f10111244d16a299", size = 249974, upload-time = "2026-02-09T12:56:43.323Z" }, - { url = "https://files.pythonhosted.org/packages/98/9f/4c5c015a6e98ced54efd0f5cf8d31b88e5504ecb6857585fc0161bb1e600/coverage-7.13.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e856bf6616714c3a9fbc270ab54103f4e685ba236fa98c054e8f87f266c93505", size = 253741, upload-time = "2026-02-09T12:56:45.155Z" }, - { url = "https://files.pythonhosted.org/packages/bd/59/0f4eef89b9f0fcd9633b5d350016f54126ab49426a70ff4c4e87446cabdc/coverage-7.13.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:65dfcbe305c3dfe658492df2d85259e0d79ead4177f9ae724b6fb245198f55d6", size = 249695, upload-time = "2026-02-09T12:56:46.636Z" }, - { url = "https://files.pythonhosted.org/packages/b5/2c/b7476f938deb07166f3eb281a385c262675d688ff4659ad56c6c6b8e2e70/coverage-7.13.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b507778ae8a4c915436ed5c2e05b4a6cecfa70f734e19c22a005152a11c7b6a9", size = 250599, upload-time = "2026-02-09T12:56:48.13Z" }, - { url = "https://files.pythonhosted.org/packages/b8/34/c3420709d9846ee3785b9f2831b4d94f276f38884032dca1457fa83f7476/coverage-7.13.4-cp311-cp311-win32.whl", hash = "sha256:784fc3cf8be001197b652d51d3fd259b1e2262888693a4636e18879f613a62a9", size = 221780, upload-time = "2026-02-09T12:56:50.479Z" }, - { url = "https://files.pythonhosted.org/packages/61/08/3d9c8613079d2b11c185b865de9a4c1a68850cfda2b357fae365cf609f29/coverage-7.13.4-cp311-cp311-win_amd64.whl", hash = "sha256:2421d591f8ca05b308cf0092807308b2facbefe54af7c02ac22548b88b95c98f", size = 222715, upload-time = "2026-02-09T12:56:51.815Z" }, - { url = "https://files.pythonhosted.org/packages/18/1a/54c3c80b2f056164cc0a6cdcb040733760c7c4be9d780fe655f356f433e4/coverage-7.13.4-cp311-cp311-win_arm64.whl", hash = "sha256:79e73a76b854d9c6088fe5d8b2ebe745f8681c55f7397c3c0a016192d681045f", size = 221385, upload-time = "2026-02-09T12:56:53.194Z" }, - { url = "https://files.pythonhosted.org/packages/d1/81/4ce2fdd909c5a0ed1f6dedb88aa57ab79b6d1fbd9b588c1ac7ef45659566/coverage-7.13.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02231499b08dabbe2b96612993e5fc34217cdae907a51b906ac7fca8027a4459", size = 219449, upload-time = "2026-02-09T12:56:54.889Z" }, - { url = "https://files.pythonhosted.org/packages/5d/96/5238b1efc5922ddbdc9b0db9243152c09777804fb7c02ad1741eb18a11c0/coverage-7.13.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40aa8808140e55dc022b15d8aa7f651b6b3d68b365ea0398f1441e0b04d859c3", size = 219810, upload-time = "2026-02-09T12:56:56.33Z" }, - { url = "https://files.pythonhosted.org/packages/78/72/2f372b726d433c9c35e56377cf1d513b4c16fe51841060d826b95caacec1/coverage-7.13.4-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5b856a8ccf749480024ff3bd7310adaef57bf31fd17e1bfc404b7940b6986634", size = 251308, upload-time = "2026-02-09T12:56:57.858Z" }, - { url = "https://files.pythonhosted.org/packages/5d/a0/2ea570925524ef4e00bb6c82649f5682a77fac5ab910a65c9284de422600/coverage-7.13.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c048ea43875fbf8b45d476ad79f179809c590ec7b79e2035c662e7afa3192e3", size = 254052, upload-time = "2026-02-09T12:56:59.754Z" }, - { url = "https://files.pythonhosted.org/packages/e8/ac/45dc2e19a1939098d783c846e130b8f862fbb50d09e0af663988f2f21973/coverage-7.13.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b7b38448866e83176e28086674fe7368ab8590e4610fb662b44e345b86d63ffa", size = 255165, upload-time = "2026-02-09T12:57:01.287Z" }, - { url = "https://files.pythonhosted.org/packages/2d/4d/26d236ff35abc3b5e63540d3386e4c3b192168c1d96da5cb2f43c640970f/coverage-7.13.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:de6defc1c9badbf8b9e67ae90fd00519186d6ab64e5cc5f3d21359c2a9b2c1d3", size = 257432, upload-time = "2026-02-09T12:57:02.637Z" }, - { url = "https://files.pythonhosted.org/packages/ec/55/14a966c757d1348b2e19caf699415a2a4c4f7feaa4bbc6326a51f5c7dd1b/coverage-7.13.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7eda778067ad7ffccd23ecffce537dface96212576a07924cbf0d8799d2ded5a", size = 251716, upload-time = "2026-02-09T12:57:04.056Z" }, - { url = "https://files.pythonhosted.org/packages/77/33/50116647905837c66d28b2af1321b845d5f5d19be9655cb84d4a0ea806b4/coverage-7.13.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e87f6c587c3f34356c3759f0420693e35e7eb0e2e41e4c011cb6ec6ecbbf1db7", size = 253089, upload-time = "2026-02-09T12:57:05.503Z" }, - { url = "https://files.pythonhosted.org/packages/c2/b4/8efb11a46e3665d92635a56e4f2d4529de6d33f2cb38afd47d779d15fc99/coverage-7.13.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8248977c2e33aecb2ced42fef99f2d319e9904a36e55a8a68b69207fb7e43edc", size = 251232, upload-time = "2026-02-09T12:57:06.879Z" }, - { url = "https://files.pythonhosted.org/packages/51/24/8cd73dd399b812cc76bb0ac260e671c4163093441847ffe058ac9fda1e32/coverage-7.13.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:25381386e80ae727608e662474db537d4df1ecd42379b5ba33c84633a2b36d47", size = 255299, upload-time = "2026-02-09T12:57:08.245Z" }, - { url = "https://files.pythonhosted.org/packages/03/94/0a4b12f1d0e029ce1ccc1c800944a9984cbe7d678e470bb6d3c6bc38a0da/coverage-7.13.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:ee756f00726693e5ba94d6df2bdfd64d4852d23b09bb0bc700e3b30e6f333985", size = 250796, upload-time = "2026-02-09T12:57:10.142Z" }, - { url = "https://files.pythonhosted.org/packages/73/44/6002fbf88f6698ca034360ce474c406be6d5a985b3fdb3401128031eef6b/coverage-7.13.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fdfc1e28e7c7cdce44985b3043bc13bbd9c747520f94a4d7164af8260b3d91f0", size = 252673, upload-time = "2026-02-09T12:57:12.197Z" }, - { url = "https://files.pythonhosted.org/packages/de/c6/a0279f7c00e786be75a749a5674e6fa267bcbd8209cd10c9a450c655dfa7/coverage-7.13.4-cp312-cp312-win32.whl", hash = "sha256:01d4cbc3c283a17fc1e42d614a119f7f438eabb593391283adca8dc86eff1246", size = 221990, upload-time = "2026-02-09T12:57:14.085Z" }, - { url = "https://files.pythonhosted.org/packages/77/4e/c0a25a425fcf5557d9abd18419c95b63922e897bc86c1f327f155ef234a9/coverage-7.13.4-cp312-cp312-win_amd64.whl", hash = "sha256:9401ebc7ef522f01d01d45532c68c5ac40fb27113019b6b7d8b208f6e9baa126", size = 222800, upload-time = "2026-02-09T12:57:15.944Z" }, - { url = "https://files.pythonhosted.org/packages/47/ac/92da44ad9a6f4e3a7debd178949d6f3769bedca33830ce9b1dcdab589a37/coverage-7.13.4-cp312-cp312-win_arm64.whl", hash = "sha256:b1ec7b6b6e93255f952e27ab58fbc68dcc468844b16ecbee881aeb29b6ab4d8d", size = 221415, upload-time = "2026-02-09T12:57:17.497Z" }, - { url = "https://files.pythonhosted.org/packages/db/23/aad45061a31677d68e47499197a131eea55da4875d16c1f42021ab963503/coverage-7.13.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b66a2da594b6068b48b2692f043f35d4d3693fb639d5ea8b39533c2ad9ac3ab9", size = 219474, upload-time = "2026-02-09T12:57:19.332Z" }, - { url = "https://files.pythonhosted.org/packages/a5/70/9b8b67a0945f3dfec1fd896c5cefb7c19d5a3a6d74630b99a895170999ae/coverage-7.13.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3599eb3992d814d23b35c536c28df1a882caa950f8f507cef23d1cbf334995ac", size = 219844, upload-time = "2026-02-09T12:57:20.66Z" }, - { url = "https://files.pythonhosted.org/packages/97/fd/7e859f8fab324cef6c4ad7cff156ca7c489fef9179d5749b0c8d321281c2/coverage-7.13.4-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:93550784d9281e374fb5a12bf1324cc8a963fd63b2d2f223503ef0fd4aa339ea", size = 250832, upload-time = "2026-02-09T12:57:22.007Z" }, - { url = "https://files.pythonhosted.org/packages/e4/dc/b2442d10020c2f52617828862d8b6ee337859cd8f3a1f13d607dddda9cf7/coverage-7.13.4-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b720ce6a88a2755f7c697c23268ddc47a571b88052e6b155224347389fdf6a3b", size = 253434, upload-time = "2026-02-09T12:57:23.339Z" }, - { url = "https://files.pythonhosted.org/packages/5a/88/6728a7ad17428b18d836540630487231f5470fb82454871149502f5e5aa2/coverage-7.13.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7b322db1284a2ed3aa28ffd8ebe3db91c929b7a333c0820abec3d838ef5b3525", size = 254676, upload-time = "2026-02-09T12:57:24.774Z" }, - { url = "https://files.pythonhosted.org/packages/7c/bc/21244b1b8cedf0dff0a2b53b208015fe798d5f2a8d5348dbfece04224fff/coverage-7.13.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4594c67d8a7c89cf922d9df0438c7c7bb022ad506eddb0fdb2863359ff78242", size = 256807, upload-time = "2026-02-09T12:57:26.125Z" }, - { url = "https://files.pythonhosted.org/packages/97/a0/ddba7ed3251cff51006737a727d84e05b61517d1784a9988a846ba508877/coverage-7.13.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:53d133df809c743eb8bce33b24bcababb371f4441340578cd406e084d94a6148", size = 251058, upload-time = "2026-02-09T12:57:27.614Z" }, - { url = "https://files.pythonhosted.org/packages/9b/55/e289addf7ff54d3a540526f33751951bf0878f3809b47f6dfb3def69c6f7/coverage-7.13.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76451d1978b95ba6507a039090ba076105c87cc76fc3efd5d35d72093964d49a", size = 252805, upload-time = "2026-02-09T12:57:29.066Z" }, - { url = "https://files.pythonhosted.org/packages/13/4e/cc276b1fa4a59be56d96f1dabddbdc30f4ba22e3b1cd42504c37b3313255/coverage-7.13.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7f57b33491e281e962021de110b451ab8a24182589be17e12a22c79047935e23", size = 250766, upload-time = "2026-02-09T12:57:30.522Z" }, - { url = "https://files.pythonhosted.org/packages/94/44/1093b8f93018f8b41a8cf29636c9292502f05e4a113d4d107d14a3acd044/coverage-7.13.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:1731dc33dc276dafc410a885cbf5992f1ff171393e48a21453b78727d090de80", size = 254923, upload-time = "2026-02-09T12:57:31.946Z" }, - { url = "https://files.pythonhosted.org/packages/8b/55/ea2796da2d42257f37dbea1aab239ba9263b31bd91d5527cdd6db5efe174/coverage-7.13.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:bd60d4fe2f6fa7dff9223ca1bbc9f05d2b6697bc5961072e5d3b952d46e1b1ea", size = 250591, upload-time = "2026-02-09T12:57:33.842Z" }, - { url = "https://files.pythonhosted.org/packages/d4/fa/7c4bb72aacf8af5020675aa633e59c1fbe296d22aed191b6a5b711eb2bc7/coverage-7.13.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9181a3ccead280b828fae232df12b16652702b49d41e99d657f46cc7b1f6ec7a", size = 252364, upload-time = "2026-02-09T12:57:35.743Z" }, - { url = "https://files.pythonhosted.org/packages/5c/38/a8d2ec0146479c20bbaa7181b5b455a0c41101eed57f10dd19a78ab44c80/coverage-7.13.4-cp313-cp313-win32.whl", hash = "sha256:f53d492307962561ac7de4cd1de3e363589b000ab69617c6156a16ba7237998d", size = 222010, upload-time = "2026-02-09T12:57:37.25Z" }, - { url = "https://files.pythonhosted.org/packages/e2/0c/dbfafbe90a185943dcfbc766fe0e1909f658811492d79b741523a414a6cc/coverage-7.13.4-cp313-cp313-win_amd64.whl", hash = "sha256:e6f70dec1cc557e52df5306d051ef56003f74d56e9c4dd7ddb07e07ef32a84dd", size = 222818, upload-time = "2026-02-09T12:57:38.734Z" }, - { url = "https://files.pythonhosted.org/packages/04/d1/934918a138c932c90d78301f45f677fb05c39a3112b96fd2c8e60503cdc7/coverage-7.13.4-cp313-cp313-win_arm64.whl", hash = "sha256:fb07dc5da7e849e2ad31a5d74e9bece81f30ecf5a42909d0a695f8bd1874d6af", size = 221438, upload-time = "2026-02-09T12:57:40.223Z" }, - { url = "https://files.pythonhosted.org/packages/52/57/ee93ced533bcb3e6df961c0c6e42da2fc6addae53fb95b94a89b1e33ebd7/coverage-7.13.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:40d74da8e6c4b9ac18b15331c4b5ebc35a17069410cad462ad4f40dcd2d50c0d", size = 220165, upload-time = "2026-02-09T12:57:41.639Z" }, - { url = "https://files.pythonhosted.org/packages/c5/e0/969fc285a6fbdda49d91af278488d904dcd7651b2693872f0ff94e40e84a/coverage-7.13.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4223b4230a376138939a9173f1bdd6521994f2aff8047fae100d6d94d50c5a12", size = 220516, upload-time = "2026-02-09T12:57:44.215Z" }, - { url = "https://files.pythonhosted.org/packages/b1/b8/9531944e16267e2735a30a9641ff49671f07e8138ecf1ca13db9fd2560c7/coverage-7.13.4-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1d4be36a5114c499f9f1f9195e95ebf979460dbe2d88e6816ea202010ba1c34b", size = 261804, upload-time = "2026-02-09T12:57:45.989Z" }, - { url = "https://files.pythonhosted.org/packages/8a/f3/e63df6d500314a2a60390d1989240d5f27318a7a68fa30ad3806e2a9323e/coverage-7.13.4-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:200dea7d1e8095cc6e98cdabe3fd1d21ab17d3cee6dab00cadbb2fe35d9c15b9", size = 263885, upload-time = "2026-02-09T12:57:47.42Z" }, - { url = "https://files.pythonhosted.org/packages/f3/67/7654810de580e14b37670b60a09c599fa348e48312db5b216d730857ffe6/coverage-7.13.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8eb931ee8e6d8243e253e5ed7336deea6904369d2fd8ae6e43f68abbf167092", size = 266308, upload-time = "2026-02-09T12:57:49.345Z" }, - { url = "https://files.pythonhosted.org/packages/37/6f/39d41eca0eab3cc82115953ad41c4e77935286c930e8fad15eaed1389d83/coverage-7.13.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:75eab1ebe4f2f64d9509b984f9314d4aa788540368218b858dad56dc8f3e5eb9", size = 267452, upload-time = "2026-02-09T12:57:50.811Z" }, - { url = "https://files.pythonhosted.org/packages/50/6d/39c0fbb8fc5cd4d2090811e553c2108cf5112e882f82505ee7495349a6bf/coverage-7.13.4-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c35eb28c1d085eb7d8c9b3296567a1bebe03ce72962e932431b9a61f28facf26", size = 261057, upload-time = "2026-02-09T12:57:52.447Z" }, - { url = "https://files.pythonhosted.org/packages/a4/a2/60010c669df5fa603bb5a97fb75407e191a846510da70ac657eb696b7fce/coverage-7.13.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb88b316ec33760714a4720feb2816a3a59180fd58c1985012054fa7aebee4c2", size = 263875, upload-time = "2026-02-09T12:57:53.938Z" }, - { url = "https://files.pythonhosted.org/packages/3e/d9/63b22a6bdbd17f1f96e9ed58604c2a6b0e72a9133e37d663bef185877cf6/coverage-7.13.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7d41eead3cc673cbd38a4417deb7fd0b4ca26954ff7dc6078e33f6ff97bed940", size = 261500, upload-time = "2026-02-09T12:57:56.012Z" }, - { url = "https://files.pythonhosted.org/packages/70/bf/69f86ba1ad85bc3ad240e4c0e57a2e620fbc0e1645a47b5c62f0e941ad7f/coverage-7.13.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:fb26a934946a6afe0e326aebe0730cdff393a8bc0bbb65a2f41e30feddca399c", size = 265212, upload-time = "2026-02-09T12:57:57.5Z" }, - { url = "https://files.pythonhosted.org/packages/ae/f2/5f65a278a8c2148731831574c73e42f57204243d33bedaaf18fa79c5958f/coverage-7.13.4-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:dae88bc0fc77edaa65c14be099bd57ee140cf507e6bfdeea7938457ab387efb0", size = 260398, upload-time = "2026-02-09T12:57:59.027Z" }, - { url = "https://files.pythonhosted.org/packages/ef/80/6e8280a350ee9fea92f14b8357448a242dcaa243cb2c72ab0ca591f66c8c/coverage-7.13.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:845f352911777a8e722bfce168958214951e07e47e5d5d9744109fa5fe77f79b", size = 262584, upload-time = "2026-02-09T12:58:01.129Z" }, - { url = "https://files.pythonhosted.org/packages/22/63/01ff182fc95f260b539590fb12c11ad3e21332c15f9799cb5e2386f71d9f/coverage-7.13.4-cp313-cp313t-win32.whl", hash = "sha256:2fa8d5f8de70688a28240de9e139fa16b153cc3cbb01c5f16d88d6505ebdadf9", size = 222688, upload-time = "2026-02-09T12:58:02.736Z" }, - { url = "https://files.pythonhosted.org/packages/a9/43/89de4ef5d3cd53b886afa114065f7e9d3707bdb3e5efae13535b46ae483d/coverage-7.13.4-cp313-cp313t-win_amd64.whl", hash = "sha256:9351229c8c8407645840edcc277f4a2d44814d1bc34a2128c11c2a031d45a5dd", size = 223746, upload-time = "2026-02-09T12:58:05.362Z" }, - { url = "https://files.pythonhosted.org/packages/35/39/7cf0aa9a10d470a5309b38b289b9bb07ddeac5d61af9b664fe9775a4cb3e/coverage-7.13.4-cp313-cp313t-win_arm64.whl", hash = "sha256:30b8d0512f2dc8c8747557e8fb459d6176a2c9e5731e2b74d311c03b78451997", size = 222003, upload-time = "2026-02-09T12:58:06.952Z" }, - { url = "https://files.pythonhosted.org/packages/92/11/a9cf762bb83386467737d32187756a42094927150c3e107df4cb078e8590/coverage-7.13.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:300deaee342f90696ed186e3a00c71b5b3d27bffe9e827677954f4ee56969601", size = 219522, upload-time = "2026-02-09T12:58:08.623Z" }, - { url = "https://files.pythonhosted.org/packages/d3/28/56e6d892b7b052236d67c95f1936b6a7cf7c3e2634bf27610b8cbd7f9c60/coverage-7.13.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29e3220258d682b6226a9b0925bc563ed9a1ebcff3cad30f043eceea7eaf2689", size = 219855, upload-time = "2026-02-09T12:58:10.176Z" }, - { url = "https://files.pythonhosted.org/packages/e5/69/233459ee9eb0c0d10fcc2fe425a029b3fa5ce0f040c966ebce851d030c70/coverage-7.13.4-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:391ee8f19bef69210978363ca930f7328081c6a0152f1166c91f0b5fdd2a773c", size = 250887, upload-time = "2026-02-09T12:58:12.503Z" }, - { url = "https://files.pythonhosted.org/packages/06/90/2cdab0974b9b5bbc1623f7876b73603aecac11b8d95b85b5b86b32de5eab/coverage-7.13.4-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0dd7ab8278f0d58a0128ba2fca25824321f05d059c1441800e934ff2efa52129", size = 253396, upload-time = "2026-02-09T12:58:14.615Z" }, - { url = "https://files.pythonhosted.org/packages/ac/15/ea4da0f85bf7d7b27635039e649e99deb8173fe551096ea15017f7053537/coverage-7.13.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78cdf0d578b15148b009ccf18c686aa4f719d887e76e6b40c38ffb61d264a552", size = 254745, upload-time = "2026-02-09T12:58:16.162Z" }, - { url = "https://files.pythonhosted.org/packages/99/11/bb356e86920c655ca4d61daee4e2bbc7258f0a37de0be32d233b561134ff/coverage-7.13.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:48685fee12c2eb3b27c62f2658e7ea21e9c3239cba5a8a242801a0a3f6a8c62a", size = 257055, upload-time = "2026-02-09T12:58:17.892Z" }, - { url = "https://files.pythonhosted.org/packages/c9/0f/9ae1f8cb17029e09da06ca4e28c9e1d5c1c0a511c7074592e37e0836c915/coverage-7.13.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4e83efc079eb39480e6346a15a1bcb3e9b04759c5202d157e1dd4303cd619356", size = 250911, upload-time = "2026-02-09T12:58:19.495Z" }, - { url = "https://files.pythonhosted.org/packages/89/3a/adfb68558fa815cbc29747b553bc833d2150228f251b127f1ce97e48547c/coverage-7.13.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ecae9737b72408d6a950f7e525f30aca12d4bd8dd95e37342e5beb3a2a8c4f71", size = 252754, upload-time = "2026-02-09T12:58:21.064Z" }, - { url = "https://files.pythonhosted.org/packages/32/b1/540d0c27c4e748bd3cd0bd001076ee416eda993c2bae47a73b7cc9357931/coverage-7.13.4-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ae4578f8528569d3cf303fef2ea569c7f4c4059a38c8667ccef15c6e1f118aa5", size = 250720, upload-time = "2026-02-09T12:58:22.622Z" }, - { url = "https://files.pythonhosted.org/packages/c7/95/383609462b3ffb1fe133014a7c84fc0dd01ed55ac6140fa1093b5af7ebb1/coverage-7.13.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:6fdef321fdfbb30a197efa02d48fcd9981f0d8ad2ae8903ac318adc653f5df98", size = 254994, upload-time = "2026-02-09T12:58:24.548Z" }, - { url = "https://files.pythonhosted.org/packages/f7/ba/1761138e86c81680bfc3c49579d66312865457f9fe405b033184e5793cb3/coverage-7.13.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b0f6ccf3dbe577170bebfce1318707d0e8c3650003cb4b3a9dd744575daa8b5", size = 250531, upload-time = "2026-02-09T12:58:26.271Z" }, - { url = "https://files.pythonhosted.org/packages/f8/8e/05900df797a9c11837ab59c4d6fe94094e029582aab75c3309a93e6fb4e3/coverage-7.13.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75fcd519f2a5765db3f0e391eb3b7d150cce1a771bf4c9f861aeab86c767a3c0", size = 252189, upload-time = "2026-02-09T12:58:27.807Z" }, - { url = "https://files.pythonhosted.org/packages/00/bd/29c9f2db9ea4ed2738b8a9508c35626eb205d51af4ab7bf56a21a2e49926/coverage-7.13.4-cp314-cp314-win32.whl", hash = "sha256:8e798c266c378da2bd819b0677df41ab46d78065fb2a399558f3f6cae78b2fbb", size = 222258, upload-time = "2026-02-09T12:58:29.441Z" }, - { url = "https://files.pythonhosted.org/packages/a7/4d/1f8e723f6829977410efeb88f73673d794075091c8c7c18848d273dc9d73/coverage-7.13.4-cp314-cp314-win_amd64.whl", hash = "sha256:245e37f664d89861cf2329c9afa2c1fe9e6d4e1a09d872c947e70718aeeac505", size = 223073, upload-time = "2026-02-09T12:58:31.026Z" }, - { url = "https://files.pythonhosted.org/packages/51/5b/84100025be913b44e082ea32abcf1afbf4e872f5120b7a1cab1d331b1e13/coverage-7.13.4-cp314-cp314-win_arm64.whl", hash = "sha256:ad27098a189e5838900ce4c2a99f2fe42a0bf0c2093c17c69b45a71579e8d4a2", size = 221638, upload-time = "2026-02-09T12:58:32.599Z" }, - { url = "https://files.pythonhosted.org/packages/a7/e4/c884a405d6ead1370433dad1e3720216b4f9fd8ef5b64bfd984a2a60a11a/coverage-7.13.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:85480adfb35ffc32d40918aad81b89c69c9cc5661a9b8a81476d3e645321a056", size = 220246, upload-time = "2026-02-09T12:58:34.181Z" }, - { url = "https://files.pythonhosted.org/packages/81/5c/4d7ed8b23b233b0fffbc9dfec53c232be2e695468523242ea9fd30f97ad2/coverage-7.13.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:79be69cf7f3bf9b0deeeb062eab7ac7f36cd4cc4c4dd694bd28921ba4d8596cc", size = 220514, upload-time = "2026-02-09T12:58:35.704Z" }, - { url = "https://files.pythonhosted.org/packages/2f/6f/3284d4203fd2f28edd73034968398cd2d4cb04ab192abc8cff007ea35679/coverage-7.13.4-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:caa421e2684e382c5d8973ac55e4f36bed6821a9bad5c953494de960c74595c9", size = 261877, upload-time = "2026-02-09T12:58:37.864Z" }, - { url = "https://files.pythonhosted.org/packages/09/aa/b672a647bbe1556a85337dc95bfd40d146e9965ead9cc2fe81bde1e5cbce/coverage-7.13.4-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14375934243ee05f56c45393fe2ce81fe5cc503c07cee2bdf1725fb8bef3ffaf", size = 264004, upload-time = "2026-02-09T12:58:39.492Z" }, - { url = "https://files.pythonhosted.org/packages/79/a1/aa384dbe9181f98bba87dd23dda436f0c6cf2e148aecbb4e50fc51c1a656/coverage-7.13.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25a41c3104d08edb094d9db0d905ca54d0cd41c928bb6be3c4c799a54753af55", size = 266408, upload-time = "2026-02-09T12:58:41.852Z" }, - { url = "https://files.pythonhosted.org/packages/53/5e/5150bf17b4019bc600799f376bb9606941e55bd5a775dc1e096b6ffea952/coverage-7.13.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6f01afcff62bf9a08fb32b2c1d6e924236c0383c02c790732b6537269e466a72", size = 267544, upload-time = "2026-02-09T12:58:44.093Z" }, - { url = "https://files.pythonhosted.org/packages/e0/ed/f1de5c675987a4a7a672250d2c5c9d73d289dbf13410f00ed7181d8017dd/coverage-7.13.4-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eb9078108fbf0bcdde37c3f4779303673c2fa1fe8f7956e68d447d0dd426d38a", size = 260980, upload-time = "2026-02-09T12:58:45.721Z" }, - { url = "https://files.pythonhosted.org/packages/b3/e3/fe758d01850aa172419a6743fe76ba8b92c29d181d4f676ffe2dae2ba631/coverage-7.13.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0e086334e8537ddd17e5f16a344777c1ab8194986ec533711cbe6c41cde841b6", size = 263871, upload-time = "2026-02-09T12:58:47.334Z" }, - { url = "https://files.pythonhosted.org/packages/b6/76/b829869d464115e22499541def9796b25312b8cf235d3bb00b39f1675395/coverage-7.13.4-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:725d985c5ab621268b2edb8e50dfe57633dc69bda071abc470fed55a14935fd3", size = 261472, upload-time = "2026-02-09T12:58:48.995Z" }, - { url = "https://files.pythonhosted.org/packages/14/9e/caedb1679e73e2f6ad240173f55218488bfe043e38da577c4ec977489915/coverage-7.13.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:3c06f0f1337c667b971ca2f975523347e63ec5e500b9aa5882d91931cd3ef750", size = 265210, upload-time = "2026-02-09T12:58:51.178Z" }, - { url = "https://files.pythonhosted.org/packages/3a/10/0dd02cb009b16ede425b49ec344aba13a6ae1dc39600840ea6abcb085ac4/coverage-7.13.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:590c0ed4bf8e85f745e6b805b2e1c457b2e33d5255dd9729743165253bc9ad39", size = 260319, upload-time = "2026-02-09T12:58:53.081Z" }, - { url = "https://files.pythonhosted.org/packages/92/8e/234d2c927af27c6d7a5ffad5bd2cf31634c46a477b4c7adfbfa66baf7ebb/coverage-7.13.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:eb30bf180de3f632cd043322dad5751390e5385108b2807368997d1a92a509d0", size = 262638, upload-time = "2026-02-09T12:58:55.258Z" }, - { url = "https://files.pythonhosted.org/packages/2f/64/e5547c8ff6964e5965c35a480855911b61509cce544f4d442caa759a0702/coverage-7.13.4-cp314-cp314t-win32.whl", hash = "sha256:c4240e7eded42d131a2d2c4dec70374b781b043ddc79a9de4d55ca71f8e98aea", size = 223040, upload-time = "2026-02-09T12:58:56.936Z" }, - { url = "https://files.pythonhosted.org/packages/c7/96/38086d58a181aac86d503dfa9c47eb20715a79c3e3acbdf786e92e5c09a8/coverage-7.13.4-cp314-cp314t-win_amd64.whl", hash = "sha256:4c7d3cc01e7350f2f0f6f7036caaf5673fb56b6998889ccfe9e1c1fe75a9c932", size = 224148, upload-time = "2026-02-09T12:58:58.645Z" }, - { url = "https://files.pythonhosted.org/packages/ce/72/8d10abd3740a0beb98c305e0c3faf454366221c0f37a8bcf8f60020bb65a/coverage-7.13.4-cp314-cp314t-win_arm64.whl", hash = "sha256:23e3f687cf945070d1c90f85db66d11e3025665d8dafa831301a0e0038f3db9b", size = 222172, upload-time = "2026-02-09T12:59:00.396Z" }, - { url = "https://files.pythonhosted.org/packages/0d/4a/331fe2caf6799d591109bb9c08083080f6de90a823695d412a935622abb2/coverage-7.13.4-py3-none-any.whl", hash = "sha256:1af1641e57cf7ba1bd67d677c9abdbcd6cc2ab7da3bca7fa1e2b7e50e65f2ad0", size = 211242, upload-time = "2026-02-09T12:59:02.032Z" }, + { url = "https://files.pythonhosted.org/packages/69/33/e8c48488c29a73fd089f9d71f9653c1be7478f2ad6b5bc870db11a55d23d/coverage-7.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5", size = 219255, upload-time = "2026-03-17T10:29:51.081Z" }, + { url = "https://files.pythonhosted.org/packages/da/bd/b0ebe9f677d7f4b74a3e115eec7ddd4bcf892074963a00d91e8b164a6386/coverage-7.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf", size = 219772, upload-time = "2026-03-17T10:29:52.867Z" }, + { url = "https://files.pythonhosted.org/packages/48/cc/5cb9502f4e01972f54eedd48218bb203fe81e294be606a2bc93970208013/coverage-7.13.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8", size = 246532, upload-time = "2026-03-17T10:29:54.688Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d8/3217636d86c7e7b12e126e4f30ef1581047da73140614523af7495ed5f2d/coverage-7.13.5-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4", size = 248333, upload-time = "2026-03-17T10:29:56.221Z" }, + { url = "https://files.pythonhosted.org/packages/2b/30/2002ac6729ba2d4357438e2ed3c447ad8562866c8c63fc16f6dfc33afe56/coverage-7.13.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d", size = 250211, upload-time = "2026-03-17T10:29:57.938Z" }, + { url = "https://files.pythonhosted.org/packages/6c/85/552496626d6b9359eb0e2f86f920037c9cbfba09b24d914c6e1528155f7d/coverage-7.13.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930", size = 252125, upload-time = "2026-03-17T10:29:59.388Z" }, + { url = "https://files.pythonhosted.org/packages/44/21/40256eabdcbccdb6acf6b381b3016a154399a75fe39d406f790ae84d1f3c/coverage-7.13.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d", size = 247219, upload-time = "2026-03-17T10:30:01.199Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/96e2a6c3f21a0ea77d7830b254a1542d0328acc8d7bdf6a284ba7e529f77/coverage-7.13.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40", size = 248248, upload-time = "2026-03-17T10:30:03.317Z" }, + { url = "https://files.pythonhosted.org/packages/da/ba/8477f549e554827da390ec659f3c38e4b6d95470f4daafc2d8ff94eaa9c2/coverage-7.13.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878", size = 246254, upload-time = "2026-03-17T10:30:04.832Z" }, + { url = "https://files.pythonhosted.org/packages/55/59/bc22aef0e6aa179d5b1b001e8b3654785e9adf27ef24c93dc4228ebd5d68/coverage-7.13.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400", size = 250067, upload-time = "2026-03-17T10:30:06.535Z" }, + { url = "https://files.pythonhosted.org/packages/de/1b/c6a023a160806a5137dca53468fd97530d6acad24a22003b1578a9c2e429/coverage-7.13.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0", size = 246521, upload-time = "2026-03-17T10:30:08.486Z" }, + { url = "https://files.pythonhosted.org/packages/2d/3f/3532c85a55aa2f899fa17c186f831cfa1aa434d88ff792a709636f64130e/coverage-7.13.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0", size = 247126, upload-time = "2026-03-17T10:30:09.966Z" }, + { url = "https://files.pythonhosted.org/packages/aa/2e/b9d56af4a24ef45dfbcda88e06870cb7d57b2b0bfa3a888d79b4c8debd76/coverage-7.13.5-cp310-cp310-win32.whl", hash = "sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58", size = 221860, upload-time = "2026-03-17T10:30:11.393Z" }, + { url = "https://files.pythonhosted.org/packages/9f/cc/d938417e7a4d7f0433ad4edee8bb2acdc60dc7ac5af19e2a07a048ecbee3/coverage-7.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e", size = 222788, upload-time = "2026-03-17T10:30:12.886Z" }, + { url = "https://files.pythonhosted.org/packages/4b/37/d24c8f8220ff07b839b2c043ea4903a33b0f455abe673ae3c03bbdb7f212/coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d", size = 219381, upload-time = "2026-03-17T10:30:14.68Z" }, + { url = "https://files.pythonhosted.org/packages/35/8b/cd129b0ca4afe886a6ce9d183c44d8301acbd4ef248622e7c49a23145605/coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587", size = 219880, upload-time = "2026-03-17T10:30:16.231Z" }, + { url = "https://files.pythonhosted.org/packages/55/2f/e0e5b237bffdb5d6c530ce87cc1d413a5b7d7dfd60fb067ad6d254c35c76/coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642", size = 250303, upload-time = "2026-03-17T10:30:17.748Z" }, + { url = "https://files.pythonhosted.org/packages/92/be/b1afb692be85b947f3401375851484496134c5554e67e822c35f28bf2fbc/coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b", size = 252218, upload-time = "2026-03-17T10:30:19.804Z" }, + { url = "https://files.pythonhosted.org/packages/da/69/2f47bb6fa1b8d1e3e5d0c4be8ccb4313c63d742476a619418f85740d597b/coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686", size = 254326, upload-time = "2026-03-17T10:30:21.321Z" }, + { url = "https://files.pythonhosted.org/packages/d5/d0/79db81da58965bd29dabc8f4ad2a2af70611a57cba9d1ec006f072f30a54/coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743", size = 256267, upload-time = "2026-03-17T10:30:23.094Z" }, + { url = "https://files.pythonhosted.org/packages/e5/32/d0d7cc8168f91ddab44c0ce4806b969df5f5fdfdbb568eaca2dbc2a04936/coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75", size = 250430, upload-time = "2026-03-17T10:30:25.311Z" }, + { url = "https://files.pythonhosted.org/packages/4d/06/a055311d891ddbe231cd69fdd20ea4be6e3603ffebddf8704b8ca8e10a3c/coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209", size = 252017, upload-time = "2026-03-17T10:30:27.284Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f6/d0fd2d21e29a657b5f77a2fe7082e1568158340dceb941954f776dce1b7b/coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a", size = 250080, upload-time = "2026-03-17T10:30:29.481Z" }, + { url = "https://files.pythonhosted.org/packages/4e/ab/0d7fb2efc2e9a5eb7ddcc6e722f834a69b454b7e6e5888c3a8567ecffb31/coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e", size = 253843, upload-time = "2026-03-17T10:30:31.301Z" }, + { url = "https://files.pythonhosted.org/packages/ba/6f/7467b917bbf5408610178f62a49c0ed4377bb16c1657f689cc61470da8ce/coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd", size = 249802, upload-time = "2026-03-17T10:30:33.358Z" }, + { url = "https://files.pythonhosted.org/packages/75/2c/1172fb689df92135f5bfbbd69fc83017a76d24ea2e2f3a1154007e2fb9f8/coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8", size = 250707, upload-time = "2026-03-17T10:30:35.2Z" }, + { url = "https://files.pythonhosted.org/packages/67/21/9ac389377380a07884e3b48ba7a620fcd9dbfaf1d40565facdc6b36ec9ef/coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf", size = 221880, upload-time = "2026-03-17T10:30:36.775Z" }, + { url = "https://files.pythonhosted.org/packages/af/7f/4cd8a92531253f9d7c1bbecd9fa1b472907fb54446ca768c59b531248dc5/coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9", size = 222816, upload-time = "2026-03-17T10:30:38.891Z" }, + { url = "https://files.pythonhosted.org/packages/12/a6/1d3f6155fb0010ca68eba7fe48ca6c9da7385058b77a95848710ecf189b1/coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028", size = 221483, upload-time = "2026-03-17T10:30:40.463Z" }, + { url = "https://files.pythonhosted.org/packages/a0/c3/a396306ba7db865bf96fc1fb3b7fd29bcbf3d829df642e77b13555163cd6/coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01", size = 219554, upload-time = "2026-03-17T10:30:42.208Z" }, + { url = "https://files.pythonhosted.org/packages/a6/16/a68a19e5384e93f811dccc51034b1fd0b865841c390e3c931dcc4699e035/coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422", size = 219908, upload-time = "2026-03-17T10:30:43.906Z" }, + { url = "https://files.pythonhosted.org/packages/29/72/20b917c6793af3a5ceb7fb9c50033f3ec7865f2911a1416b34a7cfa0813b/coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f", size = 251419, upload-time = "2026-03-17T10:30:45.545Z" }, + { url = "https://files.pythonhosted.org/packages/8c/49/cd14b789536ac6a4778c453c6a2338bc0a2fb60c5a5a41b4008328b9acc1/coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5", size = 254159, upload-time = "2026-03-17T10:30:47.204Z" }, + { url = "https://files.pythonhosted.org/packages/9d/00/7b0edcfe64e2ed4c0340dac14a52ad0f4c9bd0b8b5e531af7d55b703db7c/coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376", size = 255270, upload-time = "2026-03-17T10:30:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/93/89/7ffc4ba0f5d0a55c1e84ea7cee39c9fc06af7b170513d83fbf3bbefce280/coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256", size = 257538, upload-time = "2026-03-17T10:30:50.77Z" }, + { url = "https://files.pythonhosted.org/packages/81/bd/73ddf85f93f7e6fa83e77ccecb6162d9415c79007b4bc124008a4995e4a7/coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c", size = 251821, upload-time = "2026-03-17T10:30:52.5Z" }, + { url = "https://files.pythonhosted.org/packages/a0/81/278aff4e8dec4926a0bcb9486320752811f543a3ce5b602cc7a29978d073/coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5", size = 253191, upload-time = "2026-03-17T10:30:54.543Z" }, + { url = "https://files.pythonhosted.org/packages/70/ee/fe1621488e2e0a58d7e94c4800f0d96f79671553488d401a612bebae324b/coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09", size = 251337, upload-time = "2026-03-17T10:30:56.663Z" }, + { url = "https://files.pythonhosted.org/packages/37/a6/f79fb37aa104b562207cc23cb5711ab6793608e246cae1e93f26b2236ed9/coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9", size = 255404, upload-time = "2026-03-17T10:30:58.427Z" }, + { url = "https://files.pythonhosted.org/packages/75/f0/ed15262a58ec81ce457ceb717b7f78752a1713556b19081b76e90896e8d4/coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf", size = 250903, upload-time = "2026-03-17T10:31:00.093Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e9/9129958f20e7e9d4d56d51d42ccf708d15cac355ff4ac6e736e97a9393d2/coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c", size = 252780, upload-time = "2026-03-17T10:31:01.916Z" }, + { url = "https://files.pythonhosted.org/packages/a4/d7/0ad9b15812d81272db94379fe4c6df8fd17781cc7671fdfa30c76ba5ff7b/coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf", size = 222093, upload-time = "2026-03-17T10:31:03.642Z" }, + { url = "https://files.pythonhosted.org/packages/29/3d/821a9a5799fac2556bcf0bd37a70d1d11fa9e49784b6d22e92e8b2f85f18/coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810", size = 222900, upload-time = "2026-03-17T10:31:05.651Z" }, + { url = "https://files.pythonhosted.org/packages/d4/fa/2238c2ad08e35cf4f020ea721f717e09ec3152aea75d191a7faf3ef009a8/coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de", size = 221515, upload-time = "2026-03-17T10:31:07.293Z" }, + { url = "https://files.pythonhosted.org/packages/74/8c/74fedc9663dcf168b0a059d4ea756ecae4da77a489048f94b5f512a8d0b3/coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1", size = 219576, upload-time = "2026-03-17T10:31:09.045Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c9/44fb661c55062f0818a6ffd2685c67aa30816200d5f2817543717d4b92eb/coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3", size = 219942, upload-time = "2026-03-17T10:31:10.708Z" }, + { url = "https://files.pythonhosted.org/packages/5f/13/93419671cee82b780bab7ea96b67c8ef448f5f295f36bf5031154ec9a790/coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26", size = 250935, upload-time = "2026-03-17T10:31:12.392Z" }, + { url = "https://files.pythonhosted.org/packages/ac/68/1666e3a4462f8202d836920114fa7a5ee9275d1fa45366d336c551a162dd/coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3", size = 253541, upload-time = "2026-03-17T10:31:14.247Z" }, + { url = "https://files.pythonhosted.org/packages/4e/5e/3ee3b835647be646dcf3c65a7c6c18f87c27326a858f72ab22c12730773d/coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b", size = 254780, upload-time = "2026-03-17T10:31:16.193Z" }, + { url = "https://files.pythonhosted.org/packages/44/b3/cb5bd1a04cfcc49ede6cd8409d80bee17661167686741e041abc7ee1b9a9/coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a", size = 256912, upload-time = "2026-03-17T10:31:17.89Z" }, + { url = "https://files.pythonhosted.org/packages/1b/66/c1dceb7b9714473800b075f5c8a84f4588f887a90eb8645282031676e242/coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969", size = 251165, upload-time = "2026-03-17T10:31:19.605Z" }, + { url = "https://files.pythonhosted.org/packages/b7/62/5502b73b97aa2e53ea22a39cf8649ff44827bef76d90bf638777daa27a9d/coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161", size = 252908, upload-time = "2026-03-17T10:31:21.312Z" }, + { url = "https://files.pythonhosted.org/packages/7d/37/7792c2d69854397ca77a55c4646e5897c467928b0e27f2d235d83b5d08c6/coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15", size = 250873, upload-time = "2026-03-17T10:31:23.565Z" }, + { url = "https://files.pythonhosted.org/packages/a3/23/bc866fb6163be52a8a9e5d708ba0d3b1283c12158cefca0a8bbb6e247a43/coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1", size = 255030, upload-time = "2026-03-17T10:31:25.58Z" }, + { url = "https://files.pythonhosted.org/packages/7d/8b/ef67e1c222ef49860701d346b8bbb70881bef283bd5f6cbba68a39a086c7/coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6", size = 250694, upload-time = "2026-03-17T10:31:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/46/0d/866d1f74f0acddbb906db212e096dee77a8e2158ca5e6bb44729f9d93298/coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17", size = 252469, upload-time = "2026-03-17T10:31:29.472Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f5/be742fec31118f02ce42b21c6af187ad6a344fed546b56ca60caacc6a9a0/coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85", size = 222112, upload-time = "2026-03-17T10:31:31.526Z" }, + { url = "https://files.pythonhosted.org/packages/66/40/7732d648ab9d069a46e686043241f01206348e2bbf128daea85be4d6414b/coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b", size = 222923, upload-time = "2026-03-17T10:31:33.633Z" }, + { url = "https://files.pythonhosted.org/packages/48/af/fea819c12a095781f6ccd504890aaddaf88b8fab263c4940e82c7b770124/coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664", size = 221540, upload-time = "2026-03-17T10:31:35.445Z" }, + { url = "https://files.pythonhosted.org/packages/23/d2/17879af479df7fbbd44bd528a31692a48f6b25055d16482fdf5cdb633805/coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d", size = 220262, upload-time = "2026-03-17T10:31:37.184Z" }, + { url = "https://files.pythonhosted.org/packages/5b/4c/d20e554f988c8f91d6a02c5118f9abbbf73a8768a3048cb4962230d5743f/coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0", size = 220617, upload-time = "2026-03-17T10:31:39.245Z" }, + { url = "https://files.pythonhosted.org/packages/29/9c/f9f5277b95184f764b24e7231e166dfdb5780a46d408a2ac665969416d61/coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806", size = 261912, upload-time = "2026-03-17T10:31:41.324Z" }, + { url = "https://files.pythonhosted.org/packages/d5/f6/7f1ab39393eeb50cfe4747ae8ef0e4fc564b989225aa1152e13a180d74f8/coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3", size = 263987, upload-time = "2026-03-17T10:31:43.724Z" }, + { url = "https://files.pythonhosted.org/packages/a0/d7/62c084fb489ed9c6fbdf57e006752e7c516ea46fd690e5ed8b8617c7d52e/coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9", size = 266416, upload-time = "2026-03-17T10:31:45.769Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f6/df63d8660e1a0bff6125947afda112a0502736f470d62ca68b288ea762d8/coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd", size = 267558, upload-time = "2026-03-17T10:31:48.293Z" }, + { url = "https://files.pythonhosted.org/packages/5b/02/353ca81d36779bd108f6d384425f7139ac3c58c750dcfaafe5d0bee6436b/coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606", size = 261163, upload-time = "2026-03-17T10:31:50.125Z" }, + { url = "https://files.pythonhosted.org/packages/2c/16/2e79106d5749bcaf3aee6d309123548e3276517cd7851faa8da213bc61bf/coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e", size = 263981, upload-time = "2026-03-17T10:31:51.961Z" }, + { url = "https://files.pythonhosted.org/packages/29/c7/c29e0c59ffa6942030ae6f50b88ae49988e7e8da06de7ecdbf49c6d4feae/coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0", size = 261604, upload-time = "2026-03-17T10:31:53.872Z" }, + { url = "https://files.pythonhosted.org/packages/40/48/097cdc3db342f34006a308ab41c3a7c11c3f0d84750d340f45d88a782e00/coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87", size = 265321, upload-time = "2026-03-17T10:31:55.997Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1f/4994af354689e14fd03a75f8ec85a9a68d94e0188bbdab3fc1516b55e512/coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479", size = 260502, upload-time = "2026-03-17T10:31:58.308Z" }, + { url = "https://files.pythonhosted.org/packages/22/c6/9bb9ef55903e628033560885f5c31aa227e46878118b63ab15dc7ba87797/coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2", size = 262688, upload-time = "2026-03-17T10:32:00.141Z" }, + { url = "https://files.pythonhosted.org/packages/14/4f/f5df9007e50b15e53e01edea486814783a7f019893733d9e4d6caad75557/coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a", size = 222788, upload-time = "2026-03-17T10:32:02.246Z" }, + { url = "https://files.pythonhosted.org/packages/e1/98/aa7fccaa97d0f3192bec013c4e6fd6d294a6ed44b640e6bb61f479e00ed5/coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819", size = 223851, upload-time = "2026-03-17T10:32:04.416Z" }, + { url = "https://files.pythonhosted.org/packages/3d/8b/e5c469f7352651e5f013198e9e21f97510b23de957dd06a84071683b4b60/coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911", size = 222104, upload-time = "2026-03-17T10:32:06.65Z" }, + { url = "https://files.pythonhosted.org/packages/8e/77/39703f0d1d4b478bfd30191d3c14f53caf596fac00efb3f8f6ee23646439/coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f", size = 219621, upload-time = "2026-03-17T10:32:08.589Z" }, + { url = "https://files.pythonhosted.org/packages/e2/3e/51dff36d99ae14639a133d9b164d63e628532e2974d8b1edb99dd1ebc733/coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e", size = 219953, upload-time = "2026-03-17T10:32:10.507Z" }, + { url = "https://files.pythonhosted.org/packages/6a/6c/1f1917b01eb647c2f2adc9962bd66c79eb978951cab61bdc1acab3290c07/coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a", size = 250992, upload-time = "2026-03-17T10:32:12.41Z" }, + { url = "https://files.pythonhosted.org/packages/22/e5/06b1f88f42a5a99df42ce61208bdec3bddb3d261412874280a19796fc09c/coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510", size = 253503, upload-time = "2026-03-17T10:32:14.449Z" }, + { url = "https://files.pythonhosted.org/packages/80/28/2a148a51e5907e504fa7b85490277734e6771d8844ebcc48764a15e28155/coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247", size = 254852, upload-time = "2026-03-17T10:32:16.56Z" }, + { url = "https://files.pythonhosted.org/packages/61/77/50e8d3d85cc0b7ebe09f30f151d670e302c7ff4a1bf6243f71dd8b0981fa/coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6", size = 257161, upload-time = "2026-03-17T10:32:19.004Z" }, + { url = "https://files.pythonhosted.org/packages/3b/c4/b5fd1d4b7bf8d0e75d997afd3925c59ba629fc8616f1b3aae7605132e256/coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0", size = 251021, upload-time = "2026-03-17T10:32:21.344Z" }, + { url = "https://files.pythonhosted.org/packages/f8/66/6ea21f910e92d69ef0b1c3346ea5922a51bad4446c9126db2ae96ee24c4c/coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882", size = 252858, upload-time = "2026-03-17T10:32:23.506Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ea/879c83cb5d61aa2a35fb80e72715e92672daef8191b84911a643f533840c/coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740", size = 250823, upload-time = "2026-03-17T10:32:25.516Z" }, + { url = "https://files.pythonhosted.org/packages/8a/fb/616d95d3adb88b9803b275580bdeee8bd1b69a886d057652521f83d7322f/coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16", size = 255099, upload-time = "2026-03-17T10:32:27.944Z" }, + { url = "https://files.pythonhosted.org/packages/1c/93/25e6917c90ec1c9a56b0b26f6cad6408e5f13bb6b35d484a0d75c9cf000d/coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0", size = 250638, upload-time = "2026-03-17T10:32:29.914Z" }, + { url = "https://files.pythonhosted.org/packages/fc/7b/dc1776b0464145a929deed214aef9fb1493f159b59ff3c7eeeedf91eddd0/coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0", size = 252295, upload-time = "2026-03-17T10:32:31.981Z" }, + { url = "https://files.pythonhosted.org/packages/ea/fb/99cbbc56a26e07762a2740713f3c8f9f3f3106e3a3dd8cc4474954bccd34/coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc", size = 222360, upload-time = "2026-03-17T10:32:34.233Z" }, + { url = "https://files.pythonhosted.org/packages/8d/b7/4758d4f73fb536347cc5e4ad63662f9d60ba9118cb6785e9616b2ce5d7fa/coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633", size = 223174, upload-time = "2026-03-17T10:32:36.369Z" }, + { url = "https://files.pythonhosted.org/packages/2c/f2/24d84e1dfe70f8ac9fdf30d338239860d0d1d5da0bda528959d0ebc9da28/coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8", size = 221739, upload-time = "2026-03-17T10:32:38.736Z" }, + { url = "https://files.pythonhosted.org/packages/60/5b/4a168591057b3668c2428bff25dd3ebc21b629d666d90bcdfa0217940e84/coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b", size = 220351, upload-time = "2026-03-17T10:32:41.196Z" }, + { url = "https://files.pythonhosted.org/packages/f5/21/1fd5c4dbfe4a58b6b99649125635df46decdfd4a784c3cd6d410d303e370/coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c", size = 220612, upload-time = "2026-03-17T10:32:43.204Z" }, + { url = "https://files.pythonhosted.org/packages/d6/fe/2a924b3055a5e7e4512655a9d4609781b0d62334fa0140c3e742926834e2/coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9", size = 261985, upload-time = "2026-03-17T10:32:45.514Z" }, + { url = "https://files.pythonhosted.org/packages/d7/0d/c8928f2bd518c45990fe1a2ab8db42e914ef9b726c975facc4282578c3eb/coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29", size = 264107, upload-time = "2026-03-17T10:32:47.971Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ae/4ae35bbd9a0af9d820362751f0766582833c211224b38665c0f8de3d487f/coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607", size = 266513, upload-time = "2026-03-17T10:32:50.1Z" }, + { url = "https://files.pythonhosted.org/packages/9c/20/d326174c55af36f74eac6ae781612d9492f060ce8244b570bb9d50d9d609/coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90", size = 267650, upload-time = "2026-03-17T10:32:52.391Z" }, + { url = "https://files.pythonhosted.org/packages/7a/5e/31484d62cbd0eabd3412e30d74386ece4a0837d4f6c3040a653878bfc019/coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3", size = 261089, upload-time = "2026-03-17T10:32:54.544Z" }, + { url = "https://files.pythonhosted.org/packages/e9/d8/49a72d6de146eebb0b7e48cc0f4bc2c0dd858e3d4790ab2b39a2872b62bd/coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab", size = 263982, upload-time = "2026-03-17T10:32:56.803Z" }, + { url = "https://files.pythonhosted.org/packages/06/3b/0351f1bd566e6e4dd39e978efe7958bde1d32f879e85589de147654f57bb/coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562", size = 261579, upload-time = "2026-03-17T10:32:59.466Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ce/796a2a2f4017f554d7810f5c573449b35b1e46788424a548d4d19201b222/coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2", size = 265316, upload-time = "2026-03-17T10:33:01.847Z" }, + { url = "https://files.pythonhosted.org/packages/3d/16/d5ae91455541d1a78bc90abf495be600588aff8f6db5c8b0dae739fa39c9/coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea", size = 260427, upload-time = "2026-03-17T10:33:03.945Z" }, + { url = "https://files.pythonhosted.org/packages/48/11/07f413dba62db21fb3fad5d0de013a50e073cc4e2dc4306e770360f6dfc8/coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a", size = 262745, upload-time = "2026-03-17T10:33:06.285Z" }, + { url = "https://files.pythonhosted.org/packages/91/15/d792371332eb4663115becf4bad47e047d16234b1aff687b1b18c58d60ae/coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215", size = 223146, upload-time = "2026-03-17T10:33:08.756Z" }, + { url = "https://files.pythonhosted.org/packages/db/51/37221f59a111dca5e85be7dbf09696323b5b9f13ff65e0641d535ed06ea8/coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43", size = 224254, upload-time = "2026-03-17T10:33:11.174Z" }, + { url = "https://files.pythonhosted.org/packages/54/83/6acacc889de8987441aa7d5adfbdbf33d288dad28704a67e574f1df9bcbb/coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45", size = 222276, upload-time = "2026-03-17T10:33:13.466Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ee/a4cf96b8ce1e566ed238f0659ac2d3f007ed1d14b181bcb684e19561a69a/coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61", size = 211346, upload-time = "2026-03-17T10:33:15.691Z" }, ] [package.optional-dependencies] @@ -432,11 +450,11 @@ wheels = [ [[package]] name = "identify" -version = "2.6.17" +version = "2.6.18" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/57/84/376a3b96e5a8d33a7aa2c5b3b31a4b3c364117184bf0b17418055f6ace66/identify-2.6.17.tar.gz", hash = "sha256:f816b0b596b204c9fdf076ded172322f2723cf958d02f9c3587504834c8ff04d", size = 99579, upload-time = "2026-03-01T20:04:12.702Z" } +sdist = { url = "https://files.pythonhosted.org/packages/46/c4/7fb4db12296cdb11893d61c92048fe617ee853f8523b9b296ac03b43757e/identify-2.6.18.tar.gz", hash = "sha256:873ac56a5e3fd63e7438a7ecbc4d91aca692eb3fefa4534db2b7913f3fc352fd", size = 99580, upload-time = "2026-03-15T18:39:50.319Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/40/66/71c1227dff78aaeb942fed29dd5651f2aec166cc7c9aeea3e8b26a539b7d/identify-2.6.17-py2.py3-none-any.whl", hash = "sha256:be5f8412d5ed4b20f2bd41a65f920990bdccaa6a4a18a08f1eefdcd0bdd885f0", size = 99382, upload-time = "2026-03-01T20:04:11.439Z" }, + { url = "https://files.pythonhosted.org/packages/46/33/92ef41c6fad0233e41d3d84ba8e8ad18d1780f1e5d99b3c683e6d7f98b63/identify-2.6.18-py2.py3-none-any.whl", hash = "sha256:8db9d3c8ea9079db92cafb0ebf97abdc09d52e97f4dcf773a2e694048b7cd737", size = 99394, upload-time = "2026-03-15T18:39:48.915Z" }, ] [[package]] @@ -450,14 +468,14 @@ wheels = [ [[package]] name = "importlib-metadata" -version = "8.7.1" +version = "9.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "zipp" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f3/49/3b30cad09e7771a4982d9975a8cbf64f00d4a1ececb53297f1d9a7be1b10/importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb", size = 57107, upload-time = "2025-12-21T10:00:19.278Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a9/01/15bb152d77b21318514a96f43af312635eb2500c96b55398d020c93d86ea/importlib_metadata-9.0.0.tar.gz", hash = "sha256:a4f57ab599e6a2e3016d7595cfd72eb4661a5106e787a95bcc90c7105b831efc", size = 56405, upload-time = "2026-03-20T06:42:56.999Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" }, + { url = "https://files.pythonhosted.org/packages/38/3d/2d244233ac4f76e38533cfcb2991c9eb4c7bf688ae0a036d30725b8faafe/importlib_metadata-9.0.0-py3-none-any.whl", hash = "sha256:2d21d1cc5a017bd0559e36150c21c830ab1dc304dedd1b7ea85d20f45ef3edd7", size = 27789, upload-time = "2026-03-20T06:42:55.665Z" }, ] [[package]] @@ -483,14 +501,14 @@ wheels = [ [[package]] name = "jaraco-context" -version = "6.1.1" +version = "6.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "backports-tarfile", marker = "python_full_version < '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/27/7b/c3081ff1af947915503121c649f26a778e1a2101fd525f74aef997d75b7e/jaraco_context-6.1.1.tar.gz", hash = "sha256:bc046b2dc94f1e5532bd02402684414575cc11f565d929b6563125deb0a6e581", size = 15832, upload-time = "2026-03-07T15:46:04.63Z" } +sdist = { url = "https://files.pythonhosted.org/packages/af/50/4763cd07e722bb6285316d390a164bc7e479db9d90daa769f22578f698b4/jaraco_context-6.1.2.tar.gz", hash = "sha256:f1a6c9d391e661cc5b8d39861ff077a7dc24dc23833ccee564b234b81c82dfe3", size = 16801, upload-time = "2026-03-20T22:13:33.922Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/49/c152890d49102b280ecf86ba5f80a8c111c3a155dafa3bd24aeb64fde9e1/jaraco_context-6.1.1-py3-none-any.whl", hash = "sha256:0df6a0287258f3e364072c3e40d5411b20cafa30cb28c4839d24319cecf9f808", size = 7005, upload-time = "2026-03-07T15:46:03.515Z" }, + { url = "https://files.pythonhosted.org/packages/f2/58/bc8954bda5fcda97bd7c19be11b85f91973d67a706ed4a3aec33e7de22db/jaraco_context-6.1.2-py3-none-any.whl", hash = "sha256:bf8150b79a2d5d91ae48629d8b427a8f7ba0e1097dd6202a9059f29a36379535", size = 7871, upload-time = "2026-03-20T22:13:32.808Z" }, ] [[package]] @@ -844,29 +862,29 @@ wheels = [ [[package]] name = "pytest-cov" -version = "7.0.0" +version = "7.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "coverage", extra = ["toml"] }, { name = "pluggy" }, { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b1/51/a849f96e117386044471c8ec2bd6cfebacda285da9525c9106aeb28da671/pytest_cov-7.1.0.tar.gz", hash = "sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2", size = 55592, upload-time = "2026-03-21T20:11:16.284Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, + { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" }, ] [[package]] name = "python-discovery" -version = "1.1.3" +version = "1.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "platformdirs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d7/7e/9f3b0dd3a074a6c3e1e79f35e465b1f2ee4b262d619de00cfce523cc9b24/python_discovery-1.1.3.tar.gz", hash = "sha256:7acca36e818cd88e9b2ba03e045ad7e93e1713e29c6bbfba5d90202310b7baa5", size = 56945, upload-time = "2026-03-10T15:08:15.038Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9c/90/bcce6b46823c9bec1757c964dc37ed332579be512e17a30e9698095dcae4/python_discovery-1.2.0.tar.gz", hash = "sha256:7d33e350704818b09e3da2bd419d37e21e7c30db6e0977bb438916e06b41b5b1", size = 58055, upload-time = "2026-03-19T01:43:08.248Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/80/73211fc5bfbfc562369b4aa61dc1e4bf07dc7b34df7b317e4539316b809c/python_discovery-1.1.3-py3-none-any.whl", hash = "sha256:90e795f0121bc84572e737c9aa9966311b9fde44ffb88a5953b3ec9b31c6945e", size = 31485, upload-time = "2026-03-10T15:08:13.06Z" }, + { url = "https://files.pythonhosted.org/packages/c2/3c/2005227cb951df502412de2fa781f800663cccbef8d90ec6f1b371ac2c0d/python_discovery-1.2.0-py3-none-any.whl", hash = "sha256:1e108f1bbe2ed0ef089823d28805d5ad32be8e734b86a5f212bf89b71c266e4a", size = 31524, upload-time = "2026-03-19T01:43:07.045Z" }, ] [[package]] @@ -1007,27 +1025,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.6" +version = "0.15.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/51/df/f8629c19c5318601d3121e230f74cbee7a3732339c52b21daa2b82ef9c7d/ruff-0.15.6.tar.gz", hash = "sha256:8394c7bb153a4e3811a4ecdacd4a8e6a4fa8097028119160dffecdcdf9b56ae4", size = 4597916, upload-time = "2026-03-12T23:05:47.51Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/22/9e4f66ee588588dc6c9af6a994e12d26e19efbe874d1a909d09a6dac7a59/ruff-0.15.7.tar.gz", hash = "sha256:04f1ae61fc20fe0b148617c324d9d009b5f63412c0b16474f3d5f1a1a665f7ac", size = 4601277, upload-time = "2026-03-19T16:26:22.605Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/2f/4e03a7e5ce99b517e98d3b4951f411de2b0fa8348d39cf446671adcce9a2/ruff-0.15.6-py3-none-linux_armv6l.whl", hash = "sha256:7c98c3b16407b2cf3d0f2b80c80187384bc92c6774d85fefa913ecd941256fff", size = 10508953, upload-time = "2026-03-12T23:05:17.246Z" }, - { url = "https://files.pythonhosted.org/packages/70/60/55bcdc3e9f80bcf39edf0cd272da6fa511a3d94d5a0dd9e0adf76ceebdb4/ruff-0.15.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ee7dcfaad8b282a284df4aa6ddc2741b3f4a18b0555d626805555a820ea181c3", size = 10942257, upload-time = "2026-03-12T23:05:23.076Z" }, - { url = "https://files.pythonhosted.org/packages/e7/f9/005c29bd1726c0f492bfa215e95154cf480574140cb5f867c797c18c790b/ruff-0.15.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:3bd9967851a25f038fc8b9ae88a7fbd1b609f30349231dffaa37b6804923c4bb", size = 10322683, upload-time = "2026-03-12T23:05:33.738Z" }, - { url = "https://files.pythonhosted.org/packages/5f/74/2f861f5fd7cbb2146bddb5501450300ce41562da36d21868c69b7a828169/ruff-0.15.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13f4594b04e42cd24a41da653886b04d2ff87adbf57497ed4f728b0e8a4866f8", size = 10660986, upload-time = "2026-03-12T23:05:53.245Z" }, - { url = "https://files.pythonhosted.org/packages/c1/a1/309f2364a424eccb763cdafc49df843c282609f47fe53aa83f38272389e0/ruff-0.15.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2ed8aea2f3fe57886d3f00ea5b8aae5bf68d5e195f487f037a955ff9fbaac9e", size = 10332177, upload-time = "2026-03-12T23:05:56.145Z" }, - { url = "https://files.pythonhosted.org/packages/30/41/7ebf1d32658b4bab20f8ac80972fb19cd4e2c6b78552be263a680edc55ac/ruff-0.15.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70789d3e7830b848b548aae96766431c0dc01a6c78c13381f423bf7076c66d15", size = 11170783, upload-time = "2026-03-12T23:06:01.742Z" }, - { url = "https://files.pythonhosted.org/packages/76/be/6d488f6adca047df82cd62c304638bcb00821c36bd4881cfca221561fdfc/ruff-0.15.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:542aaf1de3154cea088ced5a819ce872611256ffe2498e750bbae5247a8114e9", size = 12044201, upload-time = "2026-03-12T23:05:28.697Z" }, - { url = "https://files.pythonhosted.org/packages/71/68/e6f125df4af7e6d0b498f8d373274794bc5156b324e8ab4bf5c1b4fc0ec7/ruff-0.15.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c22e6f02c16cfac3888aa636e9eba857254d15bbacc9906c9689fdecb1953ab", size = 11421561, upload-time = "2026-03-12T23:05:31.236Z" }, - { url = "https://files.pythonhosted.org/packages/f1/9f/f85ef5fd01a52e0b472b26dc1b4bd228b8f6f0435975442ffa4741278703/ruff-0.15.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98893c4c0aadc8e448cfa315bd0cc343a5323d740fe5f28ef8a3f9e21b381f7e", size = 11310928, upload-time = "2026-03-12T23:05:45.288Z" }, - { url = "https://files.pythonhosted.org/packages/8c/26/b75f8c421f5654304b89471ed384ae8c7f42b4dff58fa6ce1626d7f2b59a/ruff-0.15.6-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:70d263770d234912374493e8cc1e7385c5d49376e41dfa51c5c3453169dc581c", size = 11235186, upload-time = "2026-03-12T23:05:50.677Z" }, - { url = "https://files.pythonhosted.org/packages/fc/d4/d5a6d065962ff7a68a86c9b4f5500f7d101a0792078de636526c0edd40da/ruff-0.15.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:55a1ad63c5a6e54b1f21b7514dfadc0c7fb40093fa22e95143cf3f64ebdcd512", size = 10635231, upload-time = "2026-03-12T23:05:37.044Z" }, - { url = "https://files.pythonhosted.org/packages/d6/56/7c3acf3d50910375349016cf33de24be021532042afbed87942858992491/ruff-0.15.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8dc473ba093c5ec238bb1e7429ee676dca24643c471e11fbaa8a857925b061c0", size = 10340357, upload-time = "2026-03-12T23:06:04.748Z" }, - { url = "https://files.pythonhosted.org/packages/06/54/6faa39e9c1033ff6a3b6e76b5df536931cd30caf64988e112bbf91ef5ce5/ruff-0.15.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:85b042377c2a5561131767974617006f99f7e13c63c111b998f29fc1e58a4cfb", size = 10860583, upload-time = "2026-03-12T23:05:58.978Z" }, - { url = "https://files.pythonhosted.org/packages/cb/1e/509a201b843b4dfb0b32acdedf68d951d3377988cae43949ba4c4133a96a/ruff-0.15.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cef49e30bc5a86a6a92098a7fbf6e467a234d90b63305d6f3ec01225a9d092e0", size = 11410976, upload-time = "2026-03-12T23:05:39.955Z" }, - { url = "https://files.pythonhosted.org/packages/6c/25/3fc9114abf979a41673ce877c08016f8e660ad6cf508c3957f537d2e9fa9/ruff-0.15.6-py3-none-win32.whl", hash = "sha256:bbf67d39832404812a2d23020dda68fee7f18ce15654e96fb1d3ad21a5fe436c", size = 10616872, upload-time = "2026-03-12T23:05:42.451Z" }, - { url = "https://files.pythonhosted.org/packages/89/7a/09ece68445ceac348df06e08bf75db72d0e8427765b96c9c0ffabc1be1d9/ruff-0.15.6-py3-none-win_amd64.whl", hash = "sha256:aee25bc84c2f1007ecb5037dff75cef00414fdf17c23f07dc13e577883dca406", size = 11787271, upload-time = "2026-03-12T23:05:20.168Z" }, - { url = "https://files.pythonhosted.org/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" }, + { url = "https://files.pythonhosted.org/packages/41/2f/0b08ced94412af091807b6119ca03755d651d3d93a242682bf020189db94/ruff-0.15.7-py3-none-linux_armv6l.whl", hash = "sha256:a81cc5b6910fb7dfc7c32d20652e50fa05963f6e13ead3c5915c41ac5d16668e", size = 10489037, upload-time = "2026-03-19T16:26:32.47Z" }, + { url = "https://files.pythonhosted.org/packages/91/4a/82e0fa632e5c8b1eba5ee86ecd929e8ff327bbdbfb3c6ac5d81631bef605/ruff-0.15.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:722d165bd52403f3bdabc0ce9e41fc47070ac56d7a91b4e0d097b516a53a3477", size = 10955433, upload-time = "2026-03-19T16:27:00.205Z" }, + { url = "https://files.pythonhosted.org/packages/ab/10/12586735d0ff42526ad78c049bf51d7428618c8b5c467e72508c694119df/ruff-0.15.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7fbc2448094262552146cbe1b9643a92f66559d3761f1ad0656d4991491af49e", size = 10269302, upload-time = "2026-03-19T16:26:26.183Z" }, + { url = "https://files.pythonhosted.org/packages/eb/5d/32b5c44ccf149a26623671df49cbfbd0a0ae511ff3df9d9d2426966a8d57/ruff-0.15.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b39329b60eba44156d138275323cc726bbfbddcec3063da57caa8a8b1d50adf", size = 10607625, upload-time = "2026-03-19T16:27:03.263Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f1/f0001cabe86173aaacb6eb9bb734aa0605f9a6aa6fa7d43cb49cbc4af9c9/ruff-0.15.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87768c151808505f2bfc93ae44e5f9e7c8518943e5074f76ac21558ef5627c85", size = 10324743, upload-time = "2026-03-19T16:27:09.791Z" }, + { url = "https://files.pythonhosted.org/packages/7a/87/b8a8f3d56b8d848008559e7c9d8bf367934d5367f6d932ba779456e2f73b/ruff-0.15.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb0511670002c6c529ec66c0e30641c976c8963de26a113f3a30456b702468b0", size = 11138536, upload-time = "2026-03-19T16:27:06.101Z" }, + { url = "https://files.pythonhosted.org/packages/e4/f2/4fd0d05aab0c5934b2e1464784f85ba2eab9d54bffc53fb5430d1ed8b829/ruff-0.15.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0d19644f801849229db8345180a71bee5407b429dd217f853ec515e968a6912", size = 11994292, upload-time = "2026-03-19T16:26:48.718Z" }, + { url = "https://files.pythonhosted.org/packages/64/22/fc4483871e767e5e95d1622ad83dad5ebb830f762ed0420fde7dfa9d9b08/ruff-0.15.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4806d8e09ef5e84eb19ba833d0442f7e300b23fe3f0981cae159a248a10f0036", size = 11398981, upload-time = "2026-03-19T16:26:54.513Z" }, + { url = "https://files.pythonhosted.org/packages/b0/99/66f0343176d5eab02c3f7fcd2de7a8e0dd7a41f0d982bee56cd1c24db62b/ruff-0.15.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dce0896488562f09a27b9c91b1f58a097457143931f3c4d519690dea54e624c5", size = 11242422, upload-time = "2026-03-19T16:26:29.277Z" }, + { url = "https://files.pythonhosted.org/packages/5d/3a/a7060f145bfdcce4c987ea27788b30c60e2c81d6e9a65157ca8afe646328/ruff-0.15.7-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:1852ce241d2bc89e5dc823e03cff4ce73d816b5c6cdadd27dbfe7b03217d2a12", size = 11232158, upload-time = "2026-03-19T16:26:42.321Z" }, + { url = "https://files.pythonhosted.org/packages/a7/53/90fbb9e08b29c048c403558d3cdd0adf2668b02ce9d50602452e187cd4af/ruff-0.15.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5f3e4b221fb4bd293f79912fc5e93a9063ebd6d0dcbd528f91b89172a9b8436c", size = 10577861, upload-time = "2026-03-19T16:26:57.459Z" }, + { url = "https://files.pythonhosted.org/packages/2f/aa/5f486226538fe4d0f0439e2da1716e1acf895e2a232b26f2459c55f8ddad/ruff-0.15.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:b15e48602c9c1d9bdc504b472e90b90c97dc7d46c7028011ae67f3861ceba7b4", size = 10327310, upload-time = "2026-03-19T16:26:35.909Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/271afdffb81fe7bfc8c43ba079e9d96238f674380099457a74ccb3863857/ruff-0.15.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b4705e0e85cedc74b0a23cf6a179dbb3df184cb227761979cc76c0440b5ab0d", size = 10840752, upload-time = "2026-03-19T16:26:45.723Z" }, + { url = "https://files.pythonhosted.org/packages/bf/29/a4ae78394f76c7759953c47884eb44de271b03a66634148d9f7d11e721bd/ruff-0.15.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:112c1fa316a558bb34319282c1200a8bf0495f1b735aeb78bfcb2991e6087580", size = 11336961, upload-time = "2026-03-19T16:26:39.076Z" }, + { url = "https://files.pythonhosted.org/packages/26/6b/8786ba5736562220d588a2f6653e6c17e90c59ced34a2d7b512ef8956103/ruff-0.15.7-py3-none-win32.whl", hash = "sha256:6d39e2d3505b082323352f733599f28169d12e891f7dd407f2d4f54b4c2886de", size = 10582538, upload-time = "2026-03-19T16:26:15.992Z" }, + { url = "https://files.pythonhosted.org/packages/2b/e9/346d4d3fffc6871125e877dae8d9a1966b254fbd92a50f8561078b88b099/ruff-0.15.7-py3-none-win_amd64.whl", hash = "sha256:4d53d712ddebcd7dace1bc395367aec12c057aacfe9adbb6d832302575f4d3a1", size = 11755839, upload-time = "2026-03-19T16:26:19.897Z" }, + { url = "https://files.pythonhosted.org/packages/8f/e8/726643a3ea68c727da31570bde48c7a10f1aa60eddd628d94078fec586ff/ruff-0.15.7-py3-none-win_arm64.whl", hash = "sha256:18e8d73f1c3fdf27931497972250340f92e8c861722161a9caeb89a58ead6ed2", size = 11023304, upload-time = "2026-03-19T16:26:51.669Z" }, ] [[package]]