From 94b69b070173b8cef480c0f9294b7198e7cf2645 Mon Sep 17 00:00:00 2001 From: Jan Schlosser Date: Mon, 15 Jun 2026 14:21:56 +0200 Subject: [PATCH 1/4] chore: move coverage.bazelrc to subfolder and enable user.bazelrc - Move quality/coverage.bazelrc to quality/coverage/coverage.bazelrc for better organization alongside the coverage tooling. - Enable user.bazelrc support via try-import in .bazelrc. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .bazelrc | 5 +++- quality/coverage.bazelrc | 30 ------------------- quality/coverage/coverage.bazelrc | 50 +++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 31 deletions(-) delete mode 100644 quality/coverage.bazelrc create mode 100644 quality/coverage/coverage.bazelrc diff --git a/.bazelrc b/.bazelrc index 8a39a730e..6bae9936a 100644 --- a/.bazelrc +++ b/.bazelrc @@ -105,7 +105,7 @@ common --check_direct_dependencies=error # Issue an error if a direct dependency build --test_env="GTEST_COLOR=1" # Configuration options required for quality assurance -import %workspace%/quality/coverage.bazelrc +import %workspace%/quality/coverage/coverage.bazelrc import %workspace%/quality/sanitizer/sanitizer.bazelrc import %workspace%/quality/static_analysis/static_analysis.bazelrc @@ -120,3 +120,6 @@ build --tool_java_runtime_version=remotejdk_21 # Import AI checker custom configuration try-import %workspace%/.bazelrc.ai_checker + +# Enable user-defined configs +try-import %workspace%/user.bazelrc diff --git a/quality/coverage.bazelrc b/quality/coverage.bazelrc deleted file mode 100644 index 3c0d6f8aa..000000000 --- a/quality/coverage.bazelrc +++ /dev/null @@ -1,30 +0,0 @@ -# ******************************************************************************* -# Copyright (c) 2026 Contributors to the Eclipse Foundation -# -# See the NOTICE file(s) distributed with this work for additional -# information regarding copyright ownership. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0 -# -# SPDX-License-Identifier: Apache-2.0 -# ******************************************************************************* - -# With this instrumentation filter for our two main components, we ensure that `bazel coverage //...` is yielding the correct results -coverage --nocache_test_results -coverage --cxxopt=-O0 -coverage --instrumentation_filter="^//score/message_passing[/:],^//score/mw/com[/:],-//score/mw/com/performance_benchmarks[/:],-//score/mw/.*/test[/:]" -coverage --experimental_generate_llvm_lcov -coverage --experimental_use_llvm_covmap -coverage --combined_report=lcov -coverage --coverage_report_generator=@bazel_tools//tools/test/CoverageOutputGenerator/java/com/google/devtools/coverageoutputgenerator:Main -coverage --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-linux -coverage --extra_toolchains=@ferrocene_x86_64_unknown_linux_gnu_llvm//:rust_ferrocene_toolchain -coverage --test_env=COVERAGE_GCOV_OPTIONS=-bcu -# TODO set toolchain feature once possible -# These compile time options are required to cover abnormal termination cases. In GCC one can use `__gcc_dump()`, but this does not work with LLVM -# LLVM provided these compile-time options in combination with a specific profile setting which is enabled in bazel via `LLVM_PROFILE_CONTINUOUS_MODE` -coverage --test_env=LLVM_PROFILE_CONTINUOUS_MODE=1 -coverage --cxxopt -mllvm -coverage --cxxopt -runtime-counter-relocation diff --git a/quality/coverage/coverage.bazelrc b/quality/coverage/coverage.bazelrc new file mode 100644 index 000000000..36aecc778 --- /dev/null +++ b/quality/coverage/coverage.bazelrc @@ -0,0 +1,50 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# NOTE: --experimental_use_llvm_covmap (required for llvm-cov) causes Bazel to instrument +# ALL targets regardless of --instrumentation_filter. The actual source filtering happens +# in the merger/reporter via --ignore-filename-regex. The instrumentation_filter is kept +# for documentation purposes and in case this Bazel limitation is fixed in the future. +coverage --nocache_test_results +coverage --cxxopt=-O0 +coverage --instrumentation_filter="^//score/message_passing[/:],^//score/mw/com/(impl|gateway|dependability|design|example|mocking|doc)" +coverage --experimental_generate_llvm_lcov +coverage --experimental_use_llvm_covmap +coverage --combined_report=lcov +coverage --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-linux +coverage --extra_toolchains=@ferrocene_x86_64_unknown_linux_gnu_llvm//:rust_ferrocene_toolchain + +# Use llvm-cov directly for HTML report generation instead of genhtml/lcov. +# The custom merger (per-test) receives profraw files and produces a zip with profdata + HTML. +# The custom reporter (final) merges all profdata and generates the combined HTML report. +coverage --coverage_output_generator=//quality/coverage/llvm_cov:merger +coverage --coverage_report_generator=//quality/coverage/llvm_cov:reporter +coverage --experimental_fetch_all_coverage_outputs + +# Override GENERATE_LLVM_LCOV to keep raw profraw files instead of converting to LCOV. +# The custom merger handles profraw→profdata→HTML directly. +coverage --test_env=GENERATE_LLVM_LCOV=0 +# Suppress the default gcov path since we use llvm-cov directly. +coverage --test_env=COVERAGE_GCOV_PATH=/usr/bin/true + +# These compile time options are required to cover abnormal termination cases. In GCC one can use `__gcc_dump()`, but this does not work with LLVM +# LLVM provided these compile-time options in combination with a specific profile setting which is enabled in bazel via `LLVM_PROFILE_CONTINUOUS_MODE` +coverage --test_env=LLVM_PROFILE_CONTINUOUS_MODE=1 +coverage --cxxopt -mllvm +coverage --cxxopt -runtime-counter-relocation + +# By default Bazel creates for each library a *.so for its tests. If there is a header that is used by multiple *.so files +# and these *.so files have different instrumentation (production code has, test has not) a conflict arises which one to use when calculating coverage. +# Then the first object is used, and it is pure luck if it contains the correct data or not. Even worse, small changes can change the order and then lead +# to big coverage gaps. All these problems do not arise if no dynamic libs are used. Thus, we rather take bigger build times and binaries in advance for correct data. +coverage --dynamic_mode=off From 109be3058a4078153c66495c28579c7186a2f9d8 Mon Sep 17 00:00:00 2001 From: Jan Schlosser Date: Mon, 15 Jun 2026 14:23:00 +0200 Subject: [PATCH 2/4] feat(coverage): replace genhtml/lcov with llvm-cov for HTML coverage reports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the genhtml/lcov-based coverage report pipeline with a native llvm-cov implementation that produces HTML reports directly from profraw data. Key changes: - Add merger.py as --coverage_output_generator (per-test profraw→profdata+HTML) - Add reporter.py as --coverage_report_generator (aggregate all tests) - Add filter_regexes.txt config file for configurable source filtering - Update coverage.bazelrc with llvm-cov flags (--experimental_use_llvm_covmap, --dynamic_mode=off, continuous profiling mode) - Update generate_coverage_html.sh to extract from the new zip format The source filtering is configurable via filter_regexes.txt (loaded from runfiles) and can be overridden via --filter_sources CLI argument. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/coverage_report.yml | 3 - BUILD | 2 + MODULE.bazel | 12 - quality/coverage/BUILD | 1 - quality/coverage/coverage.bazelrc | 2 +- quality/coverage/generate_coverage_html.sh | 147 ++++----- quality/coverage/llvm_cov/BUILD | 77 +++++ quality/coverage/llvm_cov/filter_regexes.txt | 20 ++ quality/coverage/llvm_cov/merger.py | 181 +++++++++++ quality/coverage/llvm_cov/reporter.py | 320 +++++++++++++++++++ 10 files changed, 672 insertions(+), 93 deletions(-) create mode 100644 quality/coverage/llvm_cov/BUILD create mode 100644 quality/coverage/llvm_cov/filter_regexes.txt create mode 100644 quality/coverage/llvm_cov/merger.py create mode 100644 quality/coverage/llvm_cov/reporter.py diff --git a/.github/workflows/coverage_report.yml b/.github/workflows/coverage_report.yml index d7a93a479..09dca276c 100644 --- a/.github/workflows/coverage_report.yml +++ b/.github/workflows/coverage_report.yml @@ -60,9 +60,6 @@ jobs: - name: Allow linux-sandbox uses: ./actions/unblock_user_namespace_for_linux_sandbox - - name: Install Perl dependencies for genhtml - run: sudo apt-get install -y --no-install-recommends lcov - - name: Run Unit Test with Coverage for C++ id: run-coverage run: | diff --git a/BUILD b/BUILD index 76f68c274..ef2c6fc04 100644 --- a/BUILD +++ b/BUILD @@ -17,6 +17,8 @@ load("@rules_shell//shell:sh_binary.bzl", "sh_binary") load("@score_tooling//:defs.bzl", "copyright_checker") load("//tools/lint:linters.bzl", "use_clang_tidy_targets") +exports_files(["MODULE.bazel"]) + compile_pip_requirements( name = "pip_requirements", src = "requirements.in", diff --git a/MODULE.bazel b/MODULE.bazel index 883b58259..29e933415 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -210,18 +210,6 @@ use_repo( # We use here a pre-compiled fully static and hermetic clang_format binary # and not the one provided by llvm_toolchain, because the one from llvm_toolchain is not fully hermetic (and different version for now) -############################################################################### -# lcov deb package (provides genhtml + lcov for coverage HTML reports) -############################################################################### -deb = use_repo_rule("@download_utils//download/deb:defs.bzl", "download_deb") - -deb( - name = "lcov_deb", - dev_dependency = True, - integrity = "sha256-Ip14IkKavqBtkQ7mh6AXzr/6YyHpvSAZ0veMmw1+N80=", - urls = ["https://archive.ubuntu.com/ubuntu/pool/universe/l/lcov/lcov_2.0-4ubuntu2_all.deb"], -) - download_file = use_repo_rule("@download_utils//download/file:defs.bzl", "download_file") download_file( diff --git a/quality/coverage/BUILD b/quality/coverage/BUILD index 2a9ad0cb5..57871bf0c 100644 --- a/quality/coverage/BUILD +++ b/quality/coverage/BUILD @@ -16,6 +16,5 @@ load("@rules_shell//shell:sh_binary.bzl", "sh_binary") sh_binary( name = "generate_coverage_html", srcs = ["generate_coverage_html.sh"], - data = ["@lcov_deb//:srcs"], visibility = ["//visibility:private"], ) diff --git a/quality/coverage/coverage.bazelrc b/quality/coverage/coverage.bazelrc index 36aecc778..b4186e07f 100644 --- a/quality/coverage/coverage.bazelrc +++ b/quality/coverage/coverage.bazelrc @@ -28,7 +28,7 @@ coverage --extra_toolchains=@ferrocene_x86_64_unknown_linux_gnu_llvm//:rust_ferr # The custom merger (per-test) receives profraw files and produces a zip with profdata + HTML. # The custom reporter (final) merges all profdata and generates the combined HTML report. coverage --coverage_output_generator=//quality/coverage/llvm_cov:merger -coverage --coverage_report_generator=//quality/coverage/llvm_cov:reporter +coverage --coverage_report_generator=//quality/coverage/llvm_cov:reporter_wrapper coverage --experimental_fetch_all_coverage_outputs # Override GENERATE_LLVM_LCOV to keep raw profraw files instead of converting to LCOV. diff --git a/quality/coverage/generate_coverage_html.sh b/quality/coverage/generate_coverage_html.sh index 94f28a3d6..8b682898a 100755 --- a/quality/coverage/generate_coverage_html.sh +++ b/quality/coverage/generate_coverage_html.sh @@ -11,8 +11,9 @@ # # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* -# Generates an HTML coverage report from an LCOV .dat file using genhtml. -# Optionally assembles and zips the report together with LCOV data and JUnit XMLs. +# Extracts the HTML coverage report from the llvm-cov generated zip produced by +# `bazel coverage`. Optionally assembles and zips the report together with +# LCOV data and JUnit XMLs. # # Usage: # bazel run //quality/coverage:generate_coverage_html [-- [--archive ] [output-dir]] @@ -58,87 +59,82 @@ unset _SELF_DIR _SELF_NAME cd "${BUILD_WORKSPACE_DIRECTORY}" -# bazel-out/ is a symlink Bazel always creates in the workspace root, it -# points to the real output directory. _coverage/ sits at its root (not -# inside a config-specific sub-directory), so we can locate the merged -# report without calling 'bazel info' — which would require 'bazel' to be -# on PATH inside the run environment. -LCOV_DAT="${BUILD_WORKSPACE_DIRECTORY}/bazel-out/_coverage/_coverage_report.dat" +# Resolve OUTPUT_DIR to absolute path (relative to workspace root). +OUTPUT_DIR="${BUILD_WORKSPACE_DIRECTORY}/${OUTPUT_DIR}" -# --------------------------------------------------------------------------- -# Resolve genhtml: prefer Bazel-managed binary from @lcov_deb runfiles so -# that no system lcov installation is required. Fall back to PATH. -# -# Bazel uses a symlink forest on Linux: runfiles entries are symlinks to the -# real cached files. Use 'find -L' to dereference them ('find -type f' -# without -L never matches symlinks). -# --------------------------------------------------------------------------- -_tool_path() { - local name="$1" - local found="" - # 1. Symlink forest (always present on Linux under bazel run) - if [[ -n "${RUNFILES_DIR:-}" ]]; then - found=$(find -L "${RUNFILES_DIR}" -path "*lcov_deb/usr/bin/${name}" -type f 2>/dev/null | head -1) - fi - # 2. PATH - if [[ -z "${found}" ]]; then - found=$(command -v "${name}" 2>/dev/null || true) - fi - echo "${found}" -} - -GENHTML="$(_tool_path genhtml)" -LCOV="$(_tool_path lcov)" +# The coverage report generator produces a zip file at _coverage_report.dat +# containing: html_report/, lcov_report/lcov.dat, text_report/summary.txt +COVERAGE_ZIP="${BUILD_WORKSPACE_DIRECTORY}/bazel-out/_coverage/_coverage_report.dat" -if [[ -z "$GENHTML" ]]; then - echo "ERROR: 'genhtml' not found. Run via 'bazel run //quality/coverage:generate_coverage_html' or install 'lcov'." >&2 +if [[ ! -f "${COVERAGE_ZIP}" ]]; then + echo "ERROR: Coverage report not found at ${COVERAGE_ZIP}" >&2 + echo " Run 'bazel coverage //... --build_tests_only' first." >&2 exit 1 fi -if [[ -z "$LCOV" ]]; then - echo "ERROR: 'lcov' not found. Run via 'bazel run //quality/coverage:generate_coverage_html' or install 'lcov'." >&2 + +# Extract the HTML report from the zip. +TMPDIR_EXTRACT="${TMPDIR:-/tmp}/coverage_extract_$$" +mkdir -p "${TMPDIR_EXTRACT}" +trap 'rm -rf "${TMPDIR_EXTRACT}"' EXIT + +unzip -q -o "${COVERAGE_ZIP}" -d "${TMPDIR_EXTRACT}" + +# Copy the HTML report to the output directory. +rm -rf "${OUTPUT_DIR}" +if [[ -d "${TMPDIR_EXTRACT}/html_report" ]]; then + cp -r "${TMPDIR_EXTRACT}/html_report" "${OUTPUT_DIR}" +else + echo "ERROR: html_report/ not found in ${COVERAGE_ZIP}" >&2 exit 1 fi -# When using the Bazel-managed tool, set PERL5LIB so Perl finds lcovutil.pm. -# lcovutil.pm lives two levels above the genhtml binary: bin/ → usr/ → lib/lcov. -lcov_lib="$(dirname "$(dirname "${GENHTML}")")/lib/lcov" -if [[ -d "${lcov_lib}" ]]; then - export PERL5LIB="${lcov_lib}${PERL5LIB:+:${PERL5LIB}}" -fi +echo "Coverage report written to: ${OUTPUT_DIR}" # --------------------------------------------------------------------------- -# Filter source files from LCOV data before generating HTML. -# The --instrumentation_filter in coverage.bazelrc already excludes external -# deps (third_party, gtest) and test/ subdirectories at compile time. -# Only files that slip through because they live in mixed packages need -# removal here: -# - *mock*.h/cpp mock headers in production packages (e.g. configuration/) +# Run coverage justification processing. # --------------------------------------------------------------------------- -LCOV_DAT_FILTERED="${TMPDIR:-/tmp}/coverage_report_filtered_$$.dat" -"${LCOV}" --remove "${LCOV_DAT}" \ - '*mock*.h' \ - '*mock*.cpp' \ - --output-file "${LCOV_DAT_FILTERED}" \ - --rc lcov_branch_coverage=1 \ - --ignore-errors unused - -# NOTE: "--ignore-errors category,inconsistent" -# LLVM coverage writes per-process .profraw files that are merged during -# bazel's post-processing step. The merge can occasionally leave -# inconsistent hit counts that genhtml rejects. This flag tells genhtml to -# silently skip those entries instead of aborting, coverage numbers are -# slightly under-counted for affected translation units but the report still -# generates. -"${GENHTML}" "${LCOV_DAT_FILTERED}" \ - --output-directory "${OUTPUT_DIR}" \ - --show-details \ - --legend \ - --function-coverage \ - --branch-coverage \ - --rc no_exception_branch=1 \ - --ignore-errors category,inconsistent +JUSTIFICATION_YAML="${BUILD_WORKSPACE_DIRECTORY}/quality/coverage/coverage_justifications.yaml" + +if [[ -f "${JUSTIFICATION_YAML}" ]]; then + echo "" + echo "Running coverage justification processing..." + + JUSTIFICATION_DIR="${TMPDIR_EXTRACT}/justification_report" + mkdir -p "${JUSTIFICATION_DIR}" + + # Run justify.py via Bazel to produce the resolved manifest. + if bazel run //quality/coverage/llvm_cov:justify -- \ + --yaml "${JUSTIFICATION_YAML}" \ + --source-root "${BUILD_WORKSPACE_DIRECTORY}" \ + --output "${JUSTIFICATION_DIR}/manifest.json"; then + + # Run effective_coverage.py via Bazel to post-process HTML and calculate effective coverage. + bazel run //quality/coverage/llvm_cov:effective_coverage -- \ + --html-dir "${OUTPUT_DIR}" \ + --manifest "${JUSTIFICATION_DIR}/manifest.json" \ + --output "${JUSTIFICATION_DIR}/report.json" + fi -echo "Coverage report written to: ${OUTPUT_DIR}" + # Display effective coverage summary. + if [[ -f "${JUSTIFICATION_DIR}/summary.txt" ]]; then + echo "" + cat "${JUSTIFICATION_DIR}/summary.txt" + + # Extract effective coverage percentage for threshold check. + EFFECTIVE_PCT=$(grep -oP 'Effective line coverage:\s+\K[0-9.]+' \ + "${JUSTIFICATION_DIR}/summary.txt" 2>/dev/null || echo "0") + + # Threshold check (default: 100%) + THRESHOLD="${COVERAGE_THRESHOLD:-100}" + if awk "BEGIN {exit (${EFFECTIVE_PCT} >= ${THRESHOLD}) ? 0 : 1}"; then + : + else + echo "WARNING: Effective coverage ${EFFECTIVE_PCT}% is below threshold ${THRESHOLD}%" >&2 + fi + fi +else + echo "INFO: No coverage_justifications.yaml found, skipping justification processing." +fi # --------------------------------------------------------------------------- # Optional: create a zip archive with the HTML report, raw LCOV data and @@ -153,10 +149,9 @@ if [[ -n "${ARCHIVE_NAME}" ]]; then # Copy the HTML coverage report cp -r "${OUTPUT_DIR}" artifacts/ - # Include the raw LCOV .dat so the quality dashboard can read - # line/function/branch percentages without re-running genhtml. - if [[ -f "${LCOV_DAT}" ]]; then - cp "${LCOV_DAT}" artifacts/coverage_report.dat + # Include the LCOV .dat file from the zip (for backward compat with dashboards). + if [[ -f "${TMPDIR_EXTRACT}/lcov_report/lcov.dat" ]]; then + cp "${TMPDIR_EXTRACT}/lcov_report/lcov.dat" artifacts/coverage_report.dat fi zip -r "${ARCHIVE_NAME}.zip" artifacts/ diff --git a/quality/coverage/llvm_cov/BUILD b/quality/coverage/llvm_cov/BUILD new file mode 100644 index 000000000..60dc0e0b6 --- /dev/null +++ b/quality/coverage/llvm_cov/BUILD @@ -0,0 +1,77 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +load("@rules_python//python:defs.bzl", "py_binary") +load("@rules_shell//shell:sh_binary.bzl", "sh_binary") + +py_binary( + name = "merger", + srcs = ["merger.py"], +) + +py_binary( + name = "reporter", + srcs = ["reporter.py"], + data = [ + "filter_regexes.txt", + "//:MODULE.bazel", + "@llvm_toolchain//:llvm-cov", + "@llvm_toolchain//:llvm-profdata", + ], + deps = ["@rules_python//python/runfiles"], +) + +genrule( + name = "reporter_wrapper_gen", + srcs = [ + "filter_regexes.txt", + "//:MODULE.bazel", + ], + outs = ["reporter_wrapper.sh"], + cmd = """ +cat > $@ << EOF +#!/usr/bin/env bash +set -euo pipefail +if [[ -z "\\$${RUNFILES_DIR:-}" ]]; then + if [[ -d "\\$$0.runfiles" ]]; then + export RUNFILES_DIR="\\$$0.runfiles" + fi +fi +WORKSPACE_ROOT="\\$$(cd "\\$$(dirname "\\$$(readlink -f "\\$${RUNFILES_DIR}/$(rlocationpath //:MODULE.bazel)")")" && pwd)/" +exec "\\$${RUNFILES_DIR}/_main/quality/coverage/llvm_cov/reporter" \\\\ + --filter_regexes="$(rlocationpath filter_regexes.txt)" \\\\ + --workspace_root="\\$${WORKSPACE_ROOT}" \\\\ + "\\$$@" +EOF +chmod +x $@ +""", +) + +sh_binary( + name = "reporter_wrapper", + srcs = [":reporter_wrapper_gen"], + data = [":reporter"], +) + +py_binary( + name = "justify", + srcs = ["justify.py"], + deps = [ + "@score_communication_pip//pyyaml", + ], +) + +py_binary( + name = "effective_coverage", + srcs = ["effective_coverage.py"], +) diff --git a/quality/coverage/llvm_cov/filter_regexes.txt b/quality/coverage/llvm_cov/filter_regexes.txt new file mode 100644 index 000000000..c8ce6427c --- /dev/null +++ b/quality/coverage/llvm_cov/filter_regexes.txt @@ -0,0 +1,20 @@ +# Coverage filter regexes (one per line). +# Lines matching any of these patterns are excluded from the coverage report +# via llvm-cov's --ignore-filename-regex option. +# +# NOTE: --experimental_use_llvm_covmap causes Bazel to instrument ALL targets +# regardless of --instrumentation_filter. Therefore, filtering MUST happen here +# at the report level. + +# Exclude mock files. +.*_mock.*\.(h|hpp|cpp)$ + +# Exclude external dependencies (anything under external/). +external/.* + +# Exclude test files and test directories. +.*_test\.(cpp|h|hpp)$ +.*/test/.* + +# Exclude performance benchmarks. +.*/performance_benchmarks/.* diff --git a/quality/coverage/llvm_cov/merger.py b/quality/coverage/llvm_cov/merger.py new file mode 100644 index 000000000..a2e2c195c --- /dev/null +++ b/quality/coverage/llvm_cov/merger.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Per-test coverage output generator using llvm-cov. + +This script is invoked by Bazel as the --coverage_output_generator for each test. +It receives profraw files from test execution, merges them into profdata, generates +an HTML coverage report using llvm-cov show, and packages everything into a zip file +that the reporter can later aggregate. + +Expected Bazel interface (from collect_coverage.sh): + --coverage_dir= Directory containing *.profraw files + --output_file= Where to write the output (zip) + --source_file_manifest= File listing instrumented sources and object files + --filter_sources= Source path regexes to exclude (repeatable) + [--sources_to_replace_file=] Optional source mapping file +""" + +import argparse +import json +import os +import subprocess +import sys +import zipfile +from pathlib import Path +from typing import List, Set + + +def main() -> None: + args = parse_args() + + # Get object files from the manifest. + object_files = get_object_files_from_manifest(args.source_file_manifest) + if not object_files: + print("INFO: No instrumented object files found, skipping coverage.", file=sys.stderr) + cleanup_dangling_symlinks(args.coverage_dir) + sys.exit(0) + + # Find profraw files. + profraw_files = sorted(args.coverage_dir.glob("*.profraw")) + if not profraw_files: + print("INFO: No *.profraw files found, skipping coverage.", file=sys.stderr) + cleanup_dangling_symlinks(args.coverage_dir) + sys.exit(0) + + # Merge profraw → profdata. + profdata_dir = args.coverage_dir / "profdata" + profdata_dir.mkdir(exist_ok=True) + profdata_file = profdata_dir / "target.profdata" + + run_command([ + str(os.environ.get("LLVM_PROFDATA")), "merge", + "--sparse", + "--output", str(profdata_file), + ] + [str(f) for f in profraw_files]) + + # Create meta.json with object files for the reporter. + meta_dir = args.coverage_dir / "meta" + meta_dir.mkdir(exist_ok=True) + meta = { + "object_files": [os.path.realpath(f) for f in sorted(object_files)], + } + with open(meta_dir / "meta.json", "w", encoding="utf-8") as f: + json.dump(meta, f) + + # Package into zip at output_file. + create_zip( + root=args.coverage_dir, + directories=[profdata_dir, meta_dir], + output_file=args.output_file, + ) + + # Clean up dangling symlinks in coverage_dir that would cause Bazel tree + # artifact validation to fail (e.g. the 'gcov' symlink created by + # collect_cc_coverage.sh's init_gcov() pointing into the destroyed sandbox). + cleanup_dangling_symlinks(args.coverage_dir) + + target = os.environ.get("TEST_TARGET", "unknown") + print(f"INFO: Coverage merger completed for '{target}'", file=sys.stderr) + + +def cleanup_dangling_symlinks(directory: Path) -> None: + """Remove symlinks in the coverage directory that would become dangling. + + Bazel's tree artifact validation rejects directories containing dangling + symlinks. The 'gcov' symlink created by collect_cc_coverage.sh's init_gcov() + points into the sandbox which is torn down before validation runs. Since we + use llvm-cov directly, this symlink is not needed. + """ + gcov_link = directory / "gcov" + if gcov_link.is_symlink(): + gcov_link.unlink() + + # Also remove any other symlinks pointing into sandbox paths. + for entry in directory.iterdir(): + if entry.is_symlink(): + target = os.readlink(entry) + if "sandbox" in target: + entry.unlink() + + +def get_object_files_from_manifest(source_file_manifest: Path) -> Set[str]: + """Parse the coverage manifest to find instrumented object files.""" + runfiles_dir = Path(os.environ.get("RUNFILES_DIR", "")) / os.environ.get("TEST_WORKSPACE", "_main") + exec_root = Path(os.environ.get("ROOT")) + + object_files = set() + with open(source_file_manifest, encoding="utf-8") as f: + manifests = [line.strip() for line in f.readlines()] + + for manifest in manifests: + if "objects_list.txt" in manifest: + with open(manifest, encoding="utf-8") as f: + for line in f: + obj_path = line.strip() + if not obj_path: + continue + # Try runfiles first, then exec_root. + candidate = runfiles_dir / obj_path + if candidate.exists(): + object_files.add(str(candidate)) + else: + object_files.add(str(exec_root / obj_path)) + + return object_files + + +def run_command(cmd: List[str]) -> subprocess.CompletedProcess: + """Run a command and exit on failure.""" + try: + return subprocess.run( + cmd, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + except subprocess.CalledProcessError as e: + print(f"ERROR: Command failed with code {e.returncode}:", file=sys.stderr) + print(f" {' '.join(cmd)}", file=sys.stderr) + if e.stdout: + print(e.stdout, file=sys.stderr) + sys.exit(1) + + +def create_zip(root: Path, directories: List[Path], output_file: Path) -> None: + """Create a zip file from the given directories relative to root.""" + with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zf: + for directory in directories: + if not directory.exists(): + continue + for dirpath, _, files in os.walk(directory): + for filename in files: + file_path = Path(dirpath) / filename + arcname = file_path.relative_to(root) + zf.write(file_path, arcname) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments matching the Bazel LCOV_MERGER interface.""" + parser = argparse.ArgumentParser(description="LLVM coverage merger for Bazel") + parser.add_argument("--coverage_dir", type=Path, required=True) + parser.add_argument("--output_file", type=Path, required=True) + parser.add_argument("--source_file_manifest", type=Path, required=True) + parser.add_argument("--filter_sources", action="append", default=[]) + parser.add_argument("--sources_to_replace_file", type=str, default=None) + return parser.parse_args() + + +if __name__ == "__main__": + main() diff --git a/quality/coverage/llvm_cov/reporter.py b/quality/coverage/llvm_cov/reporter.py new file mode 100644 index 000000000..fdceed616 --- /dev/null +++ b/quality/coverage/llvm_cov/reporter.py @@ -0,0 +1,320 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Final coverage report generator using llvm-cov. + +This script is invoked by Bazel as the --coverage_report_generator after all tests +complete. It reads the per-test zip files produced by the merger, merges all profdata +into one, and generates the final combined HTML report. + +Expected Bazel interface: + --reports_file= Text file listing paths to all per-test coverage outputs + --output_file= Where to write the final report (zip) +""" + +import argparse +import json +import os +import subprocess +import sys +import zipfile +from pathlib import Path +from typing import List, Set, Tuple +from python.runfiles import Runfiles + + +def main() -> None: + """Main entry point.""" + args = parse_args() + + # Read the list of per-test report files. + reports = read_reports_file(args.reports_file) + if not reports: + print("INFO: No coverage reports found.", file=sys.stderr) + write_empty_output(args.output_file) + sys.exit(0) + + # Extract profdata and object files from each per-test zip. + valid_profdata_files, valid_object_files = extract_reports(reports) + + if not valid_profdata_files or not valid_object_files: + print("INFO: No valid profdata or object files found.", file=sys.stderr) + write_empty_output(args.output_file) + sys.exit(0) + + # Get llvm tools via runfiles. + r = Runfiles.Create() + llvm_bin_path = Path(r.Rlocation("llvm_toolchain/llvm-cov")) + + # Merge all profdata files. + merged_profdata = Path.cwd() / "merged_coverage.profdata" + run_command([ + r.Rlocation("llvm_toolchain/llvm-profdata"), "merge", + "--sparse", + "--output", str(merged_profdata), + ] + sorted(valid_profdata_files)) + + # Build coverage arguments. + coverage_args = ["--instr-profile", str(merged_profdata)] + for obj in sorted(valid_object_files): + coverage_args.extend(["--object", obj]) + + # Get filter regexes and workspace root. + filter_regexes = load_filter_regexes(r, args.filter_regexes) + workspace_root = args.workspace_root + + common_show_args = { + "llvm_bin_path": llvm_bin_path, + "coverage_args": coverage_args, + "filter_regexes": sorted(filter_regexes), + "workspace_root": workspace_root, + } + + # Generate HTML report. + html_report_dir = Path.cwd() / "html_report" + run_llvm_cov_show( + **common_show_args, + output_format="html", + html_report_dir=html_report_dir, + ) + + # Generate LCOV report (for backward compatibility with dashboards). + lcov_report_dir = Path.cwd() / "lcov_report" + lcov_report_dir.mkdir(exist_ok=True) + lcov_result = run_llvm_cov_export( + llvm_bin_path=llvm_bin_path, + coverage_args=coverage_args, + filter_regexes=sorted(filter_regexes), + workspace_root=workspace_root, + ) + with open(lcov_report_dir / "lcov.dat", "w", encoding="utf-8") as f: + f.write(lcov_result.stdout) + + # Generate text summary. + text_report_dir = Path.cwd() / "text_report" + text_report_dir.mkdir(exist_ok=True) + summary = run_llvm_cov_report( + llvm_bin_path=llvm_bin_path, + coverage_args=coverage_args, + filter_regexes=sorted(filter_regexes), + ) + with open(text_report_dir / "summary.txt", "w", encoding="utf-8") as f: + f.write(summary.stdout) + print(summary.stdout, file=sys.stderr) + + # Package everything into the output zip. + directories = [html_report_dir, lcov_report_dir, text_report_dir] + create_zip( + root=Path.cwd(), + directories=directories, + output_file=args.output_file, + ) + + print(f"INFO: Coverage reporter completed. Output: {args.output_file}", file=sys.stderr) + + +def run_llvm_cov_show( + llvm_bin_path: Path, + coverage_args: List[str], + filter_regexes: List[str], + workspace_root: str, + output_format: str, + html_report_dir: Path = None, +) -> subprocess.CompletedProcess: + """Run llvm-cov show.""" + cmd = [ + str(llvm_bin_path), + "show", + f"--format={output_format}", + f"--path-equivalence=/proc/self/cwd/,{workspace_root}", + f"--compilation-dir={workspace_root}", + "--show-branches=count", + "--show-region-summary=0", + ] + + cxxfilt = llvm_bin_path.parent / "llvm-cxxfilt" + if cxxfilt.exists(): + cmd.append(f"--Xdemangler={cxxfilt}") + + for regex in filter_regexes: + adjusted = regex.replace("/proc/self/cwd/", workspace_root) + cmd.append(f"--ignore-filename-regex={adjusted}") + + if html_report_dir: + cmd.append(f"--output-dir={html_report_dir}") + cmd.append("--coverage-watermark=100,50") + cmd.append("--show-expansions") + + cmd.extend(coverage_args) + return run_command(cmd) + + +def run_llvm_cov_export( + llvm_bin_path: Path, + coverage_args: List[str], + filter_regexes: List[str], + workspace_root: str, +) -> subprocess.CompletedProcess: + """Run llvm-cov export to produce LCOV format.""" + cmd = [ + str(llvm_bin_path), + "export", + "--format=lcov", + f"--path-equivalence=/proc/self/cwd/,{workspace_root}", + f"--compilation-dir={workspace_root}", + ] + + for regex in filter_regexes: + adjusted = regex.replace("/proc/self/cwd/", workspace_root) + cmd.append(f"--ignore-filename-regex={adjusted}") + + cmd.extend(coverage_args) + return run_command(cmd) + + +def run_llvm_cov_report( + llvm_bin_path: Path, + coverage_args: List[str], + filter_regexes: List[str], +) -> subprocess.CompletedProcess: + """Run llvm-cov report for a summary.""" + cmd = [ + str(llvm_bin_path), + "report", + "--summary-only", + "--show-region-summary=0", + "--show-branch-summary=1", + ] + + for regex in filter_regexes: + cmd.append(f"--ignore-filename-regex={regex}") + + cmd.extend(coverage_args) + return run_command(cmd) + + +def extract_reports(reports: List[str]) -> Tuple[Set[str], Set[str]]: + """Extract profdata and object files from per-test zip files.""" + valid_profdata_files = set() + valid_object_files = set() + + for i, report_path in enumerate(reports): + # Skip baseline_coverage files (LCOV format, not our zip). + if "baseline_coverage" in report_path: + continue + + report = Path(report_path) + if not report.exists() or report.stat().st_size == 0: + continue + + # Check if it's a valid zip. + if not zipfile.is_zipfile(report): + continue + + profdata_name = f"coverage_report_{i:08d}.profdata" + + try: + with zipfile.ZipFile(report, "r") as archive: + # Extract meta. + meta_json = archive.read("meta/meta.json") + target_meta = json.loads(meta_json) + + # Extract profdata. + profdata_content = archive.read("profdata/target.profdata") + profdata_path = Path.cwd() / profdata_name + with open(profdata_path, "wb") as f: + f.write(profdata_content) + + valid_profdata_files.add(str(profdata_path)) + + # Collect object files. + for obj in target_meta.get("object_files", []): + if obj and Path(obj).exists(): + valid_object_files.add(os.path.realpath(obj)) + + except (zipfile.BadZipFile, KeyError, json.JSONDecodeError) as e: + print(f"WARNING: Skipping invalid report {report_path}: {e}", file=sys.stderr) + continue + + return valid_profdata_files, valid_object_files + +def read_reports_file(reports_file: Path) -> List[str]: + """Read the reports file listing all per-test coverage outputs.""" + with open(reports_file, encoding="utf-8") as f: + return [line.strip() for line in f if line.strip()] + + +def load_filter_regexes(runfiles: Runfiles, rlocation_path: str) -> List[str]: + """Load filter regexes from filter_regexes.txt via Bazel runfiles.""" + path = runfiles.Rlocation(rlocation_path) + if not path or not Path(path).exists(): + print(f"WARNING: {rlocation_path} not found in runfiles, no source filtering applied", + file=sys.stderr) + return [] + + lines = Path(path).read_text(encoding="utf-8").splitlines() + return [line.strip() for line in lines if line.strip() and not line.strip().startswith("#")] + + +def write_empty_output(output_file: Path) -> None: + """Write an empty file as output when there's nothing to report.""" + with open(output_file, "w", encoding="utf-8") as f: + f.write("") + + +def run_command(cmd: List[str]) -> subprocess.CompletedProcess: + """Run a command and exit on failure.""" + try: + return subprocess.run( + cmd, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + except subprocess.CalledProcessError as e: + print(f"ERROR: Command failed with code {e.returncode}:", file=sys.stderr) + print(f" {' '.join(cmd)}", file=sys.stderr) + if e.stdout: + print(e.stdout, file=sys.stderr) + sys.exit(1) + + +def create_zip(root: Path, directories: List[Path], output_file: Path) -> None: + """Create a zip file from the given directories relative to root.""" + with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zf: + for directory in directories: + if not directory.exists(): + continue + for dirpath, _, files in os.walk(directory): + for filename in files: + file_path = Path(dirpath) / filename + arcname = file_path.relative_to(root) + zf.write(file_path, arcname) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments matching the Bazel coverage_report_generator interface.""" + parser = argparse.ArgumentParser(description="LLVM coverage reporter for Bazel") + parser.add_argument("--output_file", type=Path, required=True) + parser.add_argument("--reports_file", type=Path, required=True) + parser.add_argument("--filter_regexes", type=str, required=True, + help="Rlocation path to the filter regexes file") + parser.add_argument("--workspace_root", type=str, required=True, + help="Real workspace root path for source path mapping") + return parser.parse_args() + + + +if __name__ == "__main__": + main() From 724b1f881488777175e1a378eab6d95d272409bd Mon Sep 17 00:00:00 2001 From: Jan Schlosser Date: Mon, 15 Jun 2026 14:23:53 +0200 Subject: [PATCH 3/4] feat(coverage): add coverage justification infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a justification system that allows developers to argue lines that cannot be covered by tests (defensive programming, tool false positives, platform-specific code). Components: - justify.py: YAML loader + source scanner → resolved manifest JSON - effective_coverage.py: HTML post-processor + effective coverage calculator - coverage_justifications.yaml: initial justification database - Integration into generate_coverage_html.sh (runs after report generation) Justifications can be applied via: - In-code markers: // COV_JUSTIFIED - YAML locations: file + line range (no code change needed) Justified lines appear yellow in the HTML report, and effective coverage is calculated as (covered + justified) / instrumented. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .gitignore | 2 + quality/coverage/coverage_justifications.yaml | 70 ++ .../coverage/llvm_cov/effective_coverage.py | 737 ++++++++++++++++++ quality/coverage/llvm_cov/justify.py | 402 ++++++++++ 4 files changed, 1211 insertions(+) create mode 100644 quality/coverage/coverage_justifications.yaml create mode 100644 quality/coverage/llvm_cov/effective_coverage.py create mode 100644 quality/coverage/llvm_cov/justify.py diff --git a/.gitignore b/.gitignore index 018bc457f..6c05b59f3 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ rust-project.json # docs-as-code docs/ubproject.toml +cpp_coverage/ +__pycache__/ diff --git a/quality/coverage/coverage_justifications.yaml b/quality/coverage/coverage_justifications.yaml new file mode 100644 index 000000000..d4bd86f86 --- /dev/null +++ b/quality/coverage/coverage_justifications.yaml @@ -0,0 +1,70 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +# +# Coverage Justification Database +# ================================ +# +# This file defines justified non-covered lines for achieving 100% effective coverage. +# +# Each justification has: +# - id: Unique kebab-case identifier +# - category: One of: defensive_programming, tool_false_positive, platform_specific, other +# - reason: Explanation why the line(s) cannot be covered +# - locations: (optional) File + line specifications where justification applies +# +# Lines can also be justified via in-code markers referencing the ID: +# // COV_JUSTIFIED — justifies the current line +# // COV_JUSTIFIED_START — starts a justified region +# // COV_JUSTIFIED_STOP — ends the justified region +# +# Validation rules: +# - IDs must be unique and kebab-case (lowercase + hyphens) +# - Every justification needs a non-empty reason +# - Stale justifications (line is actually covered) produce warnings +# - References to non-existent IDs produce errors +# + +version: 1 + +justifications: [] + # Example entries (uncomment and adapt as needed): + # + # - id: config-parser-defensive-switch + # category: defensive_programming + # reason: > + # Default case in switch over fully-handled enum. All enum values are explicitly + # handled with a static_assert guard. Kept as defensive programming against + # future enum additions. + # locations: + # - file: score/mw/com/impl/configuration/config_parser.cpp + # line_start: 1201 + # line_end: 1203 + # + # - id: runtime-tool-false-positive + # category: tool_false_positive + # reason: > + # Coverage tool incorrectly marks this line as not covered. Lines immediately + # before and after are covered, indicating a tool bug. + # locations: + # - file: score/mw/com/impl/runtime.cpp + # lines: [191] + # + # - id: flag-file-crawler-defensive + # category: defensive_programming + # reason: > + # Defensive guard for flag file parsing. Only entered if a flag file exists, + # so the file must be parseable. Guard protects against filesystem race conditions. + # locations: + # - file: score/mw/com/impl/bindings/lola/service_discovery/flag_file_crawler.cpp + # line_start: 81 + # line_end: 85 diff --git a/quality/coverage/llvm_cov/effective_coverage.py b/quality/coverage/llvm_cov/effective_coverage.py new file mode 100644 index 000000000..93d77e7a4 --- /dev/null +++ b/quality/coverage/llvm_cov/effective_coverage.py @@ -0,0 +1,737 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Effective coverage calculator and HTML post-processor. + +Takes the llvm-cov HTML report and the resolved justification manifest. +Modifies the HTML to show justified lines in a distinct color (yellow/orange) +and calculates effective coverage metrics. + +Usage: + python effective_coverage.py --html-dir --manifest --output +""" + +import argparse +import json +import os +import re +import sys +from pathlib import Path +from typing import Any, Dict, List, Tuple + + +# Pattern to match a table row in llvm-cov HTML source pages +# Format: ......... +LINE_NUMBER_RE = re.compile(r"") +COVERED_LINE_TD_RE = re.compile(r"") + + +def main() -> None: + """Main entry point.""" + args = parse_args() + + # Load the justification manifest + manifest = load_manifest(args.manifest) + justified_files = manifest.get("justified_files", {}) + + # Find all source HTML files in the report + html_dir = args.html_dir + if not html_dir.exists(): + print(f"ERROR: HTML report directory not found: {html_dir}", file=sys.stderr) + sys.exit(1) + + # Parse raw coverage totals from the index page (matches llvm-cov exactly). + totals = parse_index_page_totals(html_dir) + raw_covered, raw_total = totals["lines"] + raw_branch_covered, raw_branch_total = totals["branches"] + + # Process each source HTML file (restyle justified lines + count them) + total_justified = 0 + total_stale = 0 + total_justified_branches = 0 + applied_justifications: List[Dict[str, Any]] = [] + stale_justifications: List[Dict[str, Any]] = [] + # Track per-file justification counts for index page updates + per_file_stats: Dict[str, Dict[str, int]] = {} + + source_html_files = find_source_html_files(html_dir) + for html_file in source_html_files: + rel_source_path = extract_source_path_from_html(html_file, html_dir) + if not rel_source_path: + continue + + file_justifications = find_matching_justifications( + rel_source_path, justified_files + ) + + file_stats = process_html_file( + html_file, file_justifications, applied_justifications, stale_justifications + ) + + total_justified += file_stats["justified"] + total_stale += file_stats["stale"] + total_justified_branches += file_stats["justified_branches"] + + if file_stats["justified"] > 0 or file_stats["justified_branches"] > 0: + per_file_stats[rel_source_path] = file_stats + + # Calculate stats using llvm-cov's exact numbers + raw_uncovered = raw_total - raw_covered + unjustified_uncovered = raw_uncovered - total_justified + + effective_branch_covered = raw_branch_covered + total_justified_branches + + stats = { + "total_instrumented_lines": raw_total, + "covered_lines": raw_covered, + "justified_lines": total_justified, + "unjustified_uncovered_lines": max(0, unjustified_uncovered), + "stale_justifications": total_stale, + "raw_line_coverage_pct": round(100.0 * raw_covered / raw_total, 2) if raw_total > 0 else 0.0, + "effective_line_coverage_pct": round( + 100.0 * (raw_covered + total_justified) / raw_total, 2 + ) if raw_total > 0 else 0.0, + "total_branches": raw_branch_total, + "covered_branches": raw_branch_covered, + "justified_branches": total_justified_branches, + "raw_branch_coverage_pct": round(100.0 * raw_branch_covered / raw_branch_total, 2) if raw_branch_total > 0 else 0.0, + "effective_branch_coverage_pct": round( + 100.0 * effective_branch_covered / raw_branch_total, 2 + ) if raw_branch_total > 0 else 0.0, + } + + # Inject CSS for justified lines into style.css + inject_justified_css(html_dir) + + # Update the index page with effective coverage info and per-file stats + update_index_page(html_dir, stats, per_file_stats) + + # Write output report + report = { + "version": 1, + "summary": stats, + "applied_justifications": applied_justifications, + "stale_justifications": stale_justifications, + } + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + json.dump(report, f, indent=2) + + # Write human-readable summary + summary_path = output_path.parent / "summary.txt" + write_summary(summary_path, stats, stale_justifications) + + # Print summary + print( + f"INFO: Effective line coverage: {stats['effective_line_coverage_pct']}% " + f"(raw: {stats['raw_line_coverage_pct']}%, " + f"justified: {stats['justified_lines']} lines, " + f"unjustified uncovered: {stats['unjustified_uncovered_lines']} lines)", + file=sys.stderr, + ) + if stats['justified_branches'] > 0: + print( + f"INFO: Effective branch coverage: {stats['effective_branch_coverage_pct']}% " + f"(raw: {stats['raw_branch_coverage_pct']}%, " + f"justified: {stats['justified_branches']} branches)", + file=sys.stderr, + ) + if stale_justifications: + print( + f"WARNING: {len(stale_justifications)} stale justifications " + f"(lines are actually covered, justification can be removed)", + file=sys.stderr, + ) + + +def process_html_file( + html_file: Path, + justifications: Dict[int, Dict[str, str]], + applied_justifications: List[Dict[str, Any]], + stale_justifications: List[Dict[str, Any]], +) -> Dict[str, int]: + """Process a single source HTML file. Modifies it in-place. + + Restyles justified lines: changes the count cell to show "J" with justified-line + class, and changes red code regions to justified (orange) background. + Also restyles uncovered branches on justified lines. + Only counts justified/stale lines for the justification report — raw coverage + numbers are taken from the index page to match llvm-cov exactly. + """ + file_stats = { + "justified": 0, + "stale": 0, + "justified_branches": 0, + } + + with open(html_file, "r", encoding="utf-8") as f: + content = f.read() + + if not justifications: + return file_stats + + # Determine effective line status (covered if ANY instantiation covers it) + row_pattern = re.compile( + r"
\d+
" + r"" + ) + line_effective_status: Dict[int, str] = {} + for m in row_pattern.finditer(content): + line_num = int(m.group(1)) + line_class = m.group(2) + if line_class == "covered-line": + line_effective_status[line_num] = "covered" + elif line_class == "uncovered-line": + if line_num not in line_effective_status: + line_effective_status[line_num] = "uncovered" + + # Determine which lines have truly uncovered branches (never covered in any instantiation). + # A branch direction is "truly uncovered" if no instantiation covers it. + branch_check_pattern = re.compile( + r"Branch \(" + r"(\d+:\d+)\):\s*\[(.*?)\]" + ) + covered_branch_dirs_check: Dict[str, set] = {} # branch_id → set of covered directions + uncovered_branch_dirs_check: Dict[str, set] = {} # branch_id → set of uncovered directions + branch_line_map: Dict[str, int] = {} # branch_id → line_num + + for m in branch_check_pattern.finditer(content): + line_num = int(m.group(1)) + branch_id = m.group(2) + branch_content = m.group(3) + branch_line_map[branch_id] = line_num + if branch_id not in covered_branch_dirs_check: + covered_branch_dirs_check[branch_id] = set() + uncovered_branch_dirs_check[branch_id] = set() + for direction in ("True", "False"): + if f"class='None'>{direction}" in branch_content: + covered_branch_dirs_check[branch_id].add(direction) + if f"class='red branch'>{direction}" in branch_content: + uncovered_branch_dirs_check[branch_id].add(direction) + + # Lines with truly uncovered branches (uncovered in ALL instantiations) + lines_with_uncovered_branches: set = set() + for branch_id, uncov_dirs in uncovered_branch_dirs_check.items(): + cov_dirs = covered_branch_dirs_check.get(branch_id, set()) + truly_uncovered = uncov_dirs - cov_dirs + if truly_uncovered: + lines_with_uncovered_branches.add(branch_line_map[branch_id]) + + # Determine which justified lines are stale vs applicable. + # A justification is stale only if the line is covered AND has no uncovered branches. + for line_num, justification in justifications.items(): + status = line_effective_status.get(line_num) + has_uncovered_branches = line_num in lines_with_uncovered_branches + if status == "covered" and not has_uncovered_branches: + file_stats["stale"] += 1 + stale_justifications.append({ + "file": html_file.stem, + "line": line_num, + "id": justification.get("id", ""), + "reason": "Line is already covered and has no uncovered branches — justification is stale", + }) + elif status == "uncovered": + file_stats["justified"] += 1 + applied_justifications.append({ + "file": html_file.stem, + "line": line_num, + "id": justification.get("id", ""), + "category": justification.get("category", ""), + }) + elif status == "covered" and has_uncovered_branches: + # Line is covered but has uncovered branches — justification applies to branches only + applied_justifications.append({ + "file": html_file.stem, + "line": line_num, + "id": justification.get("id", ""), + "category": justification.get("category", ""), + }) + + # Restyle justified lines in the HTML (all occurrences including instantiations). + # Full row pattern to capture and replace the entire row: + # ...
0
...
... + full_row_pattern = re.compile( + r"(
\d+
)" + r"(
)\d+(
)" + r"(
)(.*?)(
)" + ) + + modified = False + + def replace_full_row(match: re.Match) -> str: + nonlocal modified + line_num = int(match.group(2)) + if line_num not in justifications: + return match.group(0) + + justification = justifications[line_num] + reason = justification.get("reason", "").replace("'", "'").replace('"', """) + jid = justification.get("id", "") + tooltip = f"Justified [{jid}]: {reason}" + modified = True + + # Rebuild the row with justified styling: + # 1. Line number td (unchanged) + line_td = match.group(1) + # 2. Count td: change class and show "J" instead of "0" + count_td = f"
J{match.group(4)}"
+        # 3. Code td: replace 'region red' spans with 'region justified'
+        code_start = match.group(5)
+        code_content = match.group(6).replace("class='region red'", "class='region justified'")
+        code_end = match.group(7)
+
+        return line_td + count_td + code_start + code_content + code_end
+
+    new_content = full_row_pattern.sub(replace_full_row, content)
+
+    # Restyle branches on justified lines.
+    # Branch format in expansion-view:
+    # Branch (195:17):
+    #   [True: 0, ...]
+    # We find branches at justified line numbers and restyle red branch → justified branch
+    # Counting: A branch direction is "uncovered" only if ALL instantiations show it as red.
+    # (Same as llvm-cov's logic: covered if ANY instantiation covers it.)
+    branch_pattern = re.compile(
+        r"(Branch \("
+        r"(\d+:\d+)\):\s*\[)(.*?\])"
+    )
+
+    # First pass: determine which branch directions are covered in any instantiation
+    covered_branch_dirs: set = set()  # (line:col, direction) that are covered somewhere
+    for m in branch_pattern.finditer(new_content):
+        line_num = int(m.group(2))
+        if line_num not in justifications:
+            continue
+        branch_id = m.group(3)
+        branch_content = m.group(4)
+        # A direction is covered if it does NOT have 'red branch' class
+        for direction in ("True", "False"):
+            # Check if this direction appears as covered (class='None' means covered)
+            covered_marker = f"class='None'>{direction}"
+            if covered_marker in branch_content:
+                covered_branch_dirs.add((branch_id, direction))
+
+    # Second pass: restyle and count only truly uncovered branch directions
+    justified_branch_ids: set = set()  # Track unique uncovered (line:col, direction) pairs
+
+    def replace_branch(match: re.Match) -> str:
+        nonlocal modified
+        line_num = int(match.group(2))
+        if line_num not in justifications:
+            return match.group(0)
+
+        branch_content = match.group(4)
+        if "class='red branch'" not in branch_content:
+            return match.group(0)
+
+        modified = True
+        branch_id = match.group(3)  # e.g. "68:13"
+
+        # Count unique uncovered branch directions that are NEVER covered in any instantiation
+        for direction in ("True", "False"):
+            if f"class='red branch'>{direction}" in branch_content:
+                uid = (branch_id, direction)
+                if uid not in covered_branch_dirs and uid not in justified_branch_ids:
+                    justified_branch_ids.add(uid)
+                    file_stats["justified_branches"] += 1
+
+        # Restyle: red branch → justified-branch, uncovered-line → justified-line
+        branch_content = branch_content.replace(
+            "class='red branch'", "class='justified-branch'"
+        )
+        branch_content = branch_content.replace(
+            "class='uncovered-line'", "class='justified-line'"
+        )
+        return match.group(1) + branch_content
+
+    new_content = branch_pattern.sub(replace_branch, new_content)
+
+    if modified:
+        with open(html_file, "w", encoding="utf-8") as f:
+            f.write(new_content)
+
+    return file_stats
+
+
+def parse_index_page_totals(html_dir: Path) -> Dict[str, Tuple[int, int]]:
+    """Parse the TOTALS row from the llvm-cov index.html to get exact coverage numbers.
+
+    Returns dict with 'lines' and 'branches' keys, each (covered, total).
+    The index page TOTALS row format: "93.55% (17565/18777)" — func, line, branch.
+    """
+    index_file = html_dir / "index.html"
+    if not index_file.exists():
+        return {"lines": (0, 0), "branches": (0, 0)}
+
+    with open(index_file, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    pct_pattern = re.compile(r"(\d+\.\d+)%\s*\((\d+)/(\d+)\)")
+    matches = pct_pattern.findall(content)
+
+    result = {"lines": (0, 0), "branches": (0, 0)}
+
+    if len(matches) >= 3:
+        # Last 3 matches are from TOTALS row: func, line, branch
+        totals_matches = matches[-3:]
+        _, line_covered, line_total = totals_matches[1]
+        result["lines"] = (int(line_covered), int(line_total))
+        _, branch_covered, branch_total = totals_matches[2]
+        result["branches"] = (int(branch_covered), int(branch_total))
+
+    if result["lines"] == (0, 0):
+        print("WARNING: Could not parse coverage totals from index.html", file=sys.stderr)
+
+    return result
+
+
+def inject_justified_css(html_dir: Path) -> None:
+    """Add CSS for justified lines to style.css."""
+    style_file = html_dir / "style.css"
+    if not style_file.exists():
+        return
+
+    justified_css = """
+/* Coverage justification styling */
+.justified-line {
+  text-align: right;
+  color: #a60;
+}
+.region.justified {
+  background-color: #fa04;
+}
+.justified-branch {
+  color: #a60;
+  font-weight: bold;
+}
+tr:has(> td.justified-line) > td.code {
+  background-color: #fff3e0;
+}
+@media (prefers-color-scheme: dark) {
+  .justified-line {
+    color: #fa0;
+  }
+  .justified-branch {
+    color: #fa0;
+  }
+  tr:has(> td.justified-line) > td.code {
+    background-color: #3d2800;
+  }
+  .region.justified {
+    background-color: #fa03;
+  }
+}
+"""
+
+    with open(style_file, "a", encoding="utf-8") as f:
+        f.write(justified_css)
+
+
+def update_index_page(html_dir: Path, stats: Dict[str, Any], per_file_stats: Dict[str, Dict[str, int]]) -> None:
+    """Update the index page with effective coverage info and per-file adjusted percentages."""
+    index_file = html_dir / "index.html"
+    if not index_file.exists():
+        return
+
+    with open(index_file, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    # Banner with overall effective coverage (lines + branches)
+    branch_info = ""
+    if stats.get("justified_branches", 0) > 0:
+        branch_info = (
+            f" | Effective Branch Coverage: {stats['effective_branch_coverage_pct']}%"
+            f" (Raw: {stats['raw_branch_coverage_pct']}%, Justified: {stats['justified_branches']} branches)"
+        )
+
+    banner = (
+        f"
" + f"Effective Line Coverage: {stats['effective_line_coverage_pct']}% " + f"(Raw: {stats['raw_line_coverage_pct']}% | " + f"Justified: {stats['justified_lines']} lines | " + f"Unjustified Uncovered: {stats['unjustified_uncovered_lines']} lines)" + f"{branch_info}" + f"
" + ) + + # Insert after the tag or after the first

+ if "

" in content: + content = content.replace("

", banner + "

", 1) + else: + content = content.replace("", f"{banner}", 1) + + # Update per-file rows in the index table. + # For each file with justifications, find its row and update line% and branch% cells. + # Row format:
displayname
+ #
  XX.XX% (covered/total)
← function + #
  XX.XX% (covered/total)
← line + #
  XX.XX% (covered/total)
← branch + # + pct_cell_pattern = re.compile( + r"
\s*(\d+\.\d+)%\s*\((\d+)/(\d+)\)
" + ) + + for file_path, fstats in per_file_stats.items(): + justified_lines = fstats.get("justified", 0) + justified_branches = fstats.get("justified_branches", 0) + if justified_lines == 0 and justified_branches == 0: + continue + + # Find the row for this file in the index page + # The href contains the full path to the HTML file + if file_path not in content: + continue + + # Find the containing this file path + file_idx = content.find(file_path) + if file_idx < 0: + continue + row_start = content.rfind("", file_idx) + if row_start < 0 or row_end < 0: + continue + + row = content[row_start:row_end + 5] + + # Find all percentage cells in this row (func, line, branch) + cells = list(pct_cell_pattern.finditer(row)) + if len(cells) < 2: + continue + + new_row = row + # Update line coverage cell (second cell, index 1) + if justified_lines > 0 and len(cells) >= 2: + line_cell = cells[1] + covered = int(line_cell.group(3)) + total = int(line_cell.group(4)) + eff_covered = covered + justified_lines + eff_pct = round(100.0 * eff_covered / total, 2) if total > 0 else 0.0 + color = _get_coverage_color(eff_pct) + old_cell = line_cell.group(0) + new_cell = ( + f"
"
+                f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + # Update branch coverage cell (third cell, index 2) + if justified_branches > 0 and len(cells) >= 3: + branch_cell = cells[2] + covered = int(branch_cell.group(3)) + total = int(branch_cell.group(4)) + eff_covered = covered + justified_branches + eff_pct = round(100.0 * eff_covered / total, 2) if total > 0 else 0.0 + color = _get_coverage_color(eff_pct) + old_cell = branch_cell.group(0) + new_cell = ( + f"
"
+                f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + if new_row != row: + content = content.replace(row, new_row) + + # Update the TOTALS row + content = _update_totals_row(content, stats) + + with open(index_file, "w", encoding="utf-8") as f: + f.write(content) + + +def _get_coverage_color(pct: float) -> str: + """Return the llvm-cov color class for a coverage percentage.""" + if pct >= 100.0: + return "green" + elif pct >= 80.0: + return "yellow" + else: + return "red" + + +def _update_totals_row(content: str, stats: Dict[str, Any]) -> str: + """Update the TOTALS row in the index page with effective coverage numbers.""" + # Find the TOTALS row — it's the last row before + totals_idx = content.rfind("Totals") + if totals_idx < 0: + return content + + row_start = content.rfind("", totals_idx) + if row_start < 0 or row_end < 0: + return content + + row = content[row_start:row_end + 5] + + pct_cell_pattern = re.compile( + r"
\s*(\d+\.\d+)%\s*\((\d+)/(\d+)\)
" + ) + cells = list(pct_cell_pattern.finditer(row)) + + new_row = row + + # Update line coverage in totals (index 1) + if len(cells) >= 2 and stats.get("justified_lines", 0) > 0: + line_cell = cells[1] + eff_covered = stats["covered_lines"] + stats["justified_lines"] + total = stats["total_instrumented_lines"] + eff_pct = stats["effective_line_coverage_pct"] + color = _get_coverage_color(eff_pct) + old_cell = line_cell.group(0) + new_cell = ( + f"
"
+            f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + # Update branch coverage in totals (index 2) + if len(cells) >= 3 and stats.get("justified_branches", 0) > 0: + branch_cell = cells[2] + eff_covered = stats["covered_branches"] + stats["justified_branches"] + total = stats["total_branches"] + eff_pct = stats["effective_branch_coverage_pct"] + color = _get_coverage_color(eff_pct) + old_cell = branch_cell.group(0) + new_cell = ( + f"
"
+            f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + if new_row != row: + content = content.replace(row, new_row) + + return content + + +def find_source_html_files(html_dir: Path) -> List[Path]: + """Find all per-source HTML files (not index.html, style.css, etc.).""" + coverage_dir = html_dir / "coverage" + if not coverage_dir.exists(): + # Some llvm-cov versions put source files directly in html_dir + coverage_dir = html_dir + + files = [] + for html_file in coverage_dir.rglob("*.html"): + if html_file.name in ("index.html",): + continue + files.append(html_file) + return sorted(files) + + +def extract_source_path_from_html(html_file: Path, html_dir: Path) -> str: + """Extract the relative source file path from the HTML file path. + + llvm-cov creates paths like: html_report/coverage/.html + We need to extract the relative path within the project. + """ + rel = str(html_file.relative_to(html_dir)) + # Remove "coverage/" prefix if present + if rel.startswith("coverage/"): + rel = rel[len("coverage/"):] + # Remove .html suffix + if rel.endswith(".html"): + rel = rel[:-5] + return rel + + +def find_matching_justifications( + source_path: str, justified_files: Dict[str, Dict[str, Dict[str, str]]] +) -> Dict[int, Dict[str, str]]: + """Find justifications that match the given source path. + + The source_path from HTML may be an absolute path or relative. + The justified_files keys are relative to source root. + We match by suffix. + """ + result: Dict[int, Dict[str, str]] = {} + + for justified_path, line_justifications in justified_files.items(): + # Match if the source_path ends with the justified_path + if source_path.endswith(justified_path) or justified_path.endswith(source_path): + for line_str, justification in line_justifications.items(): + result[int(line_str)] = justification + + return result + + +def write_summary( + path: Path, stats: Dict[str, Any], stale: List[Dict[str, Any]] +) -> None: + """Write human-readable summary.""" + with open(path, "w", encoding="utf-8") as f: + f.write("Coverage Justification Summary\n") + f.write("=" * 40 + "\n\n") + f.write(f"Total instrumented lines: {stats['total_instrumented_lines']}\n") + f.write(f"Covered lines: {stats['covered_lines']}\n") + f.write(f"Justified lines: {stats['justified_lines']}\n") + f.write(f"Unjustified uncovered: {stats['unjustified_uncovered_lines']}\n") + f.write(f"\n") + f.write(f"Raw line coverage: {stats['raw_line_coverage_pct']}%\n") + f.write(f"Effective line coverage: {stats['effective_line_coverage_pct']}%\n") + f.write(f"\n") + if stats.get("total_branches", 0) > 0: + f.write(f"Total branches: {stats['total_branches']}\n") + f.write(f"Covered branches: {stats['covered_branches']}\n") + f.write(f"Justified branches: {stats['justified_branches']}\n") + f.write(f"Raw branch coverage: {stats['raw_branch_coverage_pct']}%\n") + f.write(f"Effective branch coverage: {stats['effective_branch_coverage_pct']}%\n") + f.write(f"\n") + if stale: + f.write(f"Stale justifications ({len(stale)}):\n") + for s in stale: + f.write(f" - {s['file']}:{s['line']} [{s['id']}]\n") + f.write("\n") + + +def load_manifest(path: Path) -> Dict[str, Any]: + """Load the justification manifest JSON.""" + if not path.exists(): + print(f"ERROR: Manifest not found: {path}", file=sys.stderr) + sys.exit(1) + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Effective coverage calculator and HTML post-processor" + ) + parser.add_argument( + "--html-dir", + type=Path, + required=True, + help="Path to llvm-cov HTML report directory", + ) + parser.add_argument( + "--manifest", + type=Path, + required=True, + help="Path to resolved justification manifest (from justify.py)", + ) + parser.add_argument( + "--output", + type=Path, + required=True, + help="Output path for justification report (JSON)", + ) + return parser.parse_args() + + +if __name__ == "__main__": + main() diff --git a/quality/coverage/llvm_cov/justify.py b/quality/coverage/llvm_cov/justify.py new file mode 100644 index 000000000..8e37292b1 --- /dev/null +++ b/quality/coverage/llvm_cov/justify.py @@ -0,0 +1,402 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Coverage justification processor. + +Parses the YAML justification database and source files for COV_JUSTIFIED markers. +Resolves all justified lines and produces a manifest mapping file:line → justification. + +Usage: + python justify.py --yaml --source-root --output + +Supports two ways to specify justified lines: +1. YAML locations: directly specify file + line ranges in the YAML +2. In-code markers: COV_JUSTIFIED , COV_JUSTIFIED_START / COV_JUSTIFIED_STOP +""" + +import argparse +import json +import re +import sys +from pathlib import Path +from typing import Any, Dict, List, Set, Tuple + +import yaml + + +# Marker patterns +COV_JUSTIFIED_LINE_RE = re.compile(r"COV_JUSTIFIED\s+([\w-]+)") +COV_JUSTIFIED_START_RE = re.compile(r"COV_JUSTIFIED_START\s+([\w-]+)") +COV_JUSTIFIED_STOP_RE = re.compile(r"COV_JUSTIFIED_STOP") + +VALID_CATEGORIES = { + "defensive_programming", + "tool_false_positive", + "platform_specific", + "other", +} + + +def main() -> None: + """Main entry point.""" + args = parse_args() + + justifications_data = load_yaml(args.yaml) + validate_yaml(justifications_data) + + # Build lookup: id -> justification entry + justifications_by_id: Dict[str, Dict[str, Any]] = {} + for entry in justifications_data.get("justifications", []): + justifications_by_id[entry["id"]] = entry + + # Resolve all justified lines + resolved: Dict[str, Dict[int, Dict[str, str]]] = {} + warnings: List[str] = [] + errors: List[str] = [] + + # 1. Process YAML direct locations + for entry in justifications_data.get("justifications", []): + for location in entry.get("locations", []): + file_path = location["file"] + full_path = Path(args.source_root) / file_path + + if not full_path.exists(): + errors.append( + f"File not found for justification '{entry['id']}': {file_path}" + ) + continue + + lines = resolve_location_lines(location) + if file_path not in resolved: + resolved[file_path] = {} + for line in lines: + resolved[file_path][line] = { + "id": entry["id"], + "category": entry["category"], + "reason": entry["reason"].strip(), + } + + # 2. Scan source files for in-code COV_JUSTIFIED markers + source_files = collect_source_files(args.source_root, args.file_filter) + for source_file in source_files: + rel_path = str(source_file.relative_to(args.source_root)) + scan_warnings, scan_lines = scan_file_for_markers( + source_file, rel_path, justifications_by_id + ) + warnings.extend(scan_warnings) + + if scan_lines: + if rel_path not in resolved: + resolved[rel_path] = {} + for line_num, justification_info in scan_lines.items(): + resolved[rel_path][line_num] = justification_info + + # Output manifest + manifest = { + "version": 1, + "source_root": str(args.source_root), + "justified_files": { + filepath: {str(k): v for k, v in lines.items()} + for filepath, lines in sorted(resolved.items()) + }, + "warnings": warnings, + "errors": errors, + } + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + json.dump(manifest, f, indent=2) + + # Print diagnostics + total_justified_lines = sum(len(lines) for lines in resolved.values()) + print( + f"INFO: Resolved {total_justified_lines} justified lines across " + f"{len(resolved)} files.", + file=sys.stderr, + ) + if warnings: + for w in warnings: + print(f"WARNING: {w}", file=sys.stderr) + if errors: + for e in errors: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + + +def resolve_location_lines(location: Dict[str, Any]) -> List[int]: + """Resolve line numbers from a YAML location entry.""" + if "lines" in location: + return location["lines"] + elif "line_start" in location and "line_end" in location: + return list(range(location["line_start"], location["line_end"] + 1)) + elif "line" in location: + return [location["line"]] + return [] + + +def scan_file_for_markers( + file_path: Path, + rel_path: str, + justifications_by_id: Dict[str, Dict[str, Any]], +) -> Tuple[List[str], Dict[int, Dict[str, str]]]: + """Scan a source file for COV_JUSTIFIED markers.""" + warnings = [] + justified_lines: Dict[int, Dict[str, str]] = {} + + try: + with open(file_path, "r", encoding="utf-8", errors="replace") as f: + lines = f.readlines() + except (IOError, OSError): + return warnings, justified_lines + + region_stack: List[Tuple[int, str]] = [] # (start_line, justification_id) + + for line_num, line in enumerate(lines, start=1): + # Check for COV_JUSTIFIED_START + start_match = COV_JUSTIFIED_START_RE.search(line) + if start_match: + jid = start_match.group(1) + if jid not in justifications_by_id: + warnings.append( + f"{rel_path}:{line_num}: COV_JUSTIFIED_START references " + f"unknown ID '{jid}'" + ) + else: + region_stack.append((line_num, jid)) + continue + + # Check for COV_JUSTIFIED_STOP + stop_match = COV_JUSTIFIED_STOP_RE.search(line) + if stop_match: + if not region_stack: + warnings.append( + f"{rel_path}:{line_num}: COV_JUSTIFIED_STOP without matching START" + ) + else: + start_line, jid = region_stack.pop() + if jid in justifications_by_id: + entry = justifications_by_id[jid] + for ln in range(start_line + 1, line_num): + justified_lines[ln] = { + "id": jid, + "category": entry["category"], + "reason": entry["reason"].strip(), + } + continue + + # Check for single-line COV_JUSTIFIED (but not START/STOP) + if "COV_JUSTIFIED_START" not in line and "COV_JUSTIFIED_STOP" not in line: + line_match = COV_JUSTIFIED_LINE_RE.search(line) + if line_match: + jid = line_match.group(1) + if jid not in justifications_by_id: + warnings.append( + f"{rel_path}:{line_num}: COV_JUSTIFIED references " + f"unknown ID '{jid}'" + ) + else: + entry = justifications_by_id[jid] + justified_lines[line_num] = { + "id": jid, + "category": entry["category"], + "reason": entry["reason"].strip(), + } + + # Check for unclosed regions + for start_line, jid in region_stack: + warnings.append( + f"{rel_path}:{start_line}: COV_JUSTIFIED_START '{jid}' without matching STOP" + ) + + return warnings, justified_lines + + +def collect_source_files(source_root: Path, file_filter: str) -> List[Path]: + """Collect source files to scan for markers.""" + extensions = file_filter.split(",") if file_filter else ["cpp", "h", "hpp", "cc"] + files = [] + for ext in extensions: + files.extend(source_root.rglob(f"*.{ext.strip()}")) + return sorted(files) + + +def load_yaml(yaml_path: Path) -> Dict[str, Any]: + """Load YAML justification database.""" + if not yaml_path.exists(): + print(f"ERROR: Justification YAML not found: {yaml_path}", file=sys.stderr) + sys.exit(1) + + with open(yaml_path, "r", encoding="utf-8") as f: + content = f.read() + + return yaml.safe_load(content) + + +def validate_yaml(data: Dict[str, Any]) -> None: + """Validate the justification YAML structure and types.""" + try: + errors = [] + + if not isinstance(data, dict): + print("ERROR: YAML validation: root must be a mapping", file=sys.stderr) + sys.exit(1) + + if "version" not in data: + errors.append("Missing 'version' field") + elif not isinstance(data["version"], int): + errors.append(f"'version' must be an integer, got {type(data['version']).__name__}") + + if "justifications" not in data: + errors.append("Missing 'justifications' field") + for e in errors: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + + if not isinstance(data["justifications"], list): + errors.append( + f"'justifications' must be a list, got {type(data['justifications']).__name__}" + ) + for e in errors: + print(f"ERROR: YAML validation: {e}", file=sys.stderr) + sys.exit(1) + + seen_ids: Set[str] = set() + for i, entry in enumerate(data["justifications"]): + prefix = f"justifications[{i}]" + + if not isinstance(entry, dict): + errors.append(f"{prefix}: must be a mapping, got {type(entry).__name__}") + continue + + if "id" not in entry: + errors.append(f"{prefix}: missing 'id'") + continue + + jid = entry["id"] + if not isinstance(jid, str): + errors.append(f"{prefix}: 'id' must be a string, got {type(jid).__name__}") + continue + + if jid in seen_ids: + errors.append(f"{prefix}: duplicate ID '{jid}'") + seen_ids.add(jid) + + if not re.match(r"^[a-z0-9]+(-[a-z0-9]+)*$", jid): + errors.append(f"{prefix}: ID '{jid}' must be kebab-case") + + if "category" not in entry: + errors.append(f"{prefix}: missing 'category'") + elif not isinstance(entry["category"], str): + errors.append( + f"{prefix}: 'category' must be a string, " + f"got {type(entry['category']).__name__}" + ) + elif entry["category"] not in VALID_CATEGORIES: + errors.append( + f"{prefix}: invalid category '{entry['category']}'. " + f"Must be one of: {sorted(VALID_CATEGORIES)}" + ) + + if "reason" not in entry: + errors.append(f"{prefix}: missing 'reason'") + elif not isinstance(entry["reason"], str): + errors.append( + f"{prefix}: 'reason' must be a string, " + f"got {type(entry['reason']).__name__}" + ) + elif not entry["reason"].strip(): + errors.append(f"{prefix}: 'reason' must not be empty") + + if "locations" in entry: + if not isinstance(entry["locations"], list): + errors.append( + f"{prefix}: 'locations' must be a list, " + f"got {type(entry['locations']).__name__}" + ) + else: + for j, loc in enumerate(entry["locations"]): + loc_prefix = f"{prefix}.locations[{j}]" + if not isinstance(loc, dict): + errors.append( + f"{loc_prefix}: must be a mapping, " + f"got {type(loc).__name__}" + ) + continue + if "file" not in loc: + errors.append(f"{loc_prefix}: missing 'file'") + elif not isinstance(loc["file"], str): + errors.append( + f"{loc_prefix}: 'file' must be a string, " + f"got {type(loc['file']).__name__}" + ) + for int_field in ("line", "line_start", "line_end"): + if int_field in loc and not isinstance(loc[int_field], int): + errors.append( + f"{loc_prefix}: '{int_field}' must be an integer, " + f"got {type(loc[int_field]).__name__}" + ) + if "lines" in loc: + if not isinstance(loc["lines"], list): + errors.append( + f"{loc_prefix}: 'lines' must be a list, " + f"got {type(loc['lines']).__name__}" + ) + elif not all(isinstance(ln, int) for ln in loc["lines"]): + errors.append( + f"{loc_prefix}: 'lines' must contain only integers" + ) + + if errors: + for e in errors: + print(f"ERROR: YAML validation: {e}", file=sys.stderr) + sys.exit(1) + except Exception as error: + print(f"ERROR: YAML validation: {error}", file=sys.stderr) + sys.exit(1) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Coverage justification processor" + ) + parser.add_argument( + "--yaml", + type=Path, + required=True, + help="Path to coverage_justifications.yaml", + ) + parser.add_argument( + "--source-root", + type=Path, + required=True, + help="Root directory of source files", + ) + parser.add_argument( + "--output", + type=Path, + required=True, + help="Output path for resolved justification manifest (JSON)", + ) + parser.add_argument( + "--file-filter", + type=str, + default="cpp,h,hpp,cc", + help="Comma-separated file extensions to scan (default: cpp,h,hpp,cc)", + ) + return parser.parse_args() + + +if __name__ == "__main__": + main() From 0b875d207610428a30b8e972e77b8b62affb5502 Mon Sep 17 00:00:00 2001 From: Jan Schlosser Date: Mon, 15 Jun 2026 14:24:20 +0200 Subject: [PATCH 4/4] docs(coverage): add documentation for coverage infrastructure - Add quality/coverage/README.md with pipeline architecture overview - Add quality/coverage/llvm_cov/README.md with tool-level documentation - Update quality/quality.md to reflect new llvm-cov pipeline and link to the detailed documentation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- quality/coverage/README.md | 207 +++++++++++++++++++++++ quality/coverage/llvm_cov/README.md | 252 ++++++++++++++++++++++++++++ quality/quality.md | 24 ++- 3 files changed, 474 insertions(+), 9 deletions(-) create mode 100644 quality/coverage/README.md create mode 100644 quality/coverage/llvm_cov/README.md diff --git a/quality/coverage/README.md b/quality/coverage/README.md new file mode 100644 index 000000000..a07de9d78 --- /dev/null +++ b/quality/coverage/README.md @@ -0,0 +1,207 @@ +# Coverage Infrastructure + +This directory contains the tooling to generate, post-process, and report C++ code coverage for the Score Communication project using LLVM's source-based coverage instrumentation (`llvm-cov`). + +## Overview + +``` +quality/coverage/ +├── README.md ← You are here +├── BUILD ← Bazel target for generate_coverage_html +├── coverage.bazelrc ← Bazel coverage configuration flags +├── coverage_justifications.yaml ← Central justification database +├── generate_coverage_html.sh ← Orchestrator script (entry point) +└── llvm_cov/ ← Python tools for coverage processing + ├── README.md ← Detailed tool documentation + ├── BUILD ← Bazel targets for Python tools + ├── merger.py ← Per-test coverage output generator + ├── reporter.py ← Final combined report generator + ├── justify.py ← Justification resolver + └── effective_coverage.py ← HTML post-processor & effective coverage calculator +``` + +## Requirements + +The coverage pipeline was built to satisfy the following requirements: + +### REQ-COV-001: Native llvm-cov HTML Reports + +Coverage reports **must** be generated directly by `llvm-cov show` using LLVM's source-based coverage (`--experimental_use_llvm_covmap`). No intermediate LCOV-to-HTML conversion (genhtml) is used. This provides accurate source-level coverage including branch and expansion views. + +### REQ-COV-002: Instrumentation Filtering + +Only project source code under `//score/message_passing` and `//score/mw/com` shall be instrumented and reported. Tests, benchmarks, and external/third-party code must be excluded from the report. + +> **Note:** `--experimental_use_llvm_covmap` causes Bazel to instrument ALL targets regardless of `--instrumentation_filter`. Actual source filtering is enforced by `--ignore-filename-regex` in the merger and reporter. See `coverage.bazelrc` for details. + +### REQ-COV-003: Coverage Justification Infrastructure + +A YAML-based justification system must allow developers to "argue" non-covered lines and branches to achieve 100% effective coverage. Justified lines must: +- Be tracked in a central YAML file with unique IDs, categories, and rationale +- Optionally be referenced from code via `COV_JUSTIFIED` markers +- Appear visually distinct (yellow/orange) in the HTML report +- Be reflected in both per-file and total coverage percentages + +### REQ-COV-004: Effective Coverage Calculation + +The system must calculate and display: +- **Raw coverage**: actual lines/branches hit ÷ total instrumented lines/branches +- **Effective coverage**: (hit + justified) ÷ total + +Both line and branch effective coverage must be shown in the summary, per-file index table, and totals row. + +### REQ-COV-005: Stale Justification Detection + +Justifications for lines/branches that are actually covered by tests must be detected and reported as stale warnings, enabling cleanup. + +### REQ-COV-006: Template Instantiation Handling + +For C++ templates with multiple instantiations, a line or branch is considered "covered" if ANY instantiation covers it (consistent with llvm-cov semantics). This prevents inflated totals from repeated template expansions. + +### REQ-COV-007: Threshold Enforcement + +The pipeline must support a configurable effective coverage threshold (default: 100%) and emit a warning when coverage falls below it. + +## Quick Start + +### 1. Run Coverage Collection + +```bash +# Full project +bazel coverage //... + +# Specific target +bazel coverage //score/message_passing:client_connection_test_linux +``` + +### 2. Generate the HTML Report + +```bash +bazel run //quality/coverage:generate_coverage_html +``` + +This extracts the HTML report to `cpp_coverage/`, runs justification processing, and prints the coverage summary. Open the report: + +```bash +xdg-open cpp_coverage/index.html +``` + +### 3. Create an Archive (CI) + +```bash +bazel run //quality/coverage:generate_coverage_html -- --archive coverage-report +``` + +Creates `coverage-report.zip` containing the HTML report, LCOV data, and JUnit XML test results. + +## Pipeline Architecture + +The coverage pipeline has two phases: + +### Phase 1: Bazel Coverage Collection + +Configured by `coverage.bazelrc`, Bazel runs tests with coverage instrumentation enabled: + +``` +bazel coverage //... + │ + ├── Per-test: merger.py (--coverage_output_generator) + │ • Receives .profraw files from test execution + │ • Merges into .profdata via llvm-profdata + │ • Packages profdata + metadata into a zip + │ + └── Final: reporter.py (--coverage_report_generator) + • Merges all per-test profdata into one + • Runs llvm-cov show → HTML report + • Runs llvm-cov export → LCOV data + • Runs llvm-cov report → text summary + • Packages everything into _coverage_report.dat (zip) +``` + +### Phase 2: Report Extraction & Justification + +``` +bazel run //quality/coverage:generate_coverage_html + │ + └── generate_coverage_html.sh + ├── Extract HTML from _coverage_report.dat → cpp_coverage/ + ├── justify.py: YAML + code markers → manifest.json + ├── effective_coverage.py: Post-process HTML + calculate effective % + └── Print summary + threshold check +``` + +## Configuration + +### coverage.bazelrc + +Key settings: + +| Flag | Purpose | +|------|---------| +| `--experimental_use_llvm_covmap` | Use LLVM source-based coverage (not gcov) | +| `--instrumentation_filter` | Documents intended scope (not enforced by Bazel with covmap) | +| `--coverage_output_generator` | Points to `merger.py` for per-test processing | +| `--coverage_report_generator` | Points to `reporter.py` for final aggregation | +| `--test_env=LLVM_PROFILE_CONTINUOUS_MODE=1` | Enables profiling of abnormal terminations | +| `-mllvm -runtime-counter-relocation` | Required for continuous-mode profiling with LLVM | + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `COVERAGE_THRESHOLD` | `100` | Minimum effective line coverage % (warning if below) | + +## Coverage Justifications + +See [`coverage_justifications.yaml`](coverage_justifications.yaml) for the justification database and [`llvm_cov/README.md`](llvm_cov/README.md) for detailed documentation of the justification tools. + +### Adding a Justification + +1. **Via YAML** — add an entry to `coverage_justifications.yaml`: + +```yaml +justifications: + - id: my-unique-id # kebab-case, must be unique + category: defensive_programming # or: tool_false_positive, platform_specific, other + reason: > + Explanation of why these lines cannot be covered by tests. + locations: + - file: score/mw/com/impl/some_file.cpp + line_start: 42 + line_end: 45 +``` + +2. **Via code markers** — reference the ID from source (no `locations` needed in YAML): + +```cpp +unreachable_code(); // COV_JUSTIFIED my-unique-id + +// COV_JUSTIFIED_START my-unique-id +defensive_block(); +more_defensive_code(); +// COV_JUSTIFIED_STOP +``` + +Both methods can be combined. A justification covers both the line and any branches on that line. + +We strongly suggest though to use the in-code marker where possible, as this better supports refactorings and avoids +better that justifications get outdated. + +### Justification Categories + +| Category | Use Case | +|----------|----------| +| `defensive_programming` | Unreachable code kept as safety guard (e.g., default case in exhaustive switch) | +| `tool_false_positive` | Coverage tool incorrectly marks line as uncovered | +| `platform_specific` | Code path only reachable on platforms not under test | +| `other` | Any other valid reason | + +### Visual Indicators in HTML Report + +| Color | Meaning | +|-------|---------| +| **Green** | Covered by tests | +| **Red** | Not covered (needs tests or justification) | +| **Yellow/Orange** | Justified — not covered but argued with rationale | + +The index page shows a banner with overall effective coverage and updates per-file percentages in the table to reflect justifications. diff --git a/quality/coverage/llvm_cov/README.md b/quality/coverage/llvm_cov/README.md new file mode 100644 index 000000000..5a6c55a4f --- /dev/null +++ b/quality/coverage/llvm_cov/README.md @@ -0,0 +1,252 @@ +# llvm-cov Coverage Tools + +This directory contains the Python tools that power the coverage pipeline. They are invoked by Bazel (merger + reporter) and by the `generate_coverage_html.sh` orchestrator script (justify + effective_coverage). + +## Tool Overview + +| Tool | Invoked By | Purpose | +|------|-----------|---------| +| `merger.py` | Bazel (`--coverage_output_generator`) | Per-test: profraw → profdata + metadata zip | +| `reporter.py` | Bazel (`--coverage_report_generator`) | Final: merge all profdata → HTML + LCOV + summary | +| `justify.py` | `generate_coverage_html.sh` | Resolve YAML + code markers → justification manifest | +| `effective_coverage.py` | `generate_coverage_html.sh` | Post-process HTML report + calculate effective coverage | + +## Data Flow + +``` +Test execution + │ + ▼ +┌──────────┐ profraw files +│ merger.py │ ◄── from each test +└────┬─────┘ + │ Per-test zip: {profdata, metadata.json} + ▼ +┌────────────┐ All per-test zips +│ reporter.py │ ◄── listed in --reports_file +└────┬───────┘ + │ _coverage_report.dat (zip): + │ ├── html_report/ (llvm-cov show --format=html) + │ ├── lcov_report/lcov.dat (llvm-cov export --format=lcov) + │ └── text_report/summary.txt (llvm-cov report) + ▼ +┌───────────────────────────┐ +│ generate_coverage_html.sh │ ◄── bazel run //quality/coverage:generate_coverage_html +└────┬──────────────────────┘ + │ Extracts html_report/ → cpp_coverage/ + ▼ +┌─────────────┐ coverage_justifications.yaml +│ justify.py │ ◄── + source files (COV_JUSTIFIED markers) +└────┬────────┘ + │ manifest.json: {file → {line → justification}} + ▼ +┌───────────────────────┐ +│ effective_coverage.py │ ◄── manifest.json + html_report/ +└────┬──────────────────┘ + │ • Modifies HTML in-place (restyled justified lines/branches) + │ • report.json + summary.txt + ▼ + Console output: effective coverage summary +``` + +Right now we do not perform the justification and effective coverage calculation in the reporter, as it will not have +access to the whole code base, which makes the integration more difficult. This can maybe be a future improvement. + +--- + +## merger.py — Per-Test Coverage Output Generator + +**Bazel role:** `--coverage_output_generator` (replaces the default `collect_coverage.sh` output step) + +**What it does:** + +1. Receives `.profraw` files from a single test execution +2. Finds the instrumented object files from the source manifest +3. Runs `llvm-profdata merge` to create a `.profdata` file +4. Collects metadata (llvm-tools path, workspace root, excluded source patterns) +5. Packages `{profdata, metadata.json}` into a zip file for the reporter + +**Interface (called by Bazel's `collect_coverage.sh`):** + +``` +merger.py --coverage_dir= \ + --output_file= \ + --source_file_manifest= \ + --filter_sources= \ # repeatable + [--sources_to_replace_file=] +``` + +**Key behaviors:** + +- Resolves the actual workspace root by following Bazel sandbox symlinks (important for `--path-equivalence` in later stages) +- Cleans up dangling symlinks in the coverage directory that can cause `llvm-profdata` to fail +- Extracts `--ignore-filename-regex` patterns from `--filter_sources` for source filtering + +--- + +## reporter.py — Final Combined Report Generator + +**Bazel role:** `--coverage_report_generator` (replaces the default lcov-based reporter) + +**What it does:** + +1. Reads the list of per-test zip files from `--reports_file` +2. Extracts profdata + metadata from each zip +3. Merges all profdata into a single `merged_coverage.profdata` via `llvm-profdata merge` +4. Generates three output formats: + - **HTML report** via `llvm-cov show --format=html` with branch counts and expansion views + - **LCOV data** via `llvm-cov export --format=lcov` (backward compatibility with dashboards) + - **Text summary** via `llvm-cov report --summary-only` +5. Packages everything into a zip file at `_coverage_report.dat` + +**Interface (called by Bazel):** + +``` +reporter.py --reports_file= \ + --output_file= +``` + +**Source filtering:** + +The reporter applies `--ignore-filename-regex` to all `llvm-cov` commands to exclude: +- Test files and benchmarks +- External/third-party code +- Any paths matching patterns collected from `--filter_sources` during the merger phase + +These patterns are propagated via `metadata.json` in each per-test zip. + +**llvm-cov show options:** + +| Option | Purpose | +|--------|---------| +| `--show-branches=count` | Show branch coverage with execution counts | +| `--show-expansions` | Expand template instantiations inline | +| `--coverage-watermark=100,50` | Green ≥100%, yellow ≥50%, red <50% | +| `--path-equivalence=/proc/self/cwd/,` | Map sandbox paths to real source paths | +| `--Xdemangler=llvm-cxxfilt` | Demangle C++ symbol names | + +--- + +## justify.py — Justification Resolver + +**What it does:** + +Resolves all justified lines from two sources and produces a unified manifest: + +1. **YAML locations** — `file` + `line_start`/`line_end` entries in `coverage_justifications.yaml` +2. **In-code markers** — `COV_JUSTIFIED `, `COV_JUSTIFIED_START ` / `COV_JUSTIFIED_STOP` comments + +**Interface:** + +``` +python3 justify.py --yaml \ + --source-root \ + --output +``` + +**Output format (manifest.json):** + +```json +{ + "version": 1, + "justified_files": { + "score/mw/com/impl/some_file.cpp": { + "42": {"id": "my-id", "category": "defensive_programming", "reason": "..."}, + "43": {"id": "my-id", "category": "defensive_programming", "reason": "..."} + } + } +} +``` + +**Validation rules:** + +- Justification IDs must be unique and kebab-case (lowercase + hyphens) +- Every justification must have a non-empty `reason` +- Category must be one of: `defensive_programming`, `tool_false_positive`, `platform_specific`, `other` +- In-code `COV_JUSTIFIED ` markers must reference an ID defined in the YAML + +**In-code marker patterns:** + +| Pattern | Scope | +|---------|-------| +| `// COV_JUSTIFIED ` | Justifies the current line | +| `// COV_JUSTIFIED_START ` | Starts a justified region | +| `// COV_JUSTIFIED_STOP` | Ends the justified region | + +--- + +## effective_coverage.py — HTML Post-Processor & Coverage Calculator + +**What it does:** + +1. Loads the justification manifest from `justify.py` +2. Post-processes the llvm-cov HTML report in-place: + - **Lines:** Uncovered justified lines get class `justified-line`, count shows "J", code background turns orange + - **Branches:** Uncovered branches on justified lines get class `justified-branch` (orange text) +3. Updates the index page: + - Adds an effective coverage banner (line + branch) + - Updates per-file line% and branch% cells to show effective (raw + justified) coverage + - Updates the TOTALS row +4. Calculates and reports effective coverage metrics +5. Detects stale justifications + +**Interface:** + +``` +python3 effective_coverage.py --html-dir \ + --manifest \ + --output +``` + +**Output files:** + +| File | Content | +|------|---------| +| `report.json` | Machine-readable report: summary stats, applied justifications, stale warnings | +| `summary.txt` | Human-readable coverage summary | + +**Template instantiation handling:** + +C++ templates produce multiple instantiations of the same source line in the HTML. The tool handles this correctly: +- **Line coverage:** A line is "covered" if ANY instantiation covers it. All instantiation occurrences of a justified line are restyled. +- **Branch coverage:** A branch direction (True/False) is "truly uncovered" only if NO instantiation covers it. Only truly uncovered branch directions count toward justified branches. +- **Statistics:** Raw coverage numbers are parsed directly from the llvm-cov index page TOTALS row, guaranteeing an exact match with llvm-cov's own calculations. + +**Stale justification detection:** + +A justification is "stale" when BOTH: +- The line is already covered by tests, AND +- All branches at that line are covered + +If the line is covered but has uncovered branches, the justification is still needed (for the branches) and is NOT stale. + +**CSS classes injected into style.css:** + +| Class | Applied To | Visual | +|-------|-----------|--------| +| `.justified-line` | Count cell (``) | Right-aligned orange text, shows "J" | +| `.region.justified` | Code spans (replacing `.region.red`) | Orange background | +| `.justified-branch` | Branch direction spans (replacing `.red.branch`) | Bold orange text | +| `tr:has(> td.justified-line) > td.code` | Entire code cell for justified rows | Light orange background | + +--- + +## Bazel Build Targets + +Defined in `BUILD`: + +```python +py_binary(name = "merger", ...) # Per-test coverage output generator +py_binary(name = "reporter", ...) # Final combined report generator +py_binary(name = "justify", ...) # Justification resolver +py_binary(name = "effective_coverage", ...) # HTML post-processor +``` + +The `reporter` target includes `justify.py` and `effective_coverage.py` as `data` dependencies for best-effort in-sandbox justification processing. + +## Dependencies + +- **Python 3** (system Python, no virtualenv needed) +- **PyYAML** — for parsing `coverage_justifications.yaml` (available via pip or system package) +- **llvm-profdata** — for merging profraw/profdata files (from LLVM toolchain) +- **llvm-cov** — for generating HTML, LCOV, and text reports (from LLVM toolchain) diff --git a/quality/quality.md b/quality/quality.md index 4a28c334e..bf02b8b8e 100644 --- a/quality/quality.md +++ b/quality/quality.md @@ -110,13 +110,13 @@ The query configuration is defined in [`quality/static_analysis/config.yaml`](st ## Coverage -Code coverage is generated using LLVM's source-based coverage instrumentation. The instrumentation filter is configured in [`quality/coverage.bazelrc`](coverage.bazelrc) to cover `//score/message_passing` and `//score/mw/com` while excluding test and benchmark code. +Code coverage is generated using LLVM's source-based coverage instrumentation. The instrumentation filter is configured in [`quality/coverage/coverage.bazelrc`](coverage/coverage.bazelrc) to cover `//score/message_passing` and `//score/mw/com` while excluding test and benchmark code. -### Running Coverage +HTML reports are generated directly by `llvm-cov show`. + +For detailed documentation of the pipeline architecture, tools, and requirements, see [`quality/coverage/README.md`](coverage/README.md) and [`quality/coverage/llvm_cov/README.md`](coverage/llvm_cov/README.md). -> **Note:** The commands below assume `--combined_report=lcov` is set, which enables -> a combined LCOV report across all test targets. This flag is already configured in -> [`quality/coverage.bazelrc`](coverage.bazelrc) (imported from the repository root `.bazelrc`). +### Running Coverage ```bash bazel coverage //... @@ -125,13 +125,13 @@ bazel coverage //... To run coverage for a specific target: ```bash -bazel coverage --combined_report=lcov //score/message_passing:client_connection_test_linux +bazel coverage //score/message_passing:client_connection_test_linux ``` -When [`quality/coverage.bazelrc`](coverage.bazelrc) is active, the combined LCOV report is written to -`bazel-out/_coverage/_coverage_report.dat`. +The coverage report generator produces a zip file at +`bazel-out/_coverage/_coverage_report.dat` containing the HTML report, an LCOV export, and a text summary. -To generate an HTML report from the LCOV data (works for both full and single-target runs): +To extract the HTML report (works for both full and single-target runs): ```bash bazel run //quality/coverage:generate_coverage_html @@ -143,6 +143,12 @@ The report is written to `cpp_coverage/index.html`. Open it with: xdg-open cpp_coverage/index.html ``` +### Coverage Justifications + +To achieve 100% effective coverage, lines and branches that cannot be covered by tests (defensive programming, tool false positives, etc.) can be *justified*. Justified lines and branches appear in **yellow/orange** in the HTML report (vs green=covered, red=uncovered). + +Justifications are defined in [`quality/coverage/coverage_justifications.yaml`](coverage/coverage_justifications.yaml). A justification covers both the line itself and any branches on that line. + ## Sanitizers Address, undefined behavior, leak, and thread sanitizers are also available: