diff --git a/.github/scripts/check_coverage_map_health.py b/.github/scripts/check_coverage_map_health.py new file mode 100644 index 0000000000..f6449b8c83 --- /dev/null +++ b/.github/scripts/check_coverage_map_health.py @@ -0,0 +1,26 @@ +"""Fail loudly if the committed coverage map is stale or under-covers. Used by coverage-health.yml.""" +import datetime +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "toolchain")) +from mfc.test.coverage import COVERAGE_MAP_PATH, load_map, map_health # noqa: E402 +from mfc.test.cases import list_cases # noqa: E402 (returns the current test list) + +MAX_AGE_DAYS = 10 +MIN_FRACTION = 0.80 + +entries, meta = load_map(COVERAGE_MAP_PATH) +if entries is None: + sys.exit("Coverage map missing or corrupt.") +current_keys = {b.to_case().coverage_key() for b in list_cases()} +ok, msg = map_health( + meta=meta, + current_keys=current_keys, + mapped_keys=set(entries), + now=datetime.datetime.now(datetime.timezone.utc).isoformat(), + max_age_days=MAX_AGE_DAYS, + min_fraction=MIN_FRACTION, +) +print(msg) +sys.exit(0 if ok else 1) diff --git a/.github/workflows/common/coverage-refresh.sh b/.github/workflows/common/coverage-refresh.sh new file mode 100755 index 0000000000..8541e04429 --- /dev/null +++ b/.github/workflows/common/coverage-refresh.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -e +NJOBS="${SLURM_CPUS_ON_NODE:-24}"; [ "$NJOBS" -gt 64 ] && NJOBS=64 +./mfc.sh clean +source .github/scripts/retry-build.sh +retry_build ./mfc.sh build --gcov -j 8 +./mfc.sh test --build-coverage-map --gcov -j "$NJOBS" diff --git a/.github/workflows/common/test.sh b/.github/workflows/common/test.sh index 08827178fb..cb856f73ba 100644 --- a/.github/workflows/common/test.sh +++ b/.github/workflows/common/test.sh @@ -57,4 +57,12 @@ if [ -n "${job_shard:-}" ]; then shard_opts="--shard $job_shard" fi -./mfc.sh test -v --max-attempts 3 --no-build -a -j $n_test_threads $rdma_opts $device_opts $build_opts $shard_opts -- -c $job_cluster +# Coverage-based test selection in SHADOW mode on PRs: prints what it WOULD select but the +# full suite still runs (no --select-enforce). Changed files come from git detection +# (self-healing deepen) since the SLURM job doesn't receive the paths-filter list. +select_opts="" +if [ "${GITHUB_EVENT_NAME:-}" = "pull_request" ]; then + select_opts="--only-changes" +fi + +./mfc.sh test -v --max-attempts 3 --no-build $select_opts -a -j $n_test_threads $rdma_opts $device_opts $build_opts $shard_opts -- -c $job_cluster diff --git a/.github/workflows/coverage-health.yml b/.github/workflows/coverage-health.yml new file mode 100644 index 0000000000..5fb37e3472 --- /dev/null +++ b/.github/workflows/coverage-health.yml @@ -0,0 +1,18 @@ +# .github/workflows/coverage-health.yml +name: 'Coverage Map Health' +on: + schedule: + - cron: '0 7 * * *' # daily; loud if the refresh stopped working + workflow_dispatch: +jobs: + health: + if: github.repository == 'MFlowCode/MFC' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: { python-version: '3.12' } + - name: Initialize MFC + run: ./mfc.sh init + - name: Check coverage map freshness + run: build/venv/bin/python3 .github/scripts/check_coverage_map_health.py diff --git a/.github/workflows/coverage-refresh.yml b/.github/workflows/coverage-refresh.yml new file mode 100644 index 0000000000..2fbb5844a1 --- /dev/null +++ b/.github/workflows/coverage-refresh.yml @@ -0,0 +1,44 @@ +# .github/workflows/coverage-refresh.yml +name: 'Coverage Map Refresh' +on: + schedule: + - cron: '0 6 * * 1' # weekly floor + push: + branches: [master] + paths: + - 'toolchain/mfc/test/cases.py' + - 'src/**/*.fpp' + workflow_dispatch: +permissions: + contents: write +concurrency: + group: coverage-refresh + cancel-in-progress: true +jobs: + refresh: + if: github.repository == 'MFlowCode/MFC' + timeout-minutes: 240 + runs-on: + group: phoenix + labels: gt + steps: + - uses: actions/checkout@v4 + with: { clean: false } + - name: Build + collect coverage map (SLURM) + run: bash .github/scripts/submit-slurm-job.sh .github/workflows/common/coverage-refresh.sh cpu none phoenix + - name: Commit refreshed map + run: | + if ! git diff --quiet tests/coverage_map.json.gz; then + git config user.name "mfc-bot" + git config user.email "mfc-bot@users.noreply.github.com" + git add tests/coverage_map.json.gz + git commit -m "test: refresh coverage map [skip ci]" + # NOTE: pushing to a protected default branch requires a token or + # GitHub App with bypass-branch-protection permission. The default + # GITHUB_TOKEN may be rejected by branch protection rules; if so, + # configure a PAT or App token with the `contents: write` scope and + # pass it as `GITHUB_TOKEN` in the environment for this step. + git push origin HEAD:master + else + echo "Coverage map unchanged." + fi diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d81a0e1659..1c05d68c17 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -53,6 +53,7 @@ jobs: runs-on: 'ubuntu-latest' outputs: checkall: ${{ steps.changes.outputs.checkall }} + changed_files: ${{ steps.changes.outputs.checkall_files }} steps: - name: Clone uses: actions/checkout@v4 @@ -62,6 +63,7 @@ jobs: id: changes with: filters: ".github/file-filter.yml" + list-files: shell github: name: ${{ matrix.nvhpc && format('NVHPC {0} ({1})', matrix.nvhpc, matrix.target) || format('Github ({0}, {1}, {2}, {3})', matrix.os, matrix.mpi, matrix.debug, matrix.intel && 'intel' || 'GNU') }} @@ -262,11 +264,17 @@ jobs: - name: Test if: '!matrix.nvhpc' run: | - /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $TEST_ALL $TEST_PCT $PRECISION + # Coverage-based test selection in SHADOW mode on PRs: the selector + # prints what it WOULD run, but the full suite still runs (no + # --select-enforce). Enforcement is a separate, later change. + SELECT=() + [ "${{ github.event_name }}" = "pull_request" ] && SELECT=(--only-changes --changed-files "$CHANGED_FILES") + /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) "${SELECT[@]}" $TEST_ALL $TEST_PCT $PRECISION env: TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }} TEST_PCT: ${{ matrix.debug == 'reldebug' && '-% 20' || '' }} PRECISION: ${{ matrix.precision != '' && format('--{0}', matrix.precision) || '' }} + CHANGED_FILES: ${{ needs.file-changes.outputs.changed_files }} # ── NVHPC build + test (via docker exec into long-lived container) ── - name: Build (NVHPC) diff --git a/tests/coverage_map.json.gz b/tests/coverage_map.json.gz new file mode 100644 index 0000000000..06128cb395 Binary files /dev/null and b/tests/coverage_map.json.gz differ diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index 135ce57dbc..06c55cbdce 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -464,6 +464,25 @@ type=str, default=None, ), + Argument( + name="build-coverage-map", dest="build_coverage_map", action=ArgAction.STORE_TRUE, default=False, help="Build the gcov coverage map (requires a prior --gcov build). Master-side only." + ), + Argument( + name="only-changes", + dest="only_changes", + action=ArgAction.STORE_TRUE, + default=False, + help="Select only tests whose covered files overlap changed files (shadow mode unless --select-enforce).", + ), + Argument( + name="select-enforce", + dest="select_enforce", + action=ArgAction.STORE_TRUE, + default=False, + help="With --only-changes, actually skip unselected tests (otherwise shadow: print selection, run all).", + ), + Argument(name="changed-files", dest="changed_files", type=str, default=None, help="Changed-file list (newline-, space-, or comma-separated; from CI paths-filter). Overrides git detection."), + Argument(name="changes-branch", dest="changes_branch", type=str, default="master", help="Branch to diff against for --only-changes."), ], mutually_exclusive=[ MutuallyExclusiveGroup( diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py index 05c4bfa0bc..bfc4b1f9eb 100644 --- a/toolchain/mfc/test/case.py +++ b/toolchain/mfc/test/case.py @@ -246,6 +246,11 @@ def get_trace(self) -> str: def get_uuid(self) -> str: return trace_to_uuid(self.trace) + def coverage_key(self) -> str: + from .coverage import param_hash + + return param_hash(self.params) + def get_dirpath(self): return os.path.join(common.MFC_TEST_DIR, self.get_uuid()) @@ -371,6 +376,9 @@ class TestCaseBuilder: def get_uuid(self) -> str: return trace_to_uuid(self.trace) + def coverage_key(self) -> str: + return self.to_case().coverage_key() + def to_case(self) -> TestCase: if self.kind == "convergence": # Convergence cases drive their own runs — the BASE_CFG mods/path diff --git a/toolchain/mfc/test/coverage.py b/toolchain/mfc/test/coverage.py new file mode 100644 index 0000000000..af54ec4120 --- /dev/null +++ b/toolchain/mfc/test/coverage.py @@ -0,0 +1,234 @@ +"""Execution-coverage-based test selection (PR path). + +Selection is sound (only over-includes) and its failures are loud. See +docs/superpowers/specs/2026-05-29-coverage-test-selection-design.md. +""" + +import datetime +import gzip +import hashlib +import json +import subprocess +from pathlib import Path +from typing import Optional, Tuple + + +def param_hash(params: dict) -> str: + """Stable 16-hex key identifying a test by its defining params. + + Independent of dict ordering and of the human-readable trace, so cosmetic + cases.py edits don't change the key; a real param change does. + """ + canonical = json.dumps(params, sort_keys=True, separators=(",", ":"), default=str) + return hashlib.sha256(canonical.encode("utf-8")).hexdigest()[:16] + + +COVERAGE_MAP_PATH = Path("tests/coverage_map.json.gz") + + +def save_map(path: Path, entries: dict, *, n_tests: int, git_sha: str, gfortran_version: str) -> None: + payload = dict(entries) + payload["_meta"] = { + "built_at": datetime.datetime.now(datetime.timezone.utc).isoformat(), + "git_sha": git_sha, + "gfortran_version": gfortran_version, + "n_tests": n_tests, + } + path.parent.mkdir(parents=True, exist_ok=True) + with gzip.open(path, "wt", encoding="utf-8") as f: + json.dump(payload, f, indent=2, sort_keys=True) + + +# Test-definition file: changing it adds/modifies tests, but only the tests it touches +# (their param_hash changes -> not in map -> rung 5 runs them). NOT in ALWAYS_RUN_ALL so a +# test addition doesn't blanket-run the whole suite. +CASES_PY = "toolchain/mfc/test/cases.py" + +ALWAYS_RUN_ALL_EXACT = frozenset( + [ + "CMakeLists.txt", + ] +) +ALWAYS_RUN_ALL_PREFIXES = ( + "src/common/include/", # GPU/Fypp macro & include files (CPU map can't line-attribute) + "toolchain/cmake/", # build system + "toolchain/bootstrap/", # build/run scripts +) + +# Conservative allowlist of files that are provably irrelevant to test outcomes. +# Only add files here if you are 100% certain a change cannot affect any test result. +# Err toward run-all: a false "irrelevant" classification causes under-inclusion (skipping +# a test that should run), which violates the soundness invariant. +_IRRELEVANT_SUFFIXES = (".md", ".rst") +_IRRELEVANT_EXACT = frozenset(["LICENSE", "LICENSE.md", ".gitignore", ".gitattributes", ".mailmap", ".editorconfig", "CITATION.cff"]) +_IRRELEVANT_PREFIXES = ("docs/", ".claude/", ".github/ISSUE_TEMPLATE/") + + +def _is_test_irrelevant(f: str) -> bool: + """Return True iff the file is provably irrelevant to test outcomes (docs, config).""" + return f.endswith(_IRRELEVANT_SUFFIXES) or f in _IRRELEVANT_EXACT or f.startswith(_IRRELEVANT_PREFIXES) + + +def is_always_run_all(changed_files: set) -> bool: + """True if any changed file forces the full suite (un-attributable by the CPU map).""" + if changed_files & ALWAYS_RUN_ALL_EXACT: + return True + if any(f.startswith(ALWAYS_RUN_ALL_PREFIXES) for f in changed_files): + return True + # Any toolchain/mfc/*.py change (params/, run/, test infra, case.py, common.py, + # build.py, state.py, sched.py, …) affects EVERY test's generation or execution and + # cannot be attributed to individual tests by the coverage map. Treat the entire + # toolchain/mfc/ subtree as run-all EXCEPT cases.py, which is handled precisely by + # rung 5 (new/modified tests have a fresh param_hash absent from the map and run + # individually; unchanged tests have no .fpp overlap and are skipped). + if any(f.startswith("toolchain/mfc/") and f.endswith(".py") and f != CASES_PY for f in changed_files): + return True + # gcov rolls #:include'd .fpp into the parent compilation unit, so include files + # (inline_*.fpp, HardcodedIC, macros) are not reliably attributed in the map. Force a + # full run for ANY src/**/include/ change so this attribution gap can never cause + # under-inclusion — by rule, not by relying on the file being absent from the map. + return any(f.startswith("src/") and "/include/" in f and f.endswith(".fpp") for f in changed_files) + + +def load_map(path: Path) -> Tuple[Optional[dict], Optional[dict]]: + """Return (entries_without_meta, meta), or (None, None) if missing/corrupt.""" + if not Path(path).exists(): + return None, None + try: + with gzip.open(path, "rt", encoding="utf-8") as f: + data = json.load(f) + except (OSError, gzip.BadGzipFile, json.JSONDecodeError, UnicodeDecodeError): + return None, None + if not isinstance(data, dict) or "_meta" not in data: + return None, None + meta = data.pop("_meta") + for k, v in data.items(): + if not isinstance(k, str) or not isinstance(v, list) or not all(isinstance(x, str) for x in v): + return None, None + return data, meta + + +def _covered_fpp(coverage_map: dict) -> set: + files = set() + for cov in coverage_map.values(): + files.update(cov) + return files + + +def select_tests(cases, coverage_map, changed_files): + """Return (to_run, skipped, reason). Conservative ladder -- only over-includes. + + `cases` items expose `.coverage_key()`. `changed_files` is a set of repo-relative + paths, or None if detection failed. + """ + # Rung 1: no changed-file info -> run all. + if changed_files is None: + return list(cases), [], "rung1: changed-file list unavailable" + + # Rung 2: macro/codegen/build inputs -> run all. + if is_always_run_all(changed_files): + return list(cases), [], "rung2: macro/codegen/build input changed" + + # Rung 3: changed hand-written Fortran source under src/ (map tracks .fpp only) -> run + # all. Match case-insensitively to catch .F90, .F95, .FOR, etc. + _FORTRAN_EXTS = (".f90", ".f", ".f95", ".f03", ".f08", ".for") + if any(f.startswith("src/") and f.lower().endswith(_FORTRAN_EXTS) for f in changed_files): + return list(cases), [], "rung3: hand-written .f90/.f changed" + + changed_fpp = {f for f in changed_files if f.endswith(".fpp")} + # Unattributable-change guard: if any changed file is not a .fpp (handled by the + # coverage map), not cases.py (handled by rung 5), and not provably test-irrelevant + # (docs), we cannot attribute the change to individual tests -> run all. This is the + # catch-all that covers mfc.sh, .github/**, tests/**, toolchain/pyproject.toml, etc. + # The old ALWAYS_RUN_ALL_* sets are still checked first (rungs 2-3) for common cases. + unattributable = [f for f in changed_files if not f.endswith(".fpp") and f != CASES_PY and not _is_test_irrelevant(f)] + if unattributable: + return list(cases), [], "rung2: unattributable change (not .fpp/cases.py/docs) -> run all" + # Only .fpp + cases.py + irrelevant docs remain. + if not changed_fpp and CASES_PY not in changed_files: + return [], list(cases), "rung7: only docs/irrelevant files changed" + + # Rung 4: a changed .fpp that no test covers -> run all (GPU-only blind spot). + covered = _covered_fpp(coverage_map) + if changed_fpp - covered: + return list(cases), [], "rung4: changed .fpp not covered by any test" + + # Rungs 5-7: per-test. + to_run, skipped = [], [] + for case in cases: + key = case.coverage_key() + cov = coverage_map.get(key) + if not cov: # rung 5: unmapped/new test, or empty (uncertain) coverage -> run + to_run.append(case) + elif set(cov) & changed_fpp: # rung 6: overlap + to_run.append(case) + else: # rung 7: skip + skipped.append(case) + return to_run, skipped, f"selected {len(to_run)}/{len(cases)} by coverage overlap" + + +def _git(args, cwd, timeout=60): + return subprocess.run(["git", *args], capture_output=True, text=True, cwd=cwd, timeout=timeout, check=False) + + +def _merge_base(cwd, branch): + for ref in (branch, f"origin/{branch}"): + r = _git(["merge-base", ref, "HEAD"], cwd) + if r.returncode == 0 and r.stdout.strip(): + return r.stdout.strip() + return None + + +def get_changed_files(root_dir, compare_branch="master", explicit: Optional[str] = None): + """Set of changed repo-relative paths, or None if undeterminable (-> run all). + + `explicit` is a changed-file list from CI (paths-filter); preferred when given. It may + be separated by newlines, spaces, or commas (paths-filter's shell output is space-sep). + Otherwise use git merge-base, self-healing a shallow clone with a deepen+retry. + """ + if explicit is not None: + files = {f for f in explicit.replace(",", " ").split() if f.strip()} + if files: + return files + # explicit was given but empty/whitespace -> ambiguous (a paths-filter/env failure vs + # genuinely nothing). Per the soundness invariant, uncertainty must run, not skip: + # fall through to git detection, ultimately None -> rung 1 (run all). Never return + # an empty set here, which would be read as "nothing changed -> skip all". + try: + base = _merge_base(root_dir, compare_branch) + if base is None: + _git(["fetch", "origin", f"{compare_branch}:{compare_branch}", "--depth=1"], root_dir, 120) + _git(["fetch", "--deepen=200"], root_dir, 120) + base = _merge_base(root_dir, compare_branch) + if base is None: + return None + diff = _git(["diff", base, "HEAD", "--name-only", "--no-color"], root_dir) + if diff.returncode != 0: + return None + return {f for f in diff.stdout.splitlines() if f.strip()} + except (subprocess.TimeoutExpired, OSError): + return None + + +def format_summary(*, ran, total, reason, meta, now) -> str: + if meta and meta.get("built_at"): + built = datetime.datetime.fromisoformat(meta["built_at"]) + age_days = (datetime.datetime.fromisoformat(now) - built).days + age = f"map age {age_days}d" + else: + age = "map age unknown" + return f"Coverage selection: ran {ran}/{total} tests · {age} · {reason}" + + +def map_health(*, meta, current_keys, mapped_keys, now, max_age_days, min_fraction): + """Return (ok, message). Loud anti-rot check used by the health workflow.""" + if not meta or not meta.get("built_at"): + return False, "Coverage map has no build metadata." + age = (datetime.datetime.fromisoformat(now) - datetime.datetime.fromisoformat(meta["built_at"])).days + if age > max_age_days: + return False, f"Coverage map is STALE: {age}d old (max {max_age_days}d). Refresh workflow may be broken." + if current_keys: + frac = len(current_keys & mapped_keys) / len(current_keys) + if frac < min_fraction: + return False, f"Coverage map under-covers: {frac:.0%} of current tests mapped (min {min_fraction:.0%})." + return True, f"Coverage map healthy: {age}d old." diff --git a/toolchain/mfc/test/coverage_build.py b/toolchain/mfc/test/coverage_build.py new file mode 100644 index 0000000000..311d10d44b --- /dev/null +++ b/toolchain/mfc/test/coverage_build.py @@ -0,0 +1,593 @@ +""" +File-level gcov coverage-map builder for MFC (master-side only). + +Build MFC once with gfortran --coverage, run all tests individually, record +which .fpp files each test executes, and write a coverage map keyed by +param_hash (stable across cosmetic cases.py edits). + +Workflow: + ./mfc.sh build --gcov -j 8 # one-time: instrumented build + ./mfc.sh test --build-coverage-map --gcov -j 8 # one-time: populate the map +""" + +import io +import os +import re +import shutil +import subprocess +import tempfile +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +from typing import Optional + +from .. import common +from ..build import POST_PROCESS, PRE_PROCESS, SIMULATION +from ..common import MFCException +from ..printer import cons +from .case import POST_PROCESS_3D_PARAMS, get_post_process_mods, input_bubbles_lagrange + + +def _get_gcov_version(gcov_binary: str) -> str: + """Return the version string from gcov --version.""" + try: + result = subprocess.run([gcov_binary, "--version"], capture_output=True, text=True, timeout=10, check=False) + for line in result.stdout.splitlines(): + if line.strip(): + return line.strip() + except (OSError, subprocess.SubprocessError): + pass + return "unknown" + + +def find_gcov_binary() -> str: + """ + Find a GNU gcov binary compatible with the system gfortran. + + On macOS with Homebrew GCC, the binary is gcov-{major} (e.g. gcov-15). + On Linux with system GCC, plain gcov is usually correct. + Apple LLVM's /usr/bin/gcov is incompatible with gfortran .gcda files. + """ + # Determine gfortran major version + major = None + try: + result = subprocess.run(["gfortran", "--version"], capture_output=True, text=True, timeout=10, check=False) + m = re.search(r"(\d+)\.\d+\.\d+", result.stdout) + if m: + major = m.group(1) + except (OSError, subprocess.SubprocessError): + pass + + # Try versioned binary first (Homebrew macOS), then plain gcov + candidates = [] + if major: + candidates.append(f"gcov-{major}") + candidates.append("gcov") + + for candidate in candidates: + path = shutil.which(candidate) + if path is None: + continue + try: + result = subprocess.run([path, "--version"], capture_output=True, text=True, timeout=10, check=False) + version_out = result.stdout + if "Apple LLVM" in version_out or "Apple clang" in version_out: + continue # Apple's gcov cannot parse GCC-generated .gcda files + if "GCC" in version_out or "GNU" in version_out: + return path + except (OSError, subprocess.SubprocessError): + continue + + raise MFCException( + "GNU gcov not found. gcov is required for the coverage map.\n" + " On macOS (Homebrew): brew install gcc\n" + " On Linux (Debian/Ubuntu): apt install gcc\n" + " On Linux (RHEL/CentOS): yum install gcc\n" + "Apple's /usr/bin/gcov is incompatible with gfortran .gcda files." + ) + + +def find_gcno_files(root_dir: str) -> list: + """ + Walk build/ and return all .gcno files (excluding venv paths). + Raises if none found (indicates build was not done with --gcov). + """ + build_dir = Path(root_dir) / "build" + gcno_files = [p for p in build_dir.rglob("*.gcno") if "venv" not in p.parts] + if not gcno_files: + raise MFCException("No .gcno files found. Build with --gcov instrumentation first:\n ./mfc.sh build --gcov -j 8") + return gcno_files + + +def _parse_gcov_json_output(raw_bytes: bytes, root_dir: str) -> Optional[set]: + """ + Parse gcov JSON output and return the set of .fpp file paths with coverage. + Handles both gzip-compressed (gcov 13+) and raw JSON (gcov 12) formats. + Handles concatenated JSON objects from batched gcov calls (multiple .gcno + files passed to a single gcov invocation). + Only .fpp files with at least one executed line are included. + """ + import gzip + import json + + try: + text = gzip.decompress(raw_bytes).decode("utf-8", errors="replace") + except (gzip.BadGzipFile, OSError): + text = raw_bytes.decode("utf-8", errors="replace") + + result = set() + real_root = os.path.realpath(root_dir) + parsed_any = False + + # Parse potentially concatenated JSON objects (one per .gcno file). + decoder = json.JSONDecoder() + pos = 0 + while pos < len(text): + while pos < len(text) and text[pos] in " \t\n\r": + pos += 1 + if pos >= len(text): + break + try: + data, end_pos = decoder.raw_decode(text, pos) + pos = end_pos + parsed_any = True + except json.JSONDecodeError: + remaining = len(text) - pos + if remaining > 0: + cons.print(f"[yellow]Warning: gcov JSON parse error at offset {pos} ({remaining} bytes remaining) — coverage for this test is untrustworthy, omitting from map.[/yellow]") + # A mid-stream parse error means the JSON stream was truncated or + # corrupted. A partial coverage set is untrustworthy: a .fpp that + # would have been recorded in the missing portion would be silently + # skipped by select_tests on future runs. Return None so the caller + # omits this test from the map entirely (conservatively included). + return None + + for file_entry in data.get("files", []): + file_path = file_entry.get("file", "") + if not file_path.endswith(".fpp"): + continue + if any(line.get("count", 0) > 0 for line in file_entry.get("lines", [])): + try: + rel_path = os.path.relpath(os.path.realpath(file_path), real_root) + except ValueError: + rel_path = file_path + # Only keep src/ paths — build/staging/ artifacts from + # case-optimized builds are auto-generated and never + # appear in PR diffs. + if rel_path.startswith("src/"): + result.add(rel_path) + + # If no JSON was parsed at all (complete garbage input), return None + # so the caller omits this test from the map (conservatively included + # on future runs). An empty set after successful parsing means the test + # genuinely covers no .fpp files. + if not parsed_any: + return None + + return result + + +def _compute_gcov_prefix_strip(root_dir: str) -> str: + """ + Compute GCOV_PREFIX_STRIP so .gcda files preserve the build/ tree. + + GCOV_PREFIX_STRIP removes N leading path components from the compile-time + absolute .gcda path. We strip all components of the MFC root directory + so the prefix tree starts with ``build/staging/...``. + """ + real_root = os.path.realpath(root_dir) + return str(len(Path(real_root).parts) - 1) # -1 excludes root '/' + + +def _collect_single_test_coverage( + uuid: str, + test_gcda: str, + root_dir: str, + gcov_bin: str, +) -> tuple: + """ + Collect file-level coverage for a single test, fully self-contained. + + Copies .gcno files from the real build tree into the test's isolated + .gcda directory (alongside the .gcda files), runs a batched gcov call, + then removes the .gcno copies. Each test has its own directory, so + this is safe to call concurrently without touching the shared build tree. + """ + build_subdir = os.path.join(test_gcda, "build") + if not os.path.isdir(build_subdir): + # No .gcda files produced — test may not have run or GCOV_PREFIX + # was misconfigured. Return None so the test is omitted from the + # map (conservatively included on future runs). The sanity check + # at the end of build_coverage_map will catch systemic failures. + cons.print(f"[yellow]Warning: No .gcda directory for {uuid} — GCOV_PREFIX may be misconfigured.[/yellow]") + return uuid, None + + gcno_copies = [] + + for dirpath, _, filenames in os.walk(build_subdir): + for fname in filenames: + if not fname.endswith(".gcda"): + continue + # Derive matching .gcno path in the real build tree + gcda_path = os.path.join(dirpath, fname) + rel = os.path.relpath(gcda_path, test_gcda) + gcno_rel = rel[:-5] + ".gcno" + gcno_src = os.path.join(root_dir, gcno_rel) + if os.path.isfile(gcno_src): + # Copy .gcno alongside .gcda in the test's isolated dir. + # Wrap in try/except for NFS TOCTOU races (file may vanish + # between isfile() and copy on networked filesystems). + gcno_dst = os.path.join(dirpath, fname[:-5] + ".gcno") + try: + shutil.copy2(gcno_src, gcno_dst) + except OSError: + continue + gcno_copies.append(gcno_dst) + + if not gcno_copies: + # Genuinely no matching .gcno files — return empty list (not None). + # None means "collection failed, conservatively include"; empty list + # means "test produced .gcda but no .gcno matched", which is a real + # (if unusual) result that should be cached as-is. + return uuid, [] + + # Batch: single gcov call for all .gcno files in this test. + # Run from root_dir so source path resolution works correctly. + cmd = [gcov_bin, "--json-format", "--stdout"] + gcno_copies + try: + proc = subprocess.run(cmd, capture_output=True, cwd=root_dir, timeout=120, check=False) + except (subprocess.TimeoutExpired, subprocess.SubprocessError, OSError) as exc: + cons.print(f"[yellow]Warning: gcov failed for {uuid}: {exc}[/yellow]") + return uuid, None + finally: + for g in gcno_copies: + try: + os.remove(g) + except OSError: + pass + + if proc.returncode != 0 or not proc.stdout: + if proc.returncode != 0: + cons.print(f"[yellow]Warning: gcov exited {proc.returncode} for {uuid}[/yellow]") + return uuid, None + + coverage = _parse_gcov_json_output(proc.stdout, root_dir) + if coverage is None: + # Decode failure — return None so the caller omits this test from + # the map (absent entries are conservatively included). + return uuid, None + return uuid, sorted(coverage) + + +def _run_single_test_direct(test_info: dict, gcda_dir: str, strip: str) -> tuple: + """ + Run a single test by invoking Fortran executables directly. + + Bypasses ``./mfc.sh run`` entirely (no Python startup, no Mako template + rendering, no shell script generation). Input files and binary paths are + pre-computed by the caller. + + Returns (uuid, test_gcda_path, failures). + """ + uuid = test_info["uuid"] + test_dir = test_info["dir"] + binaries = test_info["binaries"] # ordered list of (target_name, bin_path) + ppn = test_info["ppn"] + + test_gcda = os.path.join(gcda_dir, uuid) + os.makedirs(test_gcda, exist_ok=True) + + env = {**os.environ, "GCOV_PREFIX": test_gcda, "GCOV_PREFIX_STRIP": strip} + + # MPI-compiled binaries must be launched via an MPI launcher (even ppn=1). + # Use --bind-to none to avoid binding issues with concurrent launches. + if shutil.which("mpirun"): + mpi_cmd = ["mpirun", "--bind-to", "none", "-np", str(ppn)] + elif shutil.which("mpiexec"): + mpi_cmd = ["mpiexec", "-n", str(ppn)] + elif shutil.which("srun"): + mpi_cmd = ["srun", "--ntasks", str(ppn)] + else: + raise MFCException("No MPI launcher found (mpirun, mpiexec, or srun). MFC binaries require an MPI launcher.\n On Ubuntu: sudo apt install openmpi-bin\n On macOS: brew install open-mpi") + + failures = [] + for target_name, bin_path in binaries: + if not os.path.isfile(bin_path): + # Record missing binary as a failure and stop: downstream targets + # depend on outputs from earlier ones (e.g. simulation needs the + # grid from pre_process), so running them without a predecessor + # produces misleading init-only gcda files. + failures.append((target_name, "missing-binary", f"binary not found: {bin_path}")) + break + + # Verify .inp file exists before running (diagnostic for transient + # filesystem issues where the file goes missing between phases). + inp_file = os.path.join(test_dir, f"{target_name}.inp") + if not os.path.isfile(inp_file): + failures.append((target_name, "missing-inp", f"{inp_file} not found before launch")) + break + + cmd = mpi_cmd + [bin_path] + try: + result = subprocess.run(cmd, check=False, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env, cwd=test_dir, timeout=600) + if result.returncode != 0: + # Save last lines of output for debugging. Stop here: a + # failed pre_process/simulation leaves no valid outputs for + # the next target, and running it produces spurious coverage. + tail = "\n".join(result.stdout.strip().splitlines()[-15:]) + failures.append((target_name, result.returncode, tail)) + break + except subprocess.TimeoutExpired: + failures.append((target_name, "timeout", "")) + break + except (subprocess.SubprocessError, OSError) as exc: + failures.append((target_name, str(exc), "")) + break + + return uuid, test_gcda, failures + + +def _prepare_test(case) -> dict: + """ + Prepare a test for direct execution: create directory, generate .inp + files, and resolve binary paths. All Python/toolchain overhead happens + here (single-threaded) so the parallel phase is pure subprocess calls. + + Temporarily sets modified params on the case object (needed by + get_dirpath/to_input_file/get_inp), then restores the original + params in a finally block so callers can safely reuse the case list. + """ + try: + case.delete_output() + case.create_directory() + except OSError as exc: + cons.print(f"[yellow]Warning: Failed to prepare test directory for {case.get_uuid()}: {exc}[/yellow]") + raise + + # Lagrange bubble tests need input files generated before running. + if case.params.get("bubbles_lagrange", "F") == "T": + try: + input_bubbles_lagrange(case) + except Exception as exc: + cons.print(f"[yellow]Warning: Failed to generate Lagrange bubble input for {case.get_uuid()}: {exc}[/yellow]") + raise + + # Work on a copy so we don't permanently mutate the case object. + params = dict(case.params) + + # Apply post_process output params so simulation writes data files that + # post_process reads. Mirrors the generated case.py logic that normally + # runs via ./mfc.sh run (see POST_PROCESS_OUTPUT_PARAMS in case.py). + params.update(get_post_process_mods(params)) + + # Run only one timestep: we only need to know which source files are + # *touched*, not verify correctness. A single step exercises the key + # code paths across all three executables while preventing heavy 3D tests + # from timing out under gcov instrumentation (~10x slowdown). + params["t_step_stop"] = 1 + + # Adaptive-dt tests: post_process computes n_save = int(t_stop/t_save)+1 + # and iterates over that many save indices. But with small t_step_stop + # the simulation produces far fewer saves. Clamp t_stop so post_process + # only reads saves that actually exist. + if params.get("cfl_adap_dt", "F") == "T": + t_save = float(params.get("t_save", 1.0)) + params["t_stop"] = t_save # n_save = 2: indices 0 and 1 + + # Heavy 3D tests: remove vorticity output (omega_wrt + fd_order) for + # 3D QBMM tests. The regular test suite runs post_process only under + # --test-all (which CI sets); heavy 3D QBMM configs are known to crash + # post_process (exit code 2) when vorticity FD is enabled on large grids + # with many QBMM variables. Strip those params here so the coverage + # build does not fail on those tests. + if int(params.get("p", 0)) > 0 and params.get("qbmm", "F") == "T": + for key in POST_PROCESS_3D_PARAMS: + params.pop(key, None) + + # Temporarily set mutated params on the case object for get_dirpath(), + # to_input_file(), and get_inp(). Always restore the original params + # so build_coverage_map callers can safely reuse the case list. + orig_params = case.params + case.params = params + try: + test_dir = case.get_dirpath() + input_file = case.to_input_file() + + # Write .inp files directly (no subprocess, no Mako templates). + # Suppress console output from get_inp() to avoid one message per (test, target) pair. + # Run all three executables to capture coverage across the full pipeline + # (pre_process: grid/IC generation; simulation: RHS/time-stepper; post_process: field I/O). + targets = [PRE_PROCESS, SIMULATION, POST_PROCESS] + binaries = [] + # NOTE: not thread-safe — Phase 1 must remain single-threaded. + orig_file = cons.raw.file + cons.raw.file = io.StringIO() + try: + for target in targets: + inp_content = case.get_inp(target) + common.file_write(os.path.join(test_dir, f"{target.name}.inp"), inp_content) + bin_path = target.get_install_binpath(input_file) + binaries.append((target.name, bin_path)) + finally: + cons.raw.file = orig_file + finally: + case.params = orig_params + + return { + "uuid": case.get_uuid(), + "dir": test_dir, + "binaries": binaries, + "ppn": getattr(case, "ppn", 1), + } + + +def build_coverage_map( + root_dir: str, + cases: list, + n_jobs: Optional[int] = None, +) -> None: + """ + Build the file-level coverage map by running tests in parallel. + + Phase 1 — Prepare all tests: generate .inp files and resolve binary paths. + This happens single-threaded so the parallel phase has zero Python overhead. + + Phase 2 — Run all tests concurrently. Each worker invokes Fortran binaries + directly (no ``./mfc.sh run``, no shell scripts). Each test's GCOV_PREFIX + points to an isolated directory so .gcda files don't collide. + + Phase 3 — For each test, temporarily copy .gcno files from the real build tree + into the test's isolated .gcda directory, run gcov to collect which .fpp files + had coverage, then remove the .gcno copies. + + Requires a prior ``--gcov`` build: ``./mfc.sh build --gcov -j 8`` + """ + gcov_bin = find_gcov_binary() + gcno_files = find_gcno_files(root_dir) + strip = _compute_gcov_prefix_strip(root_dir) + + if n_jobs is None: + # Caller should pass n_jobs explicitly on SLURM systems; + # os.cpu_count() may exceed the SLURM allocation. + n_jobs = max(os.cpu_count() or 1, 1) + # Cap Phase 2 test parallelism: each test spawns gcov-instrumented MPI + # processes (~2-5 GB each under gcov). Too many concurrent tests cause OOM. + # Phase 3 gcov workers run at full n_jobs (gcov is lightweight by comparison). + phase2_jobs = min(n_jobs, 16) + cons.print(f"[bold]Building coverage map for {len(cases)} tests ({phase2_jobs} test workers, {n_jobs} gcov workers)...[/bold]") + cons.print(f"[dim]Using gcov binary: {gcov_bin}[/dim]") + cons.print(f"[dim]Found {len(gcno_files)} .gcno files[/dim]") + cons.print(f"[dim]GCOV_PREFIX_STRIP={strip}[/dim]") + cons.print() + + # Phase 1: Prepare all tests (single-threaded; scales linearly with test count). + cons.print("[bold]Phase 1/3: Preparing tests...[/bold]") + test_infos = [] + for i, case in enumerate(cases): + try: + test_infos.append(_prepare_test(case)) + except Exception as exc: + cons.print(f" [yellow]Warning: skipping {case.get_uuid()} — prep failed: {exc}[/yellow]") + if (i + 1) % 100 == 0 or (i + 1) == len(cases): + cons.print(f" [{i + 1:3d}/{len(cases):3d}] prepared") + cons.print() + + gcda_dir = tempfile.mkdtemp(prefix="mfc_gcov_") + try: + # Phase 2: Run all tests in parallel via direct binary invocation. + cons.print("[bold]Phase 2/3: Running tests...[/bold]") + test_results: dict = {} + all_failures: dict = {} + with ThreadPoolExecutor(max_workers=phase2_jobs) as pool: + futures = {pool.submit(_run_single_test_direct, info, gcda_dir, strip): info for info in test_infos} + for i, future in enumerate(as_completed(futures)): + try: + uuid, test_gcda, failures = future.result() + except Exception as exc: + info = futures[future] + cons.print(f" [yellow]Warning: {info['uuid']} failed to run: {exc}[/yellow]") + continue + if failures: + # A test that crashed mid-pipeline produced only partial .gcda + # files (e.g. simulation failed after pre_process ran). That + # truncated coverage is untrustworthy: a later .fpp change that + # ran only in the missing stage would be incorrectly skipped. + # Record the failure for the warning summary but do NOT add this + # test to test_results — absent entries are conservatively + # included by select_tests (rung 5), never skipped. + all_failures[uuid] = failures + continue + test_results[uuid] = test_gcda + if (i + 1) % 50 == 0 or (i + 1) == len(test_infos): + cons.print(f" [{i + 1:3d}/{len(test_infos):3d}] tests completed") + + if all_failures: + cons.print() + cons.print(f"[bold yellow]Warning: {len(all_failures)} tests had target failures:[/bold yellow]") + for uuid, fails in sorted(all_failures.items()): + fail_str = ", ".join(f"{t}={rc}" for t, rc, _ in fails) + cons.print(f" [yellow]{uuid}[/yellow]: {fail_str}") + for target_name, _rc, tail in fails: + if tail: + cons.print(f" {target_name} output (last 15 lines):") + for line in tail.splitlines(): + cons.print(f" {line}") + + # Diagnostic: verify .gcda files exist for at least one test. + sample_uuid = next(iter(test_results), None) + if sample_uuid: + sample_gcda = test_results[sample_uuid] + sample_build = os.path.join(sample_gcda, "build") + if os.path.isdir(sample_build): + gcda_count = sum(1 for _, _, fns in os.walk(sample_build) for f in fns if f.endswith(".gcda")) + cons.print(f"[dim]Sample test {sample_uuid}: {gcda_count} .gcda files in {sample_build}[/dim]") + else: + cons.print(f"[yellow]Sample test {sample_uuid}: no build/ dir in {sample_gcda}[/yellow]") + + # Phase 3: Collect gcov coverage from each test's isolated .gcda directory. + # .gcno files are temporarily copied alongside .gcda files, then removed. + cons.print() + cons.print("[bold]Phase 3/3: Collecting coverage...[/bold]") + # Internal collection keyed by uuid (gcov machinery references uuids). + cache: dict = {} + completed = 0 + with ThreadPoolExecutor(max_workers=n_jobs) as pool: + futures = { + pool.submit( + _collect_single_test_coverage, + uuid, + test_gcda, + root_dir, + gcov_bin, + ): uuid + for uuid, test_gcda in test_results.items() + } + for future in as_completed(futures): + try: + uuid, coverage = future.result() + except Exception as exc: + uuid = futures[future] + cons.print(f" [yellow]Warning: {uuid} coverage failed: {exc}[/yellow]") + # Do NOT store entry — absent entries are conservatively + # included by select_tests, while [] means "covers no files" + # and would permanently skip the test. + continue + if coverage is None: + # Decode or collection failure — omit from map so the + # test is conservatively included on future runs. + continue + cache[uuid] = coverage + completed += 1 + if completed % 50 == 0 or completed == len(test_results): + cons.print(f" [{completed:3d}/{len(test_results):3d}] tests processed") + finally: + try: + shutil.rmtree(gcda_dir) + except OSError as exc: + cons.print(f"[yellow]Warning: Failed to clean up temp directory {gcda_dir}: {exc}[/yellow]") + + # Translate internal uuid keys -> stable param_hash keys for the committed map. + from .coverage import COVERAGE_MAP_PATH, save_map + + key_by_uuid = {c.get_uuid(): c.coverage_key() for c in cases} + out = {} + for uuid, cov in cache.items(): + if uuid == "_meta" or cov is None: + continue + out[key_by_uuid.get(uuid, uuid)] = cov + n_tests = sum(1 for v in out.values() if v) + if n_tests == 0: + raise MFCException("Coverage build produced zero coverage. Check the --gcov build and gcov binary.") + git_sha = subprocess.run(["git", "rev-parse", "HEAD"], capture_output=True, text=True, cwd=root_dir, check=False).stdout.strip() + save_map(COVERAGE_MAP_PATH, out, n_tests=n_tests, git_sha=git_sha, gfortran_version=_get_gcov_version(gcov_bin)) + cons.print(f"[bold green]Coverage map written to {COVERAGE_MAP_PATH}[/bold green] ({n_tests} tests)") + + # Clean up test output directories from Phase 1/2 (grid files, restart files, + # silo output, etc.). These live on NFS scratch and can total several GB for + # the full test suite. Leaving them behind creates I/O pressure for subsequent + # test jobs that share the same scratch filesystem. + cons.print("[dim]Cleaning up test output directories...[/dim]") + for case in cases: + try: + case.delete_output() + except OSError: + pass # Best-effort; NFS errors are non-fatal here diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py index 3d530f4f44..7d9dfbbb3a 100644 --- a/toolchain/mfc/test/test.py +++ b/toolchain/mfc/test/test.py @@ -178,6 +178,25 @@ def is_uuid(term): if not cases: raise MFCException(f"--shard {ARG('shard')} matched zero test cases. Total cases before sharding may be less than shard count.") + if ARG("only_changes"): + import datetime + + from .. import common + from .coverage import COVERAGE_MAP_PATH, format_summary, get_changed_files, load_map, select_tests + + entries, meta = load_map(COVERAGE_MAP_PATH) + if entries is None: + cons.print("[yellow]Coverage selection: map missing/corrupt — running full suite.[/yellow]") + else: + changed = get_changed_files(common.MFC_ROOT_DIR, ARG("changes_branch"), explicit=ARG("changed_files")) + to_run, to_skip, reason = select_tests(cases, entries, changed) + cons.print(format_summary(ran=len(to_run), total=len(cases), reason=reason, meta=meta, now=datetime.datetime.now(datetime.timezone.utc).isoformat())) + if ARG("select_enforce"): + skipped_cases += to_skip + cases = to_run + else: + cons.print("[dim](shadow mode: running full suite; pass --select-enforce to actually skip)[/dim]") + if ARG("percent") == 100: return cases, skipped_cases @@ -208,6 +227,19 @@ def test(): return + if ARG("build_coverage_map"): + from .coverage_build import build_coverage_map + + all_cases = [b.to_case() for b in cases] + unique = set() + for case, code in itertools.product(all_cases, [PRE_PROCESS, SIMULATION, POST_PROCESS]): + slug = code.get_slug(case.to_input_file()) + if slug not in unique: + build(code, case.to_input_file()) + unique.add(slug) + build_coverage_map(common.MFC_ROOT_DIR, all_cases, n_jobs=int(ARG("jobs"))) + return + cases, skipped_cases = __filter(cases) cases = [_.to_case() for _ in cases] total_test_count = len(cases) diff --git a/toolchain/mfc/test/test_coverage_unit.py b/toolchain/mfc/test/test_coverage_unit.py new file mode 100644 index 0000000000..0b585e144a --- /dev/null +++ b/toolchain/mfc/test/test_coverage_unit.py @@ -0,0 +1,344 @@ +import tempfile +import types as _types +from pathlib import Path +from unittest.mock import patch + +from mfc.test.coverage import format_summary, get_changed_files, is_always_run_all, load_map, map_health, param_hash, save_map, select_tests + + +def test_param_hash_is_order_independent(): + a = param_hash({"m": 100, "weno_order": 5, "bubbles_euler": "T"}) + b = param_hash({"weno_order": 5, "bubbles_euler": "T", "m": 100}) + assert a == b + + +def test_param_hash_changes_with_value(): + a = param_hash({"weno_order": 5}) + b = param_hash({"weno_order": 3}) + assert a != b + + +def test_param_hash_is_hex_and_short(): + h = param_hash({"m": 1}) + assert len(h) == 16 and all(c in "0123456789abcdef" for c in h) + + +def test_param_hash_nested_order_independent(): + a = param_hash({"patch": {"x": 1, "y": 2}}) + b = param_hash({"patch": {"y": 2, "x": 1}}) + assert a == b + + +def test_save_then_load_roundtrip(): + with tempfile.TemporaryDirectory() as d: + p = Path(d) / "m.json.gz" + save_map(p, {"abc": ["src/simulation/m_rhs.fpp"]}, n_tests=1, git_sha="deadbee", gfortran_version="13") + entries, meta = load_map(p) + assert entries == {"abc": ["src/simulation/m_rhs.fpp"]} + assert meta["n_tests"] == 1 and meta["git_sha"] == "deadbee" + assert "built_at" in meta + + +def test_load_missing_returns_none(): + assert load_map(Path("/nonexistent/m.json.gz")) == (None, None) + + +def test_load_corrupt_returns_none(): + with tempfile.TemporaryDirectory() as d: + p = Path(d) / "m.json.gz" + p.write_bytes(b"not gzip") + assert load_map(p) == (None, None) + + +def test_macro_file_forces_all(): + assert is_always_run_all({"src/common/include/parallel_macros.fpp"}) + + +def test_cmake_forces_all(): + assert is_always_run_all({"CMakeLists.txt"}) + assert is_always_run_all({"toolchain/cmake/foo.cmake"}) + + +def test_param_codegen_forces_all(): + assert is_always_run_all({"toolchain/mfc/params/definitions.py"}) + + +def test_ordinary_common_module_does_not_force_all(): + assert not is_always_run_all({"src/common/m_helper.fpp"}) + + +def test_ordinary_sim_module_does_not_force_all(): + assert not is_always_run_all({"src/simulation/m_rhs.fpp"}) + + +class _Case: + def __init__(self, ph, params=None): + self._ph = ph + self.params = params or {} + + def coverage_key(self): + return self._ph + + +def _cases(*phs): + return [_Case(p) for p in phs] + + +def test_rung1_no_changed_files_runs_all(): + cases = _cases("a", "b") + run, skip, reason = select_tests(cases, {"a": ["src/x.fpp"]}, None) + assert len(run) == 2 and skip == [] and reason.startswith("rung1") + + +def test_rung2_always_run_all(): + cases = _cases("a", "b") + run, skip, reason = select_tests(cases, {"a": [], "b": []}, {"CMakeLists.txt"}) + assert len(run) == 2 and reason.startswith("rung2") + + +def test_rung3_f90_change_runs_all(): + cases = _cases("a") + run, skip, reason = select_tests(cases, {"a": []}, {"src/common/m_precision_select.f90"}) + assert len(run) == 1 and reason.startswith("rung3") + + +def test_rung4_changed_fpp_with_zero_coverage_runs_all(): + cases = _cases("a") + # m_gpu_only.fpp is covered by no test in the map + run, skip, reason = select_tests(cases, {"a": ["src/simulation/m_rhs.fpp"]}, {"src/simulation/m_gpu_only.fpp"}) + assert len(run) == 1 and reason.startswith("rung4") + + +def test_rung5_unmapped_test_is_included(): + cases = _cases("a", "new") # 'new' not in map + run, skip, _ = select_tests(cases, {"a": ["src/simulation/m_rhs.fpp"]}, {"src/simulation/m_rhs.fpp"}) + assert {c.coverage_key() for c in run} == {"a", "new"} + + +def test_rung6_and_7_overlap_selects_subset(): + cases = _cases("hit", "miss") + cov = {"hit": ["src/simulation/m_bubbles_EE.fpp"], "miss": ["src/simulation/m_rhs.fpp"]} + run, skip, _ = select_tests(cases, cov, {"src/simulation/m_bubbles_EE.fpp"}) + assert [c.coverage_key() for c in run] == ["hit"] + assert [c.coverage_key() for c in skip] == ["miss"] + + +def test_case_coverage_key_uses_full_params(): + from mfc.test.case import TestCase + + tc = TestCase("1D -> Foo", {"m": 100, "weno_order": 5}) + assert tc.coverage_key() == param_hash(tc.params) + + +def test_case_coverage_key_changes_with_params(): + from mfc.test.case import TestCase + + a = TestCase("1D -> Foo", {"weno_order": 5}) + b = TestCase("1D -> Foo", {"weno_order": 3}) + assert a.coverage_key() != b.coverage_key() + + +def test_case_coverage_key_ignores_trace(): + from mfc.test.case import TestCase + + a = TestCase("1D -> Foo", {"m": 100}) + b = TestCase("totally -> different -> trace", {"m": 100}) + assert a.coverage_key() == b.coverage_key() + + +def test_changed_files_prefers_explicit_list(): + files = get_changed_files("/repo", "master", explicit="src/a.fpp\nsrc/b.fpp\n") + assert files == {"src/a.fpp", "src/b.fpp"} + + +def test_changed_files_deepens_then_recovers(): + state = {"deepened": False} + + def fake_run(cmd, **kw): + sub = cmd[1] if len(cmd) > 1 else "" + if sub == "fetch": + state["deepened"] = True + return _types.SimpleNamespace(returncode=0, stdout="", stderr="") + if sub == "merge-base": + return _types.SimpleNamespace(returncode=0 if state["deepened"] else 1, stdout="base\n", stderr="") + if sub == "diff": + return _types.SimpleNamespace(returncode=0, stdout="src/x.fpp\n", stderr="") + return _types.SimpleNamespace(returncode=0, stdout="", stderr="") + + with patch("subprocess.run", fake_run): + assert get_changed_files("/repo", "master") == {"src/x.fpp"} + + +def test_changed_files_returns_none_when_unrecoverable(): + def fake_run(cmd, **kw): + rc = 1 if (len(cmd) > 1 and cmd[1] == "merge-base") else 0 + return _types.SimpleNamespace(returncode=rc, stdout="", stderr="boom") + + with patch("subprocess.run", fake_run): + assert get_changed_files("/repo", "master") is None + + +def test_summary_mentions_counts_age_reason(): + s = format_summary( + ran=47, + total=610, + reason="selected 47/610 by coverage overlap", + meta={"built_at": "2026-05-20T00:00:00+00:00"}, + now="2026-05-29T00:00:00+00:00", + ) + assert "47/610" in s and "9d" in s and "coverage overlap" in s + + +def test_summary_handles_missing_meta(): + s = format_summary( + ran=610, + total=610, + reason="rung1: changed-file list unavailable", + meta=None, + now="2026-05-29T00:00:00+00:00", + ) + assert "610/610" in s and "map age unknown" in s + + +def test_health_ok(): + ok, msg = map_health( + meta={"built_at": "2026-05-28T00:00:00+00:00", "n_tests": 600}, + current_keys=set(str(i) for i in range(600)), + mapped_keys=set(str(i) for i in range(580)), + now="2026-05-29T00:00:00+00:00", + max_age_days=10, + min_fraction=0.8, + ) + assert ok, msg + + +def test_health_stale_fails(): + ok, msg = map_health( + meta={"built_at": "2026-05-01T00:00:00+00:00", "n_tests": 600}, + current_keys=set(["a"]), + mapped_keys=set(["a"]), + now="2026-05-29T00:00:00+00:00", + max_age_days=10, + min_fraction=0.8, + ) + assert not ok and "stale" in msg.lower() + + +def test_health_undercoverage_fails(): + ok, msg = map_health( + meta={"built_at": "2026-05-28T00:00:00+00:00", "n_tests": 10}, + current_keys=set(str(i) for i in range(100)), + mapped_keys=set(str(i) for i in range(50)), + now="2026-05-29T00:00:00+00:00", + max_age_days=10, + min_fraction=0.8, + ) + assert not ok and "coverage" in msg.lower() + + +def test_builder_has_coverage_key_matching_case(): + from mfc.test.case import TestCaseBuilder + + b = TestCaseBuilder(trace="1D -> Foo", mods={"m": 100, "weno_order": 5}, path="", args=[], ppn=1, functor=None) + assert b.coverage_key() == b.to_case().coverage_key() + + +def test_rung5_empty_coverage_is_included(): + # a test whose map entry is [] (uncertain) must be RUN, not skipped, on a .fpp change. + # "anchor" covers the changed .fpp so rung4 passes and we reach the per-test rungs. + cases = _cases("hasempty", "anchor") + cov_map = { + "hasempty": [], + "anchor": ["src/simulation/m_rhs.fpp"], + } + run, skip, _ = select_tests(cases, cov_map, {"src/simulation/m_rhs.fpp"}) + run_keys = {c.coverage_key() for c in run} + assert "hasempty" in run_keys and skip == [] + + +def test_changed_files_explicit_space_and_comma_separated(): + from mfc.test.coverage import get_changed_files + + assert get_changed_files("/r", "master", explicit="src/a.fpp src/b.fpp") == {"src/a.fpp", "src/b.fpp"} + assert get_changed_files("/r", "master", explicit="src/a.fpp,src/b.fpp") == {"src/a.fpp", "src/b.fpp"} + + +def test_sim_include_fpp_forces_all(): + # gcov can't reliably attribute #:include'd files; any src include change runs all. + assert is_always_run_all({"src/simulation/include/inline_riemann.fpp"}) + assert is_always_run_all({"src/pre_process/include/2dHardcodedIC.fpp"}) + + +def test_empty_explicit_is_uncertainty_not_skipall(): + # An empty/whitespace --changed-files must NOT become an empty set (skip-all under + # enforce). It falls through to git detection -> None when that fails -> run all. + def fail_git(cmd, **kw): + rc = 1 if (len(cmd) > 1 and cmd[1] == "merge-base") else 0 + return _types.SimpleNamespace(returncode=rc, stdout="", stderr="x") + + with patch("subprocess.run", fail_git): + assert get_changed_files("/r", "master", explicit="") is None + assert get_changed_files("/r", "master", explicit=" , ") is None + + +def test_run_and_test_infra_force_all(): + assert is_always_run_all({"toolchain/mfc/run/input.py"}) + assert is_always_run_all({"toolchain/mfc/test/case.py"}) + assert is_always_run_all({"toolchain/mfc/test/test.py"}) + + +def test_cases_py_is_not_always_run(): + assert not is_always_run_all({"toolchain/mfc/test/cases.py"}) + + +def test_cases_py_change_runs_new_tests_not_skipall(): + # cases.py-only change must run the NEW/modified tests (rung 5), not skip everything. + cases = _cases("mapped", "newtest") # "newtest" absent from map + run, skip, _ = select_tests(cases, {"mapped": ["src/simulation/m_rhs.fpp"]}, {"toolchain/mfc/test/cases.py"}) + assert [c.coverage_key() for c in run] == ["newtest"] + assert [c.coverage_key() for c in skip] == ["mapped"] + + +def test_unattributable_nonsource_change_runs_all(): + cases = _cases("a") + for f in ("mfc.sh", "toolchain/pyproject.toml", "tests/ABC12345/golden.txt", ".github/workflows/test.yml"): + run, skip, reason = select_tests(cases, {"a": ["src/x.fpp"]}, {f}) + assert len(run) == 1 and "run all" in reason, f + + +def test_docs_only_still_skips_all(): + cases = _cases("a") + for f in ("README.md", "docs/foo.rst", "LICENSE", ".claude/x.md"): + run, skip, reason = select_tests(cases, {"a": ["src/x.fpp"]}, {f}) + assert run == [] and "rung7" in reason, f + + +def test_load_map_rejects_malformed_entry(tmp_path): + import gzip + import json + + from mfc.test.coverage import load_map + + p = tmp_path / "m.json.gz" + with gzip.open(p, "wt") as fh: + json.dump({"_meta": {"built_at": "x"}, "good": ["a.fpp"], "bad": "not-a-list"}, fh) + assert load_map(Path(p)) == (None, None) + + +def test_uppercase_fortran_extension_forces_all(): + cases = _cases("a") + run, skip, reason = select_tests(cases, {"a": []}, {"src/common/m_x.F90"}) + assert len(run) == 1 and reason.startswith("rung3") + + +def test_toolchain_py_change_forces_all_except_cases(): + assert is_always_run_all({"toolchain/mfc/case.py"}) + assert is_always_run_all({"toolchain/mfc/build.py"}) + assert is_always_run_all({"toolchain/mfc/common.py"}) + assert not is_always_run_all({"toolchain/mfc/test/cases.py"}) + + +def test_empty_map_with_fpp_change_runs_all_rung4(): + cases = _cases("a", "b") + run, skip, reason = select_tests(cases, {}, {"src/simulation/m_rhs.fpp"}) + assert len(run) == 2 and reason.startswith("rung4")