diff --git a/src/dayamlchecker/code_formatter.py b/src/dayamlchecker/code_formatter.py index 437fb09..665c51f 100644 --- a/src/dayamlchecker/code_formatter.py +++ b/src/dayamlchecker/code_formatter.py @@ -17,6 +17,7 @@ import re import sys +import os from dataclasses import dataclass, field from pathlib import Path from typing import Any @@ -369,19 +370,41 @@ def format_yaml_file( return formatted, changed -def _collect_yaml_files(paths: list[Path]) -> list[Path]: +def _collect_yaml_files( + paths: list[Path], check_all: bool = False +) -> list[Path]: """ Expand paths to a list of YAML files. - Files are included if they have .yml or .yaml extension - Directories are recursively searched for YAML files """ + def _is_default_ignored_dir(dirname: str) -> bool: + return ( + dirname.startswith(".git") + or dirname.startswith(".github") + or dirname.startswith(".venv") + or dirname == "sources" + ) + yaml_files = [] for path in paths: if path.is_dir(): - # Recursively find all YAML files - yaml_files.extend(path.rglob("*.yml")) - yaml_files.extend(path.rglob("*.yaml")) + # Recursively find all YAML files, pruning ignored directories + for root, dirnames, filenames in os.walk(path, topdown=True): + if not check_all and _is_default_ignored_dir(Path(root).name): + dirnames[:] = [] + continue + if not check_all: + dirnames[:] = [ + dirname + for dirname in dirnames + if not _is_default_ignored_dir(dirname) + ] + root_path = Path(root) + for filename in filenames: + if filename.lower().endswith((".yml", ".yaml")): + yaml_files.append(root_path / filename) elif path.suffix.lower() in (".yml", ".yaml"): yaml_files.append(path) seen = set() @@ -438,6 +461,14 @@ def main() -> int: action="store_true", help="Only output errors", ) + parser.add_argument( + "--check-all", + action="store_true", + help=( + "Do not ignore default directories during recursive search " + "(.git*, .github*, sources)" + ), + ) args = parser.parse_args() @@ -447,7 +478,9 @@ def main() -> int: ) # Collect all YAML files from paths (handles directories recursively) - yaml_files = _collect_yaml_files(args.files) + yaml_files = _collect_yaml_files( + args.files, check_all=args.check_all + ) if not yaml_files: print("No YAML files found.", file=sys.stderr) diff --git a/src/dayamlchecker/yaml_structure.py b/src/dayamlchecker/yaml_structure.py index 99729dd..1c1ca71 100644 --- a/src/dayamlchecker/yaml_structure.py +++ b/src/dayamlchecker/yaml_structure.py @@ -1,9 +1,11 @@ # Each doc, apply this to each block import ast +import argparse +import os +from pathlib import Path import re import sys -# os and Path imports were used in a short-lived implementation; the CLI filtering approach is preferred from typing import Optional import yaml from yaml.loader import SafeLoader @@ -1161,10 +1163,79 @@ def process_file(input_file): print(f"{err}") -def main(): - for input_file in sys.argv[1:]: - process_file(input_file) +def _collect_yaml_files( + paths: list[Path], check_all: bool = False +) -> list[Path]: + """Expand files/directories into a unique, sorted list of YAML files.""" + def _is_default_ignored_dir(dirname: str) -> bool: + return ( + dirname.startswith(".git") + or dirname.startswith(".github") + or dirname.startswith(".venv") + or dirname == "sources" + ) + + yaml_files = [] + for path in paths: + if path.is_dir(): + for root, dirnames, filenames in os.walk(path, topdown=True): + if not check_all and _is_default_ignored_dir(Path(root).name): + dirnames[:] = [] + continue + if not check_all: + dirnames[:] = [ + dirname + for dirname in dirnames + if not _is_default_ignored_dir(dirname) + ] + root_path = Path(root) + for filename in filenames: + if filename.lower().endswith((".yml", ".yaml")): + yaml_files.append(root_path / filename) + elif path.suffix.lower() in (".yml", ".yaml"): + yaml_files.append(path) + + seen = set() + result = [] + for file_path in yaml_files: + resolved = file_path.resolve() + if resolved not in seen: + seen.add(resolved) + result.append(file_path) + return sorted(result) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Validate Docassemble YAML files", + ) + parser.add_argument( + "files", + nargs="+", + type=Path, + help="YAML files or directories to validate (directories are searched recursively)", + ) + parser.add_argument( + "--check-all", + action="store_true", + help=( + "Do not ignore default directories during recursive search " + "(.git*, .github*, sources)" + ), + ) + args = parser.parse_args() + + yaml_files = _collect_yaml_files( + args.files, check_all=args.check_all + ) + if not yaml_files: + print("No YAML files found.", file=sys.stderr) + return 1 + + for input_file in yaml_files: + process_file(str(input_file)) + return 0 if __name__ == "__main__": - main() + sys.exit(main())