Skip to content
Open
43 changes: 38 additions & 5 deletions src/dayamlchecker/code_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import re
import sys
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -369,19 +370,41 @@ def format_yaml_file(
return formatted, changed


def _collect_yaml_files(paths: list[Path]) -> list[Path]:
def _collect_yaml_files(
paths: list[Path], check_all: bool = False
) -> list[Path]:
"""
Expand paths to a list of YAML files.

- Files are included if they have .yml or .yaml extension
- Directories are recursively searched for YAML files
"""
def _is_default_ignored_dir(dirname: str) -> bool:
return (
dirname.startswith(".git")
or dirname.startswith(".github")
or dirname.startswith(".venv")
or dirname == "sources"
)

yaml_files = []
for path in paths:
if path.is_dir():
# Recursively find all YAML files
yaml_files.extend(path.rglob("*.yml"))
yaml_files.extend(path.rglob("*.yaml"))
# Recursively find all YAML files, pruning ignored directories
for root, dirnames, filenames in os.walk(path, topdown=True):
if not check_all and _is_default_ignored_dir(Path(root).name):
dirnames[:] = []
continue
if not check_all:
dirnames[:] = [
dirname
for dirname in dirnames
if not _is_default_ignored_dir(dirname)
]
root_path = Path(root)
for filename in filenames:
if filename.lower().endswith((".yml", ".yaml")):
yaml_files.append(root_path / filename)
elif path.suffix.lower() in (".yml", ".yaml"):
yaml_files.append(path)
seen = set()
Expand Down Expand Up @@ -438,6 +461,14 @@ def main() -> int:
action="store_true",
help="Only output errors",
)
parser.add_argument(
"--check-all",
action="store_true",
help=(
"Do not ignore default directories during recursive search "
"(.git*, .github*, sources)"
),
)

args = parser.parse_args()

Expand All @@ -447,7 +478,9 @@ def main() -> int:
)

# Collect all YAML files from paths (handles directories recursively)
yaml_files = _collect_yaml_files(args.files)
yaml_files = _collect_yaml_files(
args.files, check_all=args.check_all
)

if not yaml_files:
print("No YAML files found.", file=sys.stderr)
Expand Down
81 changes: 76 additions & 5 deletions src/dayamlchecker/yaml_structure.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Each doc, apply this to each block
import ast
import argparse
import os
from pathlib import Path
import re
import sys

# os and Path imports were used in a short-lived implementation; the CLI filtering approach is preferred
from typing import Optional
import yaml
from yaml.loader import SafeLoader
Expand Down Expand Up @@ -1161,10 +1163,79 @@ def process_file(input_file):
print(f"{err}")


def main():
for input_file in sys.argv[1:]:
process_file(input_file)
def _collect_yaml_files(
paths: list[Path], check_all: bool = False
) -> list[Path]:
"""Expand files/directories into a unique, sorted list of YAML files."""
def _is_default_ignored_dir(dirname: str) -> bool:
return (
dirname.startswith(".git")
or dirname.startswith(".github")
or dirname.startswith(".venv")
or dirname == "sources"
)

yaml_files = []
for path in paths:
if path.is_dir():
for root, dirnames, filenames in os.walk(path, topdown=True):
if not check_all and _is_default_ignored_dir(Path(root).name):
dirnames[:] = []
continue
if not check_all:
dirnames[:] = [
dirname
for dirname in dirnames
if not _is_default_ignored_dir(dirname)
]
root_path = Path(root)
for filename in filenames:
if filename.lower().endswith((".yml", ".yaml")):
yaml_files.append(root_path / filename)
elif path.suffix.lower() in (".yml", ".yaml"):
yaml_files.append(path)

seen = set()
result = []
for file_path in yaml_files:
resolved = file_path.resolve()
if resolved not in seen:
seen.add(resolved)
result.append(file_path)
return sorted(result)


def main() -> int:
parser = argparse.ArgumentParser(
description="Validate Docassemble YAML files",
)
parser.add_argument(
"files",
nargs="+",
type=Path,
help="YAML files or directories to validate (directories are searched recursively)",
)
parser.add_argument(
"--check-all",
action="store_true",
help=(
"Do not ignore default directories during recursive search "
"(.git*, .github*, sources)"
),
)
args = parser.parse_args()

yaml_files = _collect_yaml_files(
args.files, check_all=args.check_all
)
if not yaml_files:
print("No YAML files found.", file=sys.stderr)
return 1

for input_file in yaml_files:
process_file(str(input_file))
return 0


if __name__ == "__main__":
main()
sys.exit(main())