diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..85c0040 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,48 @@ +name: Pre-commit Checks + +on: [pull_request] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Cache pre-commit environments + uses: actions/cache@v4 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} + + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + version: "0.9.18" + + - name: Run pre-commit hooks + run: | + pip install pre-commit + # generate-cli-docs requires the full areal package (language: system), + # which is not installed in CI. Skip it here; it runs locally instead. + SKIP=generate-cli-docs pre-commit run --all-files + + - name: Validate commit messages + run: | + pip install conventional-pre-commit + FAILED=0 + for SHA in $(git log --no-merges --format=%H origin/${{ github.base_ref }}..HEAD); do + git log --format=%B -1 "$SHA" > /tmp/commit_msg + if ! conventional-pre-commit feat fix docs gov style refactor perf test build ci chore revert /tmp/commit_msg; then + echo "❌ Bad commit: $(git log --format=%s -1 "$SHA")" + FAILED=1 + fi + done + exit $FAILED diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..32fbaeb --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,143 @@ +default_install_hook_types: + - pre-commit + - commit-msg + +repos: + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v22.1.5 + hooks: + - id: clang-format + files: \.(c|cc|cxx|cpp|h|hpp|hxx|cu|cuh)$ + args: + - --style=file + - --fallback-style=Google + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: check-yaml + files: \.(yaml|yml)$ + exclude: \.github/workflows/ + - id: end-of-file-fixer + files: \.(py|md|yaml|yml|c|cc|cxx|cpp|h|hpp|hxx|cu|cuh|ipynb)$ + - id: trailing-whitespace + files: \.(py|md|yaml|yml|c|cc|cxx|cpp|h|hpp|hxx|cu|cuh|ipynb)$ + - id: check-added-large-files + name: Check for large files + args: ["--maxkb=1000"] + exclude: uv.lock + - id: check-json + name: Check JSON files + + - id: detect-private-key + name: Detect private keys + + - repo: https://github.com/executablebooks/mdformat + rev: 0.7.22 + hooks: + - id: mdformat + args: + - --wrap=88 + additional_dependencies: + - mdformat-gfm + - mdformat-tables + - mdformat-frontmatter + exclude: ^(docs/(en|zh)/algorithms/.*\.md|docs/(en|zh)/best_practices/algo_perf\.md)$ + + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.15.15 + hooks: + - id: ruff # Run the linter. + name: Run Linter Check (Ruff) + types_or: [python, pyi, jupyter] + args: [--fix] + - id: ruff-format # Run the formatter. + name: Run Formatter (Ruff) + types_or: [python, pyi, jupyter] + + # Clean notebook outputs and metadata + - repo: https://github.com/kynan/nbstripout + rev: 0.9.1 + hooks: + - id: nbstripout + name: nbstripout - Strip notebook output + description: Strip output from Jupyter notebooks + + # Check consistency between pyproject.toml variants (sglang vs vllm) + - repo: local + hooks: + - id: check-pyproject-consistency + name: Check pyproject.toml consistency + entry: python3 areal/tools/check_pyproject_consistency.py + language: system + files: ^pyproject(\.vllm)?\.toml$ + pass_filenames: false + always_run: false + require_serial: true + + # Regenerate uv lockfiles when pyproject files change + - repo: local + hooks: + - id: uv-lock + name: Regenerate uv lockfiles + entry: bash scripts/uv_lock.sh + language: system + files: ^pyproject(\.vllm)?\.toml$ + pass_filenames: false + always_run: false + require_serial: true + + # Generate CLI documentation + - repo: local + hooks: + - id: generate-cli-docs + name: Generate CLI documentation + entry: python docs/generate_cli_docs.py + language: system + files: ^(areal/api/cli_args\.py|docs/generate_cli_docs\.py)$ + pass_filenames: false + always_run: false + require_serial: true + + # Ensure SPDX license header in Python source files + - repo: local + hooks: + - id: check-license-header + name: Check SPDX license header + entry: python3 areal/tools/check_license_header.py + language: system + files: ^areal/.*\.py$ + types: [python] + + # Format and lint .github/CODEOWNERS + - repo: local + hooks: + - id: format-codeowners + name: Format CODEOWNERS + entry: python3 areal/tools/format_codeowners.py + language: system + files: ^\.github/CODEOWNERS$ + pass_filenames: false + always_run: false + require_serial: true + + # Conventional Commits message check + - repo: https://github.com/compilerla/conventional-pre-commit + rev: v4.4.0 + hooks: + - id: conventional-pre-commit + stages: [commit-msg] + args: + - feat + - fix + - docs + - gov + - style + - refactor + - perf + - test + - build + - ci + - chore + - revert diff --git a/README.md b/README.md index 5b2c4be..22c2475 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,8 @@ There are many ways to participate, regardless of your background: - **Report issues** or propose features in the [main issue tracker](https://github.com/areal-project/AReaL/issues). - **Improve documentation**, translations, or examples in the main repository. -- **Join community meetings.** AReaL holds regular community calls; meeting notes and recordings are linked from the main project's - [community materials](./meeting). +- **Join community meetings.** AReaL holds regular community calls; meeting notes and + recordings are linked from the main project's [community materials](./meeting). See [CONTRIBUTING.md](CONTRIBUTING.md) for more details. @@ -39,5 +39,5 @@ are all documented there. ## License -All content in this repository is licensed under the -[Apache License 2.0](LICENSE), the same license used by the main AReaL project. +All content in this repository is licensed under the [Apache License 2.0](LICENSE), the +same license used by the main AReaL project. diff --git a/REPOSITORIES.md b/REPOSITORIES.md index bdfe8fa..80b6e2a 100644 --- a/REPOSITORIES.md +++ b/REPOSITORIES.md @@ -2,8 +2,8 @@ This document indexes the repositories that make up the **AReaL** ecosystem. -Anything not listed here is not an official AReaL repository. If you maintain a -project that integrates with AReaL and would like it referenced from the +Anything not listed here is not an official AReaL repository. If you maintain a project +that integrates with AReaL and would like it referenced from the [Community Projects](#community-projects) section, please open a pull request. ## Core repositories diff --git a/ROADMAP.md b/ROADMAP.md index a7a4ab5..7175511 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,8 +1,8 @@ # Roadmap The authoritative AReaL roadmap is maintained in the **main project repository** at -[areal-project/AReaL](https://github.com/areal-project/AReaL). This file is a pointer -so contributors browsing the community repository can find it easily. +[areal-project/AReaL](https://github.com/areal-project/AReaL). This file is a pointer so +contributors browsing the community repository can find it easily. ## Where to look @@ -20,10 +20,9 @@ so contributors browsing the community repository can find it easily. We welcome community input. The recommended flow is: -1. **Search** existing - [issues](https://github.com/areal-project/AReaL/issues) and - [discussions](https://github.com/areal-project/AReaL/discussions) for prior work - on your idea. +1. **Search** existing [issues](https://github.com/areal-project/AReaL/issues) and + [discussions](https://github.com/areal-project/AReaL/discussions) for prior work on + your idea. 1. **Open a discussion** in the [Ideas category](https://github.com/areal-project/AReaL/discussions/categories/ideas) to gather feedback. @@ -36,8 +35,8 @@ guide. ## Long-term vision -Our vision for AReaL is to be the go-to framework for training reasoning and agentic -AI systems that is: +Our vision for AReaL is to be the go-to framework for training reasoning and agentic AI +systems that is: 1. **Accessible** — easy to get started for both researchers and practitioners. 1. **Scalable** — runs from a single workstation to multi-thousand-GPU clusters. diff --git a/SECURITY.md b/SECURITY.md index 6bdf637..35a0d06 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -26,8 +26,8 @@ When reporting, please include: - Suggested mitigations, if you have them. You can expect an acknowledgement within **3 business days**. If you do not hear back -within that window, please follow up via the same channel or contact another -maintainer listed in [GOVERNANCE.md](GOVERNANCE.md). +within that window, please follow up via the same channel or contact another maintainer +listed in [GOVERNANCE.md](GOVERNANCE.md). ## Disclosure process @@ -37,8 +37,8 @@ After a report is received, the maintainers will: 1. Reproduce and assess the impact of the issue. 1. Develop and validate a fix in a private branch or draft advisory. 1. Coordinate a release and a public advisory with the reporter. -1. Publicly disclose the issue after a fix is available, crediting the reporter - unless they request otherwise. +1. Publicly disclose the issue after a fix is available, crediting the reporter unless + they request otherwise. We aim to resolve confirmed vulnerabilities and publish an advisory within **90 days** of the initial report. Embargo periods may be adjusted based on severity, complexity,