diff --git a/.github/workflows/agentic-ci-authorized-checks.yml b/.github/workflows/agentic-ci-authorized-checks.yml new file mode 100644 index 000000000..0db32cf6f --- /dev/null +++ b/.github/workflows/agentic-ci-authorized-checks.yml @@ -0,0 +1,142 @@ +name: "Agentic CI Authorization Checks" + +on: + workflow_dispatch: + inputs: + pr_number: + description: "Agentic CI PR number" + required: true + type: string + expected_head_sha: + description: "PR head SHA authorized by the maintainer" + required: true + type: string + +permissions: + contents: read + pull-requests: read + +concurrency: + group: agentic-ci-authorized-checks-${{ inputs.pr_number }} + cancel-in-progress: true + +defaults: + run: + shell: bash + +jobs: + pr: + timeout-minutes: 5 + runs-on: ubuntu-latest + outputs: + title_b64: ${{ steps.metadata.outputs.title_b64 }} + trusted: ${{ steps.metadata.outputs.trusted }} + steps: + - name: Load PR metadata + id: metadata + env: + EXPECTED_HEAD_SHA: ${{ inputs.expected_head_sha }} + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ inputs.pr_number }} + REPO: ${{ github.repository }} + run: | + if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then + echo "::error::Invalid PR number: ${PR_NUMBER}" + exit 1 + fi + + PR_JSON=$(gh api "repos/${REPO}/pulls/${PR_NUMBER}") + PR_AUTHOR=$(printf '%s' "$PR_JSON" | jq -r '.user.login') + HEAD_REPO=$(printf '%s' "$PR_JSON" | jq -r '.head.repo.full_name') + HEAD_REF=$(printf '%s' "$PR_JSON" | jq -r '.head.ref') + HEAD_SHA=$(printf '%s' "$PR_JSON" | jq -r '.head.sha') + TITLE=$(printf '%s' "$PR_JSON" | jq -r '.title') + PR_BODY=$(printf '%s' "$PR_JSON" | jq -r '.body // ""') + + if [ "$HEAD_SHA" != "$EXPECTED_HEAD_SHA" ]; then + echo "::error::PR head moved from ${EXPECTED_HEAD_SHA} to ${HEAD_SHA}." + exit 1 + fi + + if [ "$GITHUB_SHA" != "$HEAD_SHA" ]; then + echo "::error::Workflow SHA ${GITHUB_SHA} does not match PR head ${HEAD_SHA}." + exit 1 + fi + + TRUSTED=false + printf '%s' "$PR_BODY" > /tmp/pr-body-raw.txt + # Commit authors can be spoofed; trust only PR metadata GitHub controls. + if [ "$PR_AUTHOR" = "github-actions[bot]" ] && \ + [ "$HEAD_REPO" = "$REPO" ] && \ + [[ "$HEAD_REF" == agentic-ci/* ]] && \ + grep -Eq '' /tmp/pr-body-raw.txt; then + TRUSTED=true + fi + + echo "trusted=${TRUSTED}" >> "$GITHUB_OUTPUT" + echo "title_b64=$(printf '%s' "$TITLE" | base64 -w0)" >> "$GITHUB_OUTPUT" + + DCOAssistant: + needs: pr + if: always() + timeout-minutes: 5 + runs-on: ubuntu-latest + steps: + - name: Validate authorization + env: + PR_RESULT: ${{ needs.pr.result }} + TRUSTED: ${{ needs.pr.outputs.trusted }} + run: | + if [ "$PR_RESULT" != "success" ] || [ "$TRUSTED" != "true" ]; then + echo "::error::This PR is not an authorized Agentic CI PR." + exit 1 + fi + echo "Trusted Agentic CI PR authorized by a maintainer." + + semantic-pull-request: + name: semantic-pull-request / semantic-pull-request + needs: pr + if: always() + timeout-minutes: 5 + runs-on: ubuntu-latest + steps: + - name: Validate PR title + env: + PR_RESULT: ${{ needs.pr.result }} + TITLE_B64: ${{ needs.pr.outputs.title_b64 }} + TRUSTED: ${{ needs.pr.outputs.trusted }} + run: | + if [ "$PR_RESULT" != "success" ] || [ "$TRUSTED" != "true" ]; then + echo "::error::This PR is not an authorized Agentic CI PR." + exit 1 + fi + + TITLE=$(printf '%s' "$TITLE_B64" | base64 -d) + TYPES='feat|fix|docs|style|refactor|perf|test|build|ci|chore|revert|cp' + REGEX="^(${TYPES})(\\([^)]+\\))?!?: .+" + if ! [[ "$TITLE" =~ $REGEX ]]; then + echo "::error::PR title is not semantic: ${TITLE}" + exit 1 + fi + + if [ "${#TITLE}" -gt 80 ]; then + echo "::error::PR title is longer than 80 characters: ${#TITLE}" + exit 1 + fi + + check: + needs: pr + if: always() + timeout-minutes: 5 + runs-on: ubuntu-latest + steps: + - name: Validate linked issue authorization + env: + PR_RESULT: ${{ needs.pr.result }} + TRUSTED: ${{ needs.pr.outputs.trusted }} + run: | + if [ "$PR_RESULT" != "success" ] || [ "$TRUSTED" != "true" ]; then + echo "::error::This PR is not an authorized Agentic CI PR." + exit 1 + fi + echo "Trusted Agentic CI PRs do not require a linked issue." diff --git a/.github/workflows/authorize-agentic-ci.yml b/.github/workflows/authorize-agentic-ci.yml new file mode 100644 index 000000000..6fa69648f --- /dev/null +++ b/.github/workflows/authorize-agentic-ci.yml @@ -0,0 +1,179 @@ +name: "Authorize Agentic CI" + +on: + issue_comment: + types: [created] + +permissions: + actions: write + contents: read + issues: write + pull-requests: read + +defaults: + run: + shell: bash + +concurrency: + group: authorize-agentic-ci-${{ github.event.issue.number }} + cancel-in-progress: false + +jobs: + authorize: + if: >- + github.repository_owner == 'NVIDIA-NeMo' + && github.event.issue.pull_request != null + && github.event.comment.body == '/authorize-agentic-ci' + runs-on: ubuntu-latest + steps: + - name: Check commenter permission + env: + GH_TOKEN: ${{ github.token }} + COMMENT_AUTHOR: ${{ github.event.comment.user.login }} + PR_NUMBER: ${{ github.event.issue.number }} + REPO: ${{ github.repository }} + run: | + PERMISSION=$(gh api "repos/${REPO}/collaborators/${COMMENT_AUTHOR}/permission" \ + --jq '.permission' 2>/dev/null || echo "none") + echo "Comment author ${COMMENT_AUTHOR} has ${PERMISSION} permission." + + case "$PERMISSION" in + admin|maintain|write) + ;; + *) + gh issue comment "$PR_NUMBER" --repo "$REPO" --body \ + "Only maintainers with write access can authorize Agentic CI checks." + exit 1 + ;; + esac + + - name: Load PR metadata + id: pr + env: + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ github.event.issue.number }} + REPO: ${{ github.repository }} + run: | + PR_JSON=$(gh api "repos/${REPO}/pulls/${PR_NUMBER}") + + PR_AUTHOR=$(printf '%s' "$PR_JSON" | jq -r '.user.login') + HEAD_REPO=$(printf '%s' "$PR_JSON" | jq -r '.head.repo.full_name') + HEAD_REF=$(printf '%s' "$PR_JSON" | jq -r '.head.ref') + HEAD_SHA=$(printf '%s' "$PR_JSON" | jq -r '.head.sha') + STATE=$(printf '%s' "$PR_JSON" | jq -r '.state') + PR_BODY=$(printf '%s' "$PR_JSON" | jq -r '.body // ""') + + TRUSTED=false + printf '%s' "$PR_BODY" > /tmp/pr-body-raw.txt + # Commit authors can be spoofed; trust only PR metadata GitHub controls. + if [ "$PR_AUTHOR" = "github-actions[bot]" ] && \ + [ "$HEAD_REPO" = "$REPO" ] && \ + [[ "$HEAD_REF" == agentic-ci/* ]] && \ + grep -Eq '' /tmp/pr-body-raw.txt; then + TRUSTED=true + fi + + echo "author=${PR_AUTHOR}" >> "$GITHUB_OUTPUT" + echo "head_ref=${HEAD_REF}" >> "$GITHUB_OUTPUT" + echo "head_sha=${HEAD_SHA}" >> "$GITHUB_OUTPUT" + echo "state=${STATE}" >> "$GITHUB_OUTPUT" + echo "trusted=${TRUSTED}" >> "$GITHUB_OUTPUT" + + - name: Validate Agentic CI PR + env: + COMMENT_ID: ${{ github.event.comment.id }} + GH_TOKEN: ${{ github.token }} + HEAD_SHA: ${{ steps.pr.outputs.head_sha }} + PR_NUMBER: ${{ github.event.issue.number }} + REPO: ${{ github.repository }} + STATE: ${{ steps.pr.outputs.state }} + TRUSTED: ${{ steps.pr.outputs.trusted }} + run: | + if [ "$STATE" != "open" ]; then + gh issue comment "$PR_NUMBER" --repo "$REPO" --body \ + "Agentic CI checks were not authorized because this PR is not open." + exit 1 + fi + + if [ "$TRUSTED" != "true" ]; then + gh issue comment "$PR_NUMBER" --repo "$REPO" --body \ + "Agentic CI checks were not authorized because this PR does not match the trusted Agentic CI metadata." + exit 1 + fi + + if [ -z "$COMMENT_ID" ]; then + gh issue comment "$PR_NUMBER" --repo "$REPO" --body \ + "Agentic CI checks were not authorized because the authorization comment ID was missing." + exit 1 + fi + + COMMENT_FOUND=false + for ATTEMPT in 1 2 3; do + gh api --paginate "repos/${REPO}/issues/${PR_NUMBER}/timeline?per_page=100" \ + -H "Accept: application/vnd.github+json" \ + --jq '.[] | [.event, ((.id // .sha // "") | tostring)] | @tsv' > /tmp/pr-timeline.tsv + if awk -F '\t' -v comment_id="$COMMENT_ID" ' + $1 == "commented" && $2 == comment_id { found = 1 } + END { exit found ? 0 : 1 } + ' /tmp/pr-timeline.tsv; then + COMMENT_FOUND=true + break + fi + sleep 2 + done + if [ "$COMMENT_FOUND" != "true" ]; then + gh issue comment "$PR_NUMBER" --repo "$REPO" --body \ + "Agentic CI checks were not authorized because the authorization comment was not found in the PR timeline." + exit 1 + fi + + HEAD_EVENT_AFTER_COMMENT=$(awk -F '\t' -v comment_id="$COMMENT_ID" ' + $1 == "commented" && $2 == comment_id { seen_comment = 1; next } + seen_comment && ($1 == "committed" || $1 == "head_ref_force_pushed" || $1 == "head_ref_deleted" || $1 == "head_ref_restored") { + print $1 " " $2 + exit + } + ' /tmp/pr-timeline.tsv) + if [ -n "$HEAD_EVENT_AFTER_COMMENT" ]; then + { + echo "Agentic CI checks were not authorized because the PR head changed after the authorization comment." + echo + echo "Latest PR head: \`${HEAD_SHA}\`" + echo "Detected update: \`${HEAD_EVENT_AFTER_COMMENT}\`" + echo + echo "Please review the latest commit and comment \`/authorize-agentic-ci\` again." + } > /tmp/agentic-ci-auth-stale.md + gh issue comment "$PR_NUMBER" --repo "$REPO" --body-file /tmp/agentic-ci-auth-stale.md + exit 1 + fi + + BLOCKED=$(gh pr diff "$PR_NUMBER" --repo "$REPO" --name-only \ + | grep -E '^\.github/' || true) + if [ -n "$BLOCKED" ]; then + { + echo "Agentic CI checks were not authorized because this PR changes privileged repository files:" + echo + printf '%s\n' "$BLOCKED" | sed 's/^/- `/' | sed 's/$/`/' + } > /tmp/agentic-ci-auth-failed.md + gh issue comment "$PR_NUMBER" --repo "$REPO" --body-file /tmp/agentic-ci-auth-failed.md + exit 1 + fi + + echo "Authorizing checks for ${HEAD_SHA}." + + - name: Dispatch checks + env: + GH_TOKEN: ${{ github.token }} + HEAD_REF: ${{ steps.pr.outputs.head_ref }} + HEAD_SHA: ${{ steps.pr.outputs.head_sha }} + PR_NUMBER: ${{ github.event.issue.number }} + REPO: ${{ github.repository }} + run: | + gh workflow run ci.yml --repo "$REPO" --ref "$HEAD_REF" \ + -f expected_head_sha="$HEAD_SHA" + gh workflow run agentic-ci-authorized-checks.yml --repo "$REPO" --ref "$HEAD_REF" \ + -f pr_number="$PR_NUMBER" \ + -f expected_head_sha="$HEAD_SHA" + + gh issue comment "$PR_NUMBER" --repo "$REPO" --body \ + "Authorized Agentic CI checks for \`${HEAD_SHA}\`. Launched CI and authorization checks." diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 312bd1a32..428484e50 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,10 +6,29 @@ on: pull_request: branches: [ main ] workflow_dispatch: + inputs: + expected_head_sha: + description: "Optional head SHA that this dispatched run must execute" + required: false + type: string permissions: {} jobs: + validate-dispatch: + name: Validate dispatched SHA + runs-on: ubuntu-latest + steps: + - name: Check expected SHA + env: + EXPECTED_HEAD_SHA: ${{ inputs.expected_head_sha }} + run: | + if [ -n "$EXPECTED_HEAD_SHA" ] && [ "$GITHUB_SHA" != "$EXPECTED_HEAD_SHA" ]; then + echo "::error::Workflow SHA ${GITHUB_SHA} does not match expected ${EXPECTED_HEAD_SHA}." + exit 1 + fi + echo "Dispatch target SHA validated." + # =========================================================================== # Independent Package Tests # Each package is tested in isolation to ensure proper dependency boundaries @@ -17,6 +36,7 @@ jobs: test-config: name: Test Config (Python ${{ matrix.python-version }} on ${{ matrix.os }}) + needs: validate-dispatch runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -48,6 +68,7 @@ jobs: test-engine: name: Test Engine (Python ${{ matrix.python-version }} on ${{ matrix.os }}) + needs: validate-dispatch runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -79,6 +100,7 @@ jobs: test-interface: name: Test Interface (Python ${{ matrix.python-version }} on ${{ matrix.os }}) + needs: validate-dispatch runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -119,6 +141,7 @@ jobs: coverage: name: Coverage Check (Python ${{ matrix.python-version }}) + needs: validate-dispatch runs-on: ubuntu-latest strategy: fail-fast: false @@ -156,6 +179,7 @@ jobs: test-e2e: name: End to end test (Python ${{ matrix.python-version }} on ${{ matrix.os }}) + needs: validate-dispatch runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -183,6 +207,7 @@ jobs: lint: name: Lint and Format Check + needs: validate-dispatch runs-on: ubuntu-latest steps: @@ -207,6 +232,7 @@ jobs: license-headers: name: Check License Headers + needs: validate-dispatch runs-on: ubuntu-latest steps: @@ -237,7 +263,7 @@ jobs: test-summary: name: Test (Python ${{ matrix.python-version }} on ${{ matrix.os }}) runs-on: ubuntu-latest - needs: [test-config, test-engine, test-interface] + needs: [validate-dispatch, test-config, test-engine, test-interface] if: always() strategy: matrix: @@ -247,10 +273,12 @@ jobs: steps: - name: Check all tests passed run: | - if [[ "${{ needs.test-config.result }}" != "success" ]] || \ + if [[ "${{ needs.validate-dispatch.result }}" != "success" ]] || \ + [[ "${{ needs.test-config.result }}" != "success" ]] || \ [[ "${{ needs.test-engine.result }}" != "success" ]] || \ [[ "${{ needs.test-interface.result }}" != "success" ]]; then echo "One or more test jobs failed" + echo "validate-dispatch: ${{ needs.validate-dispatch.result }}" echo "test-config: ${{ needs.test-config.result }}" echo "test-engine: ${{ needs.test-engine.result }}" echo "test-interface: ${{ needs.test-interface.result }}" diff --git a/.github/workflows/dco-assistant.yml b/.github/workflows/dco-assistant.yml index dddb19bec..7c8cbb45c 100644 --- a/.github/workflows/dco-assistant.yml +++ b/.github/workflows/dco-assistant.yml @@ -25,8 +25,45 @@ jobs: if: github.repository_owner == 'NVIDIA-NeMo' runs-on: ubuntu-latest steps: + - name: Check trusted Agentic CI PR + id: trusted-agentic-ci + env: + GH_TOKEN: ${{ github.token }} + EVENT_NAME: ${{ github.event_name }} + PR_AUTHOR: ${{ github.event.pull_request.user.login }} + HEAD_REPO: ${{ github.event.pull_request.head.repo.full_name }} + HEAD_REF: ${{ github.event.pull_request.head.ref }} + PR_BODY: ${{ github.event.pull_request.body }} + ISSUE_NUMBER: ${{ github.event.issue.number }} + REPO: ${{ github.repository }} + run: | + TRUSTED=false + + if [ "$EVENT_NAME" = "issue_comment" ] && [ -n "$ISSUE_NUMBER" ]; then + PR_JSON=$(gh api "repos/${REPO}/pulls/${ISSUE_NUMBER}" 2>/dev/null || true) + if [ -n "$PR_JSON" ]; then + PR_AUTHOR=$(printf '%s' "$PR_JSON" | jq -r '.user.login') + HEAD_REPO=$(printf '%s' "$PR_JSON" | jq -r '.head.repo.full_name') + HEAD_REF=$(printf '%s' "$PR_JSON" | jq -r '.head.ref') + PR_BODY=$(printf '%s' "$PR_JSON" | jq -r '.body // ""') + fi + fi + + printf '%s' "$PR_BODY" > /tmp/pr-body-raw.txt + # Commit authors can be spoofed; trust only PR metadata GitHub controls. + if [ "$PR_AUTHOR" = "github-actions[bot]" ] && \ + [ "$HEAD_REPO" = "$REPO" ] && \ + [[ "$HEAD_REF" == agentic-ci/* ]] && \ + grep -Eq '' /tmp/pr-body-raw.txt; then + TRUSTED=true + fi + + echo "trusted=${TRUSTED}" >> "$GITHUB_OUTPUT" + - name: "DCO Assistant" - if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the DCO document and I hereby sign the DCO.') || github.event_name == 'pull_request_target' + if: >- + steps.trusted-agentic-ci.outputs.trusted != 'true' + && ((github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the DCO document and I hereby sign the DCO.') || github.event_name == 'pull_request_target') uses: contributor-assistant/github-action@ca4a40a7d1004f18d9960b404b97e5f30a505a08 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/pr-linked-issue.yml b/.github/workflows/pr-linked-issue.yml index 4842a0464..5cf7833df 100644 --- a/.github/workflows/pr-linked-issue.yml +++ b/.github/workflows/pr-linked-issue.yml @@ -36,9 +36,23 @@ jobs: env: GH_TOKEN: ${{ github.token }} PR_AUTHOR: ${{ github.event.pull_request.user.login }} + HEAD_REPO: ${{ github.event.pull_request.head.repo.full_name }} + HEAD_REF: ${{ github.event.pull_request.head.ref }} + PR_BODY: ${{ github.event.pull_request.body }} + REPO: ${{ github.repository }} run: | USER="$PR_AUTHOR" + printf '%s' "$PR_BODY" > /tmp/pr-body-raw.txt + # Commit authors can be spoofed; trust only PR metadata GitHub controls. + if [ "$USER" = "github-actions[bot]" ] && \ + [ "$HEAD_REPO" = "$REPO" ] && \ + [[ "$HEAD_REF" == agentic-ci/* ]] && \ + grep -Eq '' /tmp/pr-body-raw.txt; then + echo "is_collaborator=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + # Bots that are always allowed (match DCO allowlist pattern). if [ "$USER" = "dependabot[bot]" ]; then echo "is_collaborator=true" >> "$GITHUB_OUTPUT"