From 618cea239406944bcd0968a766ba8e1274d37829 Mon Sep 17 00:00:00 2001 From: Hector Flores Date: Mon, 22 Jun 2026 14:13:25 -0500 Subject: [PATCH] =?UTF-8?q?feat:=20add=202=20new=20error=20entries=20(trig?= =?UTF-8?q?gers,=20runner-environment)=20=E2=80=94=202026-06-22?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...o-update-disabled-30-day-rolling-floor.yml | 141 ++++++++++++++++++ ...-event-ruleset-silently-blocks-trigger.yml | 123 +++++++++++++++ 2 files changed, 264 insertions(+) create mode 100644 errors/runner-environment/self-hosted-runner-stops-receiving-jobs-auto-update-disabled-30-day-rolling-floor.yml create mode 100644 errors/triggers/workflow-execution-protections-actor-event-ruleset-silently-blocks-trigger.yml diff --git a/errors/runner-environment/self-hosted-runner-stops-receiving-jobs-auto-update-disabled-30-day-rolling-floor.yml b/errors/runner-environment/self-hosted-runner-stops-receiving-jobs-auto-update-disabled-30-day-rolling-floor.yml new file mode 100644 index 0000000..bd0b664 --- /dev/null +++ b/errors/runner-environment/self-hosted-runner-stops-receiving-jobs-auto-update-disabled-30-day-rolling-floor.yml @@ -0,0 +1,141 @@ +id: re-515 +title: 'Self-Hosted Runner Silently Stops Receiving Workflow Jobs — Auto-Update Disabled, Runner Falls Behind 30-Day Version Floor' +category: runner-environment +severity: silent-failure +tags: + - self-hosted + - runner + - auto-update + - version-enforcement + - 30-day-window + - disableupdate + - arc + - silent-failure +patterns: + - regex: 'Runner version.*is deprecated and cannot receive messages' + flags: 'i' + - regex: 'Your runner.*outdated.*stop.*receiving workflow jobs' + flags: 'i' + - regex: 'runner.*version.*too old.*no longer.*receive.*jobs|no longer.*receive.*jobs.*runner.*version' + flags: 'i' + - regex: 'job.*queued.*no runners.*matching.*label|waiting for runner.*never assigned' + flags: 'i' +error_messages: + - 'Runner version 2.329.0 is deprecated and cannot receive messages from the GitHub Actions service' + - 'Your runner is outdated and will stop receiving workflow jobs. Please upgrade to the latest runner version.' + - 'Job queued but no available runner with the label ''self-hosted'' picked it up within the timeout' + - 'This job was skipped because no runner is available' +root_cause: | + GitHub Actions requires that already-registered self-hosted runners stay current within + a rolling 30-day update window. This is a separate requirement from the one-time minimum + registration version (v2.329.0, documented in re-284). A runner that successfully + registered at v2.329.0 will begin silently losing jobs if it is never updated again. + + Enforcement mechanism: + - Each time a new runner release ships, a 30-day clock starts for each runner. + - If the runner has not installed the update within 30 days, GitHub stops dispatching + workflow jobs to that runner. + - When a critical security update is published, GitHub immediately pauses job queuing + until the update is applied (regardless of the 30-day window). + + This failure mode is "silent" because: + 1. The workflow run IS created — it shows "Queued" in the Actions tab. + 2. The run may stay in Queued indefinitely (no automatic failure). + 3. No error appears in the workflow YAML or the job log. + 4. Maintainers typically blame concurrency, branch protection, or network issues before + checking runner version currency. + + The annotation "Your runner is outdated and will stop receiving workflow jobs" appears + in the "Set up job" log section BEFORE the runner stops receiving jobs — it is a warning, + not the final failure message. + + Who is most affected: + - Self-hosted runners with --disableupdate flag in runner startup scripts + - ARC (Actions Runner Controller) deployments with disableUpdate: true in RunnerDeployment + - VM/container images that bake in a specific runner binary version and are rebuilt + infrequently + - Air-gapped or isolated runners that cannot reach the GitHub runner update service + + Enforcement brownout schedule (Config+Runtime phases): + GitHub Enterprise Cloud with Data Residency: + July 13, July 15, July 17 (first Config+Runtime days) + July 20, July 22, July 24 (full Config+Runtime week) + Full enforcement: July 31, 2026 + GitHub Enterprise Cloud: + September 9, September 11 (first Config+Runtime days) + September 14, September 16, September 18 (full Config+Runtime week) + Full enforcement: September 25, 2026 +fix: | + 1. Enable auto-update on all self-hosted runners: + - Remove --disableupdate from runner startup scripts + - Ensure the runner process can reach https://api.github.com and + https://objects.githubusercontent.com (for binary downloads) + + 2. For ARC (Actions Runner Controller): + - Remove or set disableUpdate: false in RunnerDeployment/AutoscalingRunnerSet spec + - Or update the runner container image to a version shipping the latest runner release + + 3. For pinned VM/container images: + - Add a dynamic runner download step that fetches the latest release from + https://github.com/actions/runner/releases/latest at provisioning time (see fix_code) + - Set up Renovate/Dependabot to keep runner image tags within 30 days of the latest + + 4. For air-gapped runners: + - Mirror runner releases to an internal artifact store + - Implement a scheduled job that checks for new runner releases and updates the store + - Configure the runner with --updatechannel pointing to your internal mirror + + 5. Audit current runner versions BEFORE brownout windows (July 13 for Data Residency, + September 9 for GH Enterprise Cloud): + - Use the REST API to check versions (see fix_code below) + - Check the "Set up job" log for the "Runner Image" and version lines +fix_code: + - language: yaml + label: 'Audit runner versions via GitHub API before brownout windows' + code: | + # Check self-hosted runner versions for a repository + gh api repos/{owner}/{repo}/actions/runners \ + --jq '.runners[] | {name: .name, version: .version, status: .status, busy: .busy}' + + # Org-level audit — flag anything that has not updated in 30+ days + gh api orgs/{org}/actions/runners \ + --jq '.runners[] | select(.version != null) | {name: .name, version: .version}' + - language: yaml + label: 'Fix: always download latest runner at provisioning time (never pin a version URL)' + code: | + #!/bin/bash + # Fetch the latest runner version dynamically — do NOT hardcode a version URL + RUNNER_VERSION=$(curl -s https://api.github.com/repos/actions/runner/releases/latest \ + | jq -r '.tag_name' | sed 's/^v//') + curl -L -o actions-runner-linux-x64.tar.gz \ + "https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz" + tar xzf ./actions-runner-linux-x64.tar.gz + # Configure without --disableupdate so the runner can auto-update + ./config.sh --url https://github.com/{org} --token ${RUNNER_TOKEN} + - language: yaml + label: 'Fix for ARC: remove disableUpdate from RunnerDeployment spec' + code: | + apiVersion: actions.summerwind.dev/v1alpha1 + kind: RunnerDeployment + metadata: + name: my-runners + spec: + template: + spec: + image: ghcr.io/actions/actions-runner:latest # always use a recent tag + # REMOVE this line if present — it prevents auto-update: + # disableUpdate: true + # With auto-update enabled (default), the runner pulls updates automatically +prevention: + - 'Never start a self-hosted runner with the --disableupdate flag — this is the most common cause of runners silently losing jobs after 30 days' + - 'For ARC deployments, do not set disableUpdate: true; let the runner auto-update within its running container' + - 'Set up a weekly automated check that queries the runners REST API and alerts if any runner version is more than 3 weeks behind the latest release' + - 'Audit runner versions before known brownout dates — the Config+Runtime brownouts start July 13, 2026 (Data Residency) and September 9, 2026 (GH Enterprise Cloud)' + - 'When the "Set up job" log shows "Your runner is outdated" annotation, treat it as an urgent alert, not a warning — update before the 30-day window closes' +docs: + - url: 'https://github.blog/changelog/2026-06-12-github-actions-minimum-version-enforcement-timeline-for-self-hosted-runners/' + label: 'GitHub Changelog June 12, 2026: Minimum version enforcement timeline with brownout calendar' + - url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#self-hosted-runner-version-requirements' + label: 'GitHub Docs: Self-hosted runner version requirements' + - url: 'https://github.com/actions/runner/releases' + label: 'actions/runner releases — check latest version' diff --git a/errors/triggers/workflow-execution-protections-actor-event-ruleset-silently-blocks-trigger.yml b/errors/triggers/workflow-execution-protections-actor-event-ruleset-silently-blocks-trigger.yml new file mode 100644 index 0000000..7854d5d --- /dev/null +++ b/errors/triggers/workflow-execution-protections-actor-event-ruleset-silently-blocks-trigger.yml @@ -0,0 +1,123 @@ +id: tr-123 +title: 'Workflow Execution Protection Actor/Event Ruleset Silently Blocks Workflow from Triggering' +category: triggers +severity: silent-failure +tags: + - workflow-execution-protections + - actor-rules + - event-rules + - rulesets + - enterprise + - organization + - silent-block + - policies +patterns: + - regex: 'Workflow run was blocked by workflow execution protection policy' + flags: 'i' + - regex: 'Actor is not in the actor allow list|actor.*not.*allowed.*trigger' + flags: 'i' + - regex: 'Event.*blocked by workflow execution protection|event.*not permitted.*policy' + flags: 'i' + - regex: 'workflow execution protection.*blocked|execution protection.*prevent' + flags: 'i' +error_messages: + - 'Workflow run was blocked by workflow execution protection policy' + - 'Actor is not in the actor allow list for this workflow' + - 'Event ''push'' is blocked by the workflow execution protection event rule' + - 'This workflow was blocked by your organization''s workflow execution protections policy' +root_cause: | + GitHub announced Workflow Execution Protections in public preview on June 18, 2026 + (GitHub Enterprise, organizations, and repositories). When an enterprise admin or + organization owner configures actor rules or event rules under + Settings → Actions → Policies (a new section separate from General), workflows that + don't satisfy the allow-list rules are silently blocked. + + Two rule types exist: + - Actor rules: control who can trigger workflows. Targets include individual users, + repository roles (Read, Maintain, Admin), GitHub Apps, Copilot, and Dependabot. + - Event rules: control which events are permitted (push, pull_request, + pull_request_target, workflow_dispatch, etc.). + + The failure mode is completely silent from the contributor's perspective: + - No workflow run is created in the repository's Actions tab. + - No email notification is sent to the actor. + - No PR check status is posted (PR checks never appear). + - The workflow YAML is never evaluated; the block happens before execution. + + Evidence is only visible in the organization audit log (event: + "workflow_run.blocked_by_policy") or via the organization's Policies settings page. + + Common misconfiguration scenarios: + 1. Admin restricts workflow_dispatch to Maintain/Admin roles — Read-access contributors + and GitHub Apps (including Dependabot) can no longer trigger manual workflows. + 2. Admin blocks pull_request_target organization-wide — all fork PR workflows stop. + 3. Dependabot or a GitHub App is excluded from the actor allow-list — automated + security/update workflows silently stop running. + 4. After a team member's role is downgraded from Maintain to Write, their workflow + triggers suddenly stop working with no error. + + Evaluate mode: Admins can enable "evaluate mode" to shadow-run rules without enforcing + them, which makes blocked runs visible before the policy is activated. +fix: | + 1. Identify whether a protection policy is active: + - Navigate to Organization Settings → Actions → Policies (new section) + - Or Repository Settings → Actions → Policies + - Check for any active rulesets with actor or event rules + + 2. Find the blocking rule: + - Review the organization audit log for events matching the blocked workflow trigger + - Look for entries containing "workflow_run.blocked_by_policy" + - Query via API: GET /orgs/{org}/audit-log?phrase=workflow_run.blocked_by_policy + + 3. Fix the policy: + - Add the blocked actor (user/app/Dependabot/Copilot) to the actor allow-list + - Or ensure the triggered event is permitted in the event rules + - Or switch the ruleset to "evaluate" mode to observe without enforcing + + 4. If you need the policy active, adjust the actor allow-list to include required roles, + specific GitHub Apps, or Dependabot/Copilot. +fix_code: + - language: yaml + label: 'Audit org audit log for blocked workflow runs via GitHub CLI' + code: | + # Query audit log for blocked workflow runs + gh api 'orgs/{org}/audit-log?phrase=workflow_run.blocked_by_policy&per_page=30' \ + --jq '.[] | {actor: .actor, repo: .repo, event: .action, created_at: .created_at}' + + # Or look for the specific event type + gh api 'orgs/{org}/audit-log?phrase=action%3Aworkflow_run.blocked_by_policy' + - language: yaml + label: 'Example: allow Dependabot and GitHub Apps in actor ruleset' + code: | + # In your org/repo Settings → Actions → Policies, the ruleset might look like: + # (conceptual — configured via GitHub UI or Rulesets API) + # + # Actor ruleset — allow list: + # ✅ Dependabot + # ✅ GitHub Apps (your specific apps) + # ✅ Copilot + # ✅ Repository role: Write (and above) + # + # If you restrict to Admin-only, Dependabot PRs and bot workflows will be blocked. + # Always include the GitHub Apps and service accounts that run your automation. + - language: yaml + label: 'Use evaluate mode to test policy without breaking workflows' + code: | + # In Settings → Actions → Policies: + # 1. Create your ruleset with actor/event rules + # 2. Set mode to "Evaluate" (not "Active") + # 3. Run workflows and check which would have been blocked + # 4. Adjust the allow-list before switching to "Active" enforcement +prevention: + - 'Before activating a workflow execution protection policy, switch to "evaluate" mode and run all workflow types to identify what would be blocked' + - 'Always include Dependabot, Copilot, and critical GitHub Apps in your actor allow-list when configuring actor rules' + - 'Document which events each workflow requires — restricting pull_request_target organizationally will break all fork PR workflows' + - 'When changing team member roles (e.g., Maintain → Write), check whether any workflow execution protection policies restrict triggers by role' + - 'Monitor the organization audit log after enabling policies; "workflow_run.blocked_by_policy" events indicate triggers being silently dropped' +docs: + - url: 'https://github.blog/changelog/2026-06-18-control-who-and-what-triggers-github-actions-workflows/' + label: 'GitHub Changelog June 18, 2026: Control who and what triggers GitHub Actions workflows' + - url: 'https://docs.github.com/en/organizations/managing-organization-settings/actions-policies/workflow-execution-protections' + label: 'GitHub Docs: Workflow execution protections' + - url: 'https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-rulesets/about-rulesets' + label: 'GitHub Docs: About rulesets (the framework powering execution protections)'