From de7a41eef798f22f2487883936e1356863607992 Mon Sep 17 00:00:00 2001
From: Dave North <6616703+dnorth98@users.noreply.github.com>
Date: Wed, 4 Feb 2026 16:03:13 -0500
Subject: [PATCH 1/3] Add verbose rate limit handling and improve API error
 handling

When the GitHub API rate limits requests, the agithub library silently
sleeps with no user feedback. This adds a monkey-patch to print a
message to stderr showing how long the script will sleep and when it
will resume.

Also adds rate limit detection for API responses, output file support,
and better error handling for unexpected API responses.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 get-deployment-metrics.py | 149 +++++++++++++++++++++++++++++++++-----
 1 file changed, 129 insertions(+), 20 deletions(-)

diff --git a/get-deployment-metrics.py b/get-deployment-metrics.py
index bc1d62d..9dfdeab 100755
--- a/get-deployment-metrics.py
+++ b/get-deployment-metrics.py
@@ -1,14 +1,35 @@
 #!/usr/bin/env python3
 
 import os
+import sys
 import logging
 import logging.handlers
 import argparse
 import fnmatch
+import time
 from agithub.GitHub import GitHub
 from dotenv import load_dotenv
 
 
+def make_verbose_rate_limit_handler(client):
+    """Patch a GitHub client to print rate limit messages."""
+    original_sleep = client.sleep_until_more_ratelimit
+
+    def verbose_sleep():
+        seconds = client.ratelimit_seconds_remaining()
+        reset_time = time.strftime(
+            "%H:%M:%S", time.localtime(time.time() + seconds)
+        )
+        print(
+            f"Rate limited by GitHub API. Sleeping for {seconds} seconds until {reset_time}...",
+            file=sys.stderr,
+            flush=True,
+        )
+        time.sleep(seconds)
+
+    client.sleep_until_more_ratelimit = verbose_sleep
+
+
 def get_mins_secs_str(duration_in_ms):
     duration_secs, duration_in_ms = divmod(duration_in_ms, 1000)
     duration_mins, duration_secs = divmod(duration_secs, 60)
@@ -25,6 +46,25 @@ def format_number(float_val):
     return str(return_val)
 
 
+def is_rate_limited(status, response):
+    """Check if a GitHub API response indicates rate limiting."""
+    if status == 403:
+        message = response.get("message", "") if isinstance(response, dict) else ""
+        if "rate limit" in message.lower():
+            return True
+    return False
+
+
+# Global list to collect output for file writing
+output_lines = []
+
+
+def output(message=""):
+    """Print to stdout and collect for file output."""
+    print(message)
+    output_lines.append(message)
+
+
 def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
     # Pagination does not work on this call
     # https://github.com/mozilla/agithub/issues/76
@@ -42,6 +82,34 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
             .runs.get(created=date_filter, page=page_to_get)
         )
 
+        # Check for rate limiting
+        if is_rate_limited(gh_status, workflow_runs):
+            print(
+                "WARNING: GitHub API rate limit exceeded while fetching workflow runs for {}/{}. Results may be incomplete.".format(
+                    org_name, repo_name
+                )
+            )
+            logger.warning("Rate limit exceeded - stopping pagination")
+            break
+
+        # Check for API errors (non-2xx status codes)
+        if gh_status < 200 or gh_status >= 300:
+            logger.warning(
+                "GitHub API returned status {} for workflow {} in {}/{}: {}".format(
+                    gh_status, workflow_id, org_name, repo_name, workflow_runs
+                )
+            )
+            break
+
+        # Handle unexpected response format
+        if "workflow_runs" not in workflow_runs:
+            logger.warning(
+                "Unexpected API response for workflow {} in {}/{}: {}".format(
+                    workflow_id, org_name, repo_name, workflow_runs
+                )
+            )
+            break
+
         runs = runs + workflow_runs["workflow_runs"]
 
         total_runs = workflow_runs["total_count"]
@@ -102,6 +170,12 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
     parser.add_argument(
         "--verbose", help="Turn on DEBUG logging", action="store_true", required=False
     )
+    parser.add_argument(
+        "--output-file",
+        help="Write results to the specified file",
+        dest="output_file",
+        required=False,
+    )
 
     args = parser.parse_args()
 
@@ -129,13 +203,20 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
         logger.error("Missing GITHUB_PAT environment variable - unable to continue")
         exit(1)
 
-    # Initialize connection to Github API
+    # Initialize connection to Github API with verbose rate limit handling
     github_handle = GitHub(token=github_pat, paginate=True)
+    make_verbose_rate_limit_handler(github_handle.client)
 
     # Get all the repos in the org
     # /orgs/{org}/repos
     gh_status, repo_data = github_handle.orgs[args.org_name].repos.get()
 
+    if is_rate_limited(gh_status, repo_data):
+        print(
+            "ERROR: GitHub API rate limit exceeded while fetching repos. Please wait and try again."
+        )
+        exit(1)
+
     for repo in repo_data:
         repo_name = repo["name"]
         repo_printed = False
@@ -152,6 +233,22 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
             repo_name
         ].actions.workflows.get()
 
+        if is_rate_limited(gh_status, workflow_data):
+            print(
+                "WARNING: GitHub API rate limit exceeded while fetching workflows for {}. Results may be incomplete.".format(
+                    repo_name
+                )
+            )
+            continue
+
+        if "workflows" not in workflow_data:
+            logger.warning(
+                "Unexpected API response for workflows in {}: {}".format(
+                    repo_name, workflow_data
+                )
+            )
+            continue
+
         for workflow in workflow_data["workflows"]:
             # Possible states: success, failure, cancelled, skipped, timed_out, action_required, neutral
 
@@ -201,7 +298,7 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
                 # Were there any runs for this workflow in this time period?
                 if total_workflow_runs > 0:
                     if args.detailed and not repo_printed:
-                        print("{}".format(repo_name))
+                        output("{}".format(repo_name))
                         repo_printed = True
 
                     # Initialize our summary stats dict
@@ -262,6 +359,12 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
                             .timing.get()
                         )
 
+                        if is_rate_limited(gh_status, workflow_durations):
+                            print(
+                                "WARNING: GitHub API rate limit exceeded while fetching timing data. Results may be incomplete."
+                            )
+                            break
+
                         # Some jobs may not have run at all
                         if "run_duration_ms" in workflow_durations:
                             job_duration = workflow_durations["run_duration_ms"]
@@ -306,23 +409,23 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
                     summary_stats[repo_name][workflow_summary_name] = stat
 
                     if args.detailed:
-                        print("\t{}:".format(workflow_name))
-                        print("\t\tRuns: {}".format(total_workflow_runs))
-                        print("\t\tSuccessful: {}".format(workflow_success_count))
-                        print("\t\tFailed: {}".format(workflow_fail_count))
-                        print("\t\tSuccess Rate: {}%".format(workflow_success_rate))
-                        print(
+                        output("\t{}:".format(workflow_name))
+                        output("\t\tRuns: {}".format(total_workflow_runs))
+                        output("\t\tSuccessful: {}".format(workflow_success_count))
+                        output("\t\tFailed: {}".format(workflow_fail_count))
+                        output("\t\tSuccess Rate: {}%".format(workflow_success_rate))
+                        output(
                             "\t\tAvg Duration:: {:.0f} ms ({})".format(
                                 workflow_avg_duration,
                                 get_mins_secs_str(workflow_avg_duration),
                             )
                         )
-                        print("\t\tDeployers:")
+                        output("\t\tDeployers:")
                         sorted_deployers = sorted(
                             deployers.items(), key=lambda item: item[1], reverse=True
                         )
                         for deploy_user, deploy_count in sorted_deployers:
-                            print("\t\t\t{}:{}".format(deploy_user, deploy_count))
+                            output("\t\t\t{}:{}".format(deploy_user, deploy_count))
 
     # now we can process the stats we have gathered and get the overall averages
     workflow_count = 0
@@ -362,9 +465,9 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
                     else:
                         overall_deployers[deploy_user] = deploy_count
 
-    print("\n")
-    print("-------- SUMMARY ---------")
-    print(
+    output("\n")
+    output("-------- SUMMARY ---------")
+    output(
         "For the period {} with workflows matching {}".format(
             args.date_filter, args.workflow_pattern
         )
@@ -380,20 +483,26 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
         )
         overall_average_duration_ms = overall_duration_ms_sum / workflow_count
 
-        print("Total Runs: {}".format(overall_run_count))
-        print("Avg Success Rate: {}%".format(overall_average_success_rate))
-        print("Avg Failure Rate: {}%".format(overall_average_failure_rate))
-        print(
+        output("Total Runs: {}".format(overall_run_count))
+        output("Avg Success Rate: {}%".format(overall_average_success_rate))
+        output("Avg Failure Rate: {}%".format(overall_average_failure_rate))
+        output(
             "Avg Duration:: {:.0f} ms ({})".format(
                 overall_average_duration_ms,
                 get_mins_secs_str(overall_average_duration_ms),
             )
         )
-        print("Non 'Branch Deploy' Deployers:")
+        output("Non 'Branch Deploy' Deployers:")
         sorted_overall_deployers = sorted(
             overall_deployers.items(), key=lambda item: item[1], reverse=True
         )
         for deploy_user, deploy_count in sorted_overall_deployers:
-            print("\t{}:{}".format(deploy_user, deploy_count))
+            output("\t{}:{}".format(deploy_user, deploy_count))
     else:
-        print("No workflows found matching the filter and/or date critiera")
+        output("No workflows found matching the filter and/or date critiera")
+
+    # Write results to file if requested
+    if args.output_file:
+        with open(args.output_file, "w") as f:
+            f.write("\n".join(output_lines))
+        print("\nResults written to {}".format(args.output_file))

From 5d5e3371b395111526ff6f7cd345e431508a916f Mon Sep 17 00:00:00 2001
From: Dave North <6616703+dnorth98@users.noreply.github.com>
Date: Wed, 4 Feb 2026 16:06:07 -0500
Subject: [PATCH 2/3] Fix black formatting

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 get-deployment-metrics.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/get-deployment-metrics.py b/get-deployment-metrics.py
index 9dfdeab..7b144de 100755
--- a/get-deployment-metrics.py
+++ b/get-deployment-metrics.py
@@ -17,9 +17,7 @@ def make_verbose_rate_limit_handler(client):
 
     def verbose_sleep():
         seconds = client.ratelimit_seconds_remaining()
-        reset_time = time.strftime(
-            "%H:%M:%S", time.localtime(time.time() + seconds)
-        )
+        reset_time = time.strftime("%H:%M:%S", time.localtime(time.time() + seconds))
         print(
             f"Rate limited by GitHub API. Sleeping for {seconds} seconds until {reset_time}...",
             file=sys.stderr,

From cc4d54542cd9f012caaa58158ee81ab505e36c15 Mon Sep 17 00:00:00 2001
From: Dave North <6616703+dnorth98@users.noreply.github.com>
Date: Wed, 4 Feb 2026 16:10:50 -0500
Subject: [PATCH 3/3] Address code review feedback

- Remove unused original_sleep variable
- Add 429 status code check for secondary rate limits
- Fix typo: critiera -> criteria

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 get-deployment-metrics.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/get-deployment-metrics.py b/get-deployment-metrics.py
index 7b144de..8e794c5 100755
--- a/get-deployment-metrics.py
+++ b/get-deployment-metrics.py
@@ -13,7 +13,6 @@
 
 def make_verbose_rate_limit_handler(client):
     """Patch a GitHub client to print rate limit messages."""
-    original_sleep = client.sleep_until_more_ratelimit
 
     def verbose_sleep():
         seconds = client.ratelimit_seconds_remaining()
@@ -46,6 +45,8 @@ def format_number(float_val):
 
 def is_rate_limited(status, response):
     """Check if a GitHub API response indicates rate limiting."""
+    if status == 429:
+        return True
     if status == 403:
         message = response.get("message", "") if isinstance(response, dict) else ""
         if "rate limit" in message.lower():
@@ -497,7 +498,7 @@ def get_workflow_runs(org_name, repo_name, workflow_id, date_filter):
         for deploy_user, deploy_count in sorted_overall_deployers:
             output("\t{}:{}".format(deploy_user, deploy_count))
     else:
-        output("No workflows found matching the filter and/or date critiera")
+        output("No workflows found matching the filter and/or date criteria")
 
     # Write results to file if requested
     if args.output_file: