From 90b67b8c81bbee9ab5da0192217d95b749da66e1 Mon Sep 17 00:00:00 2001 From: Brian McMahon Date: Sat, 11 Apr 2026 17:22:17 -0700 Subject: [PATCH] Surface data email send failures as ERROR instead of silent drop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _finalize's email send block had two bugs that combined into a silent-fail: 1. `send_step_email` is documented as \"Never raises\" and returns True/False. The caller wrapped it in `try: ... except Exception`, which is dead code — no exception can ever propagate out of the function — and then discarded the False return value. 2. When both Gmail SMTP and SES fallback failed inside `send_step_email`, the function logged at WARNING and returned False. The caller never checked the return, so the run continued as if the email had been sent. Net effect: a successful data collection with a broken emailer would record in S3 correctly but emit no notification, and monitoring had no way to tell the difference between \"ran and emailed\" and \"ran and silently dropped the notification\". ## Fix - Remove the dead try/except (send_step_email never raises) - Check the return value - On False, log ERROR with enough context to diagnose (which env vars/config to check) Not raising on email failure because the data is still valid in S3 and downstream Step Function steps can still consume it — email is a monitoring layer, not a data dependency. If CloudWatch alarms are wired to ERROR-level log events on this Lambda, the missed email will still surface. Audit of the rest of weekly_collector.py for other silent-fail patterns: - Lines 130, 363, 369: S3→Wikipedia fallback for constituents — intentional, downstream `if not tickers: return failed` catches the case where both sources fail - Lines 443, 483: duration formatting — cosmetic silent-fail, acceptable - run_daily status: ok/failed only, no partial; main() hard-fails on non-ok — already correct Tests: 61 passing. Co-Authored-By: Claude Opus 4.6 (1M context) --- weekly_collector.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/weekly_collector.py b/weekly_collector.py index 148e2aa..5c66f6c 100644 --- a/weekly_collector.py +++ b/weekly_collector.py @@ -492,14 +492,29 @@ def _finalize( duration, ) - # Send completion email (non-blocking) + # Send completion email. + # send_step_email never raises (see emailer.py docstring) — it returns + # True/False. The old try/except was dead code, AND the False return + # was being silently dropped. If Gmail SMTP AND SES both fail, the + # caller needs to know so monitoring isn't blind to a successful run + # that silently had no notification. if not dry_run and only is None: - try: - from emailer import send_step_email - step_name = f"Data Phase {phase}" if phase else "Data Collection" - send_step_email(step_name, results, run_date) - except Exception as exc: - logger.warning("Step email failed (non-fatal): %s", exc) + from emailer import send_step_email + step_name = f"Data Phase {phase}" if phase else "Data Collection" + sent = send_step_email(step_name, results, run_date) + if not sent: + # Log at ERROR so CloudWatch alarms (if wired to ERROR-level) + # surface the missed email. Not raising because the data + # collection itself succeeded — only monitoring is affected. + # Downstream Step Function steps can still consume the S3 output. + logger.error( + "Step email '%s' failed to send — both Gmail SMTP and SES " + "fallback returned failure. Monitoring will be blind to " + "this run's result summary. Check EMAIL_SENDER, " + "EMAIL_RECIPIENTS, GMAIL_APP_PASSWORD env vars and SES " + "identity verification.", + step_name, + ) def _write_manifest(bucket: str, s3_prefix: str, run_date: str, results: dict) -> None: