From 87b9c464784fc9db037cf317ba4888fad5af6789 Mon Sep 17 00:00:00 2001 From: Ben Corlett Date: Fri, 13 Jun 2025 13:16:46 +0100 Subject: [PATCH] New OTEL spike --- Makefile | 4 + app/__init__.py | 5 + app/celery/nightly_tasks.py | 6 + app/celery/process_ses_receipts_tasks.py | 16 ++ .../process_sms_client_response_tasks.py | 23 +++ app/celery/scheduled_tasks.py | 16 ++ app/clients/email/aws_ses.py | 28 ++++ app/clients/sms/__init__.py | 19 +-- app/clients/sms/firetext.py | 9 ++ app/clients/sms/mmg.py | 9 ++ app/commands.py | 4 + app/config.py | 10 ++ app/otel/decorators.py | 70 ++++++++ app/otel/metrics.py | 114 +++++++++++++ app/otel/traces.py | 100 ++++++++++++ requirements.in | 12 ++ requirements.txt | 127 +++++++++++++++ requirements_for_test.txt | 152 ++++++++++++++++++ 18 files changed, 712 insertions(+), 12 deletions(-) create mode 100644 app/otel/decorators.py create mode 100644 app/otel/metrics.py create mode 100644 app/otel/traces.py diff --git a/Makefile b/Makefile index d52e63d7a4..17f354d86f 100644 --- a/Makefile +++ b/Makefile @@ -91,6 +91,10 @@ test: ## Run tests ruff format --check . pytest -n auto --maxfail=10 -v +.PHONY: test2 +test2: + pytest -n auto --maxfail=10 -v + .PHONY: watch-tests watch-tests: ## Watch tests and run on change ptw --runner "pytest --testmon -n auto" diff --git a/app/__init__.py b/app/__init__.py index 9175c2b737..ab01c2c128 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -42,6 +42,8 @@ from app.clients.letter.dvla import DVLAClient from app.clients.sms.firetext import FiretextClient from app.clients.sms.mmg import MMGClient +from app.otel.metrics import otel_metrics +from app.otel.traces import otel_traces db = SQLAlchemy() migrate = Migrate() @@ -185,6 +187,9 @@ def create_app(application): cbc_proxy_client.init_app(application) + otel_metrics.init_app(application) + otel_traces.init_app(application) + register_blueprint(application) register_v2_blueprints(application) diff --git a/app/celery/nightly_tasks.py b/app/celery/nightly_tasks.py index 951992216d..460ede4e51 100644 --- a/app/celery/nightly_tasks.py +++ b/app/celery/nightly_tasks.py @@ -52,6 +52,7 @@ from app.notifications.notifications_ses_callback import ( check_and_queue_callback_task, ) +from app.otel.metrics import otel_metrics from app.utils import get_london_midnight_in_utc @@ -250,6 +251,11 @@ def timeout_notifications(): for notification in notifications: statsd_client.incr(f"timeout-sending.{notification.sent_by}") + otel_metrics.provider_timeout_sending_counter.add( + amount=1, + attributes={"send_by": notification.sent_by}, + ) + check_and_queue_callback_task(notification) current_app.logger.info( diff --git a/app/celery/process_ses_receipts_tasks.py b/app/celery/process_ses_receipts_tasks.py index 42aa439d39..7547336b47 100644 --- a/app/celery/process_ses_receipts_tasks.py +++ b/app/celery/process_ses_receipts_tasks.py @@ -17,6 +17,7 @@ determine_notification_bounce_type, handle_complaint, ) +from app.otel.metrics import otel_metrics @notify_celery.task( @@ -79,10 +80,25 @@ def process_ses_results(self, response): statsd_client.incr(f"callback.ses.{notification_status}") + otel_metrics.provider_status_counter.add( + amount=1, + attributes={ + "provider": "ses", + "status": notification_status, + }, + ) + if notification.sent_at: statsd_client.timing_with_dates( f"callback.ses.{notification_status}.elapsed-time", datetime.utcnow(), notification.sent_at ) + otel_metrics.provider_request_time_histogram.record( + amount=(datetime.utcnow() - notification.sent_at).total_seconds(), + attributes={ + "provider": "ses", + "status": notification_status, + }, + ) check_and_queue_callback_task(notification) diff --git a/app/celery/process_sms_client_response_tasks.py b/app/celery/process_sms_client_response_tasks.py index c184a43dee..b08f325abc 100644 --- a/app/celery/process_sms_client_response_tasks.py +++ b/app/celery/process_sms_client_response_tasks.py @@ -15,6 +15,7 @@ from app.notifications.notifications_ses_callback import ( check_and_queue_callback_task, ) +from app.otel.metrics import otel_metrics sms_response_mapper = { "MMG": get_mmg_responses, @@ -74,12 +75,27 @@ def _process_for_status(notification_status, client_name, provider_reference, de statsd_client.incr(f"callback.{client_name.lower()}.{notification_status}") + otel_metrics.provider_callback_counter.add( + amount=1, + attributes={ + "provider_name": client_name.lower(), + "notification_status": notification_status, + }, + ) + if notification.sent_at: statsd_client.timing_with_dates( f"callback.{client_name.lower()}.{notification_status}.elapsed-time", datetime.utcnow(), notification.sent_at, ) + otel_metrics.provider_request_time_histogram.record( + amount=(datetime.utcnow() - notification.sent_at).total_seconds(), + attributes={ + "provider_name": client_name.lower(), + "notification_status": notification_status, + }, + ) if notification.billable_units == 0: service = notification.service @@ -98,3 +114,10 @@ def _process_for_status(notification_status, client_name, provider_reference, de check_and_queue_callback_task(notification) if notification.international: statsd_client.incr(f"international-sms.{notification_status}.{notification.phone_prefix}") + otel_metrics.international_sms_counter.add( + ammount=1, + attributes={ + "notification_status": notification_status, + "phone_prefix": notification.phone_prefix, + }, + ) diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index 6cf1edb3ea..ad9f5853e9 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -87,6 +87,7 @@ User, ) from app.notifications.process_notifications import persist_notification, send_notification_to_queue +from app.otel.metrics import otel_metrics from app.utils import get_london_midnight_in_utc @@ -216,6 +217,14 @@ def generate_sms_delivery_stats(): f"slow-delivery.{report.provider}.delivered-within-minutes.{delivery_interval}.ratio", report.slow_ratio ) + otel_metrics.slow_delivery_ratio_gauge.set( + amount=report.slow_ratio, + attributes={ + "provider": report.provider, + "delivery_interval": delivery_interval, + }, + ) + total_notifications = sum(report.total_notifications for report in providers_slow_delivery_reports) slow_notifications = sum(report.slow_notifications for report in providers_slow_delivery_reports) ratio_slow_notifications = slow_notifications / total_notifications @@ -224,6 +233,13 @@ def generate_sms_delivery_stats(): f"slow-delivery.sms.delivered-within-minutes.{delivery_interval}.ratio", ratio_slow_notifications ) + otel_metrics.slow_sms_delivery_ratio_gauge.set( + amount=ratio_slow_notifications, + attributes={ + "delivery_interval": delivery_interval, + }, + ) + # For the 5-minute delivery interval, let's check the percentage of all text messages sent that were slow. # TODO: delete this when we have a way to raise these alerts from eg grafana, prometheus, something else. if delivery_interval == 5 and current_app.should_check_slow_text_message_delivery: diff --git a/app/clients/email/aws_ses.py b/app/clients/email/aws_ses.py index 5ac6e6262f..9f5f2d6af0 100644 --- a/app/clients/email/aws_ses.py +++ b/app/clients/email/aws_ses.py @@ -10,6 +10,7 @@ EmailClientException, EmailClientNonRetryableException, ) +from app.otel.metrics import otel_metrics ses_response_map = { "Permanent": { @@ -102,6 +103,13 @@ def send_email( ) except botocore.exceptions.ClientError as e: self.statsd_client.incr("clients.ses.error") + otel_metrics.provider_status_counter.add( + amount=1, + attributes={ + "provider": "ses", + "status": "error", + }, + ) # https://docs.aws.amazon.com/ses/latest/APIReference-V2/API_SendEmail.html#API_SendEmail_Errors if e.response["Error"]["Code"] == "InvalidParameterValue": @@ -110,6 +118,13 @@ def send_email( raise AwsSesClientThrottlingSendRateException(str(e)) from e else: self.statsd_client.incr("clients.ses.error") + otel_metrics.provider_status_counter.add( + amount=1, + attributes={ + "provider": "ses", + "status": "error", + }, + ) raise AwsSesClientException(str(e) + e.response["Error"]["Code"]) from e except Exception as e: self.statsd_client.incr("clients.ses.error") @@ -124,7 +139,20 @@ def send_email( }, ) self.statsd_client.timing("clients.ses.request-time", elapsed_time) + otel_metrics.provider_request_time_histogram.record( + amount=elapsed_time, + attributes={ + "provider_name": "ses", + }, + ) self.statsd_client.incr("clients.ses.success") + otel_metrics.provider_status_counter.add( + amount=1, + attributes={ + "provider": "ses", + "status": "success", + }, + ) return response["MessageId"] diff --git a/app/clients/sms/__init__.py b/app/clients/sms/__init__.py index f06cf3ecad..9a2c76fede 100644 --- a/app/clients/sms/__init__.py +++ b/app/clients/sms/__init__.py @@ -45,24 +45,19 @@ def __init__(self, current_app, statsd_client): **adapter.poolmanager.connection_pool_kw, } - def record_outcome(self, success): - if success: - self.current_app.logger.info("Provider request for %s %s", self.name, "succeeded" if success else "failed") - self.statsd_client.incr(f"clients.{self.name}.success") - else: - self.statsd_client.incr(f"clients.{self.name}.error") - self.current_app.logger.warning( - "Provider request for %s %s", self.name, "succeeded" if success else "failed" - ) - def send_sms(self, to, content, reference, international, sender): start_time = monotonic() try: response = self.try_send_sms(to, content, reference, international, sender) - self.record_outcome(True) + self.current_app.logger.info("Provider request for %s succeeded", self.name) + self.statsd_client.incr(f"clients.{self.name}.success") except SmsClientResponseException as e: - self.record_outcome(False) + self.statsd_client.incr(f"clients.{self.name}.error") + self.current_app.logger.warning( + "Provider request for %s failed", + self.name, + ) raise e finally: elapsed_time = monotonic() - start_time diff --git a/app/clients/sms/firetext.py b/app/clients/sms/firetext.py index 0820acea5b..22966ec87b 100644 --- a/app/clients/sms/firetext.py +++ b/app/clients/sms/firetext.py @@ -4,6 +4,7 @@ import requests from app.clients.sms import SmsClient, SmsClientResponseException +from app.otel.decorators import otel logger = logging.getLogger(__name__) @@ -57,6 +58,14 @@ def __init__(self, *args, **kwargs): self.url = self.current_app.config.get("FIRETEXT_URL") self.receipt_url = self.current_app.config.get("FIRETEXT_RECEIPT_URL") + @otel( + "provider_status_counter", + "provider_request_time_histogram", + attributes={"provider": "firetext"}, + ) + def send_sms(self, *args, **kwargs): + return super().send_sms(*args, **kwargs) + def try_send_sms(self, to, content, reference, international, sender): data = { "apiKey": self.international_api_key if international else self.api_key, diff --git a/app/clients/sms/mmg.py b/app/clients/sms/mmg.py index 8099e9724d..ec8e87eb30 100644 --- a/app/clients/sms/mmg.py +++ b/app/clients/sms/mmg.py @@ -3,6 +3,7 @@ import requests from app.clients.sms import SmsClient, SmsClientResponseException +from app.otel.decorators import otel # For some extra context, see google drive: GOV.UK Notify -> SMS suppliers -> Detailed failure statuses mmg_response_map = { @@ -88,6 +89,14 @@ def __init__(self, *args, **kwargs): self.mmg_url = self.current_app.config.get("MMG_URL") self.receipt_url = self.current_app.config.get("MMG_RECEIPT_URL") + @otel( + "provider_status_counter", + "provider_request_time_histogram", + attributes={"provider": "mmg"}, + ) + def send_sms(self, *args, **kwargs): + return super().send_sms(*args, **kwargs) + def try_send_sms(self, to, content, reference, international, sender): data = {"reqType": "BULK", "MSISDN": to, "msg": content, "sender": sender, "cid": reference, "multi": True} diff --git a/app/commands.py b/app/commands.py index d68e0eb531..4a214b2e69 100644 --- a/app/commands.py +++ b/app/commands.py @@ -74,6 +74,7 @@ Template, User, ) +from app.otel.decorators import otel @click.group(name="command", help="Additional commands") @@ -397,6 +398,7 @@ def bulk_invite_user_to_service(file_name, service_id, user_id, auth_type, permi "-s", "--start_date", default=datetime(2017, 2, 1), help="start date inclusive", type=click_dt(format="%Y-%m-%d") ) @statsd(namespace="tasks") +@otel() def populate_notification_postage(start_date): current_app.logger.info("populating historical notification postage") @@ -442,6 +444,7 @@ def populate_notification_postage(start_date): @click.option("-s", "--start_date", required=True, help="start date inclusive", type=click_dt(format="%Y-%m-%d")) @click.option("-e", "--end_date", required=True, help="end date inclusive", type=click_dt(format="%Y-%m-%d")) @statsd(namespace="tasks") +@otel() def update_jobs_archived_flag(start_date, end_date): current_app.logger.info("Archiving jobs created between %s to %s", start_date, end_date) @@ -475,6 +478,7 @@ def update_jobs_archived_flag(start_date, end_date): @notify_command(name="update-emails-to-remove-gsi") @click.option("-s", "--service_id", required=True, help="service id. Update all user.email_address to remove .gsi") @statsd(namespace="tasks") +@otel() def update_emails_to_remove_gsi(service_id): users_to_update = """SELECT u.id user_id, u.name, email_address, s.id, s.name FROM users u diff --git a/app/config.py b/app/config.py index c9a35d09ec..f825574a65 100644 --- a/app/config.py +++ b/app/config.py @@ -100,6 +100,12 @@ class Config: CELERY_WORKER_LOG_LEVEL = os.getenv("CELERY_WORKER_LOG_LEVEL", "CRITICAL").upper() CELERY_BEAT_LOG_LEVEL = os.getenv("CELERY_BEAT_LOG_LEVEL", "INFO").upper() + OTEL_EXPORT_TYPE = os.getenv("OTEL_EXPORT_TYPE", "otlp") + OTEL_COLLECTOR_ENDPOINT = os.getenv("OTEL_COLLECTOR_ENDPOINT", "localhost:4317") + OTEL_INSTRUMENTATIONS = os.getenv( + "OTEL_INSTRUMENTATIONS", "wsgi,celery,flask,redis,sqlalchemy,requests,psycopg2,boto3sqs" + ) + # secrets that internal apps, such as the admin app or document download, must use to authenticate with the API ADMIN_CLIENT_ID = "notify-admin" FUNCTIONAL_TESTS_CLIENT_ID = "notify-functional-tests" @@ -526,6 +532,8 @@ class Development(Config): CELERY_WORKER_LOG_LEVEL = "INFO" + OTEL_EXPORT_TYPE = os.getenv("OTEL_EXPORT_TYPE", "none") + CELERY = { **Config.CELERY, "broker_transport_options": { @@ -586,6 +594,8 @@ class Test(Development): CELERY_WORKER_LOG_LEVEL = "INFO" + OTEL_EXPORT_TYPE = os.getenv("OTEL_EXPORT_TYPE", "none") + S3_BUCKET_CSV_UPLOAD = "test-notifications-csv-upload" S3_BUCKET_CONTACT_LIST = "test-contact-list" S3_BUCKET_TEST_LETTERS = "test-test-letters" diff --git a/app/otel/decorators.py b/app/otel/decorators.py new file mode 100644 index 0000000000..dd10ca09ae --- /dev/null +++ b/app/otel/decorators.py @@ -0,0 +1,70 @@ +import functools +import time + +from app.otel.metrics import otel_metrics + + +def otel(counter_name=None, histogram_name=None, attributes=None): + if attributes is None: + attributes = {} + + def time_function(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + start_time = time.monotonic() + c_name = counter_name or func.__name__ + h_name = histogram_name or f"{func.__name__}_time" + + # Create counter if it doesn't exist + if not hasattr(otel_metrics, c_name): + setattr( + otel_metrics, + c_name, + otel_metrics.meter.create_counter(c_name, description=f"Calls to the {func.__name__} task"), + ) + counter = getattr(otel_metrics, c_name) + + # Create histogram if it doesn't exist + if not hasattr(otel_metrics, h_name): + setattr( + otel_metrics, + h_name, + otel_metrics.meter.create_histogram( + h_name, + description=f"time taken to execute {func.__name__} function", + explicit_bucket_boundaries_advisory=getattr(otel_metrics, "default_histogram_bucket", None), + ), + ) + histogram = getattr(otel_metrics, h_name) + + try: + result = func(*args, **kwargs) + elapsed_time = time.monotonic() - start_time + + counter.add( + amount=1, + attributes={**attributes, "function_name": func.__name__, "status": "success"}, + ) + + histogram.record( + amount=elapsed_time, + attributes={**attributes, "function_name": func.__name__, "status": "success"}, + ) + + except Exception as e: + elapsed_time = time.monotonic() - start_time + counter.add( + amount=1, + attributes={**attributes, "function_name": func.__name__, "status": "error"}, + ) + histogram.record( + amount=elapsed_time, + attributes={**attributes, "function_name": func.__name__, "status": "error"}, + ) + raise e + else: + return result + + return wrapper + + return time_function diff --git a/app/otel/metrics.py b/app/otel/metrics.py new file mode 100644 index 0000000000..8947c1ef6c --- /dev/null +++ b/app/otel/metrics.py @@ -0,0 +1,114 @@ +from opentelemetry import metrics +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import ( + ConsoleMetricExporter, + PeriodicExportingMetricReader, +) +from opentelemetry.sdk.resources import Resource + + +class Metrics: + def __init__(self): + self.meter = None + self.default_histogram_bucket = [ + 0.005, + 0.01, + 0.025, + 0.05, + 0.075, + 0.1, + 0.25, + 0.5, + 0.75, + 1.0, + 2.5, + 5.0, + 7.5, + 10.0, + float("inf"), + ] + + def init_app(self, app): + export_mode = app.config.get("OTEL_EXPORT_TYPE", "none").lower() + metric_readers = [] + + if export_mode == "console": + app.logger.info("OpenTelemetry metrics will be exported to console") + metric_readers.append(PeriodicExportingMetricReader(ConsoleMetricExporter())) + elif export_mode == "otlp": + endpoint = app.config.get("OTEL_COLLECTOR_ENDPOINT", "localhost:4317") + app.logger.info("OpenTelemetry metrics will be exported to OTLP collector at %s", endpoint) + otlp_exporter = OTLPMetricExporter(endpoint=endpoint, insecure=True) + # Metrics will be exported every 60 seconds with a 30 seconds timeout by default. + # The following environments variables can be used to change this: + # OTEL_METRIC_EXPORT_INTERVAL + # OTEL_METRIC_EXPORT_TIMEOUT + metric_readers.append(PeriodicExportingMetricReader(otlp_exporter)) + + resource = Resource.create({"service.name": "notifications-api"}) + provider = MeterProvider(metric_readers=metric_readers, resource=resource) + metrics.set_meter_provider(provider) + self.meter = metrics.get_meter(__name__) + + self.create_counters() + self.create_histograms() + self.create_gauges() + + def create_counters(self): + self.provider_status_counter = self.meter.create_counter( + "provider_status", + description="Count of successful requests to provider", + ) + + self.provider_timeout_sending_counter = self.meter.create_counter( + "provider_timeout_sending", + description="Count of notifications that have timed out while sending", + ) + + self.provider_callback_counter = self.meter.create_counter( + "provider_callback", + description="Count of provider callbacks", + ) + + self.international_sms_counter = self.meter.create_counter( + "international_sms", + description="Count of international SMS", + ) + + def create_histograms(self): + self.provider_request_time_histogram = self.meter.create_histogram( + "provider_request_time", + description="Time taken for requests to providers in seconds", + unit="seconds", + explicit_bucket_boundaries_advisory=self.default_histogram_bucket, + ) + + self.test_key_sending_time_histogram = self.meter.create_histogram( + "test_key_sending_time", + description="time taken to send a test SMS", + explicit_bucket_boundaries_advisory=self.default_histogram_bucket, + ) + + self.live_key_sending_time_histogram = self.meter.create_histogram( + "live_key_sending_time", + description="time taken to send a live SMS", + explicit_bucket_boundaries_advisory=self.default_histogram_bucket, + ) + + def create_gauges(self): + self.slow_delivery_ratio_gauge = self.meter.create_gauge( + "slow_delivery_ratio", + description="Provider slow delivery ratio", + unit="fraction", + ) + + self.slow_sms_delivery_ratio_gauge = self.meter.create_gauge( + "slow_sms_delivery_ratio", + description="SMS slow delivery ratio", + unit="fraction", + ) + + +# Initialize the metrics instance singleton +otel_metrics = Metrics() diff --git a/app/otel/traces.py b/app/otel/traces.py new file mode 100644 index 0000000000..28ee9d54b6 --- /dev/null +++ b/app/otel/traces.py @@ -0,0 +1,100 @@ +import os + +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.processor.baggage import ALLOW_ALL_BAGGAGE_KEYS, BaggageSpanProcessor +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter +from opentelemetry.trace import ( + Span, + get_tracer_provider, + set_tracer_provider, +) + + +class Traces: + def __init__(self): + self.tracer = None + + def init_app(self, app): + export_mode = app.config.get("OTEL_EXPORT_TYPE", "none").lower() + resource = Resource.create({"service.name": os.getenv("NOTIFY_APP_NAME", app.config.get("NOTIFY_APP_NAME"))}) + set_tracer_provider(TracerProvider(resource=resource)) + get_tracer_provider().get_tracer(app.config.get("NOTIFY_APP_NAME")) + + span_processor = None + + if export_mode == "console": + span_processor = BatchSpanProcessor(ConsoleSpanExporter()) + elif export_mode == "otlp": + endpoint = app.config.get("OTEL_COLLECTOR_ENDPOINT", "localhost:4317") + span_processor = BatchSpanProcessor( + OTLPSpanExporter( + endpoint=endpoint, + insecure=True, + ) + ) + + if span_processor: + # Instead of adding all baggage to attributes, we could do something like + # regex_predicate = lambda baggage_key: baggage_key.startswith("^key.+") + # tracer_provider.add_span_processor(BaggageSpanProcessor(regex_predicate)) + get_tracer_provider().add_span_processor(BaggageSpanProcessor(ALLOW_ALL_BAGGAGE_KEYS)) + get_tracer_provider().add_span_processor(span_processor) + + # not sure I like the instumentation here as it adds both traces and metrics + + self.instrument_app(app) + + def instrument_app(self, app): + instrumentation = app.config.get("OTEL_INSTRUMENTATIONS", "").lower().split(",") + + if "wsgi" in instrumentation: + self.instrument_app_wsgi(app) + + if "celery" in instrumentation: + from opentelemetry.instrumentation.celery import CeleryInstrumentor + + CeleryInstrumentor().instrument() + if "flask" in instrumentation: + from opentelemetry.instrumentation.flask import FlaskInstrumentor + + FlaskInstrumentor().instrument_app(app) + if "redis" in instrumentation: + from opentelemetry.instrumentation.redis import RedisInstrumentor + + RedisInstrumentor().instrument() + if "requests" in instrumentation: + from opentelemetry.instrumentation.requests import RequestsInstrumentor + + RequestsInstrumentor().instrument() + if "sqlalchemy" in instrumentation: + from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor + + SQLAlchemyInstrumentor().instrument(enable_commenter=True, commenter_options={}) + if "psycopg2" in instrumentation: + from opentelemetry.instrumentation.psycopg2 import Psycopg2Instrumentor + + Psycopg2Instrumentor().instrument(enable_commenter=True, commenter_options={}) + if "boto3sqs" in instrumentation: + from opentelemetry.instrumentation.boto3sqs import Boto3SQSInstrumentor + + Boto3SQSInstrumentor().instrument() + + def instrument_app_wsgi(self, app): + from wsgiref.types import WSGIEnvironment + + from opentelemetry.instrumentation.wsgi import OpenTelemetryMiddleware + + def request_hook(span: Span, environ: WSGIEnvironment): + if span and span.is_recording(): + span.set_attribute("custom_user_attribute_from_request_hook", "some-value") + + def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_headers: list[tuple[str, str]]): + if span and span.is_recording(): + span.set_attribute("custom_user_attribute_from_response_hook", "some-value") + + app.wsgi_app = OpenTelemetryMiddleware(app.wsgi_app, request_hook=request_hook, response_hook=response_hook) + + +otel_traces = Traces() diff --git a/requirements.in b/requirements.in index 933c4760b4..7f861511b8 100644 --- a/requirements.in +++ b/requirements.in @@ -32,3 +32,15 @@ prometheus-client==0.14.1 git+https://github.com/alphagov/gds_metrics_python.git@6f1840a57b6fb1ee40b7e84f2f18ec229de8aa72 sentry-sdk[flask,celery,sqlalchemy]==1.45.1 + +opentelemetry-distro==0.54b1 +opentelemetry-exporter-otlp==1.33.1 +opentelemetry-instrumentation-celery==0.54b1 +opentelemetry-instrumentation-flask==0.54b1 +opentelemetry-instrumentation-requests==0.54b1 +opentelemetry-instrumentation-redis==0.54b1 +opentelemetry-instrumentation-sqlalchemy==0.54b1 +opentelemetry-instrumentation-wsgi==0.54b1 +opentelemetry-instrumentation-psycopg2==0.54b1 +opentelemetry-instrumentation-boto3sqs==0.54b1 +opentelemetry-processor-baggage==0.54b1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6bcd9b2312..549937ca2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -60,6 +60,12 @@ click-plugins==1.1.1 # via celery click-repl==0.2.0 # via celery +deprecated==1.2.18 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions dnspython==2.6.1 # via eventlet docopt==0.6.2 @@ -94,10 +100,16 @@ fqdn==1.5.1 # via jsonschema gds-metrics @ git+https://github.com/alphagov/gds_metrics_python.git@6f1840a57b6fb1ee40b7e84f2f18ec229de8aa72 # via -r requirements.in +googleapis-common-protos==1.70.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http govuk-bank-holidays==0.15 # via notifications-utils greenlet==3.0.3 # via eventlet +grpcio==1.73.0 + # via opentelemetry-exporter-otlp-proto-grpc gunicorn==23.0.0 # via # -r requirements.in @@ -106,6 +118,8 @@ idna==3.7 # via # jsonschema # requests +importlib-metadata==8.6.1 + # via opentelemetry-api iso8601==2.1.0 # via -r requirements.in isoduration==20.11.0 @@ -152,6 +166,98 @@ notifications-python-client==10.0.1 # via -r requirements.in notifications-utils @ git+https://github.com/alphagov/notifications-utils.git@a97b36f6a32e7bb917152c8cd716fe65fa15ac9f # via -r requirements.in +opentelemetry-api==1.33.1 + # via + # opentelemetry-distro + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-boto3sqs + # opentelemetry-instrumentation-celery + # opentelemetry-instrumentation-dbapi + # opentelemetry-instrumentation-flask + # opentelemetry-instrumentation-psycopg2 + # opentelemetry-instrumentation-redis + # opentelemetry-instrumentation-requests + # opentelemetry-instrumentation-sqlalchemy + # opentelemetry-instrumentation-wsgi + # opentelemetry-processor-baggage + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-distro==0.54b1 + # via -r requirements.in +opentelemetry-exporter-otlp==1.33.1 + # via -r requirements.in +opentelemetry-exporter-otlp-proto-common==1.33.1 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.33.1 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.33.1 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.54b1 + # via + # opentelemetry-distro + # opentelemetry-instrumentation-boto3sqs + # opentelemetry-instrumentation-celery + # opentelemetry-instrumentation-dbapi + # opentelemetry-instrumentation-flask + # opentelemetry-instrumentation-psycopg2 + # opentelemetry-instrumentation-redis + # opentelemetry-instrumentation-requests + # opentelemetry-instrumentation-sqlalchemy + # opentelemetry-instrumentation-wsgi +opentelemetry-instrumentation-boto3sqs==0.54b1 + # via -r requirements.in +opentelemetry-instrumentation-celery==0.54b1 + # via -r requirements.in +opentelemetry-instrumentation-dbapi==0.54b1 + # via opentelemetry-instrumentation-psycopg2 +opentelemetry-instrumentation-flask==0.54b1 + # via -r requirements.in +opentelemetry-instrumentation-psycopg2==0.54b1 + # via -r requirements.in +opentelemetry-instrumentation-redis==0.54b1 + # via -r requirements.in +opentelemetry-instrumentation-requests==0.54b1 + # via -r requirements.in +opentelemetry-instrumentation-sqlalchemy==0.54b1 + # via -r requirements.in +opentelemetry-instrumentation-wsgi==0.54b1 + # via + # -r requirements.in + # opentelemetry-instrumentation-flask +opentelemetry-processor-baggage==0.54b1 + # via -r requirements.in +opentelemetry-proto==1.33.1 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.33.1 + # via + # opentelemetry-distro + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-processor-baggage +opentelemetry-semantic-conventions==0.54b1 + # via + # opentelemetry-instrumentation + # opentelemetry-instrumentation-boto3sqs + # opentelemetry-instrumentation-celery + # opentelemetry-instrumentation-dbapi + # opentelemetry-instrumentation-flask + # opentelemetry-instrumentation-redis + # opentelemetry-instrumentation-requests + # opentelemetry-instrumentation-sqlalchemy + # opentelemetry-instrumentation-wsgi + # opentelemetry-sdk +opentelemetry-util-http==0.54b1 + # via + # opentelemetry-instrumentation-flask + # opentelemetry-instrumentation-requests + # opentelemetry-instrumentation-wsgi ordered-set==4.1.0 # via notifications-utils packaging==23.2 @@ -159,6 +265,9 @@ packaging==23.2 # gunicorn # marshmallow # marshmallow-sqlalchemy + # opentelemetry-instrumentation + # opentelemetry-instrumentation-flask + # opentelemetry-instrumentation-sqlalchemy phonenumbers==8.13.52 # via notifications-utils prometheus-client==0.14.1 @@ -167,6 +276,10 @@ prometheus-client==0.14.1 # gds-metrics prompt-toolkit==3.0.31 # via click-repl +protobuf==5.29.5 + # via + # googleapis-common-protos + # opentelemetry-proto psutil==6.1.1 # via -r requirements.in psycopg2-binary==2.9.10 @@ -204,6 +317,7 @@ requests==2.32.2 # govuk-bank-holidays # notifications-python-client # notifications-utils + # opentelemetry-exporter-otlp-proto-http rfc3339-validator==0.1.4 # via jsonschema rfc3987==1.3.8 @@ -234,6 +348,8 @@ sqlalchemy==1.4.41 # sentry-sdk statsd==4.0.1 # via notifications-utils +typing-extensions==4.14.0 + # via opentelemetry-sdk tzdata==2024.1 # via celery uri-template==1.2.0 @@ -256,3 +372,14 @@ webcolors==1.12 # via jsonschema werkzeug==3.1.3 # via flask +wrapt==1.17.2 + # via + # deprecated + # opentelemetry-instrumentation + # opentelemetry-instrumentation-boto3sqs + # opentelemetry-instrumentation-dbapi + # opentelemetry-instrumentation-redis + # opentelemetry-instrumentation-sqlalchemy + # opentelemetry-processor-baggage +zipp==3.23.0 + # via importlib-metadata diff --git a/requirements_for_test.txt b/requirements_for_test.txt index fd5bf5877b..09fc8485f7 100644 --- a/requirements_for_test.txt +++ b/requirements_for_test.txt @@ -97,6 +97,13 @@ cryptography==44.0.1 # via # moto # trustme +deprecated==1.2.18 + # via + # -r requirements.txt + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions dnspython==2.6.1 # via # -r requirements.txt @@ -143,6 +150,11 @@ freezegun==1.5.1 # via -r requirements_for_test_common.in gds-metrics @ git+https://github.com/alphagov/gds_metrics_python.git@6f1840a57b6fb1ee40b7e84f2f18ec229de8aa72 # via -r requirements.txt +googleapis-common-protos==1.70.0 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http govuk-bank-holidays==0.15 # via # -r requirements.txt @@ -151,6 +163,10 @@ greenlet==3.0.3 # via # -r requirements.txt # eventlet +grpcio==1.73.0 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp-proto-grpc gunicorn==23.0.0 # via # -r requirements.txt @@ -160,6 +176,10 @@ idna==3.7 # -r requirements.txt # requests # trustme +importlib-metadata==8.6.1 + # via + # -r requirements.txt + # opentelemetry-api iniconfig==2.0.0 # via pytest iso8601==2.1.0 @@ -222,6 +242,111 @@ notifications-python-client==10.0.1 # via -r requirements.txt notifications-utils @ git+https://github.com/alphagov/notifications-utils.git@a97b36f6a32e7bb917152c8cd716fe65fa15ac9f # via -r requirements.txt +opentelemetry-api==1.33.1 + # via + # -r requirements.txt + # opentelemetry-distro + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-boto3sqs + # opentelemetry-instrumentation-celery + # opentelemetry-instrumentation-dbapi + # opentelemetry-instrumentation-flask + # opentelemetry-instrumentation-psycopg2 + # opentelemetry-instrumentation-redis + # opentelemetry-instrumentation-requests + # opentelemetry-instrumentation-sqlalchemy + # opentelemetry-instrumentation-wsgi + # opentelemetry-processor-baggage + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-distro==0.54b1 + # via -r requirements.txt +opentelemetry-exporter-otlp==1.33.1 + # via -r requirements.txt +opentelemetry-exporter-otlp-proto-common==1.33.1 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.33.1 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.33.1 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.54b1 + # via + # -r requirements.txt + # opentelemetry-distro + # opentelemetry-instrumentation-boto3sqs + # opentelemetry-instrumentation-celery + # opentelemetry-instrumentation-dbapi + # opentelemetry-instrumentation-flask + # opentelemetry-instrumentation-psycopg2 + # opentelemetry-instrumentation-redis + # opentelemetry-instrumentation-requests + # opentelemetry-instrumentation-sqlalchemy + # opentelemetry-instrumentation-wsgi +opentelemetry-instrumentation-boto3sqs==0.54b1 + # via -r requirements.txt +opentelemetry-instrumentation-celery==0.54b1 + # via -r requirements.txt +opentelemetry-instrumentation-dbapi==0.54b1 + # via + # -r requirements.txt + # opentelemetry-instrumentation-psycopg2 +opentelemetry-instrumentation-flask==0.54b1 + # via -r requirements.txt +opentelemetry-instrumentation-psycopg2==0.54b1 + # via -r requirements.txt +opentelemetry-instrumentation-redis==0.54b1 + # via -r requirements.txt +opentelemetry-instrumentation-requests==0.54b1 + # via -r requirements.txt +opentelemetry-instrumentation-sqlalchemy==0.54b1 + # via -r requirements.txt +opentelemetry-instrumentation-wsgi==0.54b1 + # via + # -r requirements.txt + # opentelemetry-instrumentation-flask +opentelemetry-processor-baggage==0.54b1 + # via -r requirements.txt +opentelemetry-proto==1.33.1 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.33.1 + # via + # -r requirements.txt + # opentelemetry-distro + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-processor-baggage +opentelemetry-semantic-conventions==0.54b1 + # via + # -r requirements.txt + # opentelemetry-instrumentation + # opentelemetry-instrumentation-boto3sqs + # opentelemetry-instrumentation-celery + # opentelemetry-instrumentation-dbapi + # opentelemetry-instrumentation-flask + # opentelemetry-instrumentation-redis + # opentelemetry-instrumentation-requests + # opentelemetry-instrumentation-sqlalchemy + # opentelemetry-instrumentation-wsgi + # opentelemetry-sdk +opentelemetry-util-http==0.54b1 + # via + # -r requirements.txt + # opentelemetry-instrumentation-flask + # opentelemetry-instrumentation-requests + # opentelemetry-instrumentation-wsgi ordered-set==4.1.0 # via # -r requirements.txt @@ -232,6 +357,9 @@ packaging==23.2 # gunicorn # marshmallow # marshmallow-sqlalchemy + # opentelemetry-instrumentation + # opentelemetry-instrumentation-flask + # opentelemetry-instrumentation-sqlalchemy # pytest phonenumbers==8.13.52 # via @@ -247,6 +375,11 @@ prompt-toolkit==3.0.31 # via # -r requirements.txt # click-repl +protobuf==5.29.5 + # via + # -r requirements.txt + # googleapis-common-protos + # opentelemetry-proto psutil==6.1.1 # via -r requirements.txt psycopg2-binary==2.9.10 @@ -317,6 +450,7 @@ requests==2.32.2 # moto # notifications-python-client # notifications-utils + # opentelemetry-exporter-otlp-proto-http # requests-mock # responses requests-mock==1.12.1 @@ -370,6 +504,10 @@ statsd==4.0.1 # notifications-utils trustme==0.9.0 # via -r requirements_for_test.in +typing-extensions==4.14.0 + # via + # -r requirements.txt + # opentelemetry-sdk tzdata==2024.1 # via # -r requirements.txt @@ -401,5 +539,19 @@ werkzeug==3.1.3 # flask # moto # pytest-httpserver +wrapt==1.17.2 + # via + # -r requirements.txt + # deprecated + # opentelemetry-instrumentation + # opentelemetry-instrumentation-boto3sqs + # opentelemetry-instrumentation-dbapi + # opentelemetry-instrumentation-redis + # opentelemetry-instrumentation-sqlalchemy + # opentelemetry-processor-baggage xmltodict==0.14.2 # via moto +zipp==3.23.0 + # via + # -r requirements.txt + # importlib-metadata