Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This file was automatically copied from notifications-utils@99.8.0
# This file was automatically copied from notifications-utils@100.1.0

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
18 changes: 12 additions & 6 deletions app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from gds_metrics.metrics import Gauge, Histogram
from notifications_utils import request_helper
from notifications_utils.celery import NotifyCelery
from notifications_utils.clients.otel.otel_client import OtelClient
from notifications_utils.clients.redis.redis_client import RedisClient
from notifications_utils.clients.signing.signing_client import Signing
from notifications_utils.clients.statsd.statsd_client import StatsdClient
Expand All @@ -49,6 +50,7 @@
notify_celery = NotifyCelery()
signing = Signing()
statsd_client = StatsdClient()
otel_client = OtelClient()
redis_store = RedisClient()
cbc_proxy_client = CBCProxyClient()
metrics = GDSMetrics()
Expand All @@ -70,7 +72,7 @@
_firetext_client_context_var: ContextVar[FiretextClient] = ContextVar("firetext_client")
get_firetext_client: LazyLocalGetter[FiretextClient] = LazyLocalGetter(
_firetext_client_context_var,
lambda: FiretextClient(current_app, statsd_client=statsd_client),
lambda: FiretextClient(current_app, statsd_client=statsd_client, otel_client=otel_client),
expected_type=FiretextClient,
)
memo_resetters.append(lambda: get_firetext_client.clear())
Expand All @@ -79,7 +81,7 @@
_mmg_client_context_var: ContextVar[MMGClient] = ContextVar("mmg_client")
get_mmg_client: LazyLocalGetter[MMGClient] = LazyLocalGetter(
_mmg_client_context_var,
lambda: MMGClient(current_app, statsd_client=statsd_client),
lambda: MMGClient(current_app, statsd_client=statsd_client, otel_client=otel_client),
expected_type=MMGClient,
)
memo_resetters.append(lambda: get_mmg_client.clear())
Expand All @@ -88,7 +90,7 @@
_aws_ses_client_context_var: ContextVar[AwsSesClient] = ContextVar("aws_ses_client")
get_aws_ses_client: LazyLocalGetter[AwsSesClient] = LazyLocalGetter(
_aws_ses_client_context_var,
lambda: AwsSesClient(current_app.config["AWS_REGION"], statsd_client=statsd_client),
lambda: AwsSesClient(current_app.config["AWS_REGION"], statsd_client=statsd_client, otel_client=otel_client),
expected_type=AwsSesClient,
)
memo_resetters.append(lambda: get_aws_ses_client.clear())
Expand All @@ -99,8 +101,9 @@
_aws_ses_stub_client_context_var,
lambda: AwsSesStubClient(
current_app.config["AWS_REGION"],
statsd_client=statsd_client,
stub_url=current_app.config["SES_STUB_URL"],
statsd_client=statsd_client,
otel_client=otel_client,
),
expected_type=AwsSesStubClient,
)
Expand Down Expand Up @@ -135,7 +138,7 @@
_dvla_client_context_var: ContextVar[DVLAClient] = ContextVar("dvla_client")
get_dvla_client: LazyLocalGetter[DVLAClient] = LazyLocalGetter(
_dvla_client_context_var,
lambda: DVLAClient(current_app, statsd_client=statsd_client),
lambda: DVLAClient(current_app, statsd_client=statsd_client, otel_client=otel_client),
)
memo_resetters.append(lambda: get_dvla_client.clear())
dvla_client = LocalProxy(get_dvla_client)
Expand All @@ -160,6 +163,8 @@
def create_app(application):
from app.config import Config, configs

global notify_otel_client

notify_environment = os.environ["NOTIFY_ENVIRONMENT"]

if notify_environment in configs:
Expand All @@ -177,7 +182,8 @@ def create_app(application):
migrate.init_app(application, db=db)
ma.init_app(application)
statsd_client.init_app(application)
utils_logging.init_app(application, statsd_client)
otel_client.init_app(application)
utils_logging.init_app(application, statsd_client, otel_client)

notify_celery.init_app(application)
signing.init_app(application)
Expand Down
7 changes: 6 additions & 1 deletion app/celery/nightly_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from sqlalchemy import func
from sqlalchemy.exc import SQLAlchemyError

from app import notify_celery, statsd_client, zendesk_client
from app import notify_celery, otel_client, statsd_client, zendesk_client
from app.aws import s3
from app.config import QueueNames
from app.constants import (
Expand Down Expand Up @@ -250,6 +250,11 @@ def timeout_notifications():

for notification in notifications:
statsd_client.incr(f"timeout-sending.{notification.sent_by}")
otel_client.incr(
"timeout_sending",
attributes={"notification_type": notification.notification_type},
description="Count of notifications that have timed out while sending",
)
check_and_queue_callback_task(notification)

current_app.logger.info(
Expand Down
15 changes: 14 additions & 1 deletion app/celery/process_ses_receipts_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from flask import current_app, json
from sqlalchemy.orm.exc import NoResultFound

from app import notify_celery, statsd_client
from app import notify_celery, otel_client, statsd_client
from app.clients.email.aws_ses import get_aws_responses
from app.config import QueueNames
from app.constants import NOTIFICATION_PENDING, NOTIFICATION_SENDING
Expand Down Expand Up @@ -79,11 +79,24 @@ def process_ses_results(self, response):

statsd_client.incr(f"callback.ses.{notification_status}")

otel_client.incr(
"callback_success",
attributes={"provider": "ses", "status": notification_status},
description="Count of successful SES callbacks with status",
)

if notification.sent_at:
statsd_client.timing_with_dates(
f"callback.ses.{notification_status}.elapsed-time", datetime.utcnow(), notification.sent_at
)

otel_client.record(
"callback_elapsed_time",
value=(datetime.utcnow() - notification.sent_at).total_seconds(),
attributes={"provider": "ses", "status": notification_status},
description="Elapsed time for SES callback with status",
)

check_and_queue_callback_task(notification)

return True
Expand Down
30 changes: 29 additions & 1 deletion app/celery/process_sms_client_response_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from flask import current_app
from notifications_utils.template import SMSMessageTemplate

from app import notify_celery, statsd_client
from app import notify_celery, otel_client, statsd_client
from app.clients import ClientException
from app.clients.sms.firetext import get_firetext_responses
from app.clients.sms.mmg import get_mmg_responses
Expand Down Expand Up @@ -74,13 +74,32 @@ def _process_for_status(notification_status, client_name, provider_reference, de

statsd_client.incr(f"callback.{client_name.lower()}.{notification_status}")

otel_client.incr(
"callback",
attributes={
"provider": client_name.lower(),
"status": notification_status,
},
description="Count of callbacks",
)

if notification.sent_at:
statsd_client.timing_with_dates(
f"callback.{client_name.lower()}.{notification_status}.elapsed-time",
datetime.utcnow(),
notification.sent_at,
)

otel_client.record(
"callback_elapsed_time",
value=(datetime.utcnow() - notification.sent_at).total_seconds(),
attributes={
"provider": client_name.lower(),
"status": notification_status,
},
description="Elapsed time for callbacks",
)

if notification.billable_units == 0:
service = notification.service
template_model = dao_get_template_by_id(notification.template_id, notification.template_version)
Expand All @@ -98,3 +117,12 @@ def _process_for_status(notification_status, client_name, provider_reference, de
check_and_queue_callback_task(notification)
if notification.international:
statsd_client.incr(f"international-sms.{notification_status}.{notification.phone_prefix}")

otel_client.incr(
"international_sms",
attributes={
"status": notification_status,
"phone_prefix": notification.phone_prefix,
},
description="Count of international SMS",
)
23 changes: 22 additions & 1 deletion app/celery/scheduled_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from sqlalchemy import and_, between
from sqlalchemy.exc import SQLAlchemyError

from app import db, dvla_client, notify_celery, redis_store, statsd_client, zendesk_client
from app import db, dvla_client, notify_celery, otel_client, redis_store, statsd_client, zendesk_client
from app.aws import s3
from app.celery.letters_pdf_tasks import get_pdf_for_templated_letter
from app.celery.tasks import (
Expand Down Expand Up @@ -216,6 +216,17 @@ def generate_sms_delivery_stats():
f"slow-delivery.{report.provider}.delivered-within-minutes.{delivery_interval}.ratio", report.slow_ratio
)

otel_client.record(
"slow_delivery_ratio",
value=report.slow_ratio,
attributes={
"provider": report.provider,
"delivery_interval": delivery_interval,
},
description="Ratio of slow message deliveries for in the last 15 minutes "
"for deliveries taking longer than delivery_interval minutes",
)

total_notifications = sum(report.total_notifications for report in providers_slow_delivery_reports)
slow_notifications = sum(report.slow_notifications for report in providers_slow_delivery_reports)
ratio_slow_notifications = slow_notifications / total_notifications
Expand All @@ -224,6 +235,16 @@ def generate_sms_delivery_stats():
f"slow-delivery.sms.delivered-within-minutes.{delivery_interval}.ratio", ratio_slow_notifications
)

otel_client.record(
"slow_sms_delivery_ratio",
value=ratio_slow_notifications,
attributes={
"delivery_interval": delivery_interval,
},
description="Ratio of slow message deliveries for in the last 15 minutes "
"for deliveries taking longer than delivery_interval minutes",
)

# For the 5-minute delivery interval, let's check the percentage of all text messages sent that were slow.
# TODO: delete this when we have a way to raise these alerts from eg grafana, prometheus, something else.
if delivery_interval == 5 and current_app.should_check_slow_text_message_delivery:
Expand Down
27 changes: 26 additions & 1 deletion app/clients/email/aws_ses.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,11 @@ class AwsSesClient(EmailClient):

name = "ses"

def __init__(self, region, statsd_client):
def __init__(self, region, statsd_client, otel_client):
super().__init__()
self._client = boto3.client("sesv2", region_name=region)
self.statsd_client = statsd_client
self.otel_client = otel_client

def send_email(
self,
Expand Down Expand Up @@ -103,13 +104,25 @@ def send_email(
except botocore.exceptions.ClientError as e:
self.statsd_client.incr("clients.ses.error")

self.otel_client.incr(
"clients_error",
attributes={"provider": self.name},
description="Count of failed requests to provider",
)

# https://docs.aws.amazon.com/ses/latest/APIReference-V2/API_SendEmail.html#API_SendEmail_Errors
if e.response["Error"]["Code"] == "InvalidParameterValue":
raise EmailClientNonRetryableException(e.response["Error"]["Message"]) from e
elif e.response["Error"]["Code"] == "TooManyRequestsException":
raise AwsSesClientThrottlingSendRateException(str(e)) from e
else:
self.statsd_client.incr("clients.ses.error")

self.otel_client.incr(
"clients_error",
attributes={"provider": self.name},
description="Count of failed requests to provider",
)
raise AwsSesClientException(str(e) + e.response["Error"]["Code"]) from e
except Exception as e:
self.statsd_client.incr("clients.ses.error")
Expand All @@ -125,6 +138,18 @@ def send_email(
)
self.statsd_client.timing("clients.ses.request-time", elapsed_time)
self.statsd_client.incr("clients.ses.success")

self.otel_client.record(
"clients_request_time",
value=elapsed_time,
attributes={"provider": self.name},
description="Time taken for request to provider",
)
self.otel_client.incr(
"clients_success",
attributes={"provider": self.name},
description="Count of successful requests to provider",
)
return response["MessageId"]


Expand Down
19 changes: 18 additions & 1 deletion app/clients/email/aws_ses_stub.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ class AwsSesStubClient(EmailClient):

name = "ses"

def __init__(self, region, statsd_client, stub_url):
def __init__(self, region, stub_url, statsd_client, otel_client):
super().__init__()
self.statsd_client = statsd_client
self.otel_client = otel_client
self.url = stub_url
self.requests_session = requests.Session()

Expand All @@ -45,10 +46,26 @@ def send_email(

except Exception as e:
self.statsd_client.incr("clients.ses_stub.error")
self.otel_client.incr(
"clients_error",
attributes={"provider": self.name},
description="Count of failed requests to provider",
)
raise AwsSesStubClientException(str(e)) from e
else:
elapsed_time = monotonic() - start_time
current_app.logger.info("AWS SES stub request finished in %s", elapsed_time)
self.statsd_client.timing("clients.ses_stub.request-time", elapsed_time)
self.statsd_client.incr("clients.ses_stub.success")
self.otel_client.record(
"clients_request_time",
value=elapsed_time,
attributes={"provider": self.name},
description="Time taken for requests provider",
)
self.otel_client.incr(
"clients_success",
attributes={"provider": self.name},
description="Count of successful requests to provider",
)
return response_json["MessageId"]
4 changes: 3 additions & 1 deletion app/clients/letter/dvla.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,20 @@ class DVLAClient:
name = "dvla"

statsd_client = None
otel_client = None

_jwt_token = None
_jwt_expires_at = None

def __init__(self, application, *, statsd_client):
def __init__(self, application, *, statsd_client, otel_client):
self.base_url = application.config["DVLA_API_BASE_URL"]
self.ciphers = application.config["DVLA_API_TLS_CIPHERS"]
ssm_client = boto3.client("ssm", region_name=application.config["AWS_REGION"])
self.dvla_username = SSMParameter(key="/notify/api/dvla_username", ssm_client=ssm_client)
self.dvla_password = SSMParameter(key="/notify/api/dvla_password", ssm_client=ssm_client)
self.dvla_api_key = SSMParameter(key="/notify/api/dvla_api_key", ssm_client=ssm_client)
self.statsd_client = statsd_client
self.otel_client = otel_client

self.session = requests.Session()
self.session.mount(self.base_url, _SpecifiedCiphersAdapter(ciphers=self.ciphers))
Expand Down
Loading