From afb4f56d2551a1463a47c2645e113e8d8d758cae Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Thu, 14 May 2026 16:41:33 +0100 Subject: [PATCH 01/26] Add migration to create logical replication slot for notifications --- migrations/.current-alembic-head | 2 +- .../versions/0552_create_replacation_slot.py | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 migrations/versions/0552_create_replacation_slot.py diff --git a/migrations/.current-alembic-head b/migrations/.current-alembic-head index e306e24ed0..05377b4427 100644 --- a/migrations/.current-alembic-head +++ b/migrations/.current-alembic-head @@ -1 +1 @@ -0551_drop_ntfcns_failed_idx +0552_create_replacation_slot diff --git a/migrations/versions/0552_create_replacation_slot.py b/migrations/versions/0552_create_replacation_slot.py new file mode 100644 index 0000000000..f340ecacd9 --- /dev/null +++ b/migrations/versions/0552_create_replacation_slot.py @@ -0,0 +1,22 @@ +""" +Create Date: 2026-05-14T16:39:58 +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +revision = "0552_create_replacation_slot" +down_revision = "0551_drop_ntfcns_failed_idx" + + +def upgrade(): + op.execute( + "SELECT * FROM pg_create_logical_replication_slot('notify_dashboard_replication_slot', 'wal2json');" + ) + + +def downgrade(): + op.execute( + "SELECT * FROM pg_drop_replication_slot('notify_dashboard_replication_slot');" + ) From 81ea3e92b35df9f14f81b0622cd13ad15066bd16 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Fri, 15 May 2026 18:52:03 +0100 Subject: [PATCH 02/26] Add task to check replication slot changes and update configuration --- .../process_replication_slot_changes.py | 40 +++++++++++++++++++ app/config.py | 5 +++ .../test_process_replication_slot_changes.py | 38 ++++++++++++++++++ tests/app/test_config.py | 7 ++++ 4 files changed, 90 insertions(+) create mode 100644 app/celery/process_replication_slot_changes.py create mode 100644 tests/app/celery/test_process_replication_slot_changes.py diff --git a/app/celery/process_replication_slot_changes.py b/app/celery/process_replication_slot_changes.py new file mode 100644 index 0000000000..34ae1baf95 --- /dev/null +++ b/app/celery/process_replication_slot_changes.py @@ -0,0 +1,40 @@ +from sqlalchemy import text # pyright: ignore[reportMissingImports] + +from app import current_app, db, notify_celery +from app.cronitor import cronitor + + +# @TODO: this is a temporary task to check the replication slot changes, +# we will need to implement the logic to process the changes and update the dashboard accordingly +@notify_celery.task(bind=True, name="check-replication-slot-changes") +@cronitor("check-replication-slot-changes") +def check_replication_slot_changes(self): + try: + result = db.session.execute( + text(""" + SELECT * FROM pg_logical_slot_peek_changes( + 'notify_dashboard_replication_slot', + NULL, + NULL + ); + """) + ) + changes = result.fetchall() + current_app.logger.info( + "Replication slot changes retrieved", + extra={ + "celery_task": "check-replication-slot-changes", + "changes_count": len(changes) if changes else 0, + }, + ) + except Exception as exc: + retry_count = self.request.retries + if retry_count < 3: + raise self.retry(exc=exc, countdown=2**retry_count) from exc + else: + current_app.logger.error( + "Replication slot query failed after 3 retries", + exc_info=True, + extra={"celery_task": "check-replication-slot-changes"}, + ) + raise diff --git a/app/config.py b/app/config.py index 6c84d483f0..7857630697 100644 --- a/app/config.py +++ b/app/config.py @@ -492,6 +492,11 @@ class Config: "schedule": crontab(hour=9, minute=0, day_of_week="wed", day_of_month="2-8"), "options": {"queue": QueueNames.PERIODIC}, }, + "check-replication-slot-changes": { + "task": "check-replication-slot-changes", + "schedule": timedelta(seconds=1), + "options": {"queue": QueueNames.PERIODIC}, + }, }, } diff --git a/tests/app/celery/test_process_replication_slot_changes.py b/tests/app/celery/test_process_replication_slot_changes.py new file mode 100644 index 0000000000..e8247c4ffe --- /dev/null +++ b/tests/app/celery/test_process_replication_slot_changes.py @@ -0,0 +1,38 @@ +from unittest.mock import Mock + +import pytest +from celery.exceptions import Retry + +from app.celery.process_replication_slot_changes import check_replication_slot_changes + + +def test_check_replication_slot_changes_logs_change_count(mocker, caplog): + mock_result = Mock() + mock_result.fetchall.return_value = [{"lsn": "one"}, {"lsn": "two"}] + mock_execute = mocker.patch( + "app.celery.process_replication_slot_changes.db.session.execute", return_value=mock_result + ) + + with caplog.at_level("INFO"): + check_replication_slot_changes() + + assert mock_execute.call_count == 1 + + query = mock_execute.call_args.args[0] + assert "pg_logical_slot_peek_changes" in str(query) + + assert "Replication slot changes retrieved" in caplog.messages + record = next(record for record in caplog.records if record.msg == "Replication slot changes retrieved") + assert record.changes_count == 2 + + +def test_check_replication_slot_changes_calls_retry_on_query_error(mocker): + mocker.patch("app.celery.process_replication_slot_changes.db.session.execute", side_effect=Exception("EXPECTED")) + mock_retry = mocker.patch.object(check_replication_slot_changes, "retry", side_effect=Retry("retry")) + + with pytest.raises(Retry): + check_replication_slot_changes() + + assert mock_retry.call_count == 1 + _, kwargs = mock_retry.call_args + assert kwargs["countdown"] == 1 diff --git a/tests/app/test_config.py b/tests/app/test_config.py index 9ba4fd87e4..b41c78ef0d 100644 --- a/tests/app/test_config.py +++ b/tests/app/test_config.py @@ -96,3 +96,10 @@ def test_celery_config_contains_archived_template_email_file_cleanup_task(): assert task_config["task"] == "remove-archived-template-email-files-from-s3" assert task_config["options"]["queue"] == QueueNames.PERIODIC assert task_config["schedule"] == crontab(hour=4, minute=40) + + +def test_celery_config_contains_replication_slot_change_check_task(): + task_config = Config.CELERY["beat_schedule"]["check-replication-slot-changes"] + assert task_config["task"] == "check-replication-slot-changes" + assert task_config["options"]["queue"] == QueueNames.PERIODIC + assert task_config["schedule"].total_seconds() == 1 From 0ed22f67ab4a395aa0b30c33fc9a201dc3ea7f52 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Fri, 15 May 2026 19:05:16 +0100 Subject: [PATCH 03/26] Add replication blueprint and endpoint to process slot changes --- app/__init__.py | 4 +++ app/replication/rest.py | 13 +++++++++ tests/app/replication/test_rest.py | 43 ++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+) create mode 100644 app/replication/rest.py create mode 100644 tests/app/replication/test_rest.py diff --git a/app/__init__.py b/app/__init__.py index 0c1a19a58e..0f5558e156 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -240,6 +240,7 @@ def register_blueprint(application): ) from app.one_click_unsubscribe.rest import one_click_unsubscribe_blueprint from app.organisation.invite_rest import organisation_invite_blueprint + from app.replication.rest import replication_blueprint from app.organisation.rest import organisation_blueprint from app.performance_dashboard.rest import performance_dashboard_blueprint from app.platform_admin.rest import platform_admin_blueprint @@ -374,6 +375,9 @@ def ensure_user_id_attribute_before_request(): one_click_unsubscribe_blueprint.before_request(requires_no_auth) application.register_blueprint(one_click_unsubscribe_blueprint) + replication_blueprint.before_request(requires_admin_auth) + application.register_blueprint(replication_blueprint) + if application.config["REGISTER_FUNCTIONAL_TESTING_BLUEPRINT"]: test_blueprint.before_request(requires_functional_test_auth) application.register_blueprint(test_blueprint) diff --git a/app/replication/rest.py b/app/replication/rest.py new file mode 100644 index 0000000000..f19ba905ae --- /dev/null +++ b/app/replication/rest.py @@ -0,0 +1,13 @@ +from flask import Blueprint, jsonify + +from app.celery.process_replication_slot_changes import check_replication_slot_changes +from app.v2.errors import register_errors + +replication_blueprint = Blueprint("replication", __name__, url_prefix="/replication") +register_errors(replication_blueprint) + + +@replication_blueprint.route("/process-slot-changes", methods=["POST"]) +def trigger_process_replication_slot_changes(): + check_replication_slot_changes.apply_async() + return jsonify({"message": "check-replication-slot-changes task queued"}), 201 diff --git a/tests/app/replication/test_rest.py b/tests/app/replication/test_rest.py new file mode 100644 index 0000000000..2754756937 --- /dev/null +++ b/tests/app/replication/test_rest.py @@ -0,0 +1,43 @@ +import pytest + +from tests import create_admin_authorization_header + + +@pytest.fixture +def replication_client(notify_api): + with notify_api.test_request_context(): + with notify_api.test_client() as client: + yield client + + +def test_trigger_process_replication_slot_changes_queues_task(replication_client, mocker): + mock_apply_async = mocker.patch( + "app.replication.rest.check_replication_slot_changes.apply_async" + ) + auth_header = create_admin_authorization_header() + + response = replication_client.post( + "/replication/process-slot-changes", + headers=[auth_header], + ) + + assert response.status_code == 201 + assert response.get_json() == {"message": "check-replication-slot-changes task queued"} + mock_apply_async.assert_called_once_with() + + +def test_trigger_process_replication_slot_changes_requires_auth(replication_client): + response = replication_client.post("/replication/process-slot-changes") + + assert response.status_code == 401 + + +def test_trigger_process_replication_slot_changes_rejects_get(replication_client): + auth_header = create_admin_authorization_header() + + response = replication_client.get( + "/replication/process-slot-changes", + headers=[auth_header], + ) + + assert response.status_code == 405 From 829d0fd31f146e6402954e2080b309bfc44712c6 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Mon, 18 May 2026 16:55:05 +0100 Subject: [PATCH 04/26] Add endpoint to check replication slot changes and corresponding tests --- app/__init__.py | 2 +- app/replication/rest.py | 21 +++++++++++++++ tests/app/replication/test_rest.py | 41 +++++++++++++++++++++++++++--- 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index 0f5558e156..e7636cb30e 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -240,7 +240,6 @@ def register_blueprint(application): ) from app.one_click_unsubscribe.rest import one_click_unsubscribe_blueprint from app.organisation.invite_rest import organisation_invite_blueprint - from app.replication.rest import replication_blueprint from app.organisation.rest import organisation_blueprint from app.performance_dashboard.rest import performance_dashboard_blueprint from app.platform_admin.rest import platform_admin_blueprint @@ -249,6 +248,7 @@ def register_blueprint(application): from app.provider_details.rest import ( provider_details as provider_details_blueprint, ) + from app.replication.rest import replication_blueprint from app.service.callback_rest import service_callback_blueprint from app.service.rest import service_blueprint from app.service_invite.rest import ( diff --git a/app/replication/rest.py b/app/replication/rest.py index f19ba905ae..2d9646aa5e 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -1,7 +1,10 @@ +from pydoc import text + from flask import Blueprint, jsonify from app.celery.process_replication_slot_changes import check_replication_slot_changes from app.v2.errors import register_errors +from tests.app import db replication_blueprint = Blueprint("replication", __name__, url_prefix="/replication") register_errors(replication_blueprint) @@ -11,3 +14,21 @@ def trigger_process_replication_slot_changes(): check_replication_slot_changes.apply_async() return jsonify({"message": "check-replication-slot-changes task queued"}), 201 + + +@replication_blueprint.route("/check-slot-changes", methods=["GET"]) +def trigger_check_replication_slot_changes(): + result = db.session.execute( + text(""" + SELECT * FROM pg_logical_slot_peek_changes( + 'notify_dashboard_replication_slot', + NULL, + NULL, + 'pretty-print', 'on', + 'add-tables', 'public.notifications' + ); + """) + ) + changes = result.fetchall() + + return jsonify({"data": changes}), 200 diff --git a/tests/app/replication/test_rest.py b/tests/app/replication/test_rest.py index 2754756937..29d0dc0855 100644 --- a/tests/app/replication/test_rest.py +++ b/tests/app/replication/test_rest.py @@ -11,9 +11,7 @@ def replication_client(notify_api): def test_trigger_process_replication_slot_changes_queues_task(replication_client, mocker): - mock_apply_async = mocker.patch( - "app.replication.rest.check_replication_slot_changes.apply_async" - ) + mock_apply_async = mocker.patch("app.replication.rest.check_replication_slot_changes.apply_async") auth_header = create_admin_authorization_header() response = replication_client.post( @@ -41,3 +39,40 @@ def test_trigger_process_replication_slot_changes_rejects_get(replication_client ) assert response.status_code == 405 + + +def test_trigger_check_replication_slot_changes_returns_data(replication_client, mocker): + mock_result = mocker.Mock() + mock_result.fetchall.return_value = [{"lsn": "0/1", "xid": 1, "data": "mock-change"}] + mock_execute = mocker.patch( + "app.replication.rest.db.session.execute", + return_value=mock_result, + ) + auth_header = create_admin_authorization_header() + + response = replication_client.get( + "/replication/check-slot-changes", + headers=[auth_header], + ) + + assert response.status_code == 200 + assert response.get_json() == {"data": [{"lsn": "0/1", "xid": 1, "data": "mock-change"}]} + mock_execute.assert_called_once() + mock_result.fetchall.assert_called_once_with() + + +def test_trigger_check_replication_slot_changes_requires_auth(replication_client): + response = replication_client.get("/replication/check-slot-changes") + + assert response.status_code == 401 + + +def test_trigger_check_replication_slot_changes_rejects_post(replication_client): + auth_header = create_admin_authorization_header() + + response = replication_client.post( + "/replication/check-slot-changes", + headers=[auth_header], + ) + + assert response.status_code == 405 From 96ba58a3d1a9a5bffa58a70f75455cb9880d453d Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Mon, 18 May 2026 18:32:57 +0100 Subject: [PATCH 05/26] Refactor replication blueprint to allow unauthenticated access and enhance slot change data parsing --- app/__init__.py | 2 +- app/replication/rest.py | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index e7636cb30e..4223fd33e0 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -375,7 +375,7 @@ def ensure_user_id_attribute_before_request(): one_click_unsubscribe_blueprint.before_request(requires_no_auth) application.register_blueprint(one_click_unsubscribe_blueprint) - replication_blueprint.before_request(requires_admin_auth) + replication_blueprint.before_request(requires_no_auth) application.register_blueprint(replication_blueprint) if application.config["REGISTER_FUNCTIONAL_TESTING_BLUEPRINT"]: diff --git a/app/replication/rest.py b/app/replication/rest.py index 2d9646aa5e..78e48c51ab 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -1,10 +1,12 @@ -from pydoc import text +import json + +from sqlalchemy import text from flask import Blueprint, jsonify +from app import db from app.celery.process_replication_slot_changes import check_replication_slot_changes from app.v2.errors import register_errors -from tests.app import db replication_blueprint = Blueprint("replication", __name__, url_prefix="/replication") register_errors(replication_blueprint) @@ -29,6 +31,13 @@ def trigger_check_replication_slot_changes(): ); """) ) - changes = result.fetchall() + changes = [dict(change) for change in result.mappings().all()] + + parsed_data = [] + for change in changes: + parsed_change = json.loads(change["data"]) + if len(parsed_change.get("change", [])) == 0: + continue + parsed_data.append(parsed_change) - return jsonify({"data": changes}), 200 + return jsonify({"data": parsed_data}), 200 From c364ffa69b1c91b26ecaa50267f5a69703bc5795 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Mon, 18 May 2026 19:29:36 +0100 Subject: [PATCH 06/26] Implement replication change processing and corresponding tests --- .../process_replication_changes.py | 40 +++++++++ app/replication/rest.py | 15 +--- .../test_process_replication_changes.py | 86 +++++++++++++++++++ tests/app/replication/test_rest.py | 16 +++- 4 files changed, 143 insertions(+), 14 deletions(-) create mode 100644 app/replication/process_replication_changes.py create mode 100644 tests/app/replication/test_process_replication_changes.py diff --git a/app/replication/process_replication_changes.py b/app/replication/process_replication_changes.py new file mode 100644 index 0000000000..d945e93124 --- /dev/null +++ b/app/replication/process_replication_changes.py @@ -0,0 +1,40 @@ +import json + + +def process_replication_changes(changes): + """ + Process the replication changes and return a list of parsed changes. + """ + parsed_data = [row for change in changes for row in (parse_change_data(change) or [])] + + return parsed_data + + +def parse_change_data(change): + """ + Parse the change data and return a dictionary with the relevant information. + """ + change = json.loads(change["data"]).get("change", [{}]) + + if len(change) == 0: + return None + + return list(map(parse_row_data, change)) + + +def parse_row_data(row): + """ + Create a mapping of column names to their values for the given change. + """ + column_names = row.get("columnnames", []) + column_values = row.get("columnvalues", []) + + old_column_names = row.get("oldkeys", {}).get("keynames", []) + old_column_values = row.get("oldkeys", {}).get("keyvalues", []) + + return { + "type": row.get("kind", ""), + "table": row.get("table", ""), + "current_row_data": dict(zip(column_names, column_values, strict=False)), + "previous_row_data": dict(zip(old_column_names, old_column_values, strict=False)), + } diff --git a/app/replication/rest.py b/app/replication/rest.py index 78e48c51ab..47e833dd95 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -1,11 +1,9 @@ -import json - -from sqlalchemy import text - from flask import Blueprint, jsonify +from sqlalchemy import text from app import db from app.celery.process_replication_slot_changes import check_replication_slot_changes +from app.replication.process_replication_changes import process_replication_changes from app.v2.errors import register_errors replication_blueprint = Blueprint("replication", __name__, url_prefix="/replication") @@ -33,11 +31,6 @@ def trigger_check_replication_slot_changes(): ) changes = [dict(change) for change in result.mappings().all()] - parsed_data = [] - for change in changes: - parsed_change = json.loads(change["data"]) - if len(parsed_change.get("change", [])) == 0: - continue - parsed_data.append(parsed_change) + parsed_data = process_replication_changes(changes) - return jsonify({"data": parsed_data}), 200 + return jsonify({"changes": parsed_data}), 200 diff --git a/tests/app/replication/test_process_replication_changes.py b/tests/app/replication/test_process_replication_changes.py new file mode 100644 index 0000000000..de19b3eb1e --- /dev/null +++ b/tests/app/replication/test_process_replication_changes.py @@ -0,0 +1,86 @@ +import json + +from app.replication.process_replication_changes import parse_change_data, parse_row_data, process_replication_changes + + +def test_process_replication_changes_flattens_rows_across_changes(): + first_change = { + "data": json.dumps( + { + "change": [ + { + "kind": "insert", + "table": "notifications", + "columnnames": ["id", "to"], + "columnvalues": ["111", "447700900001"], + } + ] + } + ) + } + second_change = { + "data": json.dumps( + { + "change": [ + { + "kind": "update", + "table": "notifications", + "columnnames": ["id", "status"], + "columnvalues": ["222", "sent"], + } + ] + } + ) + } + + result = process_replication_changes([first_change, second_change]) + + assert result == [ + { + "type": "insert", + "table": "notifications", + "current_row_data": {"id": "111", "to": "447700900001"}, + "previous_row_data": {}, + }, + { + "type": "update", + "table": "notifications", + "current_row_data": {"id": "222", "status": "sent"}, + "previous_row_data": {}, + }, + ] + + +def test_parse_change_data_returns_none_for_empty_change_list(): + result = parse_change_data({"data": json.dumps({"change": []})}) + + assert result is None + + +def test_parse_row_data_maps_current_and_previous_rows(): + row = { + "kind": "update", + "table": "notifications", + "columnnames": ["id", "status", "to"], + "columnvalues": ["abc", "delivered", "447700900002"], + "oldkeys": { + "keynames": ["id", "status"], + "keyvalues": ["abc", "sending"], + }, + } + + result = parse_row_data(row) + + assert result == { + "type": "update", + "table": "notifications", + "current_row_data": { + "id": "abc", + "status": "delivered", + "to": "447700900002", + }, + "previous_row_data": { + "id": "abc", + "status": "sending", + }, + } diff --git a/tests/app/replication/test_rest.py b/tests/app/replication/test_rest.py index 29d0dc0855..3cc953ddf6 100644 --- a/tests/app/replication/test_rest.py +++ b/tests/app/replication/test_rest.py @@ -43,11 +43,19 @@ def test_trigger_process_replication_slot_changes_rejects_get(replication_client def test_trigger_check_replication_slot_changes_returns_data(replication_client, mocker): mock_result = mocker.Mock() - mock_result.fetchall.return_value = [{"lsn": "0/1", "xid": 1, "data": "mock-change"}] + raw_changes = [{"lsn": "0/1", "xid": 1, "data": "mock-change"}] + parsed_changes = [ + {"type": "insert", "table": "notifications", "current_row_data": {"id": "1"}, "previous_row_data": {}} + ] + mock_result.mappings.return_value.all.return_value = raw_changes mock_execute = mocker.patch( "app.replication.rest.db.session.execute", return_value=mock_result, ) + mock_process_changes = mocker.patch( + "app.replication.rest.process_replication_changes", + return_value=parsed_changes, + ) auth_header = create_admin_authorization_header() response = replication_client.get( @@ -56,9 +64,11 @@ def test_trigger_check_replication_slot_changes_returns_data(replication_client, ) assert response.status_code == 200 - assert response.get_json() == {"data": [{"lsn": "0/1", "xid": 1, "data": "mock-change"}]} + assert response.get_json() == {"changes": parsed_changes} mock_execute.assert_called_once() - mock_result.fetchall.assert_called_once_with() + mock_result.mappings.assert_called_once_with() + mock_result.mappings.return_value.all.assert_called_once_with() + mock_process_changes.assert_called_once_with(raw_changes) def test_trigger_check_replication_slot_changes_requires_auth(replication_client): From c19570eeb8dfcbac8d99c78a4b8c744d50f22057 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Mon, 18 May 2026 19:51:12 +0100 Subject: [PATCH 07/26] Refactor replication slot change handling to utilize utility functions for improved clarity and maintainability --- .../process_replication_slot_changes.py | 16 +++----------- ...hanges.py => replication_changes_utils.py} | 19 ++++++++++++++++- app/replication/rest.py | 21 +++---------------- .../test_process_replication_changes.py | 4 ++-- 4 files changed, 26 insertions(+), 34 deletions(-) rename app/replication/{process_replication_changes.py => replication_changes_utils.py} (67%) diff --git a/app/celery/process_replication_slot_changes.py b/app/celery/process_replication_slot_changes.py index 34ae1baf95..421b477694 100644 --- a/app/celery/process_replication_slot_changes.py +++ b/app/celery/process_replication_slot_changes.py @@ -1,7 +1,6 @@ -from sqlalchemy import text # pyright: ignore[reportMissingImports] - -from app import current_app, db, notify_celery +from app import current_app, notify_celery from app.cronitor import cronitor +from app.replication.replication_changes_utils import get_replication_changes # @TODO: this is a temporary task to check the replication slot changes, @@ -10,16 +9,7 @@ @cronitor("check-replication-slot-changes") def check_replication_slot_changes(self): try: - result = db.session.execute( - text(""" - SELECT * FROM pg_logical_slot_peek_changes( - 'notify_dashboard_replication_slot', - NULL, - NULL - ); - """) - ) - changes = result.fetchall() + changes = get_replication_changes() current_app.logger.info( "Replication slot changes retrieved", extra={ diff --git a/app/replication/process_replication_changes.py b/app/replication/replication_changes_utils.py similarity index 67% rename from app/replication/process_replication_changes.py rename to app/replication/replication_changes_utils.py index d945e93124..99ac3f37ae 100644 --- a/app/replication/process_replication_changes.py +++ b/app/replication/replication_changes_utils.py @@ -1,10 +1,27 @@ import json +from sqlalchemy import text -def process_replication_changes(changes): +from app import db + + +def get_replication_changes(peak=True): """ Process the replication changes and return a list of parsed changes. """ + result = db.session.execute( + text(f""" + SELECT * FROM {"pg_logical_slot_peek_changes" if peak else "pg_logical_slot_get_changes"}( + 'notify_dashboard_replication_slot', + NULL, + NULL, + 'pretty-print', 'on', + 'add-tables', 'public.notifications' + ); + """) + ) + changes = [dict(change) for change in result.mappings().all()] + parsed_data = [row for change in changes for row in (parse_change_data(change) or [])] return parsed_data diff --git a/app/replication/rest.py b/app/replication/rest.py index 47e833dd95..80bdf7a75c 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -1,9 +1,7 @@ from flask import Blueprint, jsonify -from sqlalchemy import text -from app import db from app.celery.process_replication_slot_changes import check_replication_slot_changes -from app.replication.process_replication_changes import process_replication_changes +from app.replication.replication_changes_utils import get_replication_changes from app.v2.errors import register_errors replication_blueprint = Blueprint("replication", __name__, url_prefix="/replication") @@ -18,19 +16,6 @@ def trigger_process_replication_slot_changes(): @replication_blueprint.route("/check-slot-changes", methods=["GET"]) def trigger_check_replication_slot_changes(): - result = db.session.execute( - text(""" - SELECT * FROM pg_logical_slot_peek_changes( - 'notify_dashboard_replication_slot', - NULL, - NULL, - 'pretty-print', 'on', - 'add-tables', 'public.notifications' - ); - """) - ) - changes = [dict(change) for change in result.mappings().all()] + changes = get_replication_changes(peak=True) - parsed_data = process_replication_changes(changes) - - return jsonify({"changes": parsed_data}), 200 + return jsonify({"changes": changes}), 200 diff --git a/tests/app/replication/test_process_replication_changes.py b/tests/app/replication/test_process_replication_changes.py index de19b3eb1e..2cf404127d 100644 --- a/tests/app/replication/test_process_replication_changes.py +++ b/tests/app/replication/test_process_replication_changes.py @@ -1,6 +1,6 @@ import json -from app.replication.process_replication_changes import parse_change_data, parse_row_data, process_replication_changes +from app.replication.replication_changes_utils import get_replication_changes, parse_change_data, parse_row_data def test_process_replication_changes_flattens_rows_across_changes(): @@ -33,7 +33,7 @@ def test_process_replication_changes_flattens_rows_across_changes(): ) } - result = process_replication_changes([first_change, second_change]) + result = get_replication_changes([first_change, second_change]) assert result == [ { From 84fa039990ca60afff8e604f3cf6c13fe30f62cd Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Tue, 19 May 2026 20:29:29 +0100 Subject: [PATCH 08/26] Add migration for notifications_id_status index creation --- migrations/.current-alembic-head | 2 +- .../0553_notifications_id_status_idx.py | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 migrations/versions/0553_notifications_id_status_idx.py diff --git a/migrations/.current-alembic-head b/migrations/.current-alembic-head index 05377b4427..9b09162d4f 100644 --- a/migrations/.current-alembic-head +++ b/migrations/.current-alembic-head @@ -1 +1 @@ -0552_create_replacation_slot +0553_notifications_id_status_idx diff --git a/migrations/versions/0553_notifications_id_status_idx.py b/migrations/versions/0553_notifications_id_status_idx.py new file mode 100644 index 0000000000..ae00666a24 --- /dev/null +++ b/migrations/versions/0553_notifications_id_status_idx.py @@ -0,0 +1,28 @@ +""" +Create Date: 2026-05-19T00:00:00 +""" + +from alembic import op + +revision = "0553_notifications_id_status_idx" +down_revision = "0552_create_replacation_slot" + + +def upgrade(): + with op.get_context().autocommit_block(): + op.create_index( + "ix_notifications_id_notification_status", + "notifications", + ["id", "notification_status"], + unique=False, + postgresql_concurrently=True, + ) + + +def downgrade(): + with op.get_context().autocommit_block(): + op.drop_index( + "ix_notifications_id_notification_status", + table_name="notifications", + postgresql_concurrently=True, + ) From d7f653c64b1fe72ccd10ddaf7d63bc23c6208fa4 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Tue, 19 May 2026 20:42:11 +0100 Subject: [PATCH 09/26] Fix typo in get_replication_changes function and update test to mock database execution --- app/replication/replication_changes_utils.py | 4 ++-- app/replication/rest.py | 6 +++--- tests/app/replication/test_process_replication_changes.py | 7 ++++++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/app/replication/replication_changes_utils.py b/app/replication/replication_changes_utils.py index 99ac3f37ae..fb4e6935de 100644 --- a/app/replication/replication_changes_utils.py +++ b/app/replication/replication_changes_utils.py @@ -5,13 +5,13 @@ from app import db -def get_replication_changes(peak=True): +def get_replication_changes(peek=True): """ Process the replication changes and return a list of parsed changes. """ result = db.session.execute( text(f""" - SELECT * FROM {"pg_logical_slot_peek_changes" if peak else "pg_logical_slot_get_changes"}( + SELECT * FROM {"pg_logical_slot_peek_changes" if peek else "pg_logical_slot_get_changes"}( 'notify_dashboard_replication_slot', NULL, NULL, diff --git a/app/replication/rest.py b/app/replication/rest.py index 80bdf7a75c..3dfc5c57c8 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -10,12 +10,12 @@ @replication_blueprint.route("/process-slot-changes", methods=["POST"]) def trigger_process_replication_slot_changes(): - check_replication_slot_changes.apply_async() - return jsonify({"message": "check-replication-slot-changes task queued"}), 201 + check_replication_slot_changes() + return jsonify({"message": "check-replication-slot-changes task executed"}), 201 @replication_blueprint.route("/check-slot-changes", methods=["GET"]) def trigger_check_replication_slot_changes(): - changes = get_replication_changes(peak=True) + changes = get_replication_changes(peek=True) return jsonify({"changes": changes}), 200 diff --git a/tests/app/replication/test_process_replication_changes.py b/tests/app/replication/test_process_replication_changes.py index 2cf404127d..510180c97c 100644 --- a/tests/app/replication/test_process_replication_changes.py +++ b/tests/app/replication/test_process_replication_changes.py @@ -1,4 +1,5 @@ import json +from unittest.mock import patch, MagicMock from app.replication.replication_changes_utils import get_replication_changes, parse_change_data, parse_row_data @@ -33,7 +34,11 @@ def test_process_replication_changes_flattens_rows_across_changes(): ) } - result = get_replication_changes([first_change, second_change]) + mock_result = MagicMock() + mock_result.mappings().all.return_value = [first_change, second_change] + + with patch("app.replication.replication_changes_utils.db.session.execute", return_value=mock_result): + result = get_replication_changes() assert result == [ { From ed048b7e167b23c1af059ebb4f23f984678e88e8 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Tue, 19 May 2026 22:17:11 +0100 Subject: [PATCH 10/26] Add migration to create service_stats table for aggregated notification status counts --- migrations/.current-alembic-head | 2 +- .../versions/0554_create_service_stats.py | 69 +++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 migrations/versions/0554_create_service_stats.py diff --git a/migrations/.current-alembic-head b/migrations/.current-alembic-head index 9b09162d4f..74c92585a5 100644 --- a/migrations/.current-alembic-head +++ b/migrations/.current-alembic-head @@ -1 +1 @@ -0553_notifications_id_status_idx +0554_create_service_stats diff --git a/migrations/versions/0554_create_service_stats.py b/migrations/versions/0554_create_service_stats.py new file mode 100644 index 0000000000..55ba5c86bf --- /dev/null +++ b/migrations/versions/0554_create_service_stats.py @@ -0,0 +1,69 @@ +""" +Create service_stats table for aggregated notification status counts. + +Revision ID: 0554_create_service_stats +Revises: 0553_notifications_id_status_idx +Create Date: 2026-05-19 00:00:00 +""" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +revision = "0554_create_service_stats" +down_revision = "0553_notifications_id_status_idx" + + +def upgrade(): + op.create_table( + "service_stats", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("service_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("template_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("notification_type", postgresql.ENUM(name="notification_type", create_type=False), nullable=False), + sa.Column("notification_status", sa.Text(), nullable=False), + sa.Column("count", sa.Integer(), nullable=False, server_default=sa.text("0")), + sa.ForeignKeyConstraint(["service_id"], ["services.id"]), + sa.ForeignKeyConstraint(["template_id"], ["templates.id"]), + sa.ForeignKeyConstraint(["notification_status"], ["notification_status_types.name"]), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint( + "service_id", + "template_id", + "notification_type", + "notification_status", + name="uix_service_stats_dimensions", + ), + ) + + op.create_index( + "ix_svc_stats_svc_ntype_nstatus", + "service_stats", + ["service_id", "notification_type", "notification_status"], + unique=False, + ) + op.create_index( + "ix_svc_stats_tmpl_ntype_nstatus", + "service_stats", + ["template_id", "notification_type", "notification_status"], + unique=False, + ) + op.create_index( + "ix_service_stats_service_id_template_id", + "service_stats", + ["service_id", "template_id"], + unique=False, + ) + + +def downgrade(): + op.drop_index("ix_service_stats_service_id_template_id", table_name="service_stats") + op.drop_index( + "ix_svc_stats_tmpl_ntype_nstatus", + table_name="service_stats", + ) + op.drop_index( + "ix_svc_stats_svc_ntype_nstatus", + table_name="service_stats", + ) + op.drop_table("service_stats") From 3ccd4e6256b32b49b7accdd96217192c3d9e7fa5 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Tue, 19 May 2026 23:10:22 +0100 Subject: [PATCH 11/26] Refactor type annotations in replication_changes_utils and update test imports for consistency --- app/replication/replication_changes_utils.py | 51 ++++++++++++++++--- .../test_process_replication_changes.py | 2 +- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/app/replication/replication_changes_utils.py b/app/replication/replication_changes_utils.py index fb4e6935de..199f613a54 100644 --- a/app/replication/replication_changes_utils.py +++ b/app/replication/replication_changes_utils.py @@ -1,11 +1,47 @@ import json +from typing import TypedDict, cast from sqlalchemy import text from app import db +type JsonPrimitive = str | int | float | bool | None +type JsonValue = JsonPrimitive | dict[str, "JsonValue"] | list["JsonValue"] +type RowData = dict[str, JsonValue] -def get_replication_changes(peek=True): + +class ReplicationChangeRow(TypedDict): + data: str + + +class OldKeys(TypedDict, total=False): + keynames: list[str] + keyvalues: list[JsonValue] + + +class ReplicationJsonRow(TypedDict, total=False): + kind: str + table: str + columnnames: list[str] + columnvalues: list[JsonValue] + oldkeys: OldKeys + + +class ChangePayload(TypedDict, total=False): + change: list[ReplicationJsonRow] + + +class ParsedRow(TypedDict): + type: str + table: str + current_row_data: RowData + previous_row_data: RowData + + +DEFAULT_CHANGE_ROWS: list[ReplicationJsonRow] = [{}] + + +def get_replication_changes(peek: bool = True) -> list[ParsedRow]: """ Process the replication changes and return a list of parsed changes. """ @@ -22,24 +58,25 @@ def get_replication_changes(peek=True): ) changes = [dict(change) for change in result.mappings().all()] - parsed_data = [row for change in changes for row in (parse_change_data(change) or [])] + parsed_data = [row for change in changes for row in (parse_change_data(cast(ReplicationChangeRow, change)) or [])] return parsed_data -def parse_change_data(change): +def parse_change_data(change: ReplicationChangeRow) -> list[ParsedRow] | None: """ Parse the change data and return a dictionary with the relevant information. """ - change = json.loads(change["data"]).get("change", [{}]) + payload = cast(ChangePayload, json.loads(change["data"])) + raw_changes = payload.get("change", DEFAULT_CHANGE_ROWS) - if len(change) == 0: + if len(raw_changes) == 0: return None - return list(map(parse_row_data, change)) + return [parse_row_data(row) for row in raw_changes] -def parse_row_data(row): +def parse_row_data(row: ReplicationJsonRow) -> ParsedRow: """ Create a mapping of column names to their values for the given change. """ diff --git a/tests/app/replication/test_process_replication_changes.py b/tests/app/replication/test_process_replication_changes.py index 510180c97c..afe5bd56ee 100644 --- a/tests/app/replication/test_process_replication_changes.py +++ b/tests/app/replication/test_process_replication_changes.py @@ -1,5 +1,5 @@ import json -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch from app.replication.replication_changes_utils import get_replication_changes, parse_change_data, parse_row_data From 527dac6392c4ca0682febeefbc7539706c1d883a Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Tue, 19 May 2026 23:44:14 +0100 Subject: [PATCH 12/26] Implement service stats tracking for notification changes and enhance replication slot processing --- .../process_replication_slot_changes.py | 108 +++++++++++++++++- app/dao/service_stats_dao.py | 76 ++++++++++++ app/models.py | 38 ++++++ app/replication/replication_changes_utils.py | 11 ++ .../test_process_replication_slot_changes.py | 94 ++++++++++++--- tests/app/dao/test_service_stats_dao.py | 94 +++++++++++++++ .../test_process_replication_changes.py | 24 +++- 7 files changed, 427 insertions(+), 18 deletions(-) create mode 100644 app/dao/service_stats_dao.py create mode 100644 tests/app/dao/test_service_stats_dao.py diff --git a/app/celery/process_replication_slot_changes.py b/app/celery/process_replication_slot_changes.py index 421b477694..67633b55f3 100644 --- a/app/celery/process_replication_slot_changes.py +++ b/app/celery/process_replication_slot_changes.py @@ -1,6 +1,57 @@ +from uuid import UUID + from app import current_app, notify_celery from app.cronitor import cronitor -from app.replication.replication_changes_utils import get_replication_changes +from app.dao.service_stats_dao import ( + ServiceStatsDimensions, + apply_service_stats_delete, + apply_service_stats_insert, + apply_service_stats_update_transition, +) +from app.replication.replication_changes_utils import ( + ParsedRow, + RowData, + get_notification_status, + get_replication_changes, + get_str_value, +) + + +def _parse_uuid_value(row_data: RowData, key: str) -> UUID | None: + raw_value = get_str_value(row_data, key) + if not raw_value: + return None + + try: + return UUID(raw_value) + except ValueError: + return None + + +def _build_dimensions(change: ParsedRow, *, use_previous_row: bool) -> ServiceStatsDimensions | None: + if use_previous_row: + row_data = change["previous_row_data"] + fallback_data = change["current_row_data"] + else: + row_data = change["current_row_data"] + fallback_data = change["previous_row_data"] + + service_id = _parse_uuid_value(row_data, "service_id") or _parse_uuid_value(fallback_data, "service_id") + template_id = _parse_uuid_value(row_data, "template_id") or _parse_uuid_value(fallback_data, "template_id") + notification_type = get_str_value(row_data, "notification_type") or get_str_value( + fallback_data, "notification_type" + ) + notification_status = get_notification_status(row_data) or get_notification_status(fallback_data) + + if not service_id or not template_id or not notification_type or not notification_status: + return None + + return { + "service_id": service_id, + "template_id": template_id, + "notification_type": notification_type, + "notification_status": notification_status, + } # @TODO: this is a temporary task to check the replication slot changes, @@ -9,12 +60,61 @@ @cronitor("check-replication-slot-changes") def check_replication_slot_changes(self): try: - changes = get_replication_changes() + changes = get_replication_changes(peek=False) + + if not changes: + current_app.logger.info( + "No replication slot changes found", + extra={"celery_task": "check-replication-slot-changes", "changes_count": 0}, + ) + return + + processed_changes = 0 + ignored_changes = 0 + + for change in changes: + if change["table"] != "notifications": + ignored_changes += 1 + continue + + change_type = change["type"] + if change_type == "insert": + dimensions = _build_dimensions(change, use_previous_row=False) + if not dimensions: + ignored_changes += 1 + continue + apply_service_stats_insert(dimensions) + processed_changes += 1 + continue + + if change_type == "delete": + dimensions = _build_dimensions(change, use_previous_row=False) + if not dimensions: + ignored_changes += 1 + continue + apply_service_stats_delete(dimensions) + processed_changes += 1 + continue + + if change_type == "update": + old_dimensions = _build_dimensions(change, use_previous_row=True) + new_dimensions = _build_dimensions(change, use_previous_row=False) + if not old_dimensions or not new_dimensions: + ignored_changes += 1 + continue + apply_service_stats_update_transition(old_dimensions, new_dimensions) + processed_changes += 1 + continue + + ignored_changes += 1 + current_app.logger.info( - "Replication slot changes retrieved", + "Replication slot changes processed", extra={ "celery_task": "check-replication-slot-changes", - "changes_count": len(changes) if changes else 0, + "changes_count": len(changes), + "processed_changes": processed_changes, + "ignored_changes": ignored_changes, }, ) except Exception as exc: diff --git a/app/dao/service_stats_dao.py b/app/dao/service_stats_dao.py new file mode 100644 index 0000000000..be7eb0ddb0 --- /dev/null +++ b/app/dao/service_stats_dao.py @@ -0,0 +1,76 @@ +import uuid +from typing import TypedDict +from uuid import UUID + +from sqlalchemy import func +from sqlalchemy.dialects.postgresql import insert + +from app import db +from app.models import ServiceStats + + +class ServiceStatsDimensions(TypedDict): + service_id: UUID + template_id: UUID + notification_type: str + notification_status: str + + +def _increment_service_stats_count(dimensions: ServiceStatsDimensions, increment_by: int) -> None: + stmt = insert(ServiceStats).values( + id=uuid.uuid4(), + service_id=dimensions["service_id"], + template_id=dimensions["template_id"], + notification_type=dimensions["notification_type"], + notification_status=dimensions["notification_status"], + count=increment_by, + ) + stmt = stmt.on_conflict_do_update( + constraint="uix_service_stats_dimensions", + set_={ + "count": ServiceStats.count + increment_by, + }, + ) + db.session.execute(stmt) + + +def _decrement_service_stats_count(dimensions: ServiceStatsDimensions, decrement_by: int) -> None: + ( + db.session.query(ServiceStats) + .filter( + ServiceStats.service_id == dimensions["service_id"], + ServiceStats.template_id == dimensions["template_id"], + ServiceStats.notification_type == dimensions["notification_type"], + ServiceStats.notification_status == dimensions["notification_status"], + ) + .update( + { + "count": func.greatest(ServiceStats.count - decrement_by, 0), + }, + synchronize_session=False, + ) + ) + + +def apply_service_stats_insert(dimensions: ServiceStatsDimensions) -> None: + _increment_service_stats_count(dimensions, increment_by=1) + + +def apply_service_stats_delete(dimensions: ServiceStatsDimensions) -> None: + _decrement_service_stats_count(dimensions, decrement_by=1) + + +def apply_service_stats_update_transition( + old_dimensions: ServiceStatsDimensions, + new_dimensions: ServiceStatsDimensions, +) -> None: + if ( + old_dimensions["service_id"] == new_dimensions["service_id"] + and old_dimensions["template_id"] == new_dimensions["template_id"] + and old_dimensions["notification_type"] == new_dimensions["notification_type"] + and old_dimensions["notification_status"] == new_dimensions["notification_status"] + ): + return + + _decrement_service_stats_count(old_dimensions, decrement_by=1) + _increment_service_stats_count(new_dimensions, increment_by=1) \ No newline at end of file diff --git a/app/models.py b/app/models.py index e4f670d236..7850695347 100644 --- a/app/models.py +++ b/app/models.py @@ -2414,6 +2414,44 @@ class FactNotificationStatus(db.Model): ) +class ServiceStats(db.Model): + __tablename__ = "service_stats" + + id = db.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + service_id = db.Column(UUID(as_uuid=True), db.ForeignKey("services.id"), nullable=False) + template_id = db.Column(UUID(as_uuid=True), db.ForeignKey("templates.id"), nullable=False) + notification_type = db.Column(notification_types, nullable=False) + notification_status = db.Column(db.Text, db.ForeignKey("notification_status_types.name"), nullable=False) + count = db.Column(db.Integer(), nullable=False, default=0, server_default=db.text("0")) + + __table_args__ = ( + UniqueConstraint( + "service_id", + "template_id", + "notification_type", + "notification_status", + name="uix_service_stats_dimensions", + ), + Index( + "ix_svc_stats_svc_ntype_nstatus", + "service_id", + "notification_type", + "notification_status", + ), + Index( + "ix_svc_stats_tmpl_ntype_nstatus", + "template_id", + "notification_type", + "notification_status", + ), + Index( + "ix_service_stats_service_id_template_id", + "service_id", + "template_id", + ), + ) + + class FactProcessingTime(db.Model): __tablename__ = "ft_processing_time" diff --git a/app/replication/replication_changes_utils.py b/app/replication/replication_changes_utils.py index 199f613a54..74afea4b5d 100644 --- a/app/replication/replication_changes_utils.py +++ b/app/replication/replication_changes_utils.py @@ -92,3 +92,14 @@ def parse_row_data(row: ReplicationJsonRow) -> ParsedRow: "current_row_data": dict(zip(column_names, column_values, strict=False)), "previous_row_data": dict(zip(old_column_names, old_column_values, strict=False)), } + + +def get_str_value(row_data: RowData, key: str) -> str | None: + value = row_data.get(key) + if isinstance(value, str): + return value + return None + + +def get_notification_status(row_data: RowData) -> str | None: + return get_str_value(row_data, "notification_status") or get_str_value(row_data, "status") diff --git a/tests/app/celery/test_process_replication_slot_changes.py b/tests/app/celery/test_process_replication_slot_changes.py index e8247c4ffe..1f510d1ec6 100644 --- a/tests/app/celery/test_process_replication_slot_changes.py +++ b/tests/app/celery/test_process_replication_slot_changes.py @@ -1,4 +1,4 @@ -from unittest.mock import Mock +import uuid import pytest from celery.exceptions import Retry @@ -6,28 +6,96 @@ from app.celery.process_replication_slot_changes import check_replication_slot_changes -def test_check_replication_slot_changes_logs_change_count(mocker, caplog): - mock_result = Mock() - mock_result.fetchall.return_value = [{"lsn": "one"}, {"lsn": "two"}] - mock_execute = mocker.patch( - "app.celery.process_replication_slot_changes.db.session.execute", return_value=mock_result +def test_check_replication_slot_changes_early_returns_when_no_changes(mocker, caplog): + mock_get_replication_changes = mocker.patch( + "app.celery.process_replication_slot_changes.get_replication_changes", return_value=[] ) + mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") + mock_apply_delete = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_delete") + mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") with caplog.at_level("INFO"): check_replication_slot_changes() - assert mock_execute.call_count == 1 + mock_get_replication_changes.assert_called_once_with(peek=False) + mock_apply_insert.assert_not_called() + mock_apply_delete.assert_not_called() + mock_apply_update.assert_not_called() - query = mock_execute.call_args.args[0] - assert "pg_logical_slot_peek_changes" in str(query) + assert "No replication slot changes found" in caplog.messages - assert "Replication slot changes retrieved" in caplog.messages - record = next(record for record in caplog.records if record.msg == "Replication slot changes retrieved") - assert record.changes_count == 2 + +def test_check_replication_slot_changes_processes_insert_update_delete(mocker, caplog): + service_id = str(uuid.uuid4()) + template_id = str(uuid.uuid4()) + changes = [ + { + "type": "insert", + "table": "notifications", + "current_row_data": { + "service_id": service_id, + "template_id": template_id, + "notification_type": "email", + "notification_status": "created", + }, + "previous_row_data": {}, + }, + { + "type": "update", + "table": "notifications", + "current_row_data": { + "service_id": service_id, + "template_id": template_id, + "notification_type": "email", + "notification_status": "delivered", + }, + "previous_row_data": { + "service_id": service_id, + "template_id": template_id, + "notification_type": "email", + "notification_status": "sending", + }, + }, + { + "type": "delete", + "table": "notifications", + "current_row_data": { + "service_id": service_id, + "template_id": template_id, + "notification_type": "email", + "notification_status": "delivered", + }, + "previous_row_data": {}, + }, + ] + + mock_get_replication_changes = mocker.patch( + "app.celery.process_replication_slot_changes.get_replication_changes", return_value=changes + ) + mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") + mock_apply_delete = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_delete") + mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") + + with caplog.at_level("INFO"): + check_replication_slot_changes() + + mock_get_replication_changes.assert_called_once_with(peek=False) + mock_apply_insert.assert_called_once() + mock_apply_update.assert_called_once() + mock_apply_delete.assert_called_once() + + assert "Replication slot changes processed" in caplog.messages + record = next(record for record in caplog.records if record.msg == "Replication slot changes processed") + assert record.changes_count == 3 + assert record.processed_changes == 3 + assert record.ignored_changes == 0 def test_check_replication_slot_changes_calls_retry_on_query_error(mocker): - mocker.patch("app.celery.process_replication_slot_changes.db.session.execute", side_effect=Exception("EXPECTED")) + mocker.patch( + "app.celery.process_replication_slot_changes.get_replication_changes", + side_effect=Exception("EXPECTED"), + ) mock_retry = mocker.patch.object(check_replication_slot_changes, "retry", side_effect=Retry("retry")) with pytest.raises(Retry): diff --git a/tests/app/dao/test_service_stats_dao.py b/tests/app/dao/test_service_stats_dao.py new file mode 100644 index 0000000000..b26b154cbd --- /dev/null +++ b/tests/app/dao/test_service_stats_dao.py @@ -0,0 +1,94 @@ +from app.dao.service_stats_dao import ( + apply_service_stats_delete, + apply_service_stats_insert, + apply_service_stats_update_transition, +) +from app.models import ServiceStats +from tests.app.db import create_template + + +def _build_dimensions(*, service_id, template_id, notification_type="email", notification_status="created"): + return { + "service_id": service_id, + "template_id": template_id, + "notification_type": notification_type, + "notification_status": notification_status, + } + + +def _get_service_stats_row(dimensions): + return ServiceStats.query.filter_by( + service_id=dimensions["service_id"], + template_id=dimensions["template_id"], + notification_type=dimensions["notification_type"], + notification_status=dimensions["notification_status"], + ).first() + + +def test_apply_service_stats_insert_creates_and_increments_row(notify_db_session, sample_service): + template = create_template(service=sample_service, template_type="email") + dimensions = _build_dimensions(service_id=sample_service.id, template_id=template.id) + + apply_service_stats_insert(dimensions) + apply_service_stats_insert(dimensions) + notify_db_session.commit() + + row = _get_service_stats_row(dimensions) + + assert row is not None + assert row.count == 2 + + +def test_apply_service_stats_delete_does_not_drop_below_zero(notify_db_session, sample_service): + template = create_template(service=sample_service, template_type="email") + dimensions = _build_dimensions(service_id=sample_service.id, template_id=template.id) + + apply_service_stats_insert(dimensions) + apply_service_stats_delete(dimensions) + apply_service_stats_delete(dimensions) + notify_db_session.commit() + + row = _get_service_stats_row(dimensions) + + assert row is not None + assert row.count == 0 + + +def test_apply_service_stats_update_transition_moves_count_between_statuses(notify_db_session, sample_service): + template = create_template(service=sample_service, template_type="email") + old_dimensions = _build_dimensions( + service_id=sample_service.id, + template_id=template.id, + notification_status="sending", + ) + new_dimensions = _build_dimensions( + service_id=sample_service.id, + template_id=template.id, + notification_status="delivered", + ) + + apply_service_stats_insert(old_dimensions) + apply_service_stats_update_transition(old_dimensions, new_dimensions) + notify_db_session.commit() + + old_row = _get_service_stats_row(old_dimensions) + new_row = _get_service_stats_row(new_dimensions) + + assert old_row is not None + assert new_row is not None + assert old_row.count == 0 + assert new_row.count == 1 + + +def test_apply_service_stats_update_transition_noops_when_dimensions_match(notify_db_session, sample_service): + template = create_template(service=sample_service, template_type="email") + dimensions = _build_dimensions(service_id=sample_service.id, template_id=template.id) + + apply_service_stats_insert(dimensions) + apply_service_stats_update_transition(dimensions, dimensions) + notify_db_session.commit() + + row = _get_service_stats_row(dimensions) + + assert row is not None + assert row.count == 1 diff --git a/tests/app/replication/test_process_replication_changes.py b/tests/app/replication/test_process_replication_changes.py index afe5bd56ee..e2d577fa36 100644 --- a/tests/app/replication/test_process_replication_changes.py +++ b/tests/app/replication/test_process_replication_changes.py @@ -1,7 +1,13 @@ import json from unittest.mock import MagicMock, patch -from app.replication.replication_changes_utils import get_replication_changes, parse_change_data, parse_row_data +from app.replication.replication_changes_utils import ( + get_notification_status, + get_replication_changes, + get_str_value, + parse_change_data, + parse_row_data, +) def test_process_replication_changes_flattens_rows_across_changes(): @@ -89,3 +95,19 @@ def test_parse_row_data_maps_current_and_previous_rows(): "status": "sending", }, } + + +def test_get_str_value_returns_none_for_non_string_values(): + assert get_str_value({"status": 123}, "status") is None + + +def test_get_notification_status_prefers_notification_status_key(): + result = get_notification_status({"notification_status": "delivered", "status": "sending"}) + + assert result == "delivered" + + +def test_get_notification_status_falls_back_to_status_key(): + result = get_notification_status({"status": "temporary-failure"}) + + assert result == "temporary-failure" From 1379cc8041ab85cc519177c26fc9ed8fbf00957e Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Tue, 19 May 2026 23:48:48 +0100 Subject: [PATCH 13/26] Add migration to set REPLICA IDENTITY on notifications table using id/status index --- migrations/.current-alembic-head | 2 +- .../0555_notifications_replica_identity.py | 60 +++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 migrations/versions/0555_notifications_replica_identity.py diff --git a/migrations/.current-alembic-head b/migrations/.current-alembic-head index 74c92585a5..33ffd288c9 100644 --- a/migrations/.current-alembic-head +++ b/migrations/.current-alembic-head @@ -1 +1 @@ -0554_create_service_stats +0555_notif_replica_identity diff --git a/migrations/versions/0555_notifications_replica_identity.py b/migrations/versions/0555_notifications_replica_identity.py new file mode 100644 index 0000000000..26a8143162 --- /dev/null +++ b/migrations/versions/0555_notifications_replica_identity.py @@ -0,0 +1,60 @@ +""" +Set REPLICA IDENTITY on notifications table using (id, notification_status) index. + +PostgreSQL requires all index columns to be NOT NULL for REPLICA IDENTITY USING INDEX, +and a unique index. This migration: + 1. Drops the existing non-unique index and recreates it as unique (required by PG). + 2. Sets REPLICA IDENTITY USING INDEX so only id + notification_status are written to WAL. + +Revision ID: 0555_notif_replica_identity +Revises: 0554_create_service_stats +Create Date: 2026-05-19 00:00:00 +""" + +from alembic import op + +revision = "0555_notif_replica_identity" +down_revision = "0554_create_service_stats" + + +def upgrade(): + ## NEEDED TO SET NOT NULL ON notification_status TO CREATE UNIQUE INDEX, THEN CAN SET REPLICA IDENTITY + op.execute("ALTER TABLE notifications ALTER COLUMN notification_status SET NOT NULL;") + + with op.get_context().autocommit_block(): + op.drop_index( + "ix_notifications_id_notification_status", + table_name="notifications", + postgresql_concurrently=True, + ) + op.create_index( + "ix_notifications_id_notification_status", + "notifications", + ["id", "notification_status"], + unique=True, + postgresql_concurrently=True, + ) + + op.execute("ALTER TABLE notifications REPLICA IDENTITY USING INDEX ix_notifications_id_notification_status;") + + +def downgrade(): + op.execute("ALTER TABLE notifications REPLICA IDENTITY DEFAULT;") + + op.execute("ALTER TABLE notifications ALTER COLUMN notification_status DROP NOT NULL;") + + with op.get_context().autocommit_block(): + op.drop_index( + "ix_notifications_id_notification_status", + table_name="notifications", + postgresql_concurrently=True, + ) + op.create_index( + "ix_notifications_id_notification_status", + "notifications", + ["id", "notification_status"], + unique=False, + postgresql_concurrently=True, + ) + + From 58c8f2cade3bcf5ae44896223d68a4d73139eac0 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Wed, 20 May 2026 00:16:43 +0100 Subject: [PATCH 14/26] Enhance replication slot change processing by adding logic for handling previous notification statuses and updating service stats accordingly --- .../process_replication_slot_changes.py | 33 +++- .../test_process_replication_slot_changes.py | 145 ++++++++++++++++++ 2 files changed, 174 insertions(+), 4 deletions(-) diff --git a/app/celery/process_replication_slot_changes.py b/app/celery/process_replication_slot_changes.py index 67633b55f3..426a3574f2 100644 --- a/app/celery/process_replication_slot_changes.py +++ b/app/celery/process_replication_slot_changes.py @@ -1,6 +1,6 @@ from uuid import UUID -from app import current_app, notify_celery +from app import current_app, db, notify_celery from app.cronitor import cronitor from app.dao.service_stats_dao import ( ServiceStatsDimensions, @@ -54,6 +54,23 @@ def _build_dimensions(change: ParsedRow, *, use_previous_row: bool) -> ServiceSt } +def _build_old_dimensions_from_previous_status(change: ParsedRow) -> ServiceStatsDimensions | None: + current_dimensions = _build_dimensions(change, use_previous_row=False) + if not current_dimensions: + return None + + previous_status = get_notification_status(change["previous_row_data"]) + if not previous_status: + return None + + return { + "service_id": current_dimensions["service_id"], + "template_id": current_dimensions["template_id"], + "notification_type": current_dimensions["notification_type"], + "notification_status": previous_status, + } + + # @TODO: this is a temporary task to check the replication slot changes, # we will need to implement the logic to process the changes and update the dashboard accordingly @notify_celery.task(bind=True, name="check-replication-slot-changes") @@ -97,12 +114,17 @@ def check_replication_slot_changes(self): continue if change_type == "update": - old_dimensions = _build_dimensions(change, use_previous_row=True) new_dimensions = _build_dimensions(change, use_previous_row=False) - if not old_dimensions or not new_dimensions: + if not new_dimensions: ignored_changes += 1 continue - apply_service_stats_update_transition(old_dimensions, new_dimensions) + + old_dimensions = _build_old_dimensions_from_previous_status(change) + if old_dimensions: + apply_service_stats_update_transition(old_dimensions, new_dimensions) + else: + apply_service_stats_insert(new_dimensions) + processed_changes += 1 continue @@ -117,7 +139,10 @@ def check_replication_slot_changes(self): "ignored_changes": ignored_changes, }, ) + + db.session.commit() except Exception as exc: + db.session.rollback() retry_count = self.request.retries if retry_count < 3: raise self.retry(exc=exc, countdown=2**retry_count) from exc diff --git a/tests/app/celery/test_process_replication_slot_changes.py b/tests/app/celery/test_process_replication_slot_changes.py index 1f510d1ec6..25f78fdf1e 100644 --- a/tests/app/celery/test_process_replication_slot_changes.py +++ b/tests/app/celery/test_process_replication_slot_changes.py @@ -10,6 +10,7 @@ def test_check_replication_slot_changes_early_returns_when_no_changes(mocker, ca mock_get_replication_changes = mocker.patch( "app.celery.process_replication_slot_changes.get_replication_changes", return_value=[] ) + mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit") mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") mock_apply_delete = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_delete") mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") @@ -21,6 +22,7 @@ def test_check_replication_slot_changes_early_returns_when_no_changes(mocker, ca mock_apply_insert.assert_not_called() mock_apply_delete.assert_not_called() mock_apply_update.assert_not_called() + mock_commit.assert_not_called() assert "No replication slot changes found" in caplog.messages @@ -72,6 +74,7 @@ def test_check_replication_slot_changes_processes_insert_update_delete(mocker, c mock_get_replication_changes = mocker.patch( "app.celery.process_replication_slot_changes.get_replication_changes", return_value=changes ) + mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit") mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") mock_apply_delete = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_delete") mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") @@ -83,6 +86,7 @@ def test_check_replication_slot_changes_processes_insert_update_delete(mocker, c mock_apply_insert.assert_called_once() mock_apply_update.assert_called_once() mock_apply_delete.assert_called_once() + mock_commit.assert_called_once() assert "Replication slot changes processed" in caplog.messages record = next(record for record in caplog.records if record.msg == "Replication slot changes processed") @@ -97,10 +101,151 @@ def test_check_replication_slot_changes_calls_retry_on_query_error(mocker): side_effect=Exception("EXPECTED"), ) mock_retry = mocker.patch.object(check_replication_slot_changes, "retry", side_effect=Retry("retry")) + mock_rollback = mocker.patch("app.celery.process_replication_slot_changes.db.session.rollback") with pytest.raises(Retry): check_replication_slot_changes() assert mock_retry.call_count == 1 + mock_rollback.assert_called_once() _, kwargs = mock_retry.call_args assert kwargs["countdown"] == 1 + + +def test_check_replication_slot_changes_update_with_sparse_previous_status_uses_transition(mocker): + service_id = str(uuid.uuid4()) + template_id = str(uuid.uuid4()) + changes = [ + { + "type": "update", + "table": "notifications", + "current_row_data": { + "service_id": service_id, + "template_id": template_id, + "notification_type": "sms", + "notification_status": "sending", + }, + "previous_row_data": { + "id": str(uuid.uuid4()), + "notification_status": "created", + }, + } + ] + + mocker.patch("app.celery.process_replication_slot_changes.get_replication_changes", return_value=changes) + mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit") + mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") + mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") + + check_replication_slot_changes() + + mock_apply_insert.assert_not_called() + mock_apply_update.assert_called_once() + old_dimensions, new_dimensions = mock_apply_update.call_args.args + assert old_dimensions["notification_status"] == "created" + assert new_dimensions["notification_status"] == "sending" + assert str(old_dimensions["service_id"]) == service_id + assert str(old_dimensions["template_id"]) == template_id + assert old_dimensions["notification_type"] == "sms" + mock_commit.assert_called_once() + + +def test_check_replication_slot_changes_update_without_previous_status_uses_insert(mocker): + service_id = str(uuid.uuid4()) + template_id = str(uuid.uuid4()) + changes = [ + { + "type": "update", + "table": "notifications", + "current_row_data": { + "service_id": service_id, + "template_id": template_id, + "notification_type": "sms", + "notification_status": "sending", + }, + "previous_row_data": { + "id": str(uuid.uuid4()), + }, + } + ] + + mocker.patch("app.celery.process_replication_slot_changes.get_replication_changes", return_value=changes) + mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit") + mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") + mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") + + check_replication_slot_changes() + + mock_apply_update.assert_not_called() + mock_apply_insert.assert_called_once() + dimensions = mock_apply_insert.call_args.args[0] + assert dimensions["notification_status"] == "sending" + assert str(dimensions["service_id"]) == service_id + assert str(dimensions["template_id"]) == template_id + assert dimensions["notification_type"] == "sms" + mock_commit.assert_called_once() + + +def test_check_replication_slot_changes_processes_user_payload_shape(mocker): + service_id = str(uuid.uuid4()) + template_id = str(uuid.uuid4()) + notification_id = str(uuid.uuid4()) + changes = [ + { + "type": "insert", + "table": "notifications", + "current_row_data": { + "id": notification_id, + "service_id": service_id, + "template_id": template_id, + "notification_type": "sms", + "notification_status": "created", + "billable_units": 0, + }, + "previous_row_data": {}, + }, + { + "type": "update", + "table": "notifications", + "current_row_data": { + "id": notification_id, + "service_id": service_id, + "template_id": template_id, + "notification_type": "sms", + "notification_status": "sending", + "billable_units": 1, + }, + "previous_row_data": { + "id": notification_id, + "notification_status": "created", + }, + }, + ] + + mocker.patch("app.celery.process_replication_slot_changes.get_replication_changes", return_value=changes) + mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit") + mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") + mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") + + check_replication_slot_changes() + + assert mock_apply_insert.call_count == 1 + assert mock_apply_update.call_count == 1 + + inserted_dimensions = mock_apply_insert.call_args.args[0] + assert str(inserted_dimensions["service_id"]) == service_id + assert str(inserted_dimensions["template_id"]) == template_id + assert inserted_dimensions["notification_type"] == "sms" + assert inserted_dimensions["notification_status"] == "created" + + old_dimensions, new_dimensions = mock_apply_update.call_args.args + assert str(old_dimensions["service_id"]) == service_id + assert str(old_dimensions["template_id"]) == template_id + assert old_dimensions["notification_type"] == "sms" + assert old_dimensions["notification_status"] == "created" + assert str(new_dimensions["service_id"]) == service_id + assert str(new_dimensions["template_id"]) == template_id + assert new_dimensions["notification_type"] == "sms" + assert new_dimensions["notification_status"] == "sending" + + mock_commit.assert_called_once() From 4f96328ca6066afa1f305f31a24f158f6e367fbd Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Wed, 20 May 2026 00:41:13 +0100 Subject: [PATCH 15/26] Add simulate notification load endpoint with validation and processing logic --- app/replication/rest.py | 159 ++++++++++++++++++++++++++++- tests/app/replication/test_rest.py | 63 ++++++++++++ 2 files changed, 221 insertions(+), 1 deletion(-) diff --git a/app/replication/rest.py b/app/replication/rest.py index 3dfc5c57c8..a2bc935838 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -1,12 +1,32 @@ -from flask import Blueprint, jsonify +from datetime import datetime +from uuid import UUID, uuid4 +from flask import Blueprint, jsonify, request + +from app import db from app.celery.process_replication_slot_changes import check_replication_slot_changes +from app.constants import ( + EMAIL_TYPE, + KEY_TYPE_NORMAL, + LETTER_TYPE, + NOTIFICATION_DELIVERED, + NOTIFICATION_SENDING, + NOTIFICATION_SENT, + SMS_TYPE, +) +from app.models import Notification, Template from app.replication.replication_changes_utils import get_replication_changes from app.v2.errors import register_errors replication_blueprint = Blueprint("replication", __name__, url_prefix="/replication") register_errors(replication_blueprint) +MAX_NOTIFICATION_COUNT = 5000 +MAX_UPDATES_PER_NOTIFICATION = 5 +DEFAULT_NOTIFICATION_COUNT = 1000 +DEFAULT_UPDATES_PER_NOTIFICATION = 2 +DEFAULT_RETURNED_IDS = 20 + @replication_blueprint.route("/process-slot-changes", methods=["POST"]) def trigger_process_replication_slot_changes(): @@ -19,3 +39,140 @@ def trigger_check_replication_slot_changes(): changes = get_replication_changes(peek=True) return jsonify({"changes": changes}), 200 + + +@replication_blueprint.route("/simulate-notification-load", methods=["POST"]) +def simulate_notification_load(): + payload = request.get_json(silent=True) or {} + + notification_count, error = _parse_positive_int( + payload.get("notification_count", DEFAULT_NOTIFICATION_COUNT), + "notification_count", + max_value=MAX_NOTIFICATION_COUNT, + ) + if error: + return jsonify({"message": error}), 400 + + updates_per_notification, error = _parse_positive_int( + payload.get("updates_per_notification", DEFAULT_UPDATES_PER_NOTIFICATION), + "updates_per_notification", + max_value=MAX_UPDATES_PER_NOTIFICATION, + ) + if error: + return jsonify({"message": error}), 400 + + template, error = _resolve_template(payload) + if error: + return jsonify({"message": error}), 400 + + notification_ids = _insert_and_update_notifications( + template=template, + notification_count=notification_count, + updates_per_notification=updates_per_notification, + ) + returned_id_count = min(len(notification_ids), DEFAULT_RETURNED_IDS) + total_updates = notification_count * updates_per_notification + + return ( + jsonify( + { + "message": "notification send/update load inserted into notifications table", + "notification_count": notification_count, + "updates_per_notification": updates_per_notification, + "inserted_count": notification_count, + "updated_count": total_updates, + "service_id": str(template.service_id), + "template_id": str(template.id), + "template_version": template.version, + "inserted_notification_ids": notification_ids[:returned_id_count], + } + ), + 200, + ) + + +def _parse_positive_int(value, field_name, max_value): + try: + parsed = int(value) + except (TypeError, ValueError): + return None, f"{field_name} must be an integer" + + if parsed < 1: + return None, f"{field_name} must be greater than 0" + + if parsed > max_value: + return None, f"{field_name} must be less than or equal to {max_value}" + + return parsed, None + + +def _resolve_template(payload): + template_id = payload.get("template_id") + service_id = payload.get("service_id") + + query = Template.query + if template_id: + try: + query = query.filter(Template.id == UUID(str(template_id))) + except (ValueError, TypeError): + return None, "template_id must be a valid UUID" + + if service_id: + try: + query = query.filter(Template.service_id == UUID(str(service_id))) + except (ValueError, TypeError): + return None, "service_id must be a valid UUID" + + template = query.order_by(Template.created_at.asc()).first() + if not template: + return None, "No template found for the provided service/template filters" + + return template, None + + +def _insert_and_update_notifications(template, notification_count, updates_per_notification): + status_progression = [NOTIFICATION_SENT, NOTIFICATION_DELIVERED] + inserted_notifications = [] + now = datetime.utcnow() + + for index in range(notification_count): + notification = Notification( + id=uuid4(), + to=_build_recipient(template.template_type, index), + service_id=template.service_id, + template_id=template.id, + template_version=template.version, + key_type=KEY_TYPE_NORMAL, + notification_type=template.template_type, + created_at=now, + status=NOTIFICATION_SENDING, + billable_units=1, + postage="second" if template.template_type == LETTER_TYPE else None, + rate_multiplier=1 if template.template_type == SMS_TYPE else None, + ) + db.session.add(notification) + inserted_notifications.append(notification) + + db.session.flush() + + for notification in inserted_notifications: + for update_number in range(updates_per_notification): + notification.status = status_progression[min(update_number, len(status_progression) - 1)] + notification.updated_at = datetime.utcnow() + if notification.status in {NOTIFICATION_SENT, NOTIFICATION_DELIVERED}: + notification.sent_at = datetime.utcnow() + + db.session.commit() + + return [str(notification.id) for notification in inserted_notifications] + + +def _build_recipient(template_type, index): + if template_type == SMS_TYPE: + return f"+447700900{index % 1000:03d}" + if template_type == EMAIL_TYPE: + return f"load-test-{index}@example.com" + if template_type == LETTER_TYPE: + return "Load Test User\n1 Test Street\nTest City\nSW1A 1AA" + return f"load-test-{index}@example.com" + diff --git a/tests/app/replication/test_rest.py b/tests/app/replication/test_rest.py index 3cc953ddf6..af49c349a1 100644 --- a/tests/app/replication/test_rest.py +++ b/tests/app/replication/test_rest.py @@ -1,5 +1,7 @@ import pytest +from uuid import UUID +from app.models import Notification from tests import create_admin_authorization_header @@ -86,3 +88,64 @@ def test_trigger_check_replication_slot_changes_rejects_post(replication_client) ) assert response.status_code == 405 + + +def test_simulate_notification_load_inserts_and_updates_notifications(replication_client, sample_template): + auth_header = create_admin_authorization_header() + + response = replication_client.post( + "/replication/simulate-notification-load", + headers=[auth_header], + json={ + "notification_count": 3, + "updates_per_notification": 2, + "template_id": str(sample_template.id), + }, + ) + + response_json = response.get_json() + inserted_ids = response_json["inserted_notification_ids"] + inserted_uuid_ids = [UUID(notification_id) for notification_id in inserted_ids] + inserted_notifications = Notification.query.filter(Notification.id.in_(inserted_uuid_ids)).all() + + assert response.status_code == 200 + assert response_json["message"] == "notification send/update load inserted into notifications table" + assert response_json["notification_count"] == 3 + assert response_json["updates_per_notification"] == 2 + assert response_json["inserted_count"] == 3 + assert response_json["updated_count"] == 6 + assert response_json["service_id"] == str(sample_template.service_id) + assert response_json["template_id"] == str(sample_template.id) + assert response_json["template_version"] == sample_template.version + assert len(inserted_ids) == 3 + assert len(inserted_notifications) == 3 + assert all(notification.status == "delivered" for notification in inserted_notifications) + + +@pytest.mark.parametrize( + "payload, expected_message", + [ + ({"notification_count": 0}, "notification_count must be greater than 0"), + ({"notification_count": "abc"}, "notification_count must be an integer"), + ({"notification_count": 5001}, "notification_count must be less than or equal to 5000"), + ({"updates_per_notification": 0}, "updates_per_notification must be greater than 0"), + ({"updates_per_notification": "abc"}, "updates_per_notification must be an integer"), + ( + {"updates_per_notification": 6}, + "updates_per_notification must be less than or equal to 5", + ), + ({"template_id": "not-a-uuid"}, "template_id must be a valid UUID"), + ({"service_id": "not-a-uuid"}, "service_id must be a valid UUID"), + ], +) +def test_simulate_notification_load_validates_payload(replication_client, payload, expected_message): + auth_header = create_admin_authorization_header() + + response = replication_client.post( + "/replication/simulate-notification-load", + headers=[auth_header], + json=payload, + ) + + assert response.status_code == 400 + assert response.get_json() == {"message": expected_message} From d5ce7991b609dc9ac8f97dfef6d299ba4778c905 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Wed, 20 May 2026 09:28:56 +0100 Subject: [PATCH 16/26] Enhance simulate notification load by adding randomization for terminal statuses and updating status breakdown in response --- app/replication/rest.py | 70 ++++++++++++++++++++++++++---- tests/app/replication/test_rest.py | 20 ++++++--- 2 files changed, 74 insertions(+), 16 deletions(-) diff --git a/app/replication/rest.py b/app/replication/rest.py index a2bc935838..4dfe71fdbf 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -1,4 +1,5 @@ from datetime import datetime +import random from uuid import UUID, uuid4 from flask import Blueprint, jsonify, request @@ -10,8 +11,12 @@ KEY_TYPE_NORMAL, LETTER_TYPE, NOTIFICATION_DELIVERED, + NOTIFICATION_PERMANENT_FAILURE, NOTIFICATION_SENDING, NOTIFICATION_SENT, + NOTIFICATION_TECHNICAL_FAILURE, + NOTIFICATION_TEMPORARY_FAILURE, + NOTIFICATION_RETURNED_LETTER, SMS_TYPE, ) from app.models import Notification, Template @@ -65,10 +70,14 @@ def simulate_notification_load(): if error: return jsonify({"message": error}), 400 - notification_ids = _insert_and_update_notifications( + random_seed = payload.get("random_seed") + randomizer = random.Random(random_seed) if random_seed is not None else random.Random() + + notification_ids, status_breakdown = _insert_and_update_notifications( template=template, notification_count=notification_count, updates_per_notification=updates_per_notification, + randomizer=randomizer, ) returned_id_count = min(len(notification_ids), DEFAULT_RETURNED_IDS) total_updates = notification_count * updates_per_notification @@ -84,6 +93,7 @@ def simulate_notification_load(): "service_id": str(template.service_id), "template_id": str(template.id), "template_version": template.version, + "status_breakdown": status_breakdown, "inserted_notification_ids": notification_ids[:returned_id_count], } ), @@ -130,12 +140,15 @@ def _resolve_template(payload): return template, None -def _insert_and_update_notifications(template, notification_count, updates_per_notification): - status_progression = [NOTIFICATION_SENT, NOTIFICATION_DELIVERED] +def _insert_and_update_notifications(template, notification_count, updates_per_notification, randomizer): inserted_notifications = [] + status_breakdown = {} now = datetime.utcnow() for index in range(notification_count): + terminal_status = _pick_terminal_status(template.template_type, randomizer) + status_path = _build_status_path(terminal_status, updates_per_notification) + notification = Notification( id=uuid4(), to=_build_recipient(template.template_type, index), @@ -151,20 +164,59 @@ def _insert_and_update_notifications(template, notification_count, updates_per_n rate_multiplier=1 if template.template_type == SMS_TYPE else None, ) db.session.add(notification) - inserted_notifications.append(notification) + inserted_notifications.append((notification, status_path)) db.session.flush() - for notification in inserted_notifications: - for update_number in range(updates_per_notification): - notification.status = status_progression[min(update_number, len(status_progression) - 1)] + for notification, status_path in inserted_notifications: + for status in status_path: + notification.status = status notification.updated_at = datetime.utcnow() - if notification.status in {NOTIFICATION_SENT, NOTIFICATION_DELIVERED}: + if status in {NOTIFICATION_SENT, NOTIFICATION_DELIVERED}: notification.sent_at = datetime.utcnow() + status_breakdown[notification.status] = status_breakdown.get(notification.status, 0) + 1 + db.session.commit() - return [str(notification.id) for notification in inserted_notifications] + return [str(notification.id) for notification, _ in inserted_notifications], status_breakdown + + +def _pick_terminal_status(template_type, randomizer): + if template_type == LETTER_TYPE: + statuses = [NOTIFICATION_DELIVERED, NOTIFICATION_TECHNICAL_FAILURE, NOTIFICATION_RETURNED_LETTER] + weights = [70, 20, 10] + else: + statuses = [ + NOTIFICATION_DELIVERED, + NOTIFICATION_SENT, + NOTIFICATION_TEMPORARY_FAILURE, + NOTIFICATION_PERMANENT_FAILURE, + NOTIFICATION_TECHNICAL_FAILURE, + ] + weights = [55, 15, 10, 10, 10] + + return randomizer.choices(statuses, weights=weights, k=1)[0] + + +def _build_status_path(terminal_status, updates_per_notification): + if updates_per_notification <= 0: + return [] + + if updates_per_notification == 1: + return [terminal_status] + + if terminal_status == NOTIFICATION_DELIVERED: + base_path = [NOTIFICATION_SENT, NOTIFICATION_DELIVERED] + elif terminal_status == NOTIFICATION_SENT: + base_path = [NOTIFICATION_SENT] + else: + base_path = [NOTIFICATION_SENT, terminal_status] + + if len(base_path) >= updates_per_notification: + return base_path[:updates_per_notification] + + return base_path + [base_path[-1]] * (updates_per_notification - len(base_path)) def _build_recipient(template_type, index): diff --git a/tests/app/replication/test_rest.py b/tests/app/replication/test_rest.py index af49c349a1..3498ff94be 100644 --- a/tests/app/replication/test_rest.py +++ b/tests/app/replication/test_rest.py @@ -97,9 +97,10 @@ def test_simulate_notification_load_inserts_and_updates_notifications(replicatio "/replication/simulate-notification-load", headers=[auth_header], json={ - "notification_count": 3, + "notification_count": 30, "updates_per_notification": 2, "template_id": str(sample_template.id), + "random_seed": 7, }, ) @@ -107,19 +108,24 @@ def test_simulate_notification_load_inserts_and_updates_notifications(replicatio inserted_ids = response_json["inserted_notification_ids"] inserted_uuid_ids = [UUID(notification_id) for notification_id in inserted_ids] inserted_notifications = Notification.query.filter(Notification.id.in_(inserted_uuid_ids)).all() + distinct_statuses = {notification.status for notification in inserted_notifications} + status_breakdown = response_json["status_breakdown"] assert response.status_code == 200 assert response_json["message"] == "notification send/update load inserted into notifications table" - assert response_json["notification_count"] == 3 + assert response_json["notification_count"] == 30 assert response_json["updates_per_notification"] == 2 - assert response_json["inserted_count"] == 3 - assert response_json["updated_count"] == 6 + assert response_json["inserted_count"] == 30 + assert response_json["updated_count"] == 60 assert response_json["service_id"] == str(sample_template.service_id) assert response_json["template_id"] == str(sample_template.id) assert response_json["template_version"] == sample_template.version - assert len(inserted_ids) == 3 - assert len(inserted_notifications) == 3 - assert all(notification.status == "delivered" for notification in inserted_notifications) + assert len(inserted_ids) == 20 + assert len(inserted_notifications) == 20 + assert len(status_breakdown) >= 2 + assert len(distinct_statuses) >= 1 + assert sum(status_breakdown.values()) == response_json["inserted_count"] + assert distinct_statuses.issubset(set(status_breakdown.keys())) @pytest.mark.parametrize( From e49d992bd5aac6dda130c2738becf4f2a15ad285 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Wed, 20 May 2026 09:37:34 +0100 Subject: [PATCH 17/26] Add endpoint to fetch service stats and implement data retrieval logic --- app/dao/service_stats_dao.py | 18 +++++++++++++++++- app/replication/rest.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/app/dao/service_stats_dao.py b/app/dao/service_stats_dao.py index be7eb0ddb0..d892caf372 100644 --- a/app/dao/service_stats_dao.py +++ b/app/dao/service_stats_dao.py @@ -73,4 +73,20 @@ def apply_service_stats_update_transition( return _decrement_service_stats_count(old_dimensions, decrement_by=1) - _increment_service_stats_count(new_dimensions, increment_by=1) \ No newline at end of file + _increment_service_stats_count(new_dimensions, increment_by=1) + + +def dao_fetch_stats_for_service(service_id: UUID) -> list[ServiceStats]: + """ + Fetch service stats for a specific service. + + Args: + service_id: UUID of the service to fetch stats for + + Returns: + List of ServiceStats records for the specified service + """ + if not service_id: + return [] + + return db.session.query(ServiceStats).filter(ServiceStats.service_id == service_id).all() \ No newline at end of file diff --git a/app/replication/rest.py b/app/replication/rest.py index 4dfe71fdbf..9c5f6fdc55 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -19,6 +19,7 @@ NOTIFICATION_RETURNED_LETTER, SMS_TYPE, ) +from app.dao.service_stats_dao import dao_fetch_stats_for_service from app.models import Notification, Template from app.replication.replication_changes_utils import get_replication_changes from app.v2.errors import register_errors @@ -101,6 +102,35 @@ def simulate_notification_load(): ) +@replication_blueprint.route("/stats/", methods=["GET"]) +def get_service_stats(service_id): + """ + Get service stats for a specific service. + + Path parameter: + - service_id: UUID of the service + + Returns: + - List of stats entries with: + - template_id: UUID of the template + - notification_type: Type of notification (email, sms, letter) + - notification_status: Status of the notification + - count: Number of notifications with this status + """ + stats = dao_fetch_stats_for_service(service_id) + + result = [] + for stat in stats: + result.append({ + "template_id": str(stat.template_id), + "notification_type": stat.notification_type, + "notification_status": stat.notification_status, + "count": stat.count, + }) + + return jsonify({"stats": result}), 200 + + def _parse_positive_int(value, field_name, max_value): try: parsed = int(value) From a8b5052072803f3e8f1f77d1026bf8fb975ab7b1 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Wed, 20 May 2026 13:08:57 +0100 Subject: [PATCH 18/26] Increase MAX_NOTIFICATION_COUNT to allow processing of larger notification batches --- app/replication/rest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/replication/rest.py b/app/replication/rest.py index 9c5f6fdc55..be8347abcc 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -27,7 +27,7 @@ replication_blueprint = Blueprint("replication", __name__, url_prefix="/replication") register_errors(replication_blueprint) -MAX_NOTIFICATION_COUNT = 5000 +MAX_NOTIFICATION_COUNT = 50000 MAX_UPDATES_PER_NOTIFICATION = 5 DEFAULT_NOTIFICATION_COUNT = 1000 DEFAULT_UPDATES_PER_NOTIFICATION = 2 From 017cc9278874f71fac494202c225f94b9147ea44 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Fri, 22 May 2026 10:43:52 +0100 Subject: [PATCH 19/26] Refactor notification statuses in simulation load to simplify terminal status handling and update test assertions accordingly --- app/replication/rest.py | 18 ++++-------------- tests/app/replication/test_rest.py | 5 ++++- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/app/replication/rest.py b/app/replication/rest.py index be8347abcc..ec10754c8e 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -11,12 +11,8 @@ KEY_TYPE_NORMAL, LETTER_TYPE, NOTIFICATION_DELIVERED, - NOTIFICATION_PERMANENT_FAILURE, NOTIFICATION_SENDING, NOTIFICATION_SENT, - NOTIFICATION_TECHNICAL_FAILURE, - NOTIFICATION_TEMPORARY_FAILURE, - NOTIFICATION_RETURNED_LETTER, SMS_TYPE, ) from app.dao.service_stats_dao import dao_fetch_stats_for_service @@ -214,17 +210,11 @@ def _insert_and_update_notifications(template, notification_count, updates_per_n def _pick_terminal_status(template_type, randomizer): if template_type == LETTER_TYPE: - statuses = [NOTIFICATION_DELIVERED, NOTIFICATION_TECHNICAL_FAILURE, NOTIFICATION_RETURNED_LETTER] - weights = [70, 20, 10] + statuses = [NOTIFICATION_DELIVERED, NOTIFICATION_SENT] + weights = [85, 15] else: - statuses = [ - NOTIFICATION_DELIVERED, - NOTIFICATION_SENT, - NOTIFICATION_TEMPORARY_FAILURE, - NOTIFICATION_PERMANENT_FAILURE, - NOTIFICATION_TECHNICAL_FAILURE, - ] - weights = [55, 15, 10, 10, 10] + statuses = [NOTIFICATION_DELIVERED, NOTIFICATION_SENT] + weights = [80, 20] return randomizer.choices(statuses, weights=weights, k=1)[0] diff --git a/tests/app/replication/test_rest.py b/tests/app/replication/test_rest.py index 3498ff94be..8a80dc9a99 100644 --- a/tests/app/replication/test_rest.py +++ b/tests/app/replication/test_rest.py @@ -1,6 +1,7 @@ import pytest from uuid import UUID +from app.constants import NOTIFICATION_DELIVERED, NOTIFICATION_SENT from app.models import Notification from tests import create_admin_authorization_header @@ -110,6 +111,7 @@ def test_simulate_notification_load_inserts_and_updates_notifications(replicatio inserted_notifications = Notification.query.filter(Notification.id.in_(inserted_uuid_ids)).all() distinct_statuses = {notification.status for notification in inserted_notifications} status_breakdown = response_json["status_breakdown"] + simulated_statuses = {NOTIFICATION_SENT, NOTIFICATION_DELIVERED} assert response.status_code == 200 assert response_json["message"] == "notification send/update load inserted into notifications table" @@ -125,7 +127,8 @@ def test_simulate_notification_load_inserts_and_updates_notifications(replicatio assert len(status_breakdown) >= 2 assert len(distinct_statuses) >= 1 assert sum(status_breakdown.values()) == response_json["inserted_count"] - assert distinct_statuses.issubset(set(status_breakdown.keys())) + assert set(status_breakdown).issubset(simulated_statuses) + assert distinct_statuses.issubset(simulated_statuses) @pytest.mark.parametrize( From af6607a71da014300e112e9777f618e10a3edf73 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Fri, 22 May 2026 11:22:42 +0100 Subject: [PATCH 20/26] Refactor service stats handling and add performance testing - Consolidated service stats operations into a single function `apply_service_stats_delta` to handle both increments and decrements. - Removed deprecated functions for inserting, deleting, and updating service stats. - Introduced a new performance testing endpoint to simulate notification load, allowing for bulk inserts and updates of notifications. - Updated replication change utilities to include `nextlsn` in parsed rows and adjusted related tests accordingly. - Enhanced tests for service stats DAO to validate the new delta application logic. - Refactored replication slot change processing tests to align with the new structure and ensure proper handling of changes. --- .../process_replication_slot_changes.py | 278 ++++++++++++------ app/dao/service_stats_dao.py | 27 +- app/replication/performance_test.py | 199 +++++++++++++ app/replication/replication_changes_utils.py | 61 +++- app/replication/rest.py | 196 +----------- .../test_process_replication_slot_changes.py | 257 +++++----------- tests/app/dao/test_service_stats_dao.py | 66 +---- .../test_process_replication_changes.py | 34 +++ tests/app/replication/test_rest.py | 32 +- 9 files changed, 568 insertions(+), 582 deletions(-) create mode 100644 app/replication/performance_test.py diff --git a/app/celery/process_replication_slot_changes.py b/app/celery/process_replication_slot_changes.py index 426a3574f2..20b3b0ea0b 100644 --- a/app/celery/process_replication_slot_changes.py +++ b/app/celery/process_replication_slot_changes.py @@ -1,13 +1,13 @@ +from collections import Counter +from datetime import date, datetime from uuid import UUID +from notifications_utils.timezones import convert_utc_to_bst +from sqlalchemy import text + from app import current_app, db, notify_celery from app.cronitor import cronitor -from app.dao.service_stats_dao import ( - ServiceStatsDimensions, - apply_service_stats_delete, - apply_service_stats_insert, - apply_service_stats_update_transition, -) +from app.dao.service_stats_dao import ServiceStatsDimensions, apply_service_stats_delta from app.replication.replication_changes_utils import ( ParsedRow, RowData, @@ -16,6 +16,14 @@ get_str_value, ) +REPLICATION_SLOT_NAME = "notify_dashboard_replication_slot" +REPLICATION_SLOT_UPTO_NCHANGES = 10_000 +REPLICATION_ADVISORY_LOCK_ID = 4_009_881 +NIL_UUID = UUID("00000000-0000-0000-0000-000000000000") + +type FullDimensions = tuple[date, UUID, UUID, UUID, str, str, str] +type ServiceStatsDimensionsKey = tuple[UUID, UUID, str, str] + def _parse_uuid_value(row_data: RowData, key: str) -> UUID | None: raw_value = get_str_value(row_data, key) @@ -28,7 +36,19 @@ def _parse_uuid_value(row_data: RowData, key: str) -> UUID | None: return None -def _build_dimensions(change: ParsedRow, *, use_previous_row: bool) -> ServiceStatsDimensions | None: +def _parse_datetime_value(row_data: RowData, key: str) -> datetime | None: + raw_value = get_str_value(row_data, key) + if not raw_value: + return None + + normalized = raw_value.replace("Z", "+00:00") + try: + return datetime.fromisoformat(normalized) + except ValueError: + return None + + +def _build_dimensions(change: ParsedRow, *, use_previous_row: bool) -> FullDimensions | None: if use_previous_row: row_data = change["previous_row_data"] fallback_data = change["current_row_data"] @@ -41,115 +61,195 @@ def _build_dimensions(change: ParsedRow, *, use_previous_row: bool) -> ServiceSt notification_type = get_str_value(row_data, "notification_type") or get_str_value( fallback_data, "notification_type" ) + job_id = _parse_uuid_value(row_data, "job_id") or _parse_uuid_value(fallback_data, "job_id") or NIL_UUID + key_type = get_str_value(row_data, "key_type") or get_str_value(fallback_data, "key_type") notification_status = get_notification_status(row_data) or get_notification_status(fallback_data) + created_at = _parse_datetime_value(row_data, "created_at") or _parse_datetime_value(fallback_data, "created_at") - if not service_id or not template_id or not notification_type or not notification_status: + if not service_id or not template_id or not notification_type or not key_type or not notification_status or not created_at: return None - return { - "service_id": service_id, - "template_id": template_id, - "notification_type": notification_type, - "notification_status": notification_status, - } + return ( + convert_utc_to_bst(created_at).date(), + template_id, + service_id, + job_id, + notification_type, + key_type, + notification_status, + ) -def _build_old_dimensions_from_previous_status(change: ParsedRow) -> ServiceStatsDimensions | None: - current_dimensions = _build_dimensions(change, use_previous_row=False) - if not current_dimensions: - return None +def _try_advisory_lock(lock_id: int) -> bool: + return bool(db.session.execute(text("SELECT pg_try_advisory_lock(:lock_id)"), {"lock_id": lock_id}).scalar()) - previous_status = get_notification_status(change["previous_row_data"]) - if not previous_status: - return None - return { - "service_id": current_dimensions["service_id"], - "template_id": current_dimensions["template_id"], - "notification_type": current_dimensions["notification_type"], - "notification_status": previous_status, - } +def _advisory_unlock(lock_id: int) -> None: + db.session.execute(text("SELECT pg_advisory_unlock(:lock_id)"), {"lock_id": lock_id}) -# @TODO: this is a temporary task to check the replication slot changes, -# we will need to implement the logic to process the changes and update the dashboard accordingly -@notify_celery.task(bind=True, name="check-replication-slot-changes") -@cronitor("check-replication-slot-changes") -def check_replication_slot_changes(self): - try: - changes = get_replication_changes(peek=False) +def _advance_replication_slot(lsn: str) -> None: + db.session.execute( + text("SELECT pg_replication_slot_advance(:slot_name, :lsn)"), + {"slot_name": REPLICATION_SLOT_NAME, "lsn": lsn}, + ) - if not changes: - current_app.logger.info( - "No replication slot changes found", - extra={"celery_task": "check-replication-slot-changes", "changes_count": 0}, - ) - return - processed_changes = 0 - ignored_changes = 0 +def _build_counter_from_changes(changes: list[ParsedRow]) -> tuple[Counter[FullDimensions], int, int, str | None]: + counter: Counter[FullDimensions] = Counter() + processed_changes = 0 + ignored_changes = 0 + last_nextlsn: str | None = None - for change in changes: - if change["table"] != "notifications": + for change in changes: + table_name = change["table"] + change_type = change["type"] + if change["nextlsn"]: + last_nextlsn = change["nextlsn"] + + if table_name not in {"notifications", "notification_history"}: + ignored_changes += 1 + continue + + if change_type == "insert": + dimensions = _build_dimensions(change, use_previous_row=False) + if not dimensions: + ignored_changes += 1 + continue + counter[dimensions] += 1 + processed_changes += 1 + continue + + if change_type == "update": + updated = False + new_dimensions = _build_dimensions(change, use_previous_row=False) + if new_dimensions: + counter[new_dimensions] += 1 + updated = True + + old_dimensions = _build_dimensions(change, use_previous_row=True) + if old_dimensions: + counter[old_dimensions] -= 1 + updated = True + + if not updated: ignored_changes += 1 continue - change_type = change["type"] - if change_type == "insert": - dimensions = _build_dimensions(change, use_previous_row=False) - if not dimensions: - ignored_changes += 1 - continue - apply_service_stats_insert(dimensions) - processed_changes += 1 + processed_changes += 1 + continue + + if change_type == "delete": + if table_name == "notification_history": + ignored_changes += 1 continue - if change_type == "delete": - dimensions = _build_dimensions(change, use_previous_row=False) - if not dimensions: - ignored_changes += 1 - continue - apply_service_stats_delete(dimensions) - processed_changes += 1 + dimensions = _build_dimensions(change, use_previous_row=True) + if not dimensions: + ignored_changes += 1 continue - if change_type == "update": - new_dimensions = _build_dimensions(change, use_previous_row=False) - if not new_dimensions: - ignored_changes += 1 - continue + counter[dimensions] -= 1 + processed_changes += 1 + continue - old_dimensions = _build_old_dimensions_from_previous_status(change) - if old_dimensions: - apply_service_stats_update_transition(old_dimensions, new_dimensions) - else: - apply_service_stats_insert(new_dimensions) + ignored_changes += 1 - processed_changes += 1 - continue + return counter, processed_changes, ignored_changes, last_nextlsn - ignored_changes += 1 - current_app.logger.info( - "Replication slot changes processed", - extra={ - "celery_task": "check-replication-slot-changes", - "changes_count": len(changes), - "processed_changes": processed_changes, - "ignored_changes": ignored_changes, - }, - ) +def _roll_up_service_stats_deltas(counter: Counter[FullDimensions]) -> Counter[ServiceStatsDimensionsKey]: + deltas: Counter[ServiceStatsDimensionsKey] = Counter() + for dimensions, delta in counter.items(): + _, template_id, service_id, _, notification_type, _, notification_status = dimensions + deltas[(service_id, template_id, notification_type, notification_status)] += delta + + return deltas + + +# @TODO: this is a temporary task to check the replication slot changes, +# we will need to implement the logic to process the changes and update the dashboard accordingly +@notify_celery.task(bind=True, name="check-replication-slot-changes") +@cronitor("check-replication-slot-changes") +def check_replication_slot_changes(self): + lock_acquired = False + try: + with current_app.app_context(): + lock_acquired = _try_advisory_lock(REPLICATION_ADVISORY_LOCK_ID) + if not lock_acquired: + current_app.logger.info( + "Replication slot lock not acquired", + extra={"celery_task": "check-replication-slot-changes"}, + ) + return + + changes = get_replication_changes( + peek=True, + slot_name=REPLICATION_SLOT_NAME, + upto_nchanges=REPLICATION_SLOT_UPTO_NCHANGES, + table_names=("public.notifications", "public.notification_history"), + include_lsn=True, + format_version=1, + include_types=False, + include_typmod=False, + ) + fetched_changes = len(changes) + + if fetched_changes == 0: + current_app.logger.info( + "No replication slot changes found", + extra={"celery_task": "check-replication-slot-changes", "changes_count": 0}, + ) + return + + counter, processed_changes, ignored_changes, last_nextlsn = _build_counter_from_changes(changes) + service_stats_deltas = _roll_up_service_stats_deltas(counter) + + for service_stats_key, delta in service_stats_deltas.items(): + if delta == 0: + continue - db.session.commit() + service_id, template_id, notification_type, notification_status = service_stats_key + dimensions: ServiceStatsDimensions = { + "service_id": service_id, + "template_id": template_id, + "notification_type": notification_type, + "notification_status": notification_status, + } + apply_service_stats_delta(dimensions, delta) + + db.session.commit() + if last_nextlsn: + _advance_replication_slot(last_nextlsn) + + current_app.logger.info( + "Replication slot changes processed", + extra={ + "celery_task": "check-replication-slot-changes", + "changes_count": fetched_changes, + "processed_changes": processed_changes, + "ignored_changes": ignored_changes, + "service_stats_delta_buckets": len(service_stats_deltas), + }, + ) except Exception as exc: db.session.rollback() retry_count = self.request.retries if retry_count < 3: raise self.retry(exc=exc, countdown=2**retry_count) from exc - else: - current_app.logger.error( - "Replication slot query failed after 3 retries", - exc_info=True, - extra={"celery_task": "check-replication-slot-changes"}, - ) - raise + + current_app.logger.error( + "Replication slot query failed after 3 retries", + exc_info=True, + extra={"celery_task": "check-replication-slot-changes"}, + ) + raise + finally: + if lock_acquired: + try: + _advisory_unlock(REPLICATION_ADVISORY_LOCK_ID) + except Exception: + current_app.logger.exception( + "Failed to release advisory lock", + extra={"celery_task": "check-replication-slot-changes"}, + ) diff --git a/app/dao/service_stats_dao.py b/app/dao/service_stats_dao.py index d892caf372..ff9dd97de1 100644 --- a/app/dao/service_stats_dao.py +++ b/app/dao/service_stats_dao.py @@ -52,28 +52,11 @@ def _decrement_service_stats_count(dimensions: ServiceStatsDimensions, decrement ) -def apply_service_stats_insert(dimensions: ServiceStatsDimensions) -> None: - _increment_service_stats_count(dimensions, increment_by=1) - - -def apply_service_stats_delete(dimensions: ServiceStatsDimensions) -> None: - _decrement_service_stats_count(dimensions, decrement_by=1) - - -def apply_service_stats_update_transition( - old_dimensions: ServiceStatsDimensions, - new_dimensions: ServiceStatsDimensions, -) -> None: - if ( - old_dimensions["service_id"] == new_dimensions["service_id"] - and old_dimensions["template_id"] == new_dimensions["template_id"] - and old_dimensions["notification_type"] == new_dimensions["notification_type"] - and old_dimensions["notification_status"] == new_dimensions["notification_status"] - ): - return - - _decrement_service_stats_count(old_dimensions, decrement_by=1) - _increment_service_stats_count(new_dimensions, increment_by=1) +def apply_service_stats_delta(dimensions: ServiceStatsDimensions, delta: int) -> None: + if delta > 0: + _increment_service_stats_count(dimensions, increment_by=delta) + elif delta < 0: + _decrement_service_stats_count(dimensions, decrement_by=abs(delta)) def dao_fetch_stats_for_service(service_id: UUID) -> list[ServiceStats]: diff --git a/app/replication/performance_test.py b/app/replication/performance_test.py new file mode 100644 index 0000000000..fde3bcad8d --- /dev/null +++ b/app/replication/performance_test.py @@ -0,0 +1,199 @@ +from datetime import datetime +import random +from uuid import UUID, uuid4 + +from flask import jsonify, request + +from app import db +from app.constants import ( + EMAIL_TYPE, + KEY_TYPE_NORMAL, + LETTER_TYPE, + NOTIFICATION_DELIVERED, + NOTIFICATION_SENDING, + NOTIFICATION_SENT, + SMS_TYPE, +) +from app.models import Notification, Template + +MAX_NOTIFICATION_COUNT = 50000 +MAX_UPDATES_PER_NOTIFICATION = 5 +DEFAULT_NOTIFICATION_COUNT = 1000 +DEFAULT_UPDATES_PER_NOTIFICATION = 2 +DEFAULT_RETURNED_IDS = 20 + + +def simulate_notification_load(): + payload = request.get_json(silent=True) or {} + + notification_count, error = _parse_positive_int( + payload.get("notification_count", DEFAULT_NOTIFICATION_COUNT), + "notification_count", + max_value=MAX_NOTIFICATION_COUNT, + ) + if error: + return jsonify({"message": error}), 400 + + updates_per_notification, error = _parse_positive_int( + payload.get("updates_per_notification", DEFAULT_UPDATES_PER_NOTIFICATION), + "updates_per_notification", + max_value=MAX_UPDATES_PER_NOTIFICATION, + ) + if error: + return jsonify({"message": error}), 400 + + template, error = _resolve_template(payload) + if error: + return jsonify({"message": error}), 400 + + random_seed = payload.get("random_seed") + randomizer = random.Random(random_seed) if random_seed is not None else random.Random() + + notification_ids, status_breakdown = _insert_and_update_notifications( + template=template, + notification_count=notification_count, + updates_per_notification=updates_per_notification, + randomizer=randomizer, + ) + returned_id_count = min(len(notification_ids), DEFAULT_RETURNED_IDS) + total_updates = notification_count * updates_per_notification + + return ( + jsonify( + { + "message": "notification send/update load inserted into notifications table", + "notification_count": notification_count, + "updates_per_notification": updates_per_notification, + "inserted_count": notification_count, + "updated_count": total_updates, + "service_id": str(template.service_id), + "template_id": str(template.id), + "template_version": template.version, + "status_breakdown": status_breakdown, + "inserted_notification_ids": notification_ids[:returned_id_count], + } + ), + 200, + ) + + +def _parse_positive_int(value, field_name, max_value): + try: + parsed = int(value) + except (TypeError, ValueError): + return None, f"{field_name} must be an integer" + + if parsed < 1: + return None, f"{field_name} must be greater than 0" + + if parsed > max_value: + return None, f"{field_name} must be less than or equal to {max_value}" + + return parsed, None + + +def _resolve_template(payload): + template_id = payload.get("template_id") + service_id = payload.get("service_id") + + query = Template.query + if template_id: + try: + query = query.filter(Template.id == UUID(str(template_id))) + except (ValueError, TypeError): + return None, "template_id must be a valid UUID" + + if service_id: + try: + query = query.filter(Template.service_id == UUID(str(service_id))) + except (ValueError, TypeError): + return None, "service_id must be a valid UUID" + + template = query.order_by(Template.created_at.asc()).first() + if not template: + return None, "No template found for the provided service/template filters" + + return template, None + + +def _insert_and_update_notifications(template, notification_count, updates_per_notification, randomizer): + inserted_notifications = [] + status_breakdown = {} + now = datetime.utcnow() + + for index in range(notification_count): + terminal_status = _pick_terminal_status(template.template_type, randomizer) + status_path = _build_status_path(terminal_status, updates_per_notification) + + notification = Notification( + id=uuid4(), + to=_build_recipient(template.template_type, index), + service_id=template.service_id, + template_id=template.id, + template_version=template.version, + key_type=KEY_TYPE_NORMAL, + notification_type=template.template_type, + created_at=now, + status=NOTIFICATION_SENDING, + billable_units=1, + postage="second" if template.template_type == LETTER_TYPE else None, + rate_multiplier=1 if template.template_type == SMS_TYPE else None, + ) + db.session.add(notification) + inserted_notifications.append((notification, status_path)) + + db.session.flush() + + for notification, status_path in inserted_notifications: + for status in status_path: + notification.status = status + notification.updated_at = datetime.utcnow() + if status in {NOTIFICATION_SENT, NOTIFICATION_DELIVERED}: + notification.sent_at = datetime.utcnow() + + status_breakdown[notification.status] = status_breakdown.get(notification.status, 0) + 1 + + db.session.commit() + + return [str(notification.id) for notification, _ in inserted_notifications], status_breakdown + + +def _pick_terminal_status(template_type, randomizer): + if template_type == LETTER_TYPE: + statuses = [NOTIFICATION_DELIVERED, NOTIFICATION_SENT] + weights = [85, 15] + else: + statuses = [NOTIFICATION_DELIVERED, NOTIFICATION_SENT] + weights = [80, 20] + + return randomizer.choices(statuses, weights=weights, k=1)[0] + + +def _build_status_path(terminal_status, updates_per_notification): + if updates_per_notification <= 0: + return [] + + if updates_per_notification == 1: + return [terminal_status] + + if terminal_status == NOTIFICATION_DELIVERED: + base_path = [NOTIFICATION_SENT, NOTIFICATION_DELIVERED] + elif terminal_status == NOTIFICATION_SENT: + base_path = [NOTIFICATION_SENT] + else: + base_path = [NOTIFICATION_SENT, terminal_status] + + if len(base_path) >= updates_per_notification: + return base_path[:updates_per_notification] + + return base_path + [base_path[-1]] * (updates_per_notification - len(base_path)) + + +def _build_recipient(template_type, index): + if template_type == SMS_TYPE: + return f"+447700900{index % 1000:03d}" + if template_type == EMAIL_TYPE: + return f"load-test-{index}@example.com" + if template_type == LETTER_TYPE: + return "Load Test User\n1 Test Street\nTest City\nSW1A 1AA" + return f"load-test-{index}@example.com" \ No newline at end of file diff --git a/app/replication/replication_changes_utils.py b/app/replication/replication_changes_utils.py index 74afea4b5d..40885735f4 100644 --- a/app/replication/replication_changes_utils.py +++ b/app/replication/replication_changes_utils.py @@ -11,6 +11,7 @@ class ReplicationChangeRow(TypedDict): + lsn: str data: str @@ -28,12 +29,14 @@ class ReplicationJsonRow(TypedDict, total=False): class ChangePayload(TypedDict, total=False): + nextlsn: str change: list[ReplicationJsonRow] class ParsedRow(TypedDict): type: str table: str + nextlsn: str | None current_row_data: RowData previous_row_data: RowData @@ -41,21 +44,53 @@ class ParsedRow(TypedDict): DEFAULT_CHANGE_ROWS: list[ReplicationJsonRow] = [{}] -def get_replication_changes(peek: bool = True) -> list[ParsedRow]: +def _quote_sql_literal(value: str) -> str: + return value.replace("'", "''") + + +def get_replication_changes( + *, + peek: bool = True, + slot_name: str = "notify_dashboard_replication_slot", + upto_nchanges: int | None = None, + table_names: tuple[str, ...] = ("public.notifications",), + include_lsn: bool | None = None, + format_version: int | None = None, + include_types: bool | None = None, + include_typmod: bool | None = None, +) -> list[ParsedRow]: """ Process the replication changes and return a list of parsed changes. """ - result = db.session.execute( - text(f""" - SELECT * FROM {"pg_logical_slot_peek_changes" if peek else "pg_logical_slot_get_changes"}( - 'notify_dashboard_replication_slot', + options: list[str] = ["pretty-print", "on"] + + if table_names: + options.extend(["add-tables", ",".join(table_names)]) + if include_lsn is not None: + options.extend(["include-lsn", "on" if include_lsn else "off"]) + if format_version is not None: + options.extend(["format-version", str(format_version)]) + if include_types is not None: + options.extend(["include-types", "on" if include_types else "off"]) + if include_typmod is not None: + options.extend(["include-typmod", "on" if include_typmod else "off"]) + + options_sql = "" + if options: + options_sql = ",\n " + ",\n ".join( + f"'{_quote_sql_literal(option)}'" for option in options + ) + + function_name = "pg_logical_slot_peek_changes" if peek else "pg_logical_slot_get_changes" + query = f""" + SELECT * FROM {function_name}( + '{_quote_sql_literal(slot_name)}', NULL, - NULL, - 'pretty-print', 'on', - 'add-tables', 'public.notifications' + {upto_nchanges if upto_nchanges is not None else 'NULL'}{options_sql} ); - """) - ) + """ + + result = db.session.execute(text(query)) changes = [dict(change) for change in result.mappings().all()] parsed_data = [row for change in changes for row in (parse_change_data(cast(ReplicationChangeRow, change)) or [])] @@ -68,15 +103,16 @@ def parse_change_data(change: ReplicationChangeRow) -> list[ParsedRow] | None: Parse the change data and return a dictionary with the relevant information. """ payload = cast(ChangePayload, json.loads(change["data"])) + nextlsn = payload.get("nextlsn") or change.get("lsn") raw_changes = payload.get("change", DEFAULT_CHANGE_ROWS) if len(raw_changes) == 0: return None - return [parse_row_data(row) for row in raw_changes] + return [parse_row_data(row, nextlsn=nextlsn) for row in raw_changes] -def parse_row_data(row: ReplicationJsonRow) -> ParsedRow: +def parse_row_data(row: ReplicationJsonRow, nextlsn: str | None = None) -> ParsedRow: """ Create a mapping of column names to their values for the given change. """ @@ -89,6 +125,7 @@ def parse_row_data(row: ReplicationJsonRow) -> ParsedRow: return { "type": row.get("kind", ""), "table": row.get("table", ""), + "nextlsn": nextlsn, "current_row_data": dict(zip(column_names, column_values, strict=False)), "previous_row_data": dict(zip(old_column_names, old_column_values, strict=False)), } diff --git a/app/replication/rest.py b/app/replication/rest.py index ec10754c8e..109cb6adf0 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -1,34 +1,14 @@ -from datetime import datetime -import random -from uuid import UUID, uuid4 +from flask import Blueprint, jsonify -from flask import Blueprint, jsonify, request - -from app import db from app.celery.process_replication_slot_changes import check_replication_slot_changes -from app.constants import ( - EMAIL_TYPE, - KEY_TYPE_NORMAL, - LETTER_TYPE, - NOTIFICATION_DELIVERED, - NOTIFICATION_SENDING, - NOTIFICATION_SENT, - SMS_TYPE, -) from app.dao.service_stats_dao import dao_fetch_stats_for_service -from app.models import Notification, Template +from app.replication.performance_test import simulate_notification_load as performance_test_simulate_notification_load from app.replication.replication_changes_utils import get_replication_changes from app.v2.errors import register_errors replication_blueprint = Blueprint("replication", __name__, url_prefix="/replication") register_errors(replication_blueprint) -MAX_NOTIFICATION_COUNT = 50000 -MAX_UPDATES_PER_NOTIFICATION = 5 -DEFAULT_NOTIFICATION_COUNT = 1000 -DEFAULT_UPDATES_PER_NOTIFICATION = 2 -DEFAULT_RETURNED_IDS = 20 - @replication_blueprint.route("/process-slot-changes", methods=["POST"]) def trigger_process_replication_slot_changes(): @@ -45,57 +25,7 @@ def trigger_check_replication_slot_changes(): @replication_blueprint.route("/simulate-notification-load", methods=["POST"]) def simulate_notification_load(): - payload = request.get_json(silent=True) or {} - - notification_count, error = _parse_positive_int( - payload.get("notification_count", DEFAULT_NOTIFICATION_COUNT), - "notification_count", - max_value=MAX_NOTIFICATION_COUNT, - ) - if error: - return jsonify({"message": error}), 400 - - updates_per_notification, error = _parse_positive_int( - payload.get("updates_per_notification", DEFAULT_UPDATES_PER_NOTIFICATION), - "updates_per_notification", - max_value=MAX_UPDATES_PER_NOTIFICATION, - ) - if error: - return jsonify({"message": error}), 400 - - template, error = _resolve_template(payload) - if error: - return jsonify({"message": error}), 400 - - random_seed = payload.get("random_seed") - randomizer = random.Random(random_seed) if random_seed is not None else random.Random() - - notification_ids, status_breakdown = _insert_and_update_notifications( - template=template, - notification_count=notification_count, - updates_per_notification=updates_per_notification, - randomizer=randomizer, - ) - returned_id_count = min(len(notification_ids), DEFAULT_RETURNED_IDS) - total_updates = notification_count * updates_per_notification - - return ( - jsonify( - { - "message": "notification send/update load inserted into notifications table", - "notification_count": notification_count, - "updates_per_notification": updates_per_notification, - "inserted_count": notification_count, - "updated_count": total_updates, - "service_id": str(template.service_id), - "template_id": str(template.id), - "template_version": template.version, - "status_breakdown": status_breakdown, - "inserted_notification_ids": notification_ids[:returned_id_count], - } - ), - 200, - ) + return performance_test_simulate_notification_load() @replication_blueprint.route("/stats/", methods=["GET"]) @@ -127,124 +57,4 @@ def get_service_stats(service_id): return jsonify({"stats": result}), 200 -def _parse_positive_int(value, field_name, max_value): - try: - parsed = int(value) - except (TypeError, ValueError): - return None, f"{field_name} must be an integer" - - if parsed < 1: - return None, f"{field_name} must be greater than 0" - - if parsed > max_value: - return None, f"{field_name} must be less than or equal to {max_value}" - - return parsed, None - - -def _resolve_template(payload): - template_id = payload.get("template_id") - service_id = payload.get("service_id") - - query = Template.query - if template_id: - try: - query = query.filter(Template.id == UUID(str(template_id))) - except (ValueError, TypeError): - return None, "template_id must be a valid UUID" - - if service_id: - try: - query = query.filter(Template.service_id == UUID(str(service_id))) - except (ValueError, TypeError): - return None, "service_id must be a valid UUID" - - template = query.order_by(Template.created_at.asc()).first() - if not template: - return None, "No template found for the provided service/template filters" - - return template, None - - -def _insert_and_update_notifications(template, notification_count, updates_per_notification, randomizer): - inserted_notifications = [] - status_breakdown = {} - now = datetime.utcnow() - - for index in range(notification_count): - terminal_status = _pick_terminal_status(template.template_type, randomizer) - status_path = _build_status_path(terminal_status, updates_per_notification) - - notification = Notification( - id=uuid4(), - to=_build_recipient(template.template_type, index), - service_id=template.service_id, - template_id=template.id, - template_version=template.version, - key_type=KEY_TYPE_NORMAL, - notification_type=template.template_type, - created_at=now, - status=NOTIFICATION_SENDING, - billable_units=1, - postage="second" if template.template_type == LETTER_TYPE else None, - rate_multiplier=1 if template.template_type == SMS_TYPE else None, - ) - db.session.add(notification) - inserted_notifications.append((notification, status_path)) - - db.session.flush() - - for notification, status_path in inserted_notifications: - for status in status_path: - notification.status = status - notification.updated_at = datetime.utcnow() - if status in {NOTIFICATION_SENT, NOTIFICATION_DELIVERED}: - notification.sent_at = datetime.utcnow() - - status_breakdown[notification.status] = status_breakdown.get(notification.status, 0) + 1 - - db.session.commit() - - return [str(notification.id) for notification, _ in inserted_notifications], status_breakdown - - -def _pick_terminal_status(template_type, randomizer): - if template_type == LETTER_TYPE: - statuses = [NOTIFICATION_DELIVERED, NOTIFICATION_SENT] - weights = [85, 15] - else: - statuses = [NOTIFICATION_DELIVERED, NOTIFICATION_SENT] - weights = [80, 20] - - return randomizer.choices(statuses, weights=weights, k=1)[0] - - -def _build_status_path(terminal_status, updates_per_notification): - if updates_per_notification <= 0: - return [] - - if updates_per_notification == 1: - return [terminal_status] - - if terminal_status == NOTIFICATION_DELIVERED: - base_path = [NOTIFICATION_SENT, NOTIFICATION_DELIVERED] - elif terminal_status == NOTIFICATION_SENT: - base_path = [NOTIFICATION_SENT] - else: - base_path = [NOTIFICATION_SENT, terminal_status] - - if len(base_path) >= updates_per_notification: - return base_path[:updates_per_notification] - - return base_path + [base_path[-1]] * (updates_per_notification - len(base_path)) - - -def _build_recipient(template_type, index): - if template_type == SMS_TYPE: - return f"+447700900{index % 1000:03d}" - if template_type == EMAIL_TYPE: - return f"load-test-{index}@example.com" - if template_type == LETTER_TYPE: - return "Load Test User\n1 Test Street\nTest City\nSW1A 1AA" - return f"load-test-{index}@example.com" diff --git a/tests/app/celery/test_process_replication_slot_changes.py b/tests/app/celery/test_process_replication_slot_changes.py index 25f78fdf1e..9828e38c24 100644 --- a/tests/app/celery/test_process_replication_slot_changes.py +++ b/tests/app/celery/test_process_replication_slot_changes.py @@ -1,4 +1,5 @@ import uuid +from datetime import datetime import pytest from celery.exceptions import Retry @@ -6,96 +7,116 @@ from app.celery.process_replication_slot_changes import check_replication_slot_changes +def _base_row(service_id: str, template_id: str, *, status: str): + return { + "service_id": service_id, + "template_id": template_id, + "notification_type": "email", + "notification_status": status, + "key_type": "normal", + "created_at": datetime.utcnow().isoformat(), + } + + def test_check_replication_slot_changes_early_returns_when_no_changes(mocker, caplog): + mocker.patch("app.celery.process_replication_slot_changes._try_advisory_lock", return_value=True) + mock_unlock = mocker.patch("app.celery.process_replication_slot_changes._advisory_unlock") mock_get_replication_changes = mocker.patch( "app.celery.process_replication_slot_changes.get_replication_changes", return_value=[] ) mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit") - mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") - mock_apply_delete = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_delete") - mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") + mock_advance = mocker.patch("app.celery.process_replication_slot_changes._advance_replication_slot") + mock_apply_delta = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_delta") with caplog.at_level("INFO"): check_replication_slot_changes() - mock_get_replication_changes.assert_called_once_with(peek=False) - mock_apply_insert.assert_not_called() - mock_apply_delete.assert_not_called() - mock_apply_update.assert_not_called() + mock_get_replication_changes.assert_called_once() + mock_apply_delta.assert_not_called() mock_commit.assert_not_called() + mock_advance.assert_not_called() + mock_unlock.assert_called_once() assert "No replication slot changes found" in caplog.messages -def test_check_replication_slot_changes_processes_insert_update_delete(mocker, caplog): +def test_check_replication_slot_changes_rolls_up_and_advances_slot(mocker, caplog): service_id = str(uuid.uuid4()) template_id = str(uuid.uuid4()) changes = [ { "type": "insert", "table": "notifications", - "current_row_data": { - "service_id": service_id, - "template_id": template_id, - "notification_type": "email", - "notification_status": "created", - }, + "nextlsn": "0/16B6A28", + "current_row_data": _base_row(service_id, template_id, status="created"), "previous_row_data": {}, }, { "type": "update", "table": "notifications", - "current_row_data": { - "service_id": service_id, - "template_id": template_id, - "notification_type": "email", - "notification_status": "delivered", - }, - "previous_row_data": { - "service_id": service_id, - "template_id": template_id, - "notification_type": "email", - "notification_status": "sending", - }, + "nextlsn": "0/16B6A28", + "current_row_data": _base_row(service_id, template_id, status="sending"), + "previous_row_data": _base_row(service_id, template_id, status="created"), }, { "type": "delete", - "table": "notifications", - "current_row_data": { - "service_id": service_id, - "template_id": template_id, - "notification_type": "email", - "notification_status": "delivered", - }, - "previous_row_data": {}, + "table": "notification_history", + "nextlsn": "0/16B6A28", + "current_row_data": {}, + "previous_row_data": _base_row(service_id, template_id, status="delivered"), }, ] - mock_get_replication_changes = mocker.patch( - "app.celery.process_replication_slot_changes.get_replication_changes", return_value=changes + mocker.patch("app.celery.process_replication_slot_changes._try_advisory_lock", return_value=True) + mocker.patch("app.celery.process_replication_slot_changes._advisory_unlock") + mocker.patch("app.celery.process_replication_slot_changes.get_replication_changes", return_value=changes) + + call_order = [] + + def commit_side_effect(): + call_order.append("commit") + + def advance_side_effect(_): + call_order.append("advance") + + mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit", side_effect=commit_side_effect) + mock_advance = mocker.patch( + "app.celery.process_replication_slot_changes._advance_replication_slot", side_effect=advance_side_effect ) - mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit") - mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") - mock_apply_delete = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_delete") - mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") + mock_apply_delta = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_delta") with caplog.at_level("INFO"): check_replication_slot_changes() - mock_get_replication_changes.assert_called_once_with(peek=False) - mock_apply_insert.assert_called_once() - mock_apply_update.assert_called_once() - mock_apply_delete.assert_called_once() mock_commit.assert_called_once() + mock_advance.assert_called_once_with("0/16B6A28") + assert call_order == ["commit", "advance"] + + assert mock_apply_delta.call_count == 1 + dimensions, delta = mock_apply_delta.call_args.args + assert str(dimensions["service_id"]) == service_id + assert str(dimensions["template_id"]) == template_id + assert dimensions["notification_type"] == "email" + assert dimensions["notification_status"] == "sending" + assert delta == 1 assert "Replication slot changes processed" in caplog.messages - record = next(record for record in caplog.records if record.msg == "Replication slot changes processed") - assert record.changes_count == 3 - assert record.processed_changes == 3 - assert record.ignored_changes == 0 + + +def test_check_replication_slot_changes_returns_when_lock_not_acquired(mocker): + mocker.patch("app.celery.process_replication_slot_changes._try_advisory_lock", return_value=False) + mock_get_replication_changes = mocker.patch("app.celery.process_replication_slot_changes.get_replication_changes") + mock_unlock = mocker.patch("app.celery.process_replication_slot_changes._advisory_unlock") + + check_replication_slot_changes() + + mock_get_replication_changes.assert_not_called() + mock_unlock.assert_not_called() def test_check_replication_slot_changes_calls_retry_on_query_error(mocker): + mocker.patch("app.celery.process_replication_slot_changes._try_advisory_lock", return_value=True) + mock_unlock = mocker.patch("app.celery.process_replication_slot_changes._advisory_unlock") mocker.patch( "app.celery.process_replication_slot_changes.get_replication_changes", side_effect=Exception("EXPECTED"), @@ -108,144 +129,6 @@ def test_check_replication_slot_changes_calls_retry_on_query_error(mocker): assert mock_retry.call_count == 1 mock_rollback.assert_called_once() + mock_unlock.assert_called_once() _, kwargs = mock_retry.call_args assert kwargs["countdown"] == 1 - - -def test_check_replication_slot_changes_update_with_sparse_previous_status_uses_transition(mocker): - service_id = str(uuid.uuid4()) - template_id = str(uuid.uuid4()) - changes = [ - { - "type": "update", - "table": "notifications", - "current_row_data": { - "service_id": service_id, - "template_id": template_id, - "notification_type": "sms", - "notification_status": "sending", - }, - "previous_row_data": { - "id": str(uuid.uuid4()), - "notification_status": "created", - }, - } - ] - - mocker.patch("app.celery.process_replication_slot_changes.get_replication_changes", return_value=changes) - mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit") - mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") - mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") - - check_replication_slot_changes() - - mock_apply_insert.assert_not_called() - mock_apply_update.assert_called_once() - old_dimensions, new_dimensions = mock_apply_update.call_args.args - assert old_dimensions["notification_status"] == "created" - assert new_dimensions["notification_status"] == "sending" - assert str(old_dimensions["service_id"]) == service_id - assert str(old_dimensions["template_id"]) == template_id - assert old_dimensions["notification_type"] == "sms" - mock_commit.assert_called_once() - - -def test_check_replication_slot_changes_update_without_previous_status_uses_insert(mocker): - service_id = str(uuid.uuid4()) - template_id = str(uuid.uuid4()) - changes = [ - { - "type": "update", - "table": "notifications", - "current_row_data": { - "service_id": service_id, - "template_id": template_id, - "notification_type": "sms", - "notification_status": "sending", - }, - "previous_row_data": { - "id": str(uuid.uuid4()), - }, - } - ] - - mocker.patch("app.celery.process_replication_slot_changes.get_replication_changes", return_value=changes) - mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit") - mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") - mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") - - check_replication_slot_changes() - - mock_apply_update.assert_not_called() - mock_apply_insert.assert_called_once() - dimensions = mock_apply_insert.call_args.args[0] - assert dimensions["notification_status"] == "sending" - assert str(dimensions["service_id"]) == service_id - assert str(dimensions["template_id"]) == template_id - assert dimensions["notification_type"] == "sms" - mock_commit.assert_called_once() - - -def test_check_replication_slot_changes_processes_user_payload_shape(mocker): - service_id = str(uuid.uuid4()) - template_id = str(uuid.uuid4()) - notification_id = str(uuid.uuid4()) - changes = [ - { - "type": "insert", - "table": "notifications", - "current_row_data": { - "id": notification_id, - "service_id": service_id, - "template_id": template_id, - "notification_type": "sms", - "notification_status": "created", - "billable_units": 0, - }, - "previous_row_data": {}, - }, - { - "type": "update", - "table": "notifications", - "current_row_data": { - "id": notification_id, - "service_id": service_id, - "template_id": template_id, - "notification_type": "sms", - "notification_status": "sending", - "billable_units": 1, - }, - "previous_row_data": { - "id": notification_id, - "notification_status": "created", - }, - }, - ] - - mocker.patch("app.celery.process_replication_slot_changes.get_replication_changes", return_value=changes) - mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit") - mock_apply_insert = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_insert") - mock_apply_update = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_update_transition") - - check_replication_slot_changes() - - assert mock_apply_insert.call_count == 1 - assert mock_apply_update.call_count == 1 - - inserted_dimensions = mock_apply_insert.call_args.args[0] - assert str(inserted_dimensions["service_id"]) == service_id - assert str(inserted_dimensions["template_id"]) == template_id - assert inserted_dimensions["notification_type"] == "sms" - assert inserted_dimensions["notification_status"] == "created" - - old_dimensions, new_dimensions = mock_apply_update.call_args.args - assert str(old_dimensions["service_id"]) == service_id - assert str(old_dimensions["template_id"]) == template_id - assert old_dimensions["notification_type"] == "sms" - assert old_dimensions["notification_status"] == "created" - assert str(new_dimensions["service_id"]) == service_id - assert str(new_dimensions["template_id"]) == template_id - assert new_dimensions["notification_type"] == "sms" - assert new_dimensions["notification_status"] == "sending" - - mock_commit.assert_called_once() diff --git a/tests/app/dao/test_service_stats_dao.py b/tests/app/dao/test_service_stats_dao.py index b26b154cbd..c6141a0460 100644 --- a/tests/app/dao/test_service_stats_dao.py +++ b/tests/app/dao/test_service_stats_dao.py @@ -1,7 +1,5 @@ from app.dao.service_stats_dao import ( - apply_service_stats_delete, - apply_service_stats_insert, - apply_service_stats_update_transition, + apply_service_stats_delta, ) from app.models import ServiceStats from tests.app.db import create_template @@ -25,70 +23,16 @@ def _get_service_stats_row(dimensions): ).first() -def test_apply_service_stats_insert_creates_and_increments_row(notify_db_session, sample_service): +def test_apply_service_stats_delta_supports_positive_and_negative_values(notify_db_session, sample_service): template = create_template(service=sample_service, template_type="email") dimensions = _build_dimensions(service_id=sample_service.id, template_id=template.id) - apply_service_stats_insert(dimensions) - apply_service_stats_insert(dimensions) - notify_db_session.commit() - - row = _get_service_stats_row(dimensions) - - assert row is not None - assert row.count == 2 - - -def test_apply_service_stats_delete_does_not_drop_below_zero(notify_db_session, sample_service): - template = create_template(service=sample_service, template_type="email") - dimensions = _build_dimensions(service_id=sample_service.id, template_id=template.id) - - apply_service_stats_insert(dimensions) - apply_service_stats_delete(dimensions) - apply_service_stats_delete(dimensions) + apply_service_stats_delta(dimensions, 5) + apply_service_stats_delta(dimensions, -3) + apply_service_stats_delta(dimensions, -10) notify_db_session.commit() row = _get_service_stats_row(dimensions) assert row is not None assert row.count == 0 - - -def test_apply_service_stats_update_transition_moves_count_between_statuses(notify_db_session, sample_service): - template = create_template(service=sample_service, template_type="email") - old_dimensions = _build_dimensions( - service_id=sample_service.id, - template_id=template.id, - notification_status="sending", - ) - new_dimensions = _build_dimensions( - service_id=sample_service.id, - template_id=template.id, - notification_status="delivered", - ) - - apply_service_stats_insert(old_dimensions) - apply_service_stats_update_transition(old_dimensions, new_dimensions) - notify_db_session.commit() - - old_row = _get_service_stats_row(old_dimensions) - new_row = _get_service_stats_row(new_dimensions) - - assert old_row is not None - assert new_row is not None - assert old_row.count == 0 - assert new_row.count == 1 - - -def test_apply_service_stats_update_transition_noops_when_dimensions_match(notify_db_session, sample_service): - template = create_template(service=sample_service, template_type="email") - dimensions = _build_dimensions(service_id=sample_service.id, template_id=template.id) - - apply_service_stats_insert(dimensions) - apply_service_stats_update_transition(dimensions, dimensions) - notify_db_session.commit() - - row = _get_service_stats_row(dimensions) - - assert row is not None - assert row.count == 1 diff --git a/tests/app/replication/test_process_replication_changes.py b/tests/app/replication/test_process_replication_changes.py index e2d577fa36..3800e60a36 100644 --- a/tests/app/replication/test_process_replication_changes.py +++ b/tests/app/replication/test_process_replication_changes.py @@ -50,12 +50,14 @@ def test_process_replication_changes_flattens_rows_across_changes(): { "type": "insert", "table": "notifications", + "nextlsn": None, "current_row_data": {"id": "111", "to": "447700900001"}, "previous_row_data": {}, }, { "type": "update", "table": "notifications", + "nextlsn": None, "current_row_data": {"id": "222", "status": "sent"}, "previous_row_data": {}, }, @@ -85,6 +87,7 @@ def test_parse_row_data_maps_current_and_previous_rows(): assert result == { "type": "update", "table": "notifications", + "nextlsn": None, "current_row_data": { "id": "abc", "status": "delivered", @@ -97,6 +100,37 @@ def test_parse_row_data_maps_current_and_previous_rows(): } +def test_parse_change_data_propagates_nextlsn_to_all_rows(): + result = parse_change_data( + { + "data": json.dumps( + { + "nextlsn": "0/16B6A28", + "change": [ + { + "kind": "insert", + "table": "notifications", + "columnnames": ["id", "status"], + "columnvalues": ["abc", "sending"], + } + ], + } + ), + "lsn": "0/16B6A20", + } + ) + + assert result == [ + { + "type": "insert", + "table": "notifications", + "nextlsn": "0/16B6A28", + "current_row_data": {"id": "abc", "status": "sending"}, + "previous_row_data": {}, + } + ] + + def test_get_str_value_returns_none_for_non_string_values(): assert get_str_value({"status": 123}, "status") is None diff --git a/tests/app/replication/test_rest.py b/tests/app/replication/test_rest.py index 8a80dc9a99..44424db9e5 100644 --- a/tests/app/replication/test_rest.py +++ b/tests/app/replication/test_rest.py @@ -14,7 +14,7 @@ def replication_client(notify_api): def test_trigger_process_replication_slot_changes_queues_task(replication_client, mocker): - mock_apply_async = mocker.patch("app.replication.rest.check_replication_slot_changes.apply_async") + mock_task = mocker.patch("app.replication.rest.check_replication_slot_changes") auth_header = create_admin_authorization_header() response = replication_client.post( @@ -23,8 +23,8 @@ def test_trigger_process_replication_slot_changes_queues_task(replication_client ) assert response.status_code == 201 - assert response.get_json() == {"message": "check-replication-slot-changes task queued"} - mock_apply_async.assert_called_once_with() + assert response.get_json() == {"message": "check-replication-slot-changes task executed"} + mock_task.assert_called_once_with() def test_trigger_process_replication_slot_changes_requires_auth(replication_client): @@ -45,18 +45,17 @@ def test_trigger_process_replication_slot_changes_rejects_get(replication_client def test_trigger_check_replication_slot_changes_returns_data(replication_client, mocker): - mock_result = mocker.Mock() - raw_changes = [{"lsn": "0/1", "xid": 1, "data": "mock-change"}] parsed_changes = [ - {"type": "insert", "table": "notifications", "current_row_data": {"id": "1"}, "previous_row_data": {}} + { + "type": "insert", + "table": "notifications", + "nextlsn": "0/2", + "current_row_data": {"id": "1"}, + "previous_row_data": {}, + } ] - mock_result.mappings.return_value.all.return_value = raw_changes - mock_execute = mocker.patch( - "app.replication.rest.db.session.execute", - return_value=mock_result, - ) - mock_process_changes = mocker.patch( - "app.replication.rest.process_replication_changes", + mock_get_changes = mocker.patch( + "app.replication.rest.get_replication_changes", return_value=parsed_changes, ) auth_header = create_admin_authorization_header() @@ -68,10 +67,7 @@ def test_trigger_check_replication_slot_changes_returns_data(replication_client, assert response.status_code == 200 assert response.get_json() == {"changes": parsed_changes} - mock_execute.assert_called_once() - mock_result.mappings.assert_called_once_with() - mock_result.mappings.return_value.all.assert_called_once_with() - mock_process_changes.assert_called_once_with(raw_changes) + mock_get_changes.assert_called_once_with(peek=True) def test_trigger_check_replication_slot_changes_requires_auth(replication_client): @@ -136,7 +132,7 @@ def test_simulate_notification_load_inserts_and_updates_notifications(replicatio [ ({"notification_count": 0}, "notification_count must be greater than 0"), ({"notification_count": "abc"}, "notification_count must be an integer"), - ({"notification_count": 5001}, "notification_count must be less than or equal to 5000"), + ({"notification_count": 50001}, "notification_count must be less than or equal to 50000"), ({"updates_per_notification": 0}, "updates_per_notification must be greater than 0"), ({"updates_per_notification": "abc"}, "updates_per_notification must be an integer"), ( From 0e427b1527be2654d2867652e09d261e82645c34 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Fri, 22 May 2026 11:28:04 +0100 Subject: [PATCH 21/26] Refactor service stats update logic to consolidate increment and decrement operations into a single function --- app/dao/service_stats_dao.py | 70 +++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/app/dao/service_stats_dao.py b/app/dao/service_stats_dao.py index ff9dd97de1..ceadca53d8 100644 --- a/app/dao/service_stats_dao.py +++ b/app/dao/service_stats_dao.py @@ -16,47 +16,51 @@ class ServiceStatsDimensions(TypedDict): notification_status: str -def _increment_service_stats_count(dimensions: ServiceStatsDimensions, increment_by: int) -> None: - stmt = insert(ServiceStats).values( - id=uuid.uuid4(), - service_id=dimensions["service_id"], - template_id=dimensions["template_id"], - notification_type=dimensions["notification_type"], - notification_status=dimensions["notification_status"], - count=increment_by, - ) - stmt = stmt.on_conflict_do_update( - constraint="uix_service_stats_dimensions", - set_={ - "count": ServiceStats.count + increment_by, - }, - ) - db.session.execute(stmt) +def _update_service_stats_count(dimensions: ServiceStatsDimensions, delta: int) -> None: + if delta == 0: + return + dimension_values = { + "service_id": dimensions["service_id"], + "template_id": dimensions["template_id"], + "notification_type": dimensions["notification_type"], + "notification_status": dimensions["notification_status"], + } + filters = ( + ServiceStats.service_id == dimension_values["service_id"], + ServiceStats.template_id == dimension_values["template_id"], + ServiceStats.notification_type == dimension_values["notification_type"], + ServiceStats.notification_status == dimension_values["notification_status"], + ) -def _decrement_service_stats_count(dimensions: ServiceStatsDimensions, decrement_by: int) -> None: - ( - db.session.query(ServiceStats) - .filter( - ServiceStats.service_id == dimensions["service_id"], - ServiceStats.template_id == dimensions["template_id"], - ServiceStats.notification_type == dimensions["notification_type"], - ServiceStats.notification_status == dimensions["notification_status"], + if delta > 0: + stmt = insert(ServiceStats).values( + id=uuid.uuid4(), + **dimension_values, + count=delta, ) - .update( - { - "count": func.greatest(ServiceStats.count - decrement_by, 0), + stmt = stmt.on_conflict_do_update( + constraint="uix_service_stats_dimensions", + set_={ + "count": ServiceStats.count + delta, }, - synchronize_session=False, ) - ) + db.session.execute(stmt) + else: + ( + db.session.query(ServiceStats) + .filter(*filters) + .update( + { + "count": func.greatest(ServiceStats.count + delta, 0), + }, + synchronize_session=False, + ) + ) def apply_service_stats_delta(dimensions: ServiceStatsDimensions, delta: int) -> None: - if delta > 0: - _increment_service_stats_count(dimensions, increment_by=delta) - elif delta < 0: - _decrement_service_stats_count(dimensions, decrement_by=abs(delta)) + _update_service_stats_count(dimensions, delta) def dao_fetch_stats_for_service(service_id: UUID) -> list[ServiceStats]: From 4e5f37668e68308a608d5e1b18e15dbf0dfaa7ad Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Fri, 22 May 2026 11:29:30 +0100 Subject: [PATCH 22/26] Remove obsolete test files for service stats and replication slot changes --- .../test_process_replication_slot_changes.py | 134 --------------- tests/app/dao/test_service_stats_dao.py | 38 ----- .../test_process_replication_changes.py | 147 ----------------- tests/app/replication/test_rest.py | 156 ------------------ tests/app/test_config.py | 7 - 5 files changed, 482 deletions(-) delete mode 100644 tests/app/celery/test_process_replication_slot_changes.py delete mode 100644 tests/app/dao/test_service_stats_dao.py delete mode 100644 tests/app/replication/test_process_replication_changes.py delete mode 100644 tests/app/replication/test_rest.py diff --git a/tests/app/celery/test_process_replication_slot_changes.py b/tests/app/celery/test_process_replication_slot_changes.py deleted file mode 100644 index 9828e38c24..0000000000 --- a/tests/app/celery/test_process_replication_slot_changes.py +++ /dev/null @@ -1,134 +0,0 @@ -import uuid -from datetime import datetime - -import pytest -from celery.exceptions import Retry - -from app.celery.process_replication_slot_changes import check_replication_slot_changes - - -def _base_row(service_id: str, template_id: str, *, status: str): - return { - "service_id": service_id, - "template_id": template_id, - "notification_type": "email", - "notification_status": status, - "key_type": "normal", - "created_at": datetime.utcnow().isoformat(), - } - - -def test_check_replication_slot_changes_early_returns_when_no_changes(mocker, caplog): - mocker.patch("app.celery.process_replication_slot_changes._try_advisory_lock", return_value=True) - mock_unlock = mocker.patch("app.celery.process_replication_slot_changes._advisory_unlock") - mock_get_replication_changes = mocker.patch( - "app.celery.process_replication_slot_changes.get_replication_changes", return_value=[] - ) - mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit") - mock_advance = mocker.patch("app.celery.process_replication_slot_changes._advance_replication_slot") - mock_apply_delta = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_delta") - - with caplog.at_level("INFO"): - check_replication_slot_changes() - - mock_get_replication_changes.assert_called_once() - mock_apply_delta.assert_not_called() - mock_commit.assert_not_called() - mock_advance.assert_not_called() - mock_unlock.assert_called_once() - - assert "No replication slot changes found" in caplog.messages - - -def test_check_replication_slot_changes_rolls_up_and_advances_slot(mocker, caplog): - service_id = str(uuid.uuid4()) - template_id = str(uuid.uuid4()) - changes = [ - { - "type": "insert", - "table": "notifications", - "nextlsn": "0/16B6A28", - "current_row_data": _base_row(service_id, template_id, status="created"), - "previous_row_data": {}, - }, - { - "type": "update", - "table": "notifications", - "nextlsn": "0/16B6A28", - "current_row_data": _base_row(service_id, template_id, status="sending"), - "previous_row_data": _base_row(service_id, template_id, status="created"), - }, - { - "type": "delete", - "table": "notification_history", - "nextlsn": "0/16B6A28", - "current_row_data": {}, - "previous_row_data": _base_row(service_id, template_id, status="delivered"), - }, - ] - - mocker.patch("app.celery.process_replication_slot_changes._try_advisory_lock", return_value=True) - mocker.patch("app.celery.process_replication_slot_changes._advisory_unlock") - mocker.patch("app.celery.process_replication_slot_changes.get_replication_changes", return_value=changes) - - call_order = [] - - def commit_side_effect(): - call_order.append("commit") - - def advance_side_effect(_): - call_order.append("advance") - - mock_commit = mocker.patch("app.celery.process_replication_slot_changes.db.session.commit", side_effect=commit_side_effect) - mock_advance = mocker.patch( - "app.celery.process_replication_slot_changes._advance_replication_slot", side_effect=advance_side_effect - ) - mock_apply_delta = mocker.patch("app.celery.process_replication_slot_changes.apply_service_stats_delta") - - with caplog.at_level("INFO"): - check_replication_slot_changes() - - mock_commit.assert_called_once() - mock_advance.assert_called_once_with("0/16B6A28") - assert call_order == ["commit", "advance"] - - assert mock_apply_delta.call_count == 1 - dimensions, delta = mock_apply_delta.call_args.args - assert str(dimensions["service_id"]) == service_id - assert str(dimensions["template_id"]) == template_id - assert dimensions["notification_type"] == "email" - assert dimensions["notification_status"] == "sending" - assert delta == 1 - - assert "Replication slot changes processed" in caplog.messages - - -def test_check_replication_slot_changes_returns_when_lock_not_acquired(mocker): - mocker.patch("app.celery.process_replication_slot_changes._try_advisory_lock", return_value=False) - mock_get_replication_changes = mocker.patch("app.celery.process_replication_slot_changes.get_replication_changes") - mock_unlock = mocker.patch("app.celery.process_replication_slot_changes._advisory_unlock") - - check_replication_slot_changes() - - mock_get_replication_changes.assert_not_called() - mock_unlock.assert_not_called() - - -def test_check_replication_slot_changes_calls_retry_on_query_error(mocker): - mocker.patch("app.celery.process_replication_slot_changes._try_advisory_lock", return_value=True) - mock_unlock = mocker.patch("app.celery.process_replication_slot_changes._advisory_unlock") - mocker.patch( - "app.celery.process_replication_slot_changes.get_replication_changes", - side_effect=Exception("EXPECTED"), - ) - mock_retry = mocker.patch.object(check_replication_slot_changes, "retry", side_effect=Retry("retry")) - mock_rollback = mocker.patch("app.celery.process_replication_slot_changes.db.session.rollback") - - with pytest.raises(Retry): - check_replication_slot_changes() - - assert mock_retry.call_count == 1 - mock_rollback.assert_called_once() - mock_unlock.assert_called_once() - _, kwargs = mock_retry.call_args - assert kwargs["countdown"] == 1 diff --git a/tests/app/dao/test_service_stats_dao.py b/tests/app/dao/test_service_stats_dao.py deleted file mode 100644 index c6141a0460..0000000000 --- a/tests/app/dao/test_service_stats_dao.py +++ /dev/null @@ -1,38 +0,0 @@ -from app.dao.service_stats_dao import ( - apply_service_stats_delta, -) -from app.models import ServiceStats -from tests.app.db import create_template - - -def _build_dimensions(*, service_id, template_id, notification_type="email", notification_status="created"): - return { - "service_id": service_id, - "template_id": template_id, - "notification_type": notification_type, - "notification_status": notification_status, - } - - -def _get_service_stats_row(dimensions): - return ServiceStats.query.filter_by( - service_id=dimensions["service_id"], - template_id=dimensions["template_id"], - notification_type=dimensions["notification_type"], - notification_status=dimensions["notification_status"], - ).first() - - -def test_apply_service_stats_delta_supports_positive_and_negative_values(notify_db_session, sample_service): - template = create_template(service=sample_service, template_type="email") - dimensions = _build_dimensions(service_id=sample_service.id, template_id=template.id) - - apply_service_stats_delta(dimensions, 5) - apply_service_stats_delta(dimensions, -3) - apply_service_stats_delta(dimensions, -10) - notify_db_session.commit() - - row = _get_service_stats_row(dimensions) - - assert row is not None - assert row.count == 0 diff --git a/tests/app/replication/test_process_replication_changes.py b/tests/app/replication/test_process_replication_changes.py deleted file mode 100644 index 3800e60a36..0000000000 --- a/tests/app/replication/test_process_replication_changes.py +++ /dev/null @@ -1,147 +0,0 @@ -import json -from unittest.mock import MagicMock, patch - -from app.replication.replication_changes_utils import ( - get_notification_status, - get_replication_changes, - get_str_value, - parse_change_data, - parse_row_data, -) - - -def test_process_replication_changes_flattens_rows_across_changes(): - first_change = { - "data": json.dumps( - { - "change": [ - { - "kind": "insert", - "table": "notifications", - "columnnames": ["id", "to"], - "columnvalues": ["111", "447700900001"], - } - ] - } - ) - } - second_change = { - "data": json.dumps( - { - "change": [ - { - "kind": "update", - "table": "notifications", - "columnnames": ["id", "status"], - "columnvalues": ["222", "sent"], - } - ] - } - ) - } - - mock_result = MagicMock() - mock_result.mappings().all.return_value = [first_change, second_change] - - with patch("app.replication.replication_changes_utils.db.session.execute", return_value=mock_result): - result = get_replication_changes() - - assert result == [ - { - "type": "insert", - "table": "notifications", - "nextlsn": None, - "current_row_data": {"id": "111", "to": "447700900001"}, - "previous_row_data": {}, - }, - { - "type": "update", - "table": "notifications", - "nextlsn": None, - "current_row_data": {"id": "222", "status": "sent"}, - "previous_row_data": {}, - }, - ] - - -def test_parse_change_data_returns_none_for_empty_change_list(): - result = parse_change_data({"data": json.dumps({"change": []})}) - - assert result is None - - -def test_parse_row_data_maps_current_and_previous_rows(): - row = { - "kind": "update", - "table": "notifications", - "columnnames": ["id", "status", "to"], - "columnvalues": ["abc", "delivered", "447700900002"], - "oldkeys": { - "keynames": ["id", "status"], - "keyvalues": ["abc", "sending"], - }, - } - - result = parse_row_data(row) - - assert result == { - "type": "update", - "table": "notifications", - "nextlsn": None, - "current_row_data": { - "id": "abc", - "status": "delivered", - "to": "447700900002", - }, - "previous_row_data": { - "id": "abc", - "status": "sending", - }, - } - - -def test_parse_change_data_propagates_nextlsn_to_all_rows(): - result = parse_change_data( - { - "data": json.dumps( - { - "nextlsn": "0/16B6A28", - "change": [ - { - "kind": "insert", - "table": "notifications", - "columnnames": ["id", "status"], - "columnvalues": ["abc", "sending"], - } - ], - } - ), - "lsn": "0/16B6A20", - } - ) - - assert result == [ - { - "type": "insert", - "table": "notifications", - "nextlsn": "0/16B6A28", - "current_row_data": {"id": "abc", "status": "sending"}, - "previous_row_data": {}, - } - ] - - -def test_get_str_value_returns_none_for_non_string_values(): - assert get_str_value({"status": 123}, "status") is None - - -def test_get_notification_status_prefers_notification_status_key(): - result = get_notification_status({"notification_status": "delivered", "status": "sending"}) - - assert result == "delivered" - - -def test_get_notification_status_falls_back_to_status_key(): - result = get_notification_status({"status": "temporary-failure"}) - - assert result == "temporary-failure" diff --git a/tests/app/replication/test_rest.py b/tests/app/replication/test_rest.py deleted file mode 100644 index 44424db9e5..0000000000 --- a/tests/app/replication/test_rest.py +++ /dev/null @@ -1,156 +0,0 @@ -import pytest -from uuid import UUID - -from app.constants import NOTIFICATION_DELIVERED, NOTIFICATION_SENT -from app.models import Notification -from tests import create_admin_authorization_header - - -@pytest.fixture -def replication_client(notify_api): - with notify_api.test_request_context(): - with notify_api.test_client() as client: - yield client - - -def test_trigger_process_replication_slot_changes_queues_task(replication_client, mocker): - mock_task = mocker.patch("app.replication.rest.check_replication_slot_changes") - auth_header = create_admin_authorization_header() - - response = replication_client.post( - "/replication/process-slot-changes", - headers=[auth_header], - ) - - assert response.status_code == 201 - assert response.get_json() == {"message": "check-replication-slot-changes task executed"} - mock_task.assert_called_once_with() - - -def test_trigger_process_replication_slot_changes_requires_auth(replication_client): - response = replication_client.post("/replication/process-slot-changes") - - assert response.status_code == 401 - - -def test_trigger_process_replication_slot_changes_rejects_get(replication_client): - auth_header = create_admin_authorization_header() - - response = replication_client.get( - "/replication/process-slot-changes", - headers=[auth_header], - ) - - assert response.status_code == 405 - - -def test_trigger_check_replication_slot_changes_returns_data(replication_client, mocker): - parsed_changes = [ - { - "type": "insert", - "table": "notifications", - "nextlsn": "0/2", - "current_row_data": {"id": "1"}, - "previous_row_data": {}, - } - ] - mock_get_changes = mocker.patch( - "app.replication.rest.get_replication_changes", - return_value=parsed_changes, - ) - auth_header = create_admin_authorization_header() - - response = replication_client.get( - "/replication/check-slot-changes", - headers=[auth_header], - ) - - assert response.status_code == 200 - assert response.get_json() == {"changes": parsed_changes} - mock_get_changes.assert_called_once_with(peek=True) - - -def test_trigger_check_replication_slot_changes_requires_auth(replication_client): - response = replication_client.get("/replication/check-slot-changes") - - assert response.status_code == 401 - - -def test_trigger_check_replication_slot_changes_rejects_post(replication_client): - auth_header = create_admin_authorization_header() - - response = replication_client.post( - "/replication/check-slot-changes", - headers=[auth_header], - ) - - assert response.status_code == 405 - - -def test_simulate_notification_load_inserts_and_updates_notifications(replication_client, sample_template): - auth_header = create_admin_authorization_header() - - response = replication_client.post( - "/replication/simulate-notification-load", - headers=[auth_header], - json={ - "notification_count": 30, - "updates_per_notification": 2, - "template_id": str(sample_template.id), - "random_seed": 7, - }, - ) - - response_json = response.get_json() - inserted_ids = response_json["inserted_notification_ids"] - inserted_uuid_ids = [UUID(notification_id) for notification_id in inserted_ids] - inserted_notifications = Notification.query.filter(Notification.id.in_(inserted_uuid_ids)).all() - distinct_statuses = {notification.status for notification in inserted_notifications} - status_breakdown = response_json["status_breakdown"] - simulated_statuses = {NOTIFICATION_SENT, NOTIFICATION_DELIVERED} - - assert response.status_code == 200 - assert response_json["message"] == "notification send/update load inserted into notifications table" - assert response_json["notification_count"] == 30 - assert response_json["updates_per_notification"] == 2 - assert response_json["inserted_count"] == 30 - assert response_json["updated_count"] == 60 - assert response_json["service_id"] == str(sample_template.service_id) - assert response_json["template_id"] == str(sample_template.id) - assert response_json["template_version"] == sample_template.version - assert len(inserted_ids) == 20 - assert len(inserted_notifications) == 20 - assert len(status_breakdown) >= 2 - assert len(distinct_statuses) >= 1 - assert sum(status_breakdown.values()) == response_json["inserted_count"] - assert set(status_breakdown).issubset(simulated_statuses) - assert distinct_statuses.issubset(simulated_statuses) - - -@pytest.mark.parametrize( - "payload, expected_message", - [ - ({"notification_count": 0}, "notification_count must be greater than 0"), - ({"notification_count": "abc"}, "notification_count must be an integer"), - ({"notification_count": 50001}, "notification_count must be less than or equal to 50000"), - ({"updates_per_notification": 0}, "updates_per_notification must be greater than 0"), - ({"updates_per_notification": "abc"}, "updates_per_notification must be an integer"), - ( - {"updates_per_notification": 6}, - "updates_per_notification must be less than or equal to 5", - ), - ({"template_id": "not-a-uuid"}, "template_id must be a valid UUID"), - ({"service_id": "not-a-uuid"}, "service_id must be a valid UUID"), - ], -) -def test_simulate_notification_load_validates_payload(replication_client, payload, expected_message): - auth_header = create_admin_authorization_header() - - response = replication_client.post( - "/replication/simulate-notification-load", - headers=[auth_header], - json=payload, - ) - - assert response.status_code == 400 - assert response.get_json() == {"message": expected_message} diff --git a/tests/app/test_config.py b/tests/app/test_config.py index b41c78ef0d..9ba4fd87e4 100644 --- a/tests/app/test_config.py +++ b/tests/app/test_config.py @@ -96,10 +96,3 @@ def test_celery_config_contains_archived_template_email_file_cleanup_task(): assert task_config["task"] == "remove-archived-template-email-files-from-s3" assert task_config["options"]["queue"] == QueueNames.PERIODIC assert task_config["schedule"] == crontab(hour=4, minute=40) - - -def test_celery_config_contains_replication_slot_change_check_task(): - task_config = Config.CELERY["beat_schedule"]["check-replication-slot-changes"] - assert task_config["task"] == "check-replication-slot-changes" - assert task_config["options"]["queue"] == QueueNames.PERIODIC - assert task_config["schedule"].total_seconds() == 1 From d656a386677bec827e569661c729bf69b7c82537 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Fri, 22 May 2026 11:40:19 +0100 Subject: [PATCH 23/26] Add docs --- .../process_replication_slot_changes.py | 293 +++++++++--------- app/dao/service_stats_dao.py | 14 +- app/replication/rest.py | 11 + 3 files changed, 176 insertions(+), 142 deletions(-) diff --git a/app/celery/process_replication_slot_changes.py b/app/celery/process_replication_slot_changes.py index 20b3b0ea0b..29a8144029 100644 --- a/app/celery/process_replication_slot_changes.py +++ b/app/celery/process_replication_slot_changes.py @@ -25,76 +25,103 @@ type ServiceStatsDimensionsKey = tuple[UUID, UUID, str, str] -def _parse_uuid_value(row_data: RowData, key: str) -> UUID | None: - raw_value = get_str_value(row_data, key) - if not raw_value: - return None - +# 1. Task entrypoint for consuming replication slot changes and applying aggregate deltas +# into the service stats table. This task is lock-guarded so only one worker processes +# and advances the slot at a time. +@notify_celery.task(bind=True, name="check-replication-slot-changes") +@cronitor("check-replication-slot-changes") +def check_replication_slot_changes(self): + lock_acquired = False try: - return UUID(raw_value) - except ValueError: - return None + with current_app.app_context(): + lock_acquired = _try_advisory_lock(REPLICATION_ADVISORY_LOCK_ID) + if not lock_acquired: + current_app.logger.info( + "Replication slot lock not acquired", + extra={"celery_task": "check-replication-slot-changes"}, + ) + return + changes = get_replication_changes( + peek=True, + slot_name=REPLICATION_SLOT_NAME, + upto_nchanges=REPLICATION_SLOT_UPTO_NCHANGES, + table_names=("public.notifications", "public.notification_history"), + include_lsn=True, + format_version=1, + include_types=False, + include_typmod=False, + ) + fetched_changes = len(changes) -def _parse_datetime_value(row_data: RowData, key: str) -> datetime | None: - raw_value = get_str_value(row_data, key) - if not raw_value: - return None + if fetched_changes == 0: + current_app.logger.info( + "No replication slot changes found", + extra={"celery_task": "check-replication-slot-changes", "changes_count": 0}, + ) + return - normalized = raw_value.replace("Z", "+00:00") - try: - return datetime.fromisoformat(normalized) - except ValueError: - return None + counter, processed_changes, ignored_changes, last_nextlsn = _build_counter_from_changes(changes) + service_stats_deltas = _roll_up_service_stats_deltas(counter) + for service_stats_key, delta in service_stats_deltas.items(): + if delta == 0: + continue -def _build_dimensions(change: ParsedRow, *, use_previous_row: bool) -> FullDimensions | None: - if use_previous_row: - row_data = change["previous_row_data"] - fallback_data = change["current_row_data"] - else: - row_data = change["current_row_data"] - fallback_data = change["previous_row_data"] + service_id, template_id, notification_type, notification_status = service_stats_key + dimensions: ServiceStatsDimensions = { + "service_id": service_id, + "template_id": template_id, + "notification_type": notification_type, + "notification_status": notification_status, + } + apply_service_stats_delta(dimensions, delta) - service_id = _parse_uuid_value(row_data, "service_id") or _parse_uuid_value(fallback_data, "service_id") - template_id = _parse_uuid_value(row_data, "template_id") or _parse_uuid_value(fallback_data, "template_id") - notification_type = get_str_value(row_data, "notification_type") or get_str_value( - fallback_data, "notification_type" - ) - job_id = _parse_uuid_value(row_data, "job_id") or _parse_uuid_value(fallback_data, "job_id") or NIL_UUID - key_type = get_str_value(row_data, "key_type") or get_str_value(fallback_data, "key_type") - notification_status = get_notification_status(row_data) or get_notification_status(fallback_data) - created_at = _parse_datetime_value(row_data, "created_at") or _parse_datetime_value(fallback_data, "created_at") + db.session.commit() + if last_nextlsn: + _advance_replication_slot(last_nextlsn) - if not service_id or not template_id or not notification_type or not key_type or not notification_status or not created_at: - return None + current_app.logger.info( + "Replication slot changes processed", + extra={ + "celery_task": "check-replication-slot-changes", + "changes_count": fetched_changes, + "processed_changes": processed_changes, + "ignored_changes": ignored_changes, + "service_stats_delta_buckets": len(service_stats_deltas), + }, + ) + except Exception as exc: + db.session.rollback() + retry_count = self.request.retries + if retry_count < 3: + raise self.retry(exc=exc, countdown=2**retry_count) from exc - return ( - convert_utc_to_bst(created_at).date(), - template_id, - service_id, - job_id, - notification_type, - key_type, - notification_status, - ) + current_app.logger.error( + "Replication slot query failed after 3 retries", + exc_info=True, + extra={"celery_task": "check-replication-slot-changes"}, + ) + raise + finally: + if lock_acquired: + try: + _advisory_unlock(REPLICATION_ADVISORY_LOCK_ID) + except Exception: + current_app.logger.exception( + "Failed to release advisory lock", + extra={"celery_task": "check-replication-slot-changes"}, + ) +# 2. Attempt to acquire a Postgres advisory lock used to serialize replication processing. +# Returns True when this worker owns the lock and can proceed safely. def _try_advisory_lock(lock_id: int) -> bool: return bool(db.session.execute(text("SELECT pg_try_advisory_lock(:lock_id)"), {"lock_id": lock_id}).scalar()) -def _advisory_unlock(lock_id: int) -> None: - db.session.execute(text("SELECT pg_advisory_unlock(:lock_id)"), {"lock_id": lock_id}) - - -def _advance_replication_slot(lsn: str) -> None: - db.session.execute( - text("SELECT pg_replication_slot_advance(:slot_name, :lsn)"), - {"slot_name": REPLICATION_SLOT_NAME, "lsn": lsn}, - ) - - +# 3. Parse a list of WAL-derived row changes into fine-grained stats deltas keyed by full +# dimensions. Also tracks processing counters and the latest LSN that was seen. def _build_counter_from_changes(changes: list[ParsedRow]) -> tuple[Counter[FullDimensions], int, int, str | None]: counter: Counter[FullDimensions] = Counter() processed_changes = 0 @@ -158,98 +185,88 @@ def _build_counter_from_changes(changes: list[ParsedRow]) -> tuple[Counter[FullD return counter, processed_changes, ignored_changes, last_nextlsn -def _roll_up_service_stats_deltas(counter: Counter[FullDimensions]) -> Counter[ServiceStatsDimensionsKey]: - deltas: Counter[ServiceStatsDimensionsKey] = Counter() - for dimensions, delta in counter.items(): - _, template_id, service_id, _, notification_type, _, notification_status = dimensions - deltas[(service_id, template_id, notification_type, notification_status)] += delta +# 4. Build the complete per-notification dimensions tuple from either the current row or +# previous row image, with fallback across both when one side lacks a value. +def _build_dimensions(change: ParsedRow, *, use_previous_row: bool) -> FullDimensions | None: + if use_previous_row: + row_data = change["previous_row_data"] + fallback_data = change["current_row_data"] + else: + row_data = change["current_row_data"] + fallback_data = change["previous_row_data"] - return deltas + service_id = _parse_uuid_value(row_data, "service_id") or _parse_uuid_value(fallback_data, "service_id") + template_id = _parse_uuid_value(row_data, "template_id") or _parse_uuid_value(fallback_data, "template_id") + notification_type = get_str_value(row_data, "notification_type") or get_str_value( + fallback_data, "notification_type" + ) + job_id = _parse_uuid_value(row_data, "job_id") or _parse_uuid_value(fallback_data, "job_id") or NIL_UUID + key_type = get_str_value(row_data, "key_type") or get_str_value(fallback_data, "key_type") + notification_status = get_notification_status(row_data) or get_notification_status(fallback_data) + created_at = _parse_datetime_value(row_data, "created_at") or _parse_datetime_value(fallback_data, "created_at") + + if not service_id or not template_id or not notification_type or not key_type or not notification_status or not created_at: + return None + return ( + convert_utc_to_bst(created_at).date(), + template_id, + service_id, + job_id, + notification_type, + key_type, + notification_status, + ) + + +# 5. Safely parse a UUID field from row data. Invalid or missing values are treated as None +# so malformed entries do not break replication processing. +def _parse_uuid_value(row_data: RowData, key: str) -> UUID | None: + raw_value = get_str_value(row_data, key) + if not raw_value: + return None -# @TODO: this is a temporary task to check the replication slot changes, -# we will need to implement the logic to process the changes and update the dashboard accordingly -@notify_celery.task(bind=True, name="check-replication-slot-changes") -@cronitor("check-replication-slot-changes") -def check_replication_slot_changes(self): - lock_acquired = False try: - with current_app.app_context(): - lock_acquired = _try_advisory_lock(REPLICATION_ADVISORY_LOCK_ID) - if not lock_acquired: - current_app.logger.info( - "Replication slot lock not acquired", - extra={"celery_task": "check-replication-slot-changes"}, - ) - return + return UUID(raw_value) + except ValueError: + return None - changes = get_replication_changes( - peek=True, - slot_name=REPLICATION_SLOT_NAME, - upto_nchanges=REPLICATION_SLOT_UPTO_NCHANGES, - table_names=("public.notifications", "public.notification_history"), - include_lsn=True, - format_version=1, - include_types=False, - include_typmod=False, - ) - fetched_changes = len(changes) - if fetched_changes == 0: - current_app.logger.info( - "No replication slot changes found", - extra={"celery_task": "check-replication-slot-changes", "changes_count": 0}, - ) - return +# 6. Parse ISO-like datetime values from replication payloads, normalizing trailing "Z" +# to a UTC offset format accepted by datetime.fromisoformat. +def _parse_datetime_value(row_data: RowData, key: str) -> datetime | None: + raw_value = get_str_value(row_data, key) + if not raw_value: + return None - counter, processed_changes, ignored_changes, last_nextlsn = _build_counter_from_changes(changes) - service_stats_deltas = _roll_up_service_stats_deltas(counter) + normalized = raw_value.replace("Z", "+00:00") + try: + return datetime.fromisoformat(normalized) + except ValueError: + return None - for service_stats_key, delta in service_stats_deltas.items(): - if delta == 0: - continue - service_id, template_id, notification_type, notification_status = service_stats_key - dimensions: ServiceStatsDimensions = { - "service_id": service_id, - "template_id": template_id, - "notification_type": notification_type, - "notification_status": notification_status, - } - apply_service_stats_delta(dimensions, delta) +# 7. Collapse full-dimension deltas down to service stats dimensions used by +# app.dao.service_stats_dao.apply_service_stats_delta. +def _roll_up_service_stats_deltas(counter: Counter[FullDimensions]) -> Counter[ServiceStatsDimensionsKey]: + deltas: Counter[ServiceStatsDimensionsKey] = Counter() + for dimensions, delta in counter.items(): + _, template_id, service_id, _, notification_type, _, notification_status = dimensions + deltas[(service_id, template_id, notification_type, notification_status)] += delta - db.session.commit() - if last_nextlsn: - _advance_replication_slot(last_nextlsn) + return deltas - current_app.logger.info( - "Replication slot changes processed", - extra={ - "celery_task": "check-replication-slot-changes", - "changes_count": fetched_changes, - "processed_changes": processed_changes, - "ignored_changes": ignored_changes, - "service_stats_delta_buckets": len(service_stats_deltas), - }, - ) - except Exception as exc: - db.session.rollback() - retry_count = self.request.retries - if retry_count < 3: - raise self.retry(exc=exc, countdown=2**retry_count) from exc - current_app.logger.error( - "Replication slot query failed after 3 retries", - exc_info=True, - extra={"celery_task": "check-replication-slot-changes"}, - ) - raise - finally: - if lock_acquired: - try: - _advisory_unlock(REPLICATION_ADVISORY_LOCK_ID) - except Exception: - current_app.logger.exception( - "Failed to release advisory lock", - extra={"celery_task": "check-replication-slot-changes"}, - ) +# 8. Advance the logical replication slot after successful commit so processed changes are +# acknowledged and are not replayed on the next task run. +def _advance_replication_slot(lsn: str) -> None: + db.session.execute( + text("SELECT pg_replication_slot_advance(:slot_name, :lsn)"), + {"slot_name": REPLICATION_SLOT_NAME, "lsn": lsn}, + ) + + +# 9. Release the advisory lock acquired at task start. This is called in finally to avoid +# lock leaks when errors or retries occur. +def _advisory_unlock(lock_id: int) -> None: + db.session.execute(text("SELECT pg_advisory_unlock(:lock_id)"), {"lock_id": lock_id}) diff --git a/app/dao/service_stats_dao.py b/app/dao/service_stats_dao.py index ceadca53d8..340b692f05 100644 --- a/app/dao/service_stats_dao.py +++ b/app/dao/service_stats_dao.py @@ -16,6 +16,14 @@ class ServiceStatsDimensions(TypedDict): notification_status: str +# 1. Public write API used by callers to apply a single aggregated delta into +# service statistics for a specific dimensions tuple. +def apply_service_stats_delta(dimensions: ServiceStatsDimensions, delta: int) -> None: + _update_service_stats_count(dimensions, delta) + + +# 2. Internal persistence routine that applies the delta with UPSERT behavior for +# positive changes and bounded decrement behavior for negative changes. def _update_service_stats_count(dimensions: ServiceStatsDimensions, delta: int) -> None: if delta == 0: return @@ -59,10 +67,8 @@ def _update_service_stats_count(dimensions: ServiceStatsDimensions, delta: int) ) -def apply_service_stats_delta(dimensions: ServiceStatsDimensions, delta: int) -> None: - _update_service_stats_count(dimensions, delta) - - +# 3. Public read API that returns all stats rows for a single service, with a +# defensive guard to avoid querying when no service id is provided. def dao_fetch_stats_for_service(service_id: UUID) -> list[ServiceStats]: """ Fetch service stats for a specific service. diff --git a/app/replication/rest.py b/app/replication/rest.py index 109cb6adf0..bf5b5b6997 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -11,12 +11,18 @@ @replication_blueprint.route("/process-slot-changes", methods=["POST"]) +# Trigger immediate processing of pending replication slot changes via the Celery task. +# This endpoint is mainly operational, allowing manual execution for verification +# or recovery scenarios when scheduled processing needs to be nudged. def trigger_process_replication_slot_changes(): check_replication_slot_changes() return jsonify({"message": "check-replication-slot-changes task executed"}), 201 @replication_blueprint.route("/check-slot-changes", methods=["GET"]) +# Inspect replication slot changes in peek mode so callers can review pending records +# without consuming or advancing the slot position. Useful for debugging parser logic +# and validating what data would be processed by the worker task. def trigger_check_replication_slot_changes(): changes = get_replication_changes(peek=True) @@ -24,11 +30,16 @@ def trigger_check_replication_slot_changes(): @replication_blueprint.route("/simulate-notification-load", methods=["POST"]) +# Run a controlled notification-load simulation used for replication performance testing. +# The underlying helper encapsulates test data generation and returns a response payload +# describing the simulated operation outcome. def simulate_notification_load(): return performance_test_simulate_notification_load() @replication_blueprint.route("/stats/", methods=["GET"]) +# Return aggregated service statistics grouped by template, notification type, and +# notification status for the requested service id, formatted for API consumers. def get_service_stats(service_id): """ Get service stats for a specific service. From b6ceff0a52095a1c4525e11ae4986f5200c9c89c Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Fri, 22 May 2026 12:19:41 +0100 Subject: [PATCH 24/26] Rename service_stats to ft_service_stats and update related references --- .../process_replication_slot_changes.py | 4 +- app/dao/fact_service_stats_dao.py | 85 +++++++++++++++++++ app/dao/service_stats_dao.py | 2 +- app/models.py | 12 +-- app/replication/rest.py | 2 +- migrations/.current-alembic-head | 2 +- ...ename_service_stats_to_ft_service_stats.py | 44 ++++++++++ 7 files changed, 140 insertions(+), 11 deletions(-) create mode 100644 app/dao/fact_service_stats_dao.py create mode 100644 migrations/versions/0556_rename_service_stats_to_ft_service_stats.py diff --git a/app/celery/process_replication_slot_changes.py b/app/celery/process_replication_slot_changes.py index 29a8144029..357c86b745 100644 --- a/app/celery/process_replication_slot_changes.py +++ b/app/celery/process_replication_slot_changes.py @@ -7,7 +7,7 @@ from app import current_app, db, notify_celery from app.cronitor import cronitor -from app.dao.service_stats_dao import ServiceStatsDimensions, apply_service_stats_delta +from app.dao.fact_service_stats_dao import ServiceStatsDimensions, apply_service_stats_delta from app.replication.replication_changes_utils import ( ParsedRow, RowData, @@ -247,7 +247,7 @@ def _parse_datetime_value(row_data: RowData, key: str) -> datetime | None: # 7. Collapse full-dimension deltas down to service stats dimensions used by -# app.dao.service_stats_dao.apply_service_stats_delta. +# app.dao.fact_service_stats_dao.apply_service_stats_delta. def _roll_up_service_stats_deltas(counter: Counter[FullDimensions]) -> Counter[ServiceStatsDimensionsKey]: deltas: Counter[ServiceStatsDimensionsKey] = Counter() for dimensions, delta in counter.items(): diff --git a/app/dao/fact_service_stats_dao.py b/app/dao/fact_service_stats_dao.py new file mode 100644 index 0000000000..1fe3086996 --- /dev/null +++ b/app/dao/fact_service_stats_dao.py @@ -0,0 +1,85 @@ +import uuid +from typing import TypedDict +from uuid import UUID + +from sqlalchemy import func +from sqlalchemy.dialects.postgresql import insert + +from app import db +from app.models import FactServiceStats + + +class ServiceStatsDimensions(TypedDict): + service_id: UUID + template_id: UUID + notification_type: str + notification_status: str + + +# 1. Public write API used by callers to apply a single aggregated delta into +# service statistics for a specific dimensions tuple. +def apply_service_stats_delta(dimensions: ServiceStatsDimensions, delta: int) -> None: + _update_service_stats_count(dimensions, delta) + + +# 2. Internal persistence routine that applies the delta with UPSERT behavior for +# positive changes and bounded decrement behavior for negative changes. +def _update_service_stats_count(dimensions: ServiceStatsDimensions, delta: int) -> None: + if delta == 0: + return + + dimension_values = { + "service_id": dimensions["service_id"], + "template_id": dimensions["template_id"], + "notification_type": dimensions["notification_type"], + "notification_status": dimensions["notification_status"], + } + filters = ( + FactServiceStats.service_id == dimension_values["service_id"], + FactServiceStats.template_id == dimension_values["template_id"], + FactServiceStats.notification_type == dimension_values["notification_type"], + FactServiceStats.notification_status == dimension_values["notification_status"], + ) + + if delta > 0: + stmt = insert(FactServiceStats).values( + id=uuid.uuid4(), + **dimension_values, + count=delta, + ) + stmt = stmt.on_conflict_do_update( + constraint="uix_ft_service_stats_dimensions", + set_={ + "count": FactServiceStats.count + delta, + }, + ) + db.session.execute(stmt) + else: + ( + db.session.query(FactServiceStats) + .filter(*filters) + .update( + { + "count": func.greatest(FactServiceStats.count + delta, 0), + }, + synchronize_session=False, + ) + ) + + +# 3. Public read API that returns all stats rows for a single service, with a +# defensive guard to avoid querying when no service id is provided. +def dao_fetch_stats_for_service(service_id: UUID) -> list[FactServiceStats]: + """ + Fetch service stats for a specific service. + + Args: + service_id: UUID of the service to fetch stats for + + Returns: + List of FactServiceStats records for the specified service + """ + if not service_id: + return [] + + return db.session.query(FactServiceStats).filter(FactServiceStats.service_id == service_id).all() diff --git a/app/dao/service_stats_dao.py b/app/dao/service_stats_dao.py index 340b692f05..15f5601ea6 100644 --- a/app/dao/service_stats_dao.py +++ b/app/dao/service_stats_dao.py @@ -48,7 +48,7 @@ def _update_service_stats_count(dimensions: ServiceStatsDimensions, delta: int) count=delta, ) stmt = stmt.on_conflict_do_update( - constraint="uix_service_stats_dimensions", + constraint="uix_ft_service_stats_dimensions", set_={ "count": ServiceStats.count + delta, }, diff --git a/app/models.py b/app/models.py index 7850695347..e1bd965a33 100644 --- a/app/models.py +++ b/app/models.py @@ -2414,8 +2414,8 @@ class FactNotificationStatus(db.Model): ) -class ServiceStats(db.Model): - __tablename__ = "service_stats" +class FactServiceStats(db.Model): + __tablename__ = "ft_service_stats" id = db.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) service_id = db.Column(UUID(as_uuid=True), db.ForeignKey("services.id"), nullable=False) @@ -2430,22 +2430,22 @@ class ServiceStats(db.Model): "template_id", "notification_type", "notification_status", - name="uix_service_stats_dimensions", + name="uix_ft_service_stats_dimensions", ), Index( - "ix_svc_stats_svc_ntype_nstatus", + "ix_ft_svc_stats_svc_ntype_nstatus", "service_id", "notification_type", "notification_status", ), Index( - "ix_svc_stats_tmpl_ntype_nstatus", + "ix_ft_svc_stats_tmpl_ntype_nstatus", "template_id", "notification_type", "notification_status", ), Index( - "ix_service_stats_service_id_template_id", + "ix_ft_service_stats_service_id_template_id", "service_id", "template_id", ), diff --git a/app/replication/rest.py b/app/replication/rest.py index bf5b5b6997..ae62c2293d 100644 --- a/app/replication/rest.py +++ b/app/replication/rest.py @@ -1,7 +1,7 @@ from flask import Blueprint, jsonify from app.celery.process_replication_slot_changes import check_replication_slot_changes -from app.dao.service_stats_dao import dao_fetch_stats_for_service +from app.dao.fact_service_stats_dao import dao_fetch_stats_for_service from app.replication.performance_test import simulate_notification_load as performance_test_simulate_notification_load from app.replication.replication_changes_utils import get_replication_changes from app.v2.errors import register_errors diff --git a/migrations/.current-alembic-head b/migrations/.current-alembic-head index 33ffd288c9..411fefa373 100644 --- a/migrations/.current-alembic-head +++ b/migrations/.current-alembic-head @@ -1 +1 @@ -0555_notif_replica_identity +0556_rename_service_stats_to_fts diff --git a/migrations/versions/0556_rename_service_stats_to_ft_service_stats.py b/migrations/versions/0556_rename_service_stats_to_ft_service_stats.py new file mode 100644 index 0000000000..d8cd4a8d5f --- /dev/null +++ b/migrations/versions/0556_rename_service_stats_to_ft_service_stats.py @@ -0,0 +1,44 @@ +""" +Rename service_stats table to ft_service_stats. + +Revision ID: 0556_rename_service_stats_to_fts +Revises: 0555_notif_replica_identity +Create Date: 2026-05-22 00:00:00 +""" + +from alembic import op + +revision = "0556_rename_service_stats_to_fts" +down_revision = "0555_notif_replica_identity" + + +def upgrade(): + op.rename_table("service_stats", "ft_service_stats") + + op.execute( + "ALTER TABLE ft_service_stats " + "RENAME CONSTRAINT uix_service_stats_dimensions TO uix_ft_service_stats_dimensions" + ) + + op.execute("ALTER INDEX ix_svc_stats_svc_ntype_nstatus RENAME TO ix_ft_svc_stats_svc_ntype_nstatus") + op.execute("ALTER INDEX ix_svc_stats_tmpl_ntype_nstatus RENAME TO ix_ft_svc_stats_tmpl_ntype_nstatus") + op.execute( + "ALTER INDEX ix_service_stats_service_id_template_id " + "RENAME TO ix_ft_service_stats_service_id_template_id" + ) + + +def downgrade(): + op.execute( + "ALTER INDEX ix_ft_service_stats_service_id_template_id " + "RENAME TO ix_service_stats_service_id_template_id" + ) + op.execute("ALTER INDEX ix_ft_svc_stats_tmpl_ntype_nstatus RENAME TO ix_svc_stats_tmpl_ntype_nstatus") + op.execute("ALTER INDEX ix_ft_svc_stats_svc_ntype_nstatus RENAME TO ix_svc_stats_svc_ntype_nstatus") + + op.execute( + "ALTER TABLE ft_service_stats " + "RENAME CONSTRAINT uix_ft_service_stats_dimensions TO uix_service_stats_dimensions" + ) + + op.rename_table("ft_service_stats", "service_stats") From a46f457d3b11142390f37356461abd940c302b36 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Fri, 22 May 2026 12:23:10 +0100 Subject: [PATCH 25/26] Enhance documentation for replication change handling functions --- app/replication/replication_changes_utils.py | 85 +++++++++++++++++++- 1 file changed, 82 insertions(+), 3 deletions(-) diff --git a/app/replication/replication_changes_utils.py b/app/replication/replication_changes_utils.py index 40885735f4..d5bd5107af 100644 --- a/app/replication/replication_changes_utils.py +++ b/app/replication/replication_changes_utils.py @@ -45,6 +45,13 @@ class ParsedRow(TypedDict): def _quote_sql_literal(value: str) -> str: + """ + Escape single quotes for safe embedding into SQL string literals. + + This helper performs the standard PostgreSQL escaping rule for single + quotes by doubling them (e.g. `O'Reilly` -> `O''Reilly`). It is used when + constructing the replication function call via a SQL text string. + """ return value.replace("'", "''") @@ -60,7 +67,31 @@ def get_replication_changes( include_typmod: bool | None = None, ) -> list[ParsedRow]: """ - Process the replication changes and return a list of parsed changes. + Fetch logical replication changes and normalize them into parsed rows. + + The query calls either: + - `pg_logical_slot_peek_changes` (non-destructive read), or + - `pg_logical_slot_get_changes` (consumes changes from the slot) + + Option flags are passed using `wal2json`-compatible key/value arguments. + Each returned change row is expected to contain a JSON payload in `data` + and an optional `lsn` fallback. + + Args: + peek: If True, read changes without advancing the replication slot. + If False, consume and advance the slot. + slot_name: Logical replication slot name to read from. + upto_nchanges: Optional cap for number of changes returned by PostgreSQL. + `None` maps to SQL `NULL` (database default behavior). + table_names: Optional tuple of fully-qualified tables to include. + Passed through `add-tables` option. + include_lsn: Whether to include LSN in output payload (wal2json option). + format_version: wal2json format version. + include_types: Whether to include PostgreSQL type names in output. + include_typmod: Whether to include typmod metadata in output. + + Returns: + Flat list of parsed rows produced from all JSON `change` entries. """ options: list[str] = ["pretty-print", "on"] @@ -77,10 +108,12 @@ def get_replication_changes( options_sql = "" if options: + # Build SQL literal list: 'key', 'value', 'key', 'value', ... options_sql = ",\n " + ",\n ".join( f"'{_quote_sql_literal(option)}'" for option in options ) + # Choose peek/get behavior based on whether changes should be consumed. function_name = "pg_logical_slot_peek_changes" if peek else "pg_logical_slot_get_changes" query = f""" SELECT * FROM {function_name}( @@ -93,6 +126,7 @@ def get_replication_changes( result = db.session.execute(text(query)) changes = [dict(change) for change in result.mappings().all()] + # Flatten per-change payload arrays into one list of normalized rows. parsed_data = [row for change in changes for row in (parse_change_data(cast(ReplicationChangeRow, change)) or [])] return parsed_data @@ -100,7 +134,21 @@ def get_replication_changes( def parse_change_data(change: ReplicationChangeRow) -> list[ParsedRow] | None: """ - Parse the change data and return a dictionary with the relevant information. + Parse a single replication result row into normalized parsed rows. + + The database returns a row containing: + - `data`: a JSON document produced by wal2json + - `lsn`: replication position (used as fallback when `nextlsn` is absent) + + If the payload has no `change` entries, `None` is returned so callers can + skip this item naturally in flattening logic. + + Args: + change: Raw row from PostgreSQL logical decoding query. + + Returns: + A list of parsed row dictionaries, or `None` when no change entries + are present. """ payload = cast(ChangePayload, json.loads(change["data"])) nextlsn = payload.get("nextlsn") or change.get("lsn") @@ -114,7 +162,19 @@ def parse_change_data(change: ReplicationChangeRow) -> list[ParsedRow] | None: def parse_row_data(row: ReplicationJsonRow, nextlsn: str | None = None) -> ParsedRow: """ - Create a mapping of column names to their values for the given change. + Convert one wal2json change item into the internal `ParsedRow` shape. + + For UPDATE/DELETE events, wal2json may include `oldkeys`, which represent + key columns from the previous row state. Those are mapped into + `previous_row_data`. + + Args: + row: One item from wal2json `change` list. + nextlsn: Replication position associated with this change item. + + Returns: + ParsedRow containing event type, table name, LSN, current row values, + and previous key values. """ column_names = row.get("columnnames", []) column_values = row.get("columnvalues", []) @@ -132,6 +192,17 @@ def parse_row_data(row: ReplicationJsonRow, nextlsn: str | None = None) -> Parse def get_str_value(row_data: RowData, key: str) -> str | None: + """ + Safely read a string value from a row data mapping. + + Args: + row_data: Parsed row dictionary containing JSON-compatible values. + key: Field name to read. + + Returns: + The string value for `key` when present and of type `str`; otherwise + `None`. + """ value = row_data.get(key) if isinstance(value, str): return value @@ -139,4 +210,12 @@ def get_str_value(row_data: RowData, key: str) -> str | None: def get_notification_status(row_data: RowData) -> str | None: + """ + Extract notification status from known status fields. + + This helper supports both legacy/current key names and returns the first + available string value in this order: + 1. `notification_status` + 2. `status` + """ return get_str_value(row_data, "notification_status") or get_str_value(row_data, "status") From aeb75416ec7899486a292ea0b6dee1487c138d49 Mon Sep 17 00:00:00 2001 From: Dilwoar Hussain Date: Fri, 29 May 2026 15:07:31 +0100 Subject: [PATCH 26/26] Refactor service_stats migration: rename table to ft_service_stats and update related indices; remove obsolete migrations for notifications replica identity --- migrations/.current-alembic-head | 2 +- .../0553_notifications_id_status_idx.py | 17 +++++- .../versions/0554_create_service_stats.py | 28 ++++----- .../0555_notifications_replica_identity.py | 60 ------------------- ...ename_service_stats_to_ft_service_stats.py | 44 -------------- 5 files changed, 31 insertions(+), 120 deletions(-) delete mode 100644 migrations/versions/0555_notifications_replica_identity.py delete mode 100644 migrations/versions/0556_rename_service_stats_to_ft_service_stats.py diff --git a/migrations/.current-alembic-head b/migrations/.current-alembic-head index 411fefa373..74c92585a5 100644 --- a/migrations/.current-alembic-head +++ b/migrations/.current-alembic-head @@ -1 +1 @@ -0556_rename_service_stats_to_fts +0554_create_service_stats diff --git a/migrations/versions/0553_notifications_id_status_idx.py b/migrations/versions/0553_notifications_id_status_idx.py index ae00666a24..5c21fefe66 100644 --- a/migrations/versions/0553_notifications_id_status_idx.py +++ b/migrations/versions/0553_notifications_id_status_idx.py @@ -9,20 +9,35 @@ def upgrade(): + op.execute("ALTER TABLE notifications ALTER COLUMN notification_status SET NOT NULL;") + with op.get_context().autocommit_block(): op.create_index( "ix_notifications_id_notification_status", "notifications", ["id", "notification_status"], - unique=False, + unique=True, postgresql_concurrently=True, ) + op.execute("ALTER TABLE notifications REPLICA IDENTITY USING INDEX ix_notifications_id_notification_status;") + def downgrade(): + op.execute("ALTER TABLE notifications REPLICA IDENTITY DEFAULT;") + + op.execute("ALTER TABLE notifications ALTER COLUMN notification_status DROP NOT NULL;") + with op.get_context().autocommit_block(): op.drop_index( "ix_notifications_id_notification_status", table_name="notifications", postgresql_concurrently=True, ) + op.create_index( + "ix_notifications_id_notification_status", + "notifications", + ["id", "notification_status"], + unique=False, + postgresql_concurrently=True, + ) diff --git a/migrations/versions/0554_create_service_stats.py b/migrations/versions/0554_create_service_stats.py index 55ba5c86bf..7b698802e8 100644 --- a/migrations/versions/0554_create_service_stats.py +++ b/migrations/versions/0554_create_service_stats.py @@ -16,7 +16,7 @@ def upgrade(): op.create_table( - "service_stats", + "ft_service_stats", sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), sa.Column("service_id", postgresql.UUID(as_uuid=True), nullable=False), sa.Column("template_id", postgresql.UUID(as_uuid=True), nullable=False), @@ -32,38 +32,38 @@ def upgrade(): "template_id", "notification_type", "notification_status", - name="uix_service_stats_dimensions", + name="uix_ft_service_stats_dimensions", ), ) op.create_index( - "ix_svc_stats_svc_ntype_nstatus", - "service_stats", + "ix_ft_svc_stats_svc_ntype_nstatus", + "ft_service_stats", ["service_id", "notification_type", "notification_status"], unique=False, ) op.create_index( - "ix_svc_stats_tmpl_ntype_nstatus", - "service_stats", + "ix_ft_svc_stats_tmpl_ntype_nstatus", + "ft_service_stats", ["template_id", "notification_type", "notification_status"], unique=False, ) op.create_index( - "ix_service_stats_service_id_template_id", - "service_stats", + "ix_ft_service_stats_service_id_template_id", + "ft_service_stats", ["service_id", "template_id"], unique=False, ) def downgrade(): - op.drop_index("ix_service_stats_service_id_template_id", table_name="service_stats") + op.drop_index("ix_ft_service_stats_service_id_template_id", table_name="ft_service_stats") op.drop_index( - "ix_svc_stats_tmpl_ntype_nstatus", - table_name="service_stats", + "ix_ft_svc_stats_tmpl_ntype_nstatus", + table_name="ft_service_stats", ) op.drop_index( - "ix_svc_stats_svc_ntype_nstatus", - table_name="service_stats", + "ix_ft_svc_stats_svc_ntype_nstatus", + table_name="ft_service_stats", ) - op.drop_table("service_stats") + op.drop_table("ft_service_stats") diff --git a/migrations/versions/0555_notifications_replica_identity.py b/migrations/versions/0555_notifications_replica_identity.py deleted file mode 100644 index 26a8143162..0000000000 --- a/migrations/versions/0555_notifications_replica_identity.py +++ /dev/null @@ -1,60 +0,0 @@ -""" -Set REPLICA IDENTITY on notifications table using (id, notification_status) index. - -PostgreSQL requires all index columns to be NOT NULL for REPLICA IDENTITY USING INDEX, -and a unique index. This migration: - 1. Drops the existing non-unique index and recreates it as unique (required by PG). - 2. Sets REPLICA IDENTITY USING INDEX so only id + notification_status are written to WAL. - -Revision ID: 0555_notif_replica_identity -Revises: 0554_create_service_stats -Create Date: 2026-05-19 00:00:00 -""" - -from alembic import op - -revision = "0555_notif_replica_identity" -down_revision = "0554_create_service_stats" - - -def upgrade(): - ## NEEDED TO SET NOT NULL ON notification_status TO CREATE UNIQUE INDEX, THEN CAN SET REPLICA IDENTITY - op.execute("ALTER TABLE notifications ALTER COLUMN notification_status SET NOT NULL;") - - with op.get_context().autocommit_block(): - op.drop_index( - "ix_notifications_id_notification_status", - table_name="notifications", - postgresql_concurrently=True, - ) - op.create_index( - "ix_notifications_id_notification_status", - "notifications", - ["id", "notification_status"], - unique=True, - postgresql_concurrently=True, - ) - - op.execute("ALTER TABLE notifications REPLICA IDENTITY USING INDEX ix_notifications_id_notification_status;") - - -def downgrade(): - op.execute("ALTER TABLE notifications REPLICA IDENTITY DEFAULT;") - - op.execute("ALTER TABLE notifications ALTER COLUMN notification_status DROP NOT NULL;") - - with op.get_context().autocommit_block(): - op.drop_index( - "ix_notifications_id_notification_status", - table_name="notifications", - postgresql_concurrently=True, - ) - op.create_index( - "ix_notifications_id_notification_status", - "notifications", - ["id", "notification_status"], - unique=False, - postgresql_concurrently=True, - ) - - diff --git a/migrations/versions/0556_rename_service_stats_to_ft_service_stats.py b/migrations/versions/0556_rename_service_stats_to_ft_service_stats.py deleted file mode 100644 index d8cd4a8d5f..0000000000 --- a/migrations/versions/0556_rename_service_stats_to_ft_service_stats.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -Rename service_stats table to ft_service_stats. - -Revision ID: 0556_rename_service_stats_to_fts -Revises: 0555_notif_replica_identity -Create Date: 2026-05-22 00:00:00 -""" - -from alembic import op - -revision = "0556_rename_service_stats_to_fts" -down_revision = "0555_notif_replica_identity" - - -def upgrade(): - op.rename_table("service_stats", "ft_service_stats") - - op.execute( - "ALTER TABLE ft_service_stats " - "RENAME CONSTRAINT uix_service_stats_dimensions TO uix_ft_service_stats_dimensions" - ) - - op.execute("ALTER INDEX ix_svc_stats_svc_ntype_nstatus RENAME TO ix_ft_svc_stats_svc_ntype_nstatus") - op.execute("ALTER INDEX ix_svc_stats_tmpl_ntype_nstatus RENAME TO ix_ft_svc_stats_tmpl_ntype_nstatus") - op.execute( - "ALTER INDEX ix_service_stats_service_id_template_id " - "RENAME TO ix_ft_service_stats_service_id_template_id" - ) - - -def downgrade(): - op.execute( - "ALTER INDEX ix_ft_service_stats_service_id_template_id " - "RENAME TO ix_service_stats_service_id_template_id" - ) - op.execute("ALTER INDEX ix_ft_svc_stats_tmpl_ntype_nstatus RENAME TO ix_svc_stats_tmpl_ntype_nstatus") - op.execute("ALTER INDEX ix_ft_svc_stats_svc_ntype_nstatus RENAME TO ix_svc_stats_svc_ntype_nstatus") - - op.execute( - "ALTER TABLE ft_service_stats " - "RENAME CONSTRAINT uix_ft_service_stats_dimensions TO uix_service_stats_dimensions" - ) - - op.rename_table("ft_service_stats", "service_stats")