From 152bb0c60a026ab44ef83daa88db327aa3c5142a Mon Sep 17 00:00:00 2001 From: Richard Parke Date: Mon, 8 Jun 2026 09:56:26 +0100 Subject: [PATCH] Add celery beat task, `archive_pending_files` to automatically archive files that have been created but not made live after 24 hours. This is vital cleanup work that ensures that these stale files don't persist and are properly cleaned up, eventually being removed from S3 too. --- app/celery/scheduled_tasks.py | 15 +++++++++++- app/config.py | 7 ++++++ app/dao/template_email_files_dao.py | 15 +++++++++++- tests/app/celery/test_scheduled_tasks.py | 29 ++++++++++++++++++++++++ tests/app/db.py | 2 ++ 5 files changed, 66 insertions(+), 2 deletions(-) diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index 022a24957d..8839307c01 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -76,7 +76,7 @@ dao_find_services_sending_to_tv_numbers, dao_find_services_with_high_failure_rates, ) -from app.dao.template_email_files_dao import dao_get_template_email_files_by_template_id +from app.dao.template_email_files_dao import dao_archive_pending_files, dao_get_template_email_files_by_template_id from app.dao.templates_dao import dao_get_template_by_id from app.dao.users_dao import delete_codes_older_created_more_than_a_day_ago, get_users_for_research from app.letters.utils import generate_letter_pdf_filename @@ -109,6 +109,19 @@ def run_scheduled_jobs(): raise +@notify_celery.task(name="archive-pending-files") +def archive_pending_files(): + try: + number_of_files_archived = dao_archive_pending_files() + current_app.logger.info( + "Archived %(number_of_files_archived)s files created more than 24 hours ago that are still in pending", + extra={"number_of_files_archived": number_of_files_archived}, + ) + except SQLAlchemyError: + current_app.logger.exception("Failed to archive pending files") + raise + + @notify_celery.task(name="delete-verify-codes") def delete_verify_codes(): try: diff --git a/app/config.py b/app/config.py index cdee1ae88c..af7cc252bd 100644 --- a/app/config.py +++ b/app/config.py @@ -231,6 +231,8 @@ class Config: MAX_VERIFY_CODE_COUNT = 5 MAX_FAILED_LOGIN_COUNT = 10 + TEMPLATE_EMAIL_FILE_ARCHIVE_PERIOD_IN_HOURS = 24 + # these should always add up to 100% SMS_PROVIDER_RESTING_POINTS = {"mmg": 51, "firetext": 49} @@ -485,6 +487,11 @@ class Config: "schedule": crontab(hour=10, minute=0, day_of_week="wed"), "options": {"queue": QueueNames.PERIODIC}, }, + "archive-pending-files": { + "task": "archive-pending-files", + "schedule": crontab(hour=3, minute=33), + "options": {"queue": QueueNames.PERIODIC}, + }, # first tuesday of every month "change-dvla-api-key": { "task": "change-dvla-api-key", diff --git a/app/dao/template_email_files_dao.py b/app/dao/template_email_files_dao.py index bea13ca55e..7dc2af53c7 100644 --- a/app/dao/template_email_files_dao.py +++ b/app/dao/template_email_files_dao.py @@ -137,9 +137,22 @@ def dao_make_pending_template_email_file_live(template_email_file: TemplateEmail @version_class( VersionOptions(TemplateEmailFile, history_class=TemplateEmailFileHistory), ) -def dao_archive_template_email_file(file_to_archive, archived_by_id, template_version): +def dao_archive_template_email_file(file_to_archive, archived_by_id, template_version=None): + if not template_version: + template_version = Template.query.get(file_to_archive.template_id).version if not file_to_archive.archived_at: file_to_archive.archived_at = datetime.datetime.utcnow() file_to_archive.archived_by_id = archived_by_id file_to_archive.template_version = template_version db.session.add(file_to_archive) + + +def dao_archive_pending_files(): + files_in_pending = TemplateEmailFile.query.filter( + TemplateEmailFile.pending, + datetime.datetime.utcnow() - TemplateEmailFile.created_at + > datetime.timedelta(hours=current_app.config.get("TEMPLATE_EMAIL_FILE_ARCHIVE_PERIOD_IN_HOURS")), + ).all() + for file in files_in_pending: + dao_archive_template_email_file(file, archived_by_id=file.created_by_id) + return len(files_in_pending) diff --git a/tests/app/celery/test_scheduled_tasks.py b/tests/app/celery/test_scheduled_tasks.py index dac7bb4ad3..477d133946 100644 --- a/tests/app/celery/test_scheduled_tasks.py +++ b/tests/app/celery/test_scheduled_tasks.py @@ -22,6 +22,7 @@ from app.celery.provider_tasks import deliver_email, deliver_sms from app.celery.scheduled_tasks import ( _check_slow_text_message_delivery_reports_and_raise_error_if_needed, + archive_pending_files, change_dvla_api_key, change_dvla_password, check_for_low_available_inbound_sms_numbers, @@ -64,6 +65,7 @@ from app.dao.jobs_dao import dao_get_job_by_id from app.dao.notifications_dao import SlowProviderDeliveryReport from app.dao.provider_details_dao import get_provider_details_by_identifier +from app.dao.template_email_files_dao import dao_get_template_email_file_by_id from app.models import Event, InboundNumber, Notification from tests.app import load_example_csv from tests.app.db import ( @@ -72,6 +74,7 @@ create_notification, create_organisation, create_template, + create_template_email_file, create_user, ) from tests.conftest import _with_message_group_id, set_config, set_config_values @@ -106,6 +109,32 @@ def test_should_update_scheduled_jobs_and_put_on_queue(mock_celery_task, sample_ ) +@pytest.mark.parametrize("pending_expirary_exceeded", (True, False)) +def test_archive_pending_files_only_scopes_stale_files(sample_email_template, pending_expirary_exceeded): + with freeze_time("2016-01-01 01:00:00.000000"): + pending_file = create_template_email_file( + template_id=sample_email_template.id, + created_by_id=sample_email_template.created_by_id, + pending=True, + created_at=datetime.utcnow(), + ) + live_file = create_template_email_file( + template_id=sample_email_template.id, + created_by_id=sample_email_template.created_by_id, + pending=False, + created_at=datetime.utcnow(), + ) + with freeze_time("2016-01-02 01:00:01.00000" if pending_expirary_exceeded else "2016-01-02 00:00:00.00000"): + archive_pending_files() + expected_archived_file = dao_get_template_email_file_by_id(str(pending_file.id)) + expected_live_file = dao_get_template_email_file_by_id(str(live_file.id)) + if pending_expirary_exceeded: + assert expected_archived_file.archived_at == datetime.utcnow() + else: + assert not expected_archived_file.archived_at + assert not expected_live_file.archived_at + + def test_should_update_all_scheduled_jobs_and_put_on_queue(sample_template, mock_celery_task): mocked = mock_celery_task(process_job) diff --git a/tests/app/db.py b/tests/app/db.py index 22b2da1262..f056dd262c 100644 --- a/tests/app/db.py +++ b/tests/app/db.py @@ -199,6 +199,7 @@ def create_service_with_defined_sms_sender(sms_sender_value="1234567", *args, ** def create_template_email_file( template_id, created_by_id, + created_at=None, filename="example.pdf", link_text="follow this link", retention_period=90, @@ -212,6 +213,7 @@ def create_template_email_file( "validate_users_email": validate_users_email, "template_id": template_id, "created_by_id": created_by_id, + "created_at": created_at if created_at is not None else datetime.utcnow(), "pending": pending, } template_email_file = TemplateEmailFile(**data)