From 91b3c1ef7ebd07321798f1eb39356a5eddca3b6d Mon Sep 17 00:00:00 2001 From: PascalEgn Date: Tue, 28 Apr 2026 13:44:43 +0200 Subject: [PATCH] global: remove metadata logging --- base-images/airflow/Dockerfile | 5 +-- .../airflow/plugins/global_dag_logger.py | 44 ------------------- docker-compose.yaml | 2 +- tests/integration/oup/test_oup_pull_ftp.py | 2 +- .../test_springer_dag_process_file.py | 2 +- .../springer/test_springer_dag_pull_sftp.py | 2 +- tests/units/clean/test_clean.py | 2 +- 7 files changed, 6 insertions(+), 53 deletions(-) delete mode 100644 base-images/airflow/plugins/global_dag_logger.py diff --git a/base-images/airflow/Dockerfile b/base-images/airflow/Dockerfile index c533cb28..88ed08ce 100644 --- a/base-images/airflow/Dockerfile +++ b/base-images/airflow/Dockerfile @@ -1,4 +1,4 @@ -FROM registry.cern.ch/docker.io/apache/airflow:slim-3.1.8-python3.12 AS base +FROM registry.cern.ch/docker.io/apache/airflow:slim-3.1.8-python3.12 AS dev USER root RUN apt-get update && apt-get install -y \ build-essential \ @@ -25,8 +25,5 @@ RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements-test.txt RUN rm requirements.txt requirements-test.txt requirements-airflow.txt constraints.txt -FROM base AS dev -COPY base-images/airflow/plugins/ /opt/airflow/plugins/ - FROM dev AS prod COPY dags/ /opt/airflow/dags/ diff --git a/base-images/airflow/plugins/global_dag_logger.py b/base-images/airflow/plugins/global_dag_logger.py deleted file mode 100644 index f80f662f..00000000 --- a/base-images/airflow/plugins/global_dag_logger.py +++ /dev/null @@ -1,44 +0,0 @@ -import logging - -from airflow.models import DAG -from airflow.plugins_manager import AirflowPlugin -from airflow.providers.standard.operators.python import PythonOperator - - -def log_dag_info(**context): - logger = logging.getLogger("airflow.task") - ti = context["ti"] - dag = context["dag"] - msg = ( - f"DAG Metadata | " - f"dag_id={dag.dag_id} | " - f"version={ti.dag_version_id} | " - f"tags={list(dag.tags)} | " - f"run_id={context.get('run_id')}" - ) - logger.info(msg) - - -class GlobalDagMetadataLoggerPlugin(AirflowPlugin): - name = "global_dag_metadata_logger" - - def on_load(self, *args, **kwargs): - _original_dag_init = DAG.__init__ - - def patched_dag_init(self, *args, **kwargs): - _original_dag_init(self, *args, **kwargs) - if getattr(self, "_logger_task_injected", False): - return - self._logger_task_injected = True - if "log_dag_metadata" in self.task_ids: - return - log_dag_metadata = PythonOperator( - task_id="log_dag_metadata", python_callable=log_dag_info, dag=self - ) - existing_roots = [ - t for t in self.roots if t.task_id != log_dag_metadata.task_id - ] - for root in existing_roots: - log_dag_metadata >> root - - DAG.__init__ = patched_dag_init diff --git a/docker-compose.yaml b/docker-compose.yaml index efd2a88c..5b5c26d9 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -45,7 +45,7 @@ --- x-airflow-common: &airflow-common build: - target: ${BUILD_TARGET:-base} + target: ${BUILD_TARGET:-dev} dockerfile: base-images/airflow/Dockerfile pull_policy: never environment: &airflow-common-env diff --git a/tests/integration/oup/test_oup_pull_ftp.py b/tests/integration/oup/test_oup_pull_ftp.py index ed7f6e90..abf2a885 100644 --- a/tests/integration/oup/test_oup_pull_ftp.py +++ b/tests/integration/oup/test_oup_pull_ftp.py @@ -17,7 +17,7 @@ def dag(): def test_dag_loaded(dag): assert dag is not None - assert len(dag.tasks) == 4 + assert len(dag.tasks) == 3 def test_dag_run( diff --git a/tests/integration/springer/test_springer_dag_process_file.py b/tests/integration/springer/test_springer_dag_process_file.py index a58480c4..441a8a25 100644 --- a/tests/integration/springer/test_springer_dag_process_file.py +++ b/tests/integration/springer/test_springer_dag_process_file.py @@ -105,7 +105,7 @@ def springer_data_files_in_s3(): def test_dag_loaded(dag): assert dag is not None - assert len(dag.tasks) == 8 + assert len(dag.tasks) == 7 publisher = "Springer" diff --git a/tests/integration/springer/test_springer_dag_pull_sftp.py b/tests/integration/springer/test_springer_dag_pull_sftp.py index 2255fe39..180d2ef9 100644 --- a/tests/integration/springer/test_springer_dag_pull_sftp.py +++ b/tests/integration/springer/test_springer_dag_pull_sftp.py @@ -17,7 +17,7 @@ def dag(): def test_dag_loaded(dag): assert dag is not None - assert len(dag.tasks) == 4 + assert len(dag.tasks) == 3 def test_dag_run(dag): diff --git a/tests/units/clean/test_clean.py b/tests/units/clean/test_clean.py index 90cc386f..998651a0 100644 --- a/tests/units/clean/test_clean.py +++ b/tests/units/clean/test_clean.py @@ -16,7 +16,7 @@ def dag(): def test_dag_loaded(dag): assert dag is not None - assert len(dag.tasks) == 2 + assert len(dag.tasks) == 1 @freeze_time("2023-09-20")