From 0f5e7d602d25b89c136eeb1b2801521bc862f573 Mon Sep 17 00:00:00 2001
From: "Ruben L. Mendoza" <rub2106@gmail.com>
Date: Tue, 10 Mar 2026 18:19:37 -0500
Subject: [PATCH 01/17] Vtiles attributes in admin layers (#718)

* Add border_type in admin/nod_admin polygon and centroids boundaries

* Add other attributes for admin lines

* Add border_type in admin areas

* Add attributes for admin lines

* Add attributes for admin lines

* Remove border_type and indefinite from admin/no admin areas

* Add subdomain config for martin tiler

* Return empty when ther eis no values

* Use NULLIF for all columns for area views

* Recreate mviews on update

* Fix issue with ferry #1303

* Optimize parallel query settings and add index to speed up admin boundaries mview creation

* Update branch name to create docker image

* Update Volume id and imposm docker for production
---
 .github/workflows/chartpress.yaml             |  16 +--
 compose/tiler.yml                             |  39 +++---
 .../tiler/config/postgresql.production.conf   |   4 +-
 hetzner/tiler/config/postgresql.staging.conf  |   6 +-
 hetzner/tiler/tiler.production.yml            |   6 +-
 hetzner/traefik/traefik.template.yml          |  13 ++
 .../config/layers/admin_areas.json            |  20 ++++
 .../config/layers/admin_lines.json            |  15 +++
 .../config/layers/admin_relation_members.json |  18 +++
 .../ohm_mviews/admin_boundaries_areas.sql     |  10 +-
 .../ohm_mviews/admin_boundaries_lines.sql     | 112 ++++++++++++------
 .../ohm_mviews/non_admin_boundaries_areas.sql |   2 +-
 .../queries/ohm_mviews/transport_lines.sql    |   6 +-
 .../queries/utils/create_01_areas_mview.sql   |   4 +-
 images/tiler-imposm/start.sh                  |   8 ++
 .../providers/admin_boundaries_centroids.toml |  21 ++--
 .../providers/admin_boundaries_lines.toml     |  56 ++++++---
 .../providers/admin_boundaries_polygon.toml   |  21 ++--
 .../providers/non_admin_boundaries_areas.toml |   7 ++
 .../non_admin_boundaries_centroids.toml       |   7 ++
 20 files changed, 286 insertions(+), 105 deletions(-)

diff --git a/.github/workflows/chartpress.yaml b/.github/workflows/chartpress.yaml
index 9a216ec4f..fcf6b306e 100644
--- a/.github/workflows/chartpress.yaml
+++ b/.github/workflows/chartpress.yaml
@@ -4,7 +4,7 @@ on:
     branches:
       - 'main'
       - 'staging'
-      - 'tiler_cache_paths'
+      - 'vtiles_admin_attr'
 jobs:
   build:
     runs-on: ubuntu-22.04
@@ -71,7 +71,7 @@ jobs:
           OHM_SLACK_WEBHOOK_URL: ${{ secrets.OHM_SLACK_WEBHOOK_URL }}
       ################ Staging secrets ################ 
       - name: Staging - substitute secrets
-        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/tiler_cache_paths'
+        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/vtiles_admin_attr'
         uses: bluwy/substitute-string-action@v1
         with:
           _input-file: 'values.staging.template.yaml'
@@ -189,14 +189,14 @@ jobs:
           PRODUCTION_OPENSTREETMAP_AUTH_SECRET: ${{ secrets.PRODUCTION_OPENSTREETMAP_AUTH_SECRET }}
           
       - name: AWS Credentials
-        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/tiler_cache_paths'
+        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/vtiles_admin_attr'
         uses: aws-actions/configure-aws-credentials@v1
         with:
           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           aws-region: us-east-1
       - name: Setup Kubectl and Helm Dependencies
-        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/tiler_cache_paths'
+        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/vtiles_admin_attr'
         run: |
           sudo pip install awscli --ignore-installed six
           sudo curl -L -o /usr/bin/kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.17.7/2020-07-08/bin/linux/amd64/kubectl
@@ -210,22 +210,22 @@ jobs:
           helm version
 
       - name: Update kube-config staging
-        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/tiler_cache_paths'
+        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/vtiles_admin_attr'
         run: aws eks --region us-east-1 update-kubeconfig --name osmseed-staging
       - name: Update kube-config prod
         if: github.ref == 'refs/heads/main'
         run: aws eks --region us-east-1 update-kubeconfig --name osmseed-production-v2
       - name: Add Helm repository
-        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/tiler_cache_paths'
+        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/vtiles_admin_attr'
         run: |
           helm repo add osm-seed https://osm-seed.github.io/osm-seed-chart/
           helm repo update
       - name: Install helm dependencies for
-        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/tiler_cache_paths'
+        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/vtiles_admin_attr'
         run: cd ohm && helm dep up
       # Staging
       - name: Staging - helm deploy
-        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/tiler_cache_paths'
+        if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/vtiles_admin_attr'
         run: helm upgrade --install staging --wait ohm/ -f values.staging.yaml -f ohm/values.yaml
       # Production
       - name: Production - helm deploy
diff --git a/compose/tiler.yml b/compose/tiler.yml
index 61df8d656..10ac3158e 100644
--- a/compose/tiler.yml
+++ b/compose/tiler.yml
@@ -9,6 +9,13 @@ services:
       - "54321:5432"
     volumes:
       - tiler_pgdata:/var/lib/postgresql/data
+      - ../hetzner/tiler/config/postgresql.staging.conf:/etc/postgresql/postgresql.conf
+    environment:
+      - POSTGRES_CONFIG_FILE=/etc/postgresql/postgresql.conf
+    command:
+      - postgres
+      - "-c"
+      - "config_file=/etc/postgresql/postgresql.conf"
     env_file:
       - ../envs/.env.tiler
     restart: always
@@ -33,22 +40,22 @@ services:
     networks:
       - ohm_network
 
-  tiler:
-    image: rub21/tiler-server:v1
-    container_name: tiler
-    build:
-      context: ../images/tiler-server
-      dockerfile: Dockerfile
-    volumes:
-      - ../hetzner:/hetzner
-      - ../images/tiler-server:/app
-    ports:
-      - "9090:9090"
-    env_file:
-      - ../envs/.env.tiler
-    restart: always
-    networks:
-      - ohm_network
+  # tiler:
+  #   image: rub21/tiler-server:v1
+  #   container_name: tiler
+  #   build:
+  #     context: ../images/tiler-server
+  #     dockerfile: Dockerfile
+  #   volumes:
+  #     - ../hetzner:/hetzner
+  #     - ../images/tiler-server:/app
+  #   ports:
+  #     - "9090:9090"
+  #   env_file:
+  #     - ../envs/.env.tiler
+  #   restart: always
+  #   networks:
+  #     - ohm_network
 
   # tiler-cache:
   #   image: rub21/tiler-cache:v2
diff --git a/hetzner/tiler/config/postgresql.production.conf b/hetzner/tiler/config/postgresql.production.conf
index bb721ee2f..fd5e32da8 100644
--- a/hetzner/tiler/config/postgresql.production.conf
+++ b/hetzner/tiler/config/postgresql.production.conf
@@ -40,8 +40,8 @@ autovacuum_vacuum_cost_limit = -1          # Let PostgreSQL adjust vacuum cost d
 # QUERY TUNING
 #------------------------------------------------------------------------------
 effective_io_concurrency = 300             # For SSD; helps the planner estimate IO concurrency
-parallel_tuple_cost = 0.1                  # Lower cost to encourage parallelization
-parallel_setup_cost = 500                  # Lower to encourage more parallel plans
+parallel_tuple_cost = 0.001                # Lower cost to encourage parallelization
+parallel_setup_cost = 100                  # Lower to encourage more parallel plans
 max_worker_processes = 28                  # Allow up to 28 worker processes
 max_parallel_workers_per_gather = 8        # Max workers that can help a single query
 max_parallel_workers = 28                  # Total number of parallel workers across all queries
diff --git a/hetzner/tiler/config/postgresql.staging.conf b/hetzner/tiler/config/postgresql.staging.conf
index 26cf7f0b1..0aaf21df2 100644
--- a/hetzner/tiler/config/postgresql.staging.conf
+++ b/hetzner/tiler/config/postgresql.staging.conf
@@ -44,10 +44,10 @@ autovacuum_vacuum_cost_limit = -1        # Allow PostgreSQL to auto-adjust vacuu
 #------------------------------------------------------------------------------
 
 effective_io_concurrency = 100           # Reduce IO concurrency to fit within staging constraints
-parallel_tuple_cost = 0.1                # Encourage parallel execution
-parallel_setup_cost = 200                # Lower parallel setup cost
+parallel_tuple_cost = 0.001              # Encourage parallel execution
+parallel_setup_cost = 100                # Lower parallel setup cost
 max_worker_processes = 4                 # Limit worker processes to available CPU cores
-max_parallel_workers_per_gather = 2      # Limit per-query parallel workers
+max_parallel_workers_per_gather = 4      # Allow more per-query parallel workers
 max_parallel_workers = 4                 # Total parallel workers for staging
 
 #------------------------------------------------------------------------------
diff --git a/hetzner/tiler/tiler.production.yml b/hetzner/tiler/tiler.production.yml
index f89e258ab..2aa067001 100644
--- a/hetzner/tiler/tiler.production.yml
+++ b/hetzner/tiler/tiler.production.yml
@@ -16,7 +16,7 @@ services:
 
   tiler_imposm:
     container_name: tiler_imposm
-    image: ghcr.io/openhistoricalmap/tiler-imposm:0.0.1-0.dev.git.3309.h529b5183
+    image: ghcr.io/openhistoricalmap/tiler-imposm:0.0.1-0.dev.git.3325.hb9f97989
     volumes:
       - tiler_imposm_data:/mnt/data
     env_file:
@@ -136,10 +136,10 @@ services:
 volumes:
   tiler_pgdata:
     driver: local
-    name: tiler_db_25_02
+    name: tiler_db_10_03
   tiler_imposm_data:
     driver: local
-    name: tiler_imposm_25_02
+    name: tiler_imposm_10_03
 
 networks:
   ohm_network:
diff --git a/hetzner/traefik/traefik.template.yml b/hetzner/traefik/traefik.template.yml
index 2a3c212b2..b2f64c918 100644
--- a/hetzner/traefik/traefik.template.yml
+++ b/hetzner/traefik/traefik.template.yml
@@ -137,6 +137,14 @@ http:
       middlewares:
         - secure-headers
 
+    martin-router:
+      rule: Host(`martin.{{OHM_DOMAIN}}`)
+      entryPoints:
+        - port-web
+      service: martin
+      middlewares:
+        - secure-headers
+
     node-exporter-router:
       rule: Host(`node-exporter.{{OHM_DOMAIN}}`)
       entryPoints:
@@ -184,6 +192,11 @@ http:
         servers:
           - url: http://taginfo_web:4567
 
+    martin:
+      loadBalancer:
+        servers:
+          - url: http://martin:80
+
     node_exporter:
       loadBalancer:
         servers:
diff --git a/images/tiler-imposm/config/layers/admin_areas.json b/images/tiler-imposm/config/layers/admin_areas.json
index 755a67b14..a420bb396 100644
--- a/images/tiler-imposm/config/layers/admin_areas.json
+++ b/images/tiler-imposm/config/layers/admin_areas.json
@@ -56,6 +56,26 @@
           "name": "area",
           "key": null
         },
+        {
+          "type": "string",
+          "name": "border_type",
+          "key": "border_type"
+        },
+        {
+          "type": "string",
+          "name": "indefinite",
+          "key": "indefinite"
+        },
+        {
+          "type": "string",
+          "name": "disputed",
+          "key": "disputed"
+        },
+        {
+          "type": "string",
+          "name": "disputed_by",
+          "key": "disputed_by"
+        },
         {
           "type": "hstore_tags",
           "name": "tags",
diff --git a/images/tiler-imposm/config/layers/admin_lines.json b/images/tiler-imposm/config/layers/admin_lines.json
index 1940b89b0..2a4caff8a 100644
--- a/images/tiler-imposm/config/layers/admin_lines.json
+++ b/images/tiler-imposm/config/layers/admin_lines.json
@@ -41,6 +41,21 @@
           "name": "maritime",
           "key": "maritime"
         },
+        {
+          "type": "string",
+          "name": "indefinite",
+          "key": "indefinite"
+        },
+        {
+          "type": "string",
+          "name": "disputed",
+          "key": "disputed"
+        },
+        {
+          "type": "string",
+          "name": "disputed_by",
+          "key": "disputed_by"
+        },
         {
           "type": "boolint",
           "name": "has_label",
diff --git a/images/tiler-imposm/config/layers/admin_relation_members.json b/images/tiler-imposm/config/layers/admin_relation_members.json
index b8674207d..6fc6be6d3 100644
--- a/images/tiler-imposm/config/layers/admin_relation_members.json
+++ b/images/tiler-imposm/config/layers/admin_relation_members.json
@@ -70,6 +70,24 @@
           "key": "maritime",
           "from_member": true
         },
+        {
+          "name": "me_indefinite",
+          "type": "string",
+          "key": "indefinite",
+          "from_member": true
+        },
+        {
+          "name": "me_disputed",
+          "type": "string",
+          "key": "disputed",
+          "from_member": true
+        },
+        {
+          "name": "me_disputed_by",
+          "type": "string",
+          "key": "disputed_by",
+          "from_member": true
+        },
         {
           "type": "hstore_tags",
           "name": "me_tags",
diff --git a/images/tiler-imposm/queries/ohm_mviews/admin_boundaries_areas.sql b/images/tiler-imposm/queries/ohm_mviews/admin_boundaries_areas.sql
index 3dcad538d..3ac106216 100644
--- a/images/tiler-imposm/queries/ohm_mviews/admin_boundaries_areas.sql
+++ b/images/tiler-imposm/queries/ohm_mviews/admin_boundaries_areas.sql
@@ -1,9 +1,17 @@
 -- ============================================================================
 -- Create materialized views for admin boundaries areas
 -- ============================================================================
+
+-- Index on admin_level to speed up filtering
+CREATE INDEX IF NOT EXISTS idx_osm_admin_areas_admin_level
+ON osm_admin_areas (admin_level) WHERE geometry IS NOT NULL;
+
+-- Enable parallel workers on source table
+ALTER TABLE osm_admin_areas SET (parallel_workers = 4);
+
 DROP MATERIALIZED VIEW IF EXISTS mv_admin_boundaries_areas_z16_20 CASCADE;
 
-SELECT create_areas_mview( 'osm_admin_areas', 'mv_admin_boundaries_areas_z16_20', 1, 0, 'id, osm_id, type', 'admin_level IN (1,2,3,4,5,6,7,8,9,10,11)');
+SELECT create_areas_mview( 'osm_admin_areas', 'mv_admin_boundaries_areas_z16_20', 1, 0, 'id, osm_id, type', 'admin_level IN (1,2,3,4,5,6,7,8,9,10,11)', NULL);
 SELECT create_area_mview_from_mview('mv_admin_boundaries_areas_z16_20','mv_admin_boundaries_areas_z13_15', 5, 0.0, NULL);
 SELECT create_area_mview_from_mview('mv_admin_boundaries_areas_z13_15','mv_admin_boundaries_areas_z10_12', 20, 0.0, 'admin_level IN (1,2,3,4,5,6,7,8,9,10)');
 SELECT create_area_mview_from_mview('mv_admin_boundaries_areas_z10_12','mv_admin_boundaries_areas_z8_9', 100, 0.0, 'admin_level IN (1,2,3,4,5,6,7,8,9)');
diff --git a/images/tiler-imposm/queries/ohm_mviews/admin_boundaries_lines.sql b/images/tiler-imposm/queries/ohm_mviews/admin_boundaries_lines.sql
index 31cc3a7fc..b8666aa5a 100644
--- a/images/tiler-imposm/queries/ohm_mviews/admin_boundaries_lines.sql
+++ b/images/tiler-imposm/queries/ohm_mviews/admin_boundaries_lines.sql
@@ -261,11 +261,15 @@ WITH ordered AS (
     type,
     admin_level,
     member,
-    geometry, 
-    start_decdate,  
+    geometry,
+    start_decdate,
     end_decdate,
+    me_indefinite,
+    me_maritime,
+    me_disputed,
+    me_disputed_by,
     LAG(end_decdate) OVER (
-      PARTITION BY admin_level, member, type  
+      PARTITION BY admin_level, member, type
       ORDER BY start_decdate NULLS FIRST
     ) AS prev_end
   FROM osm_admin_relation_members
@@ -280,12 +284,16 @@ flagged AS (
     admin_level,
     member,
     geometry,
-    start_decdate,  
-    end_decdate,   
-    CASE 
-        WHEN prev_end IS NULL THEN 0 
+    start_decdate,
+    end_decdate,
+    me_indefinite,
+    me_maritime,
+    me_disputed,
+    me_disputed_by,
+    CASE
+        WHEN prev_end IS NULL THEN 0
         WHEN start_decdate IS NULL THEN 0
-        WHEN 
+        WHEN
             -- 0.003 covers all possible decimal gaps that correspond to one day (whether it’s a leap year or not).
             (start_decdate - prev_end) < 0.003
         THEN 0 -- No gap, merge
@@ -300,10 +308,14 @@ grouped AS (
     admin_level,
     member,
     geometry,
-    start_decdate,  
+    start_decdate,
     end_decdate,
+    me_indefinite,
+    me_maritime,
+    me_disputed,
+    me_disputed_by,
     SUM(gap_flag) OVER (
-      PARTITION BY admin_level, member, type  
+      PARTITION BY admin_level, member, type
       ORDER BY start_decdate NULLS FIRST
       ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
     ) AS group_id
@@ -319,34 +331,40 @@ SELECT
   COUNT(*) AS merged_row_count,
 
   -- Keep the original decimal dates
-  CASE 
-      WHEN BOOL_OR(start_decdate IS NULL) THEN NULL 
-      ELSE MIN(start_decdate) 
+  CASE
+      WHEN BOOL_OR(start_decdate IS NULL) THEN NULL
+      ELSE MIN(start_decdate)
   END AS min_start_decdate,
-  
+
   -- Ensure NULL propagation for end_decdate
-  CASE 
-      WHEN BOOL_OR(end_decdate IS NULL) THEN NULL 
-      ELSE MAX(end_decdate) 
+  CASE
+      WHEN BOOL_OR(end_decdate IS NULL) THEN NULL
+      ELSE MAX(end_decdate)
   END AS max_end_decdate,
 
   -- Convert the decimal dates to ISO format in separate columns
   convert_decimal_to_iso_date(
-      CASE 
-          WHEN BOOL_OR(start_decdate IS NULL) THEN NULL 
-          ELSE MIN(start_decdate) 
+      CASE
+          WHEN BOOL_OR(start_decdate IS NULL) THEN NULL
+          ELSE MIN(start_decdate)
       END::NUMERIC
   ) AS min_start_date_iso,
-  
+
   convert_decimal_to_iso_date(
-      CASE 
-          WHEN BOOL_OR(end_decdate IS NULL) THEN NULL 
-          ELSE MAX(end_decdate) 
+      CASE
+          WHEN BOOL_OR(end_decdate IS NULL) THEN NULL
+          ELSE MAX(end_decdate)
       END::NUMERIC
-  ) AS max_end_date_iso
-     
+  ) AS max_end_date_iso,
+
+  -- Boundary attributes from member ways
+  MAX(me_indefinite) AS indefinite,
+  MAX(me_maritime) AS maritime,
+  MAX(me_disputed) AS disputed,
+  MAX(me_disputed_by) AS disputed_by
+
 FROM grouped
-GROUP BY 
+GROUP BY
   type, admin_level, member, group_id
 WITH DATA;
 
@@ -379,7 +397,11 @@ WITH relation_boundaries AS (
     min_start_decdate AS start_decdate,
     max_end_decdate AS end_decdate,
     min_start_date_iso AS start_date,
-    max_end_date_iso AS end_date
+    max_end_date_iso AS end_date,
+    indefinite,
+    maritime,
+    disputed,
+    disputed_by
   FROM mv_relation_members_boundaries
 ),
 way_boundaries AS (
@@ -391,14 +413,18 @@ way_boundaries AS (
     0 AS group_id,
     start_date,
     end_date,
-    start_decdate, 
-    end_decdate
+    start_decdate,
+    end_decdate,
+    indefinite,
+    maritime,
+    disputed,
+    disputed_by
   FROM osm_admin_lines
   WHERE type = 'administrative'
-    AND osm_id NOT IN (SELECT member FROM mv_relation_members_boundaries) 
+    AND osm_id NOT IN (SELECT member FROM mv_relation_members_boundaries)
 )
 -- Join the two tables
-SELECT 
+SELECT
   type,
   admin_level,
   member,
@@ -407,12 +433,16 @@ SELECT
   start_decdate,
   end_decdate,
   start_date,
-  end_date
+  end_date,
+  indefinite,
+  maritime,
+  disputed,
+  disputed_by
 FROM relation_boundaries
 
 UNION ALL
 
-SELECT 
+SELECT
   type,
   admin_level,
   member,
@@ -421,7 +451,11 @@ SELECT
   start_decdate,
   end_decdate,
   start_date,
-  end_date
+  end_date,
+  indefinite,
+  maritime,
+  disputed,
+  disputed_by
 FROM way_boundaries
 WITH DATA;
 
@@ -446,7 +480,7 @@ SELECT log_notice('STEP 6: Create a materialized view for zoom levels');
 -- ==========================================
 DROP MATERIALIZED VIEW IF EXISTS mv_admin_boundaries_lines_z16_20 CASCADE;
 CREATE MATERIALIZED VIEW mv_admin_boundaries_lines_z16_20 AS
-SELECT 
+SELECT
     ROW_NUMBER() OVER (ORDER BY admin_level, member, group_id) AS id,
     type,
     admin_level,
@@ -456,7 +490,11 @@ SELECT
     start_decdate,
     end_decdate,
     start_date,
-    end_date
+    end_date,
+    NULLIF(indefinite, '') AS indefinite,
+    NULLIF(maritime, '') AS maritime,
+    NULLIF(disputed, '') AS disputed,
+    NULLIF(disputed_by, '') AS disputed_by
 FROM mv_admin_boundaries_relations_ways
 WHERE admin_level IN (1,2,3,4,5,6,7,8,9,10,11)
 WITH DATA;
diff --git a/images/tiler-imposm/queries/ohm_mviews/non_admin_boundaries_areas.sql b/images/tiler-imposm/queries/ohm_mviews/non_admin_boundaries_areas.sql
index 08dde461a..b5d0d88d7 100644
--- a/images/tiler-imposm/queries/ohm_mviews/non_admin_boundaries_areas.sql
+++ b/images/tiler-imposm/queries/ohm_mviews/non_admin_boundaries_areas.sql
@@ -3,7 +3,7 @@
 -- ============================================================================
 DROP MATERIALIZED VIEW IF EXISTS mv_non_admin_boundaries_areas_z16_20 CASCADE;
 
-SELECT create_areas_mview( 'osm_admin_areas', 'mv_non_admin_boundaries_areas_z16_20', 1, 0, 'id, osm_id, type', 'type  <> ''administrative''', 'tags->''religion'' AS religion, tags->''denomination'' AS denomination, tags->''timezone'' AS timezone, tags->''utc'' AS utc, tags->''postal_code'' AS postal_code,  tags->''ref'' AS ref, tags->''political_division'' AS political_division' );
+SELECT create_areas_mview( 'osm_admin_areas', 'mv_non_admin_boundaries_areas_z16_20', 1, 0, 'id, osm_id, type', 'type  <> ''administrative''', 'tags->''religion'' AS religion, tags->''denomination'' AS denomination, tags->''timezone'' AS timezone, tags->''utc'' AS utc, tags->''postal_code'' AS postal_code, tags->''ref'' AS ref, tags->''political_division'' AS political_division' );
 SELECT create_area_mview_from_mview('mv_non_admin_boundaries_areas_z16_20','mv_non_admin_boundaries_areas_z13_15', 5, 0.0, NULL);
 SELECT create_area_mview_from_mview('mv_non_admin_boundaries_areas_z13_15','mv_non_admin_boundaries_areas_z10_12', 20, 0.0, NULL );
 SELECT create_area_mview_from_mview('mv_non_admin_boundaries_areas_z10_12','mv_non_admin_boundaries_areas_z8_9', 100, 0.0, NULL );
diff --git a/images/tiler-imposm/queries/ohm_mviews/transport_lines.sql b/images/tiler-imposm/queries/ohm_mviews/transport_lines.sql
index d203c0bd5..5820e0b0b 100644
--- a/images/tiler-imposm/queries/ohm_mviews/transport_lines.sql
+++ b/images/tiler-imposm/queries/ohm_mviews/transport_lines.sql
@@ -300,10 +300,10 @@ END;
 $do$;
 
 
-SELECT create_mview_line_from_mview('mv_transport_lines_z16_20', 'mv_transport_lines_z13_15', 5, 'type IN (''motorway'', ''motorway_link'', ''trunk'', ''trunk_link'', ''construction'', ''primary'', ''primary_link'', ''rail'', ''secondary'', ''secondary_link'', ''tertiary'', ''tertiary_link'', ''miniature'', ''narrow_gauge'', ''dismantled'', ''abandoned'', ''disused'', ''razed'', ''light_rail'', ''preserved'', ''proposed'', ''tram'', ''funicular'', ''monorail'', ''taxiway'', ''runway'', ''raceway'', ''residential'', ''service'', ''unclassified'') OR class IN (''railway'')');
-SELECT create_mview_line_from_mview('mv_transport_lines_z13_15', 'mv_transport_lines_z10_12', 20, 'type IN (''motorway'', ''motorway_link'', ''trunk'', ''trunk_link'', ''construction'', ''primary'', ''primary_link'', ''rail'', ''secondary'', ''secondary_link'', ''tertiary'', ''tertiary_link'', ''miniature'', ''narrow_gauge'', ''dismantled'', ''abandoned'', ''disused'', ''razed'', ''light_rail'', ''preserved'', ''proposed'', ''tram'', ''funicular'', ''monorail'', ''taxiway'', ''runway'') OR class IN (''railway'')');
+SELECT create_mview_line_from_mview('mv_transport_lines_z16_20', 'mv_transport_lines_z13_15', 5, 'type IN (''motorway'', ''motorway_link'', ''trunk'', ''trunk_link'', ''construction'', ''primary'', ''primary_link'', ''rail'', ''secondary'', ''secondary_link'', ''tertiary'', ''tertiary_link'', ''miniature'', ''narrow_gauge'', ''dismantled'', ''abandoned'', ''disused'', ''razed'', ''light_rail'', ''preserved'', ''proposed'', ''tram'', ''funicular'', ''monorail'', ''taxiway'', ''runway'', ''raceway'', ''residential'', ''service'', ''unclassified'', ''ferry'') OR class IN (''railway'')');
+SELECT create_mview_line_from_mview('mv_transport_lines_z13_15', 'mv_transport_lines_z10_12', 20, 'type IN (''motorway'', ''motorway_link'', ''trunk'', ''trunk_link'', ''construction'', ''primary'', ''primary_link'', ''rail'', ''secondary'', ''secondary_link'', ''tertiary'', ''tertiary_link'', ''miniature'', ''narrow_gauge'', ''dismantled'', ''abandoned'', ''disused'', ''razed'', ''light_rail'', ''preserved'', ''proposed'', ''tram'', ''funicular'', ''monorail'', ''taxiway'', ''runway'', ''ferry'') OR class IN (''railway'')');
 SELECT create_mview_line_from_mview('mv_transport_lines_z10_12', 'mv_transport_lines_z8_9', 100, NULL);
-SELECT create_mview_line_from_mview('mv_transport_lines_z8_9', 'mv_transport_lines_z6_7', 200 , 'type IN (''motorway'', ''motorway_link'', ''trunk'', ''trunk_link'', ''construction'', ''primary'', ''primary_link'', ''rail'', ''secondary'', ''secondary_link'') OR class IN (''railway'')');
+SELECT create_mview_line_from_mview('mv_transport_lines_z8_9', 'mv_transport_lines_z6_7', 200 , 'type IN (''motorway'', ''motorway_link'', ''trunk'', ''trunk_link'', ''construction'', ''primary'', ''primary_link'', ''rail'', ''secondary'', ''secondary_link'', ''ferry'') OR class IN (''railway'')');
 SELECT create_mview_line_from_mview('mv_transport_lines_z6_7', 'mv_transport_lines_z5', 1000 , NULL);
 
 
diff --git a/images/tiler-imposm/queries/utils/create_01_areas_mview.sql b/images/tiler-imposm/queries/utils/create_01_areas_mview.sql
index f9397233e..fed588c3b 100644
--- a/images/tiler-imposm/queries/utils/create_01_areas_mview.sql
+++ b/images/tiler-imposm/queries/utils/create_01_areas_mview.sql
@@ -81,9 +81,7 @@ BEGIN
                 '%I, ROUND(CAST(%I AS numeric), 1)::numeric(20,1) AS area_m2, ROUND(CAST(%I AS numeric) / 1000000, 1)::numeric(20,1) AS area_km2',
                 column_name, column_name, column_name
             )
-            WHEN column_name = 'name' THEN 'NULLIF(name, '''') AS name'
-            WHEN column_name = 'start_date' THEN 'NULLIF(start_date, '''') AS start_date'
-            WHEN column_name = 'end_date' THEN 'NULLIF(end_date, '''') AS end_date'
+            WHEN data_type IN ('text', 'character varying') THEN format('NULLIF(%I, '''') AS %I', column_name, column_name)
             ELSE quote_ident(column_name)
         END,
         ', ' ORDER BY ordinal_position
diff --git a/images/tiler-imposm/start.sh b/images/tiler-imposm/start.sh
index fd4fb5189..d4b2bcf53 100755
--- a/images/tiler-imposm/start.sh
+++ b/images/tiler-imposm/start.sh
@@ -201,6 +201,14 @@ function monitorImposmErrors() {
 function updateData() {
     log_message "Starting database update process..."
 
+    # Step 0: Recreate materialized views if RECREATE_MVIEWS_ON_UPDATE is enabled
+    if [ "$RECREATE_MVIEWS_ON_UPDATE" = "true" ]; then
+        log_message "Recreating materialized views before update..."
+        ./scripts/create_mviews.sh --all=true
+    else
+        log_message "Skipping materialized views recreation (RECREATE_MVIEWS_ON_UPDATE=$RECREATE_MVIEWS_ON_UPDATE)"
+    fi
+
     # Step 1: Refreshing materialized views
     if [ "$REFRESH_MVIEWS" = "true" ]; then
         log_message "Refreshing materialized views..."
diff --git a/images/tiler-server/config/providers/admin_boundaries_centroids.toml b/images/tiler-server/config/providers/admin_boundaries_centroids.toml
index 5b6d4c668..811b23819 100644
--- a/images/tiler-server/config/providers/admin_boundaries_centroids.toml
+++ b/images/tiler-server/config/providers/admin_boundaries_centroids.toml
@@ -13,8 +13,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2, 
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     short_name,
     official_name,
     {{LENGUAGES}} 
@@ -38,8 +39,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2, 
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     short_name,
     official_name,
     {{LENGUAGES}} 
@@ -63,8 +65,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2, 
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     short_name,
     official_name,
     {{LENGUAGES}} 
@@ -88,8 +91,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2, 
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     short_name,
     official_name,
     {{LENGUAGES}} 
@@ -113,8 +117,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2, 
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     short_name,
     official_name,
     {{LENGUAGES}} 
@@ -138,8 +143,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2, 
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     short_name,
     official_name,
     {{LENGUAGES}} 
@@ -163,8 +169,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2, 
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     short_name,
     official_name,
     {{LENGUAGES}} 
diff --git a/images/tiler-server/config/providers/admin_boundaries_lines.toml b/images/tiler-server/config/providers/admin_boundaries_lines.toml
index 903352102..5ec95e5ae 100644
--- a/images/tiler-server/config/providers/admin_boundaries_lines.toml
+++ b/images/tiler-server/config/providers/admin_boundaries_lines.toml
@@ -11,8 +11,12 @@ SELECT
     type,
     start_date,
     end_date,
-    start_decdate, 
-    end_decdate
+    start_decdate,
+    end_decdate,
+    indefinite,
+    maritime,
+    disputed,
+    disputed_by
 FROM 
     mv_admin_boundaries_lines_z0_2
 WHERE 
@@ -32,8 +36,12 @@ SELECT
     type,
     start_date,
     end_date,
-    start_decdate, 
-    end_decdate
+    start_decdate,
+    end_decdate,
+    indefinite,
+    maritime,
+    disputed,
+    disputed_by
 FROM 
     mv_admin_boundaries_lines_z3_5
 WHERE 
@@ -54,8 +62,12 @@ SELECT
     type,
     start_date,
     end_date,
-    start_decdate, 
-    end_decdate
+    start_decdate,
+    end_decdate,
+    indefinite,
+    maritime,
+    disputed,
+    disputed_by
 FROM 
     mv_admin_boundaries_lines_z6_7
 WHERE 
@@ -75,8 +87,12 @@ SELECT
     type,
     start_date,
     end_date,
-    start_decdate, 
-    end_decdate
+    start_decdate,
+    end_decdate,
+    indefinite,
+    maritime,
+    disputed,
+    disputed_by
 FROM 
     mv_admin_boundaries_lines_z8_9
 WHERE 
@@ -96,8 +112,12 @@ SELECT
     type,
     start_date,
     end_date,
-    start_decdate, 
-    end_decdate
+    start_decdate,
+    end_decdate,
+    indefinite,
+    maritime,
+    disputed,
+    disputed_by
 FROM 
     mv_admin_boundaries_lines_z10_12 
 WHERE 
@@ -117,8 +137,12 @@ SELECT
     type,
     start_date,
     end_date,
-    start_decdate, 
-    end_decdate
+    start_decdate,
+    end_decdate,
+    indefinite,
+    maritime,
+    disputed,
+    disputed_by
 FROM 
     mv_admin_boundaries_lines_z13_15 
 WHERE 
@@ -138,8 +162,12 @@ SELECT
     type,
     start_date,
     end_date,
-    start_decdate, 
-    end_decdate
+    start_decdate,
+    end_decdate,
+    indefinite,
+    maritime,
+    disputed,
+    disputed_by
 FROM 
     mv_admin_boundaries_lines_z16_20
 WHERE 
diff --git a/images/tiler-server/config/providers/admin_boundaries_polygon.toml b/images/tiler-server/config/providers/admin_boundaries_polygon.toml
index 17c5cc89f..1d2c56d9e 100644
--- a/images/tiler-server/config/providers/admin_boundaries_polygon.toml
+++ b/images/tiler-server/config/providers/admin_boundaries_polygon.toml
@@ -13,8 +13,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2,  
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     short_name,
     official_name,
     {{LENGUAGES}} 
@@ -39,8 +40,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2,  
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_admin_boundaries_areas_z3_5
@@ -64,8 +66,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2,  
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_admin_boundaries_areas_z6_7
@@ -88,8 +91,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2,  
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_admin_boundaries_areas_z8_9
@@ -112,8 +116,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2,  
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_admin_boundaries_areas_z10_12
@@ -136,8 +141,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2,  
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_admin_boundaries_areas_z13_15
@@ -161,8 +167,9 @@ SELECT
     start_date, 
     end_date, 
     area_km2,  
-    start_decdate, 
+    start_decdate,
     end_decdate,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_admin_boundaries_areas_z16_20
diff --git a/images/tiler-server/config/providers/non_admin_boundaries_areas.toml b/images/tiler-server/config/providers/non_admin_boundaries_areas.toml
index 296319f7d..470ac543c 100644
--- a/images/tiler-server/config/providers/non_admin_boundaries_areas.toml
+++ b/images/tiler-server/config/providers/non_admin_boundaries_areas.toml
@@ -24,6 +24,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_areas_z0_2
@@ -55,6 +56,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_areas_z3_5
@@ -87,6 +89,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_areas_z6_7
@@ -118,6 +121,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_areas_z8_9
@@ -149,6 +153,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_areas_z10_12
@@ -180,6 +185,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_areas_z13_15
@@ -212,6 +218,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_areas_z16_20
diff --git a/images/tiler-server/config/providers/non_admin_boundaries_centroids.toml b/images/tiler-server/config/providers/non_admin_boundaries_centroids.toml
index 46c421f98..169951dc7 100644
--- a/images/tiler-server/config/providers/non_admin_boundaries_centroids.toml
+++ b/images/tiler-server/config/providers/non_admin_boundaries_centroids.toml
@@ -24,6 +24,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_centroids_z0_2
@@ -56,6 +57,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_centroids_z3_5
@@ -88,6 +90,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_centroids_z6_7
@@ -120,6 +123,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_centroids_z8_9
@@ -152,6 +156,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_centroids_z10_12
@@ -184,6 +189,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_centroids_z13_15
@@ -216,6 +222,7 @@ SELECT
     postal_code,
     ref,
     political_division,
+    border_type,
     {{LENGUAGES}} 
 FROM 
     mv_non_admin_boundaries_centroids_z16_20

From 54b9c39923391c660c528cb8a255b6d97c28ec40 Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Mon, 16 Mar 2026 18:11:50 -0500
Subject: [PATCH 02/17] Scripts to monitor vtiles updates

---
 images/tiler-pipeline-monitor/Dockerfile      |  19 +
 .../tiler-pipeline-monitor/checks/__init__.py |   0
 .../checks/imposm_import.py                   | 672 ++++++++++++++++++
 .../checks/mv_freshness.py                    | 179 +++++
 .../checks/replication_lag.py                 |  89 +++
 images/tiler-pipeline-monitor/config.py       |  46 ++
 images/tiler-pipeline-monitor/monitor.py      | 151 ++++
 .../tiler-pipeline-monitor/requirements.txt   |   4 +
 .../tiler-pipeline-monitor/tables_config.json | 270 +++++++
 9 files changed, 1430 insertions(+)
 create mode 100644 images/tiler-pipeline-monitor/Dockerfile
 create mode 100644 images/tiler-pipeline-monitor/checks/__init__.py
 create mode 100644 images/tiler-pipeline-monitor/checks/imposm_import.py
 create mode 100644 images/tiler-pipeline-monitor/checks/mv_freshness.py
 create mode 100644 images/tiler-pipeline-monitor/checks/replication_lag.py
 create mode 100644 images/tiler-pipeline-monitor/config.py
 create mode 100644 images/tiler-pipeline-monitor/monitor.py
 create mode 100644 images/tiler-pipeline-monitor/requirements.txt
 create mode 100644 images/tiler-pipeline-monitor/tables_config.json

diff --git a/images/tiler-pipeline-monitor/Dockerfile b/images/tiler-pipeline-monitor/Dockerfile
new file mode 100644
index 000000000..51cf7f628
--- /dev/null
+++ b/images/tiler-pipeline-monitor/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.12-slim
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends curl && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+EXPOSE 8001
+
+HEALTHCHECK --interval=30s --timeout=10s --retries=3 --start-period=30s \
+    CMD curl -f http://localhost:8001/health || exit 1
+
+CMD ["python", "monitor.py"]
diff --git a/images/tiler-pipeline-monitor/checks/__init__.py b/images/tiler-pipeline-monitor/checks/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/images/tiler-pipeline-monitor/checks/imposm_import.py b/images/tiler-pipeline-monitor/checks/imposm_import.py
new file mode 100644
index 000000000..cc981ae17
--- /dev/null
+++ b/images/tiler-pipeline-monitor/checks/imposm_import.py
@@ -0,0 +1,672 @@
+"""Pipeline check: changeset-centric verification.
+
+For each changeset in the 1-2 hour window:
+  1. Check if minute replication covers it (replication timestamp >= closed_at)
+  2. Check if its way/relation elements exist in the tiler DB with the correct version
+"""
+
+import json
+import os
+import xml.etree.ElementTree as ET
+from datetime import datetime, timezone, timedelta
+
+import psycopg2
+import requests
+
+from config import Config
+
+# Load table/view mapping from JSON config
+_config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tables_config.json")
+with open(_config_path) as f:
+    _tables_config = json.load(f)
+
+OHM_BASE = None  # lazily computed
+
+
+def _ohm_base():
+    global OHM_BASE
+    if OHM_BASE is None:
+        OHM_BASE = Config.OHM_API_BASE.replace("/api/0.6", "")
+    return OHM_BASE
+
+
+def _parse_timestamp(ts_str):
+    """Parse an ISO timestamp string to a timezone-aware datetime."""
+    ts_str = ts_str.replace("Z", "+00:00")
+    return datetime.fromisoformat(ts_str)
+
+
+# ---------------------------------------------------------------------------
+# Step 0: get changesets in the age window
+# ---------------------------------------------------------------------------
+
+def _get_changesets_in_window(min_age, max_age, limit=10):
+    """Fetch closed changesets whose age is between min_age and max_age seconds.
+
+    Fetches recent changesets and filters locally by age window.
+    """
+    now = datetime.now(timezone.utc)
+    min_closed = now - timedelta(seconds=max_age)   # oldest allowed
+    max_closed = now - timedelta(seconds=min_age)    # newest allowed
+
+    # Fetch enough to find some in the window
+    fetch_limit = 100
+    url = f"{Config.OHM_API_BASE}/changesets"
+    params = {"limit": fetch_limit, "closed": "true"}
+    headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
+
+    print(f"[pipeline] Fetching changesets: {url}?limit={fetch_limit}&closed=true")
+    print(f"  Looking for changesets closed between "
+          f"{min_closed.strftime('%Y-%m-%dT%H:%M:%SZ')} and "
+          f"{max_closed.strftime('%Y-%m-%dT%H:%M:%SZ')} "
+          f"(age {min_age//60}-{max_age//60} min)")
+
+    resp = requests.get(url, params=params, headers=headers, timeout=30)
+    resp.raise_for_status()
+
+    root = ET.fromstring(resp.content)
+    changesets = []
+    skipped_young = 0
+    skipped_old = 0
+
+    for cs in root.findall("changeset"):
+        cs_id = int(cs.attrib["id"])
+        closed_at = cs.attrib.get("closed_at", "")
+        if not closed_at:
+            continue
+        try:
+            closed_dt = _parse_timestamp(closed_at)
+        except (ValueError, TypeError):
+            continue
+
+        age_minutes = (now - closed_dt).total_seconds() / 60
+
+        if closed_dt > max_closed:
+            skipped_young += 1
+            continue
+        elif closed_dt < min_closed:
+            skipped_old += 1
+            # Changesets are ordered by newest first, so once we hit old ones, stop
+            break
+        else:
+            changesets.append({
+                "id": cs_id,
+                "closed_at": closed_at,
+                "closed_dt": closed_dt,
+                "age_minutes": round(age_minutes, 1),
+            })
+
+        if len(changesets) >= limit:
+            break
+
+    print(f"  Fetched {len(root.findall('changeset'))} changesets from API")
+    print(f"  Skipped: {skipped_young} too young (<{min_age//60}min), "
+          f"{skipped_old} too old (>{max_age//60}min)")
+    print(f"  Found {len(changesets)} changesets in window:")
+    for cs in changesets:
+        print(f"    changeset {cs['id']}: closed_at={cs['closed_at']} "
+              f"(age={cs['age_minutes']}min)")
+
+    return changesets
+
+
+# ---------------------------------------------------------------------------
+# Step 1: replication check
+# ---------------------------------------------------------------------------
+
+def _parse_replication_state(text):
+    """Parse state.txt and return (sequence, timestamp)."""
+    data = {}
+    for line in text.strip().splitlines():
+        if "=" in line:
+            key, _, value = line.partition("=")
+            data[key.strip()] = value.strip()
+    seq = int(data.get("sequenceNumber", 0))
+    ts_raw = data.get("timestamp", "").replace("\\:", ":")
+    try:
+        ts = datetime.fromisoformat(ts_raw.replace("Z", "+00:00"))
+    except ValueError:
+        ts = None
+    return seq, ts
+
+
+def _check_replication_covers(changeset, repl_seq, repl_ts):
+    """Check if the replication state covers this changeset."""
+    if repl_ts is None:
+        return {
+            "status": "warning",
+            "message": "Cannot parse replication timestamp",
+        }
+
+    closed_dt = changeset["closed_dt"]
+    if repl_ts >= closed_dt:
+        return {
+            "status": "ok",
+            "message": (f"Replication covers this changeset "
+                        f"(repl_ts={repl_ts.isoformat()} >= closed_at={changeset['closed_at']})"),
+            "replication_sequence": repl_seq,
+            "replication_timestamp": repl_ts.isoformat(),
+        }
+    else:
+        lag = (closed_dt - repl_ts).total_seconds()
+        return {
+            "status": "critical",
+            "message": (f"Replication does NOT cover this changeset. "
+                        f"Replication is {round(lag/60, 1)}min behind "
+                        f"(repl_ts={repl_ts.isoformat()} < closed_at={changeset['closed_at']})"),
+            "replication_sequence": repl_seq,
+            "replication_timestamp": repl_ts.isoformat(),
+        }
+
+
+# ---------------------------------------------------------------------------
+# Step 2: tiler DB check
+# ---------------------------------------------------------------------------
+
+def _get_changeset_elements(changeset_id):
+    """Download changeset diff and extract way/relation elements with versions."""
+    url = f"{Config.OHM_API_BASE}/changeset/{changeset_id}/download"
+    headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
+    resp = requests.get(url, headers=headers, timeout=30)
+    resp.raise_for_status()
+
+    root = ET.fromstring(resp.content)
+    elements = []
+
+    for action in root:  # create, modify, delete
+        action_type = action.tag
+        for elem in action:
+            osm_id = elem.attrib.get("id")
+            version = elem.attrib.get("version")
+            elem_type = elem.tag
+            if osm_id and elem_type in ("way", "relation"):
+                # Extract tags to determine which imposm table this element belongs to
+                tags = {}
+                for tag in elem.findall("tag"):
+                    k = tag.attrib.get("k")
+                    v = tag.attrib.get("v")
+                    if k and v:
+                        tags[k] = v
+                elements.append({
+                    "type": elem_type,
+                    "osm_id": int(osm_id),
+                    "version": int(version) if version else None,
+                    "action": action_type,
+                    "tags": tags,
+                })
+    return elements
+
+
+
+# Loaded from tables_config.json
+RELATION_TABLES = _tables_config["relation_tables"]
+WAY_TABLES = _tables_config["way_tables"]
+TABLE_TO_VIEWS = _tables_config["table_to_views"]
+TAG_TO_TABLES = _tables_config.get("tag_to_tables", {})
+
+
+def _resolve_candidate_tables(elem):
+    """Determine candidate tables based on element type and tags.
+
+    Uses tag_to_tables mapping from imposm config to narrow the search
+    to only the tables where the element could exist, instead of searching all.
+    Falls back to all tables for the element type if no tags match.
+    """
+    type_tables = RELATION_TABLES if elem["type"] == "relation" else WAY_TABLES
+    tags = elem.get("tags", {})
+
+    if not tags or not TAG_TO_TABLES:
+        return type_tables
+
+    # Collect tables that match any of the element's tag keys
+    matched_tables = set()
+    for tag_key in tags:
+        for table in TAG_TO_TABLES.get(tag_key, []):
+            matched_tables.add(table)
+
+    # Intersect with type tables (relations vs ways) to respect element type
+    filtered = [t for t in type_tables if t in matched_tables]
+
+    if not filtered:
+        # No tag matched any known mapping — fall back to all tables for safety
+        return type_tables
+
+    return filtered
+
+
+def _check_element_in_db(conn, elem):
+    """Check if an element exists in tiler DB tables (osm_*) and views (mv_*)."""
+    osm_id = elem["osm_id"]
+    # Imposm stores relations with negative IDs
+    search_id = -osm_id if elem["type"] == "relation" else osm_id
+
+    cur = conn.cursor()
+
+    # --- Resolve candidate tables based on element tags ---
+    candidate_tables = _resolve_candidate_tables(elem)
+
+    # Filter to only tables that actually exist
+    cur.execute("""
+        SELECT table_name FROM information_schema.tables
+        WHERE table_schema = 'public' AND table_name LIKE 'osm_%%'
+        ORDER BY table_name
+    """)
+    existing_tables = {row[0] for row in cur.fetchall()}
+    tables = [t for t in candidate_tables if t in existing_tables]
+
+    found_in_tables = []
+    version_match = None
+
+    for table in tables:
+        try:
+            quoted = psycopg2.extensions.quote_ident(table, cur)
+            cur.execute(
+                f"SELECT tags->'version' FROM {quoted} WHERE osm_id = %s LIMIT 1",
+                (search_id,),
+            )
+            row = cur.fetchone()
+            if row is not None:
+                db_version = row[0]
+                found_in_tables.append(table)
+                if elem["version"] is not None and db_version is not None:
+                    try:
+                        version_match = int(db_version) >= elem["version"]
+                    except (ValueError, TypeError):
+                        version_match = None
+        except Exception:
+            conn.rollback()
+
+    # --- Search in mv_* views related to candidate tables ---
+    candidate_views = set()
+    for table in candidate_tables:
+        for v in TABLE_TO_VIEWS.get(table, []):
+            candidate_views.add(v)
+
+    # Filter to only views that exist
+    cur.execute("""
+        SELECT matviewname FROM pg_matviews
+        WHERE schemaname = 'public' AND matviewname LIKE 'mv_%%'
+    """)
+    existing_views = {row[0] for row in cur.fetchall()}
+    views_to_check = [v for v in candidate_views if v in existing_views]
+
+    found_in_views = []
+
+    for view in sorted(views_to_check):
+        try:
+            quoted = psycopg2.extensions.quote_ident(view, cur)
+            cur.execute(
+                f"SELECT 1 FROM {quoted} WHERE osm_id = %s LIMIT 1",
+                (search_id,),
+            )
+            if cur.fetchone() is not None:
+                found_in_views.append(view)
+        except Exception:
+            conn.rollback()
+
+    cur.close()
+
+    return {
+        "type": elem["type"],
+        "osm_id": osm_id,
+        "action": elem["action"],
+        "expected_version": elem["version"],
+        "found_in_tables": found_in_tables,
+        "found_in_views": found_in_views,
+        "version_match": version_match,
+        "searched_tables": candidate_tables,
+        "url": f"{_ohm_base()}/{elem['type']}/{elem['osm_id']}",
+    }
+
+
+def _is_element_deleted(elem):
+    """Check if an element has been deleted in OHM (visible=false or 410 Gone)."""
+    url = f"{Config.OHM_API_BASE}/{elem['type']}/{elem['osm_id']}"
+    headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
+    try:
+        resp = requests.get(url, headers=headers, timeout=15)
+        if resp.status_code == 410:
+            return True
+        if resp.status_code == 200:
+            root = ET.fromstring(resp.content)
+            el = root.find(elem["type"])
+            if el is not None and el.attrib.get("visible") == "false":
+                return True
+        return False
+    except Exception:
+        return False
+
+
+def _check_elements_in_db(conn, changeset_id):
+    """Check all elements of a changeset in the tiler DB."""
+    try:
+        elements = _get_changeset_elements(changeset_id)
+    except requests.RequestException as e:
+        return {
+            "status": "critical",
+            "message": f"Failed to download changeset diff: {e}",
+            "elements": [],
+        }
+
+    if not elements:
+        return {
+            "status": "ok",
+            "message": "No way/relation elements in this changeset",
+            "elements": [],
+        }
+
+    missing = []
+    mismatches = []
+    checked = []
+
+    print(f"  [tiler_db] Checking {len(elements)} way/relation elements")
+
+    for elem in elements:
+        if elem["action"] == "delete":
+            print(f"    SKIP {elem['type']}/{elem['osm_id']} (action=delete)")
+            continue
+
+        check = _check_element_in_db(conn, elem)
+        checked.append(check)
+
+        tables = check["found_in_tables"]
+        views = check["found_in_views"]
+
+        # Show which tags were used to resolve candidate tables
+        tag_keys = [k for k in elem.get("tags", {}) if k in TAG_TO_TABLES]
+        searched = check.get("searched_tables", [])
+
+        if tables or views:
+            icon = "OK" if check["version_match"] is not False else "VERSION_MISMATCH"
+            print(f"    {icon} {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                  f"({elem['action']}) version_match={check['version_match']}")
+            if tag_keys:
+                print(f"         matched tags: {tag_keys} -> searched: {searched}")
+            if tables:
+                print(f"         tables: {tables}")
+            if views:
+                print(f"         views:  {views}")
+            print(f"         {check['url']}")
+        else:
+            # Check if the element was deleted in a later changeset
+            if _is_element_deleted(elem):
+                print(f"    SKIP {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                      f"({elem['action']}) -> deleted in a later changeset")
+                print(f"         {check['url']}")
+                check["deleted"] = True
+                continue
+            print(f"    MISSING {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                  f"({elem['action']}) -> NOT in tables or views")
+            if tag_keys:
+                print(f"         matched tags: {tag_keys} -> searched: {searched}")
+            else:
+                print(f"         no matching tags found, searched all: {searched}")
+            print(f"         {check['url']}")
+
+        if not tables and not views and not check.get("deleted"):
+            missing.append(f"{elem['type']}/{elem['osm_id']}")
+        elif check["version_match"] is False:
+            mismatches.append(f"{elem['type']}/{elem['osm_id']} expected v{elem['version']}")
+
+    if missing:
+        status = "warning"
+        msg = f"Missing from tiler DB: {', '.join(missing)}"
+        if mismatches:
+            msg += f". Version mismatches: {', '.join(mismatches)}"
+    elif mismatches:
+        status = "warning"
+        msg = f"Version mismatches: {', '.join(mismatches)}"
+    else:
+        status = "ok"
+        msg = f"All {len(checked)} elements verified in tiler DB"
+
+    return {"status": status, "message": msg, "elements": checked}
+
+
+# ---------------------------------------------------------------------------
+# Main pipeline check (scheduled)
+# ---------------------------------------------------------------------------
+
+def check_pipeline():
+    """Check the full pipeline for changesets in the 1-2 hour age window.
+
+    For each changeset:
+      1. Is it covered by minute replication?
+      2. Are its elements in the tiler DB?
+    """
+    now = datetime.now(timezone.utc)
+    min_age = Config.CHANGESET_MIN_AGE
+    max_age = Config.CHANGESET_MAX_AGE
+
+    result = {
+        "name": "pipeline",
+        "status": "ok",
+        "message": "",
+        "details": {
+            "window": f"{min_age//60}-{max_age//60} minutes",
+            "replication": {},
+            "changesets": [],
+        },
+        "checked_at": now.isoformat(),
+    }
+
+    # --- Fetch replication state ---
+    repl_seq, repl_ts = None, None
+    try:
+        resp = requests.get(Config.REPLICATION_STATE_URL, timeout=15)
+        resp.raise_for_status()
+        repl_seq, repl_ts = _parse_replication_state(resp.text)
+        result["details"]["replication"] = {
+            "status": "ok",
+            "sequence": repl_seq,
+            "timestamp": repl_ts.isoformat() if repl_ts else None,
+        }
+        if repl_ts:
+            lag_min = (now - repl_ts).total_seconds() / 60
+            result["details"]["replication"]["lag_minutes"] = round(lag_min, 1)
+            print(f"\n[pipeline] Replication state: seq={repl_seq}, "
+                  f"ts={repl_ts.isoformat()}, lag={lag_min:.1f}min")
+    except requests.RequestException as e:
+        result["details"]["replication"] = {
+            "status": "critical",
+            "message": f"Failed to fetch replication state: {e}",
+        }
+        print(f"\n[pipeline] WARNING: Cannot fetch replication state: {e}")
+
+    # --- Get changesets in window ---
+    try:
+        changesets = _get_changesets_in_window(
+            min_age=min_age,
+            max_age=max_age,
+            limit=Config.CHANGESET_LIMIT,
+        )
+    except requests.RequestException as e:
+        result["status"] = "critical"
+        result["message"] = f"Failed to fetch changesets from OHM API: {e}"
+        return result
+
+    if not changesets:
+        result["message"] = (
+            f"No changesets found in the {min_age//60}-{max_age//60} minute window"
+        )
+        print(f"[pipeline] {result['message']}")
+        return result
+
+    print(f"[pipeline] Found {len(changesets)} changesets in "
+          f"{min_age//60}-{max_age//60}min window")
+
+    # --- Connect to tiler DB ---
+    conn = None
+    try:
+        conn = psycopg2.connect(
+            host=Config.POSTGRES_HOST,
+            port=Config.POSTGRES_PORT,
+            dbname=Config.POSTGRES_DB,
+            user=Config.POSTGRES_USER,
+            password=Config.POSTGRES_PASSWORD,
+        )
+    except psycopg2.Error as e:
+        result["status"] = "critical"
+        result["message"] = f"Cannot connect to tiler DB: {e}"
+        print(f"[pipeline] ERROR: Cannot connect to tiler DB: {e}")
+        return result
+
+    # --- Check each changeset through the pipeline ---
+    problems = []
+
+    for cs in changesets:
+        print(f"\n[pipeline] === Changeset {cs['id']} === "
+              f"(closed_at={cs['closed_at']}, age={cs['age_minutes']}min)")
+        print(f"  URL: {_ohm_base()}/changeset/{cs['id']}")
+
+        cs_result = {
+            "changeset_id": cs["id"],
+            "changeset_url": f"{_ohm_base()}/changeset/{cs['id']}",
+            "closed_at": cs["closed_at"],
+            "age_minutes": cs["age_minutes"],
+            "replication": {},
+            "tiler_db": {},
+        }
+
+        # Step 1: replication
+        if repl_seq is not None:
+            repl_check = _check_replication_covers(cs, repl_seq, repl_ts)
+            cs_result["replication"] = repl_check
+            print(f"  [replication] {repl_check['status'].upper()}: {repl_check['message']}")
+
+            if repl_check["status"] != "ok":
+                problems.append(
+                    f"Changeset {cs['id']}: replication not covering"
+                )
+        else:
+            cs_result["replication"] = {"status": "unknown", "message": "Replication state unavailable"}
+            print(f"  [replication] UNKNOWN: Replication state unavailable")
+
+        # Step 2: tiler DB
+        db_check = _check_elements_in_db(conn, cs["id"])
+        cs_result["tiler_db"] = db_check
+        print(f"  [tiler_db] {db_check['status'].upper()}: {db_check['message']}")
+
+        if db_check["status"] != "ok":
+            problems.append(f"Changeset {cs['id']}: {db_check['message']}")
+
+        result["details"]["changesets"].append(cs_result)
+
+    conn.close()
+
+    # --- Overall status ---
+    if any(cs.get("replication", {}).get("status") == "critical"
+           or cs.get("tiler_db", {}).get("status") in ("warning", "critical")
+           for cs in result["details"]["changesets"]):
+        result["status"] = "warning"
+        result["message"] = f"Issues found: {'; '.join(problems[:5])}"
+    else:
+        result["message"] = (
+            f"All {len(changesets)} changesets in {min_age//60}-{max_age//60}min "
+            f"window passed pipeline check"
+        )
+
+    print(f"\n[pipeline] Result: {result['status'].upper()} — {result['message']}")
+    return result
+
+
+# ---------------------------------------------------------------------------
+# On-demand single changeset check
+# ---------------------------------------------------------------------------
+
+def check_single_changeset(changeset_id):
+    """Evaluate a single changeset through the full pipeline (on-demand)."""
+    now = datetime.now(timezone.utc)
+    result = {
+        "name": "pipeline",
+        "changeset_id": changeset_id,
+        "changeset_url": f"{_ohm_base()}/changeset/{changeset_id}",
+        "status": "ok",
+        "message": "",
+        "details": {"replication": {}, "tiler_db": {}},
+        "checked_at": now.isoformat(),
+    }
+
+    # Get changeset info
+    try:
+        url = f"{Config.OHM_API_BASE}/changeset/{changeset_id}"
+        headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
+        resp = requests.get(url, headers=headers, timeout=30)
+        resp.raise_for_status()
+        root = ET.fromstring(resp.content)
+        cs_elem = root.find("changeset")
+        closed_at = cs_elem.attrib.get("closed_at", "") if cs_elem is not None else ""
+    except Exception:
+        closed_at = ""
+
+    print(f"\n[pipeline] === Changeset {changeset_id} (on-demand) ===")
+    print(f"  URL: {_ohm_base()}/changeset/{changeset_id}")
+    if closed_at:
+        print(f"  closed_at: {closed_at}")
+
+    # Step 1: replication
+    try:
+        resp = requests.get(Config.REPLICATION_STATE_URL, timeout=15)
+        resp.raise_for_status()
+        repl_seq, repl_ts = _parse_replication_state(resp.text)
+
+        if closed_at and repl_ts:
+            closed_dt = _parse_timestamp(closed_at)
+            cs_data = {"closed_at": closed_at, "closed_dt": closed_dt}
+            repl_check = _check_replication_covers(cs_data, repl_seq, repl_ts)
+        else:
+            repl_check = {
+                "status": "ok" if repl_ts else "warning",
+                "message": f"Replication seq={repl_seq}, ts={repl_ts.isoformat() if repl_ts else 'unknown'}",
+                "replication_sequence": repl_seq,
+                "replication_timestamp": repl_ts.isoformat() if repl_ts else None,
+            }
+
+        result["details"]["replication"] = repl_check
+        print(f"  [replication] {repl_check['status'].upper()}: {repl_check['message']}")
+    except requests.RequestException as e:
+        result["details"]["replication"] = {
+            "status": "critical",
+            "message": f"Failed to fetch replication state: {e}",
+        }
+        print(f"  [replication] CRITICAL: Cannot fetch replication state: {e}")
+
+    # Step 2: tiler DB
+    try:
+        conn = psycopg2.connect(
+            host=Config.POSTGRES_HOST,
+            port=Config.POSTGRES_PORT,
+            dbname=Config.POSTGRES_DB,
+            user=Config.POSTGRES_USER,
+            password=Config.POSTGRES_PASSWORD,
+        )
+    except psycopg2.Error as e:
+        result["status"] = "critical"
+        result["message"] = f"Cannot connect to tiler DB: {e}"
+        result["details"]["tiler_db"] = {"status": "critical", "message": str(e)}
+        return result
+
+    db_check = _check_elements_in_db(conn, changeset_id)
+    conn.close()
+    result["details"]["tiler_db"] = db_check
+    print(f"  [tiler_db] {db_check['status'].upper()}: {db_check['message']}")
+
+    # Overall
+    problems = []
+    repl_status = result["details"]["replication"].get("status", "ok")
+    if repl_status == "critical":
+        problems.append("Replication not covering this changeset")
+    if db_check["status"] != "ok":
+        problems.append(db_check["message"])
+
+    if problems:
+        result["status"] = "warning"
+        result["message"] = "; ".join(problems)
+    else:
+        result["message"] = (
+            f"Changeset {changeset_id} passed full pipeline check "
+            f"({len(db_check.get('elements', []))} elements verified)"
+        )
+
+    print(f"  [result] {result['status'].upper()}: {result['message']}")
+    return result
diff --git a/images/tiler-pipeline-monitor/checks/mv_freshness.py b/images/tiler-pipeline-monitor/checks/mv_freshness.py
new file mode 100644
index 000000000..35d5d4d6b
--- /dev/null
+++ b/images/tiler-pipeline-monitor/checks/mv_freshness.py
@@ -0,0 +1,179 @@
+"""Check 3: Materialized view freshness monitor.
+
+Queries pg_stat_user_tables to check when materialized views were last
+auto-analyzed/auto-vacuumed (proxy for last refresh), and also checks
+if the views exist and have rows.
+"""
+
+from datetime import datetime, timezone
+
+import psycopg2
+
+from config import Config
+
+# Key materialized views grouped by expected refresh interval.
+# group_name -> (max_stale_seconds, [view_names])
+MV_GROUPS = {
+    "admin_boundaries_lines": (
+        300,  # expect refresh every ~60s + buffer
+        [
+            "mv_admin_boundaries_lines_z4_5",
+            "mv_admin_boundaries_lines_z6_7",
+            "mv_admin_boundaries_lines_z8_9",
+            "mv_admin_boundaries_lines_z10_11",
+            "mv_admin_boundaries_lines_z12_13",
+            "mv_admin_boundaries_lines_z14_15",
+            "mv_admin_boundaries_lines_z16_20",
+        ],
+    ),
+    "water": (
+        600,  # expect refresh every ~180s + buffer
+        [
+            "mv_water_lines_z10_11",
+            "mv_water_lines_z12_13",
+            "mv_water_lines_z14_15",
+            "mv_water_lines_z16_20",
+            "mv_water_areas_z6_7",
+            "mv_water_areas_z8_9",
+            "mv_water_areas_z10_11",
+            "mv_water_areas_z12_13",
+            "mv_water_areas_z14_15",
+            "mv_water_areas_z16_20",
+        ],
+    ),
+    "transport": (
+        600,
+        [
+            "mv_transport_lines_z8_9",
+            "mv_transport_lines_z10_11",
+            "mv_transport_lines_z12_13",
+            "mv_transport_lines_z14_15",
+            "mv_transport_lines_z16_20",
+        ],
+    ),
+}
+
+
+def check_mv_freshness():
+    """Check that key materialized views exist and are being refreshed."""
+    result = {
+        "name": "mv_freshness",
+        "status": "ok",
+        "message": "",
+        "details": {"groups": {}},
+        "checked_at": datetime.now(timezone.utc).isoformat(),
+    }
+
+    try:
+        conn = psycopg2.connect(
+            host=Config.POSTGRES_HOST,
+            port=Config.POSTGRES_PORT,
+            dbname=Config.POSTGRES_DB,
+            user=Config.POSTGRES_USER,
+            password=Config.POSTGRES_PASSWORD,
+        )
+    except psycopg2.Error as e:
+        result["status"] = "critical"
+        result["message"] = f"Cannot connect to tiler DB: {e}"
+        return result
+
+    cur = conn.cursor()
+
+    # Get list of existing materialized views
+    cur.execute("SELECT matviewname FROM pg_matviews WHERE schemaname = 'public'")
+    existing_mvs = {row[0] for row in cur.fetchall()}
+
+    # Check row counts and last analyze times for MVs via pg_stat_user_tables.
+    # REFRESH MATERIALIZED VIEW triggers auto-analyze, so last_autoanalyze
+    # is a good proxy for "last refreshed".
+    cur.execute("""
+        SELECT relname, n_live_tup, last_autoanalyze, last_analyze
+        FROM pg_stat_user_tables
+        WHERE schemaname = 'public'
+          AND relname LIKE 'mv_%%'
+    """)
+    mv_stats = {}
+    for row in cur.fetchall():
+        name, n_rows, last_autoanalyze, last_analyze = row
+        # Use whichever is more recent
+        last_refreshed = max(
+            filter(None, [last_autoanalyze, last_analyze]),
+            default=None,
+        )
+        mv_stats[name] = {
+            "n_rows": n_rows,
+            "last_refreshed": last_refreshed,
+        }
+
+    cur.close()
+    conn.close()
+
+    missing_views = []
+    stale_views = []
+    empty_views = []
+    now = datetime.now(timezone.utc)
+
+    for group_name, (max_stale, views) in MV_GROUPS.items():
+        group_result = {"views": [], "status": "ok"}
+
+        for view_name in views:
+            view_info = {"name": view_name, "status": "ok"}
+
+            if view_name not in existing_mvs:
+                view_info["status"] = "critical"
+                view_info["message"] = "View does not exist"
+                missing_views.append(view_name)
+            elif view_name in mv_stats:
+                stats = mv_stats[view_name]
+                view_info["n_rows"] = stats["n_rows"]
+
+                if stats["n_rows"] == 0:
+                    view_info["status"] = "warning"
+                    view_info["message"] = "View is empty (0 rows)"
+                    empty_views.append(view_name)
+
+                if stats["last_refreshed"]:
+                    last_ref = stats["last_refreshed"]
+                    if last_ref.tzinfo is None:
+                        last_ref = last_ref.replace(tzinfo=timezone.utc)
+                    age_seconds = (now - last_ref).total_seconds()
+                    view_info["last_refreshed"] = last_ref.isoformat()
+                    view_info["age_seconds"] = round(age_seconds)
+
+                    if age_seconds > max_stale:
+                        view_info["status"] = "warning"
+                        view_info["message"] = (
+                            f"Stale: last refreshed {round(age_seconds / 60, 1)} min ago "
+                            f"(threshold: {max_stale // 60} min)"
+                        )
+                        stale_views.append(view_name)
+                else:
+                    view_info["last_refreshed"] = None
+                    view_info["message"] = "No analyze timestamp available"
+            else:
+                view_info["message"] = "No stats available"
+
+            group_result["views"].append(view_info)
+
+        if any(v["status"] == "critical" for v in group_result["views"]):
+            group_result["status"] = "critical"
+        elif any(v["status"] == "warning" for v in group_result["views"]):
+            group_result["status"] = "warning"
+
+        result["details"]["groups"][group_name] = group_result
+
+    # Overall status
+    if missing_views:
+        result["status"] = "critical"
+        result["message"] = f"Missing views: {', '.join(missing_views[:5])}"
+    elif stale_views:
+        result["status"] = "warning"
+        result["message"] = f"Stale views: {', '.join(stale_views[:5])}"
+    elif empty_views:
+        result["status"] = "warning"
+        result["message"] = f"Empty views: {', '.join(empty_views[:5])}"
+    else:
+        total = sum(len(v) for _, v in MV_GROUPS.values())
+        result["message"] = f"All {total} monitored materialized views are healthy"
+
+    return result
diff --git a/images/tiler-pipeline-monitor/checks/replication_lag.py b/images/tiler-pipeline-monitor/checks/replication_lag.py
new file mode 100644
index 000000000..c15cb2cef
--- /dev/null
+++ b/images/tiler-pipeline-monitor/checks/replication_lag.py
@@ -0,0 +1,89 @@
+"""Check 1: Minute replication lag monitor.
+
+Compares the latest replication sequence number available on S3
+against the last sequence number processed by imposm (from the tiler DB
+or the replication state endpoint).
+"""
+
+import time
+from datetime import datetime, timezone
+
+import requests
+
+from config import Config
+
+
+def _parse_state(text):
+    """Parse an imposm/osm replication state.txt and return sequence + timestamp."""
+    data = {}
+    for line in text.strip().splitlines():
+        if "=" in line:
+            key, _, value = line.partition("=")
+            data[key.strip()] = value.strip()
+    seq = int(data.get("sequenceNumber", 0))
+    ts_raw = data.get("timestamp", "")
+    # Format: 2026-03-13T12\:05\:02Z  (escaped colons in java properties)
+    ts_raw = ts_raw.replace("\\:", ":")
+    try:
+        ts = datetime.fromisoformat(ts_raw.replace("Z", "+00:00"))
+    except ValueError:
+        ts = None
+    return seq, ts
+
+
+def check_replication_lag():
+    """Return a dict with the replication lag check result."""
+    result = {
+        "name": "replication_lag",
+        "status": "ok",
+        "message": "",
+        "details": {},
+        "checked_at": datetime.now(timezone.utc).isoformat(),
+    }
+
+    try:
+        # Get latest available replication state from S3
+        resp = requests.get(Config.REPLICATION_STATE_URL, timeout=15)
+        resp.raise_for_status()
+        remote_seq, remote_ts = _parse_state(resp.text)
+
+        result["details"]["remote_sequence"] = remote_seq
+        result["details"]["remote_timestamp"] = remote_ts.isoformat() if remote_ts else None
+
+        # Get imposm's last processed state
+        # The imposm diff dir stores last.state.txt - we query it via the same
+        # base URL pattern but from the local imposm state endpoint.
+        # In Docker, we can check the DB for the latest sequence via the
+        # osm_replication_status table if available, or fall back to comparing
+        # timestamps of recent data.
+        #
+        # For now: compare remote timestamp against current time.
+        # If remote_ts is stale, replication source itself is behind.
+        # A more precise check reads imposm's last.state.txt from the shared volume.
+
+        if remote_ts:
+            lag_seconds = (datetime.now(timezone.utc) - remote_ts).total_seconds()
+            result["details"]["lag_seconds"] = round(lag_seconds)
+            result["details"]["lag_minutes"] = round(lag_seconds / 60, 1)
+
+            if lag_seconds > Config.REPLICATION_LAG_THRESHOLD:
+                result["status"] = "critical"
+                result["message"] = (
+                    f"Replication lag is {round(lag_seconds / 60, 1)} minutes "
+                    f"(threshold: {Config.REPLICATION_LAG_THRESHOLD // 60} min). "
+                    f"Last replication timestamp: {remote_ts.isoformat()}"
+                )
+            else:
+                result["message"] = (
+                    f"Replication is up to date. Lag: {round(lag_seconds / 60, 1)} min, "
+                    f"sequence: {remote_seq}"
+                )
+        else:
+            result["status"] = "warning"
+            result["message"] = "Could not parse replication timestamp"
+
+    except requests.RequestException as e:
+        result["status"] = "critical"
+        result["message"] = f"Failed to fetch replication state: {e}"
+
+    return result
diff --git a/images/tiler-pipeline-monitor/config.py b/images/tiler-pipeline-monitor/config.py
new file mode 100644
index 000000000..89b698b87
--- /dev/null
+++ b/images/tiler-pipeline-monitor/config.py
@@ -0,0 +1,46 @@
+import os
+
+
+class Config:
+    # PostgreSQL (tiler DB)
+    POSTGRES_HOST = os.getenv("POSTGRES_HOST", "localhost")
+    POSTGRES_PORT = int(os.getenv("POSTGRES_PORT", 5432))
+    POSTGRES_DB = os.getenv("POSTGRES_DB", "tiler")
+    POSTGRES_USER = os.getenv("POSTGRES_USER", "postgres")
+    POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
+
+    # Replication
+    REPLICATION_STATE_URL = os.getenv(
+        "REPLICATION_STATE_URL",
+        "https://s3.amazonaws.com/planet.openhistoricalmap.org/replication/minute/state.txt",
+    )
+    OHM_API_BASE = os.getenv("OHM_API_BASE", "https://www.openhistoricalmap.org/api/0.6")
+
+    # How often to run the pipeline check (seconds)
+    CHECK_INTERVAL = int(os.getenv("CHECK_INTERVAL", 3600))  # 1 hour
+
+    # OHM changeset age window (seconds)
+    # Only check changesets closed at least CHANGESET_MIN_AGE ago
+    # and at most CHANGESET_MAX_AGE ago.
+    # Example: min=3600 max=10800 → changesets closed between 1 and 3 hours ago
+    CHANGESET_MIN_AGE = int(os.getenv("CHANGESET_MIN_AGE", 10800))    # 1 hour
+    CHANGESET_MAX_AGE = int(os.getenv("CHANGESET_MAX_AGE", 14400))   # 3 hours
+
+    # Max number of changesets to check per run
+    CHANGESET_LIMIT = int(os.getenv("CHANGESET_LIMIT", 30))
+
+    # Verbose logging
+    VERBOSE_LOGGING = os.getenv("VERBOSE_LOGGING", "false").lower() == "true"
+
+    # Alerting (optional)
+    SLACK_WEBHOOK_URL = os.getenv("SLACK_WEBHOOK_URL", "")
+
+    # Server
+    MONITOR_PORT = int(os.getenv("MONITOR_PORT", 8001))
+
+    @staticmethod
+    def get_db_dsn():
+        return (
+            f"postgresql://{Config.POSTGRES_USER}:{Config.POSTGRES_PASSWORD}"
+            f"@{Config.POSTGRES_HOST}:{Config.POSTGRES_PORT}/{Config.POSTGRES_DB}"
+        )
diff --git a/images/tiler-pipeline-monitor/monitor.py b/images/tiler-pipeline-monitor/monitor.py
new file mode 100644
index 000000000..5a5b88a9d
--- /dev/null
+++ b/images/tiler-pipeline-monitor/monitor.py
@@ -0,0 +1,151 @@
+"""Vtile pipeline monitor.
+
+Runs periodic changeset-centric checks and exposes results via a FastAPI HTTP
+server.  Optionally sends Slack alerts when checks fail.
+"""
+
+import logging
+import threading
+import time
+from datetime import datetime, timezone
+
+import requests
+import uvicorn
+from fastapi import FastAPI
+from fastapi.responses import JSONResponse
+
+from checks.imposm_import import check_pipeline, check_single_changeset
+from config import Config
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+# Store latest check result
+_latest_result = None
+_lock = threading.Lock()
+
+app = FastAPI(title="OHM Vtile Pipeline Monitor")
+
+
+# ---------------------------------------------------------------------------
+# Slack alerting
+# ---------------------------------------------------------------------------
+
+def _send_slack_alert(check_result):
+    """Send a Slack notification when a check is not ok."""
+    if not Config.SLACK_WEBHOOK_URL:
+        return
+    status_emoji = {"ok": ":white_check_mark:", "warning": ":warning:", "critical": ":rotating_light:"}
+    emoji = status_emoji.get(check_result["status"], ":question:")
+    text = f"{emoji} *{check_result['name']}* — {check_result['status'].upper()}\n{check_result['message']}"
+    try:
+        requests.post(
+            Config.SLACK_WEBHOOK_URL,
+            json={"text": text},
+            timeout=10,
+        )
+    except requests.RequestException as e:
+        logger.error(f"Failed to send Slack alert: {e}")
+
+
+# ---------------------------------------------------------------------------
+# Background scheduler
+# ---------------------------------------------------------------------------
+
+def _run_check():
+    """Run the pipeline check and update stored result."""
+    try:
+        logger.info("=============> Running pipeline check")
+        result = check_pipeline()
+        logger.info(f"Pipeline check: {result['status']} — {result['message']}")
+
+        with _lock:
+            prev = _latest_result
+            globals()["_latest_result"] = result
+
+        # Alert on state transitions to non-ok
+        if result["status"] != "ok":
+            if prev is None or prev["status"] == "ok":
+                _send_slack_alert(result)
+
+    except Exception as e:
+        logger.exception(f"Pipeline check raised an exception: {e}")
+        with _lock:
+            globals()["_latest_result"] = {
+                "name": "pipeline",
+                "status": "critical",
+                "message": f"Check failed with exception: {e}",
+                "details": {},
+                "checked_at": datetime.now(timezone.utc).isoformat(),
+            }
+
+
+def _scheduler():
+    """Background loop that runs checks at the configured interval."""
+    logger.info(f"Pipeline monitor starting. Check interval: {Config.CHECK_INTERVAL}s")
+    time.sleep(10)
+
+    while True:
+        _run_check()
+        time.sleep(Config.CHECK_INTERVAL)
+
+
+# ---------------------------------------------------------------------------
+# HTTP endpoints
+# ---------------------------------------------------------------------------
+
+@app.get("/health")
+def health():
+    """Overall health: returns 200 if ok, 503 otherwise."""
+    with _lock:
+        result = _latest_result
+
+    if result is None:
+        return JSONResponse(
+            content={"status": "starting", "message": "No checks have run yet"},
+            status_code=200,
+        )
+
+    status_code = 200 if result["status"] == "ok" else 503
+    return JSONResponse(
+        content={
+            "status": result["status"],
+            "message": result["message"],
+            "checked_at": result["checked_at"],
+        },
+        status_code=status_code,
+    )
+
+
+@app.get("/checks")
+def all_checks():
+    """Return full details for the latest pipeline check."""
+    with _lock:
+        result = _latest_result
+    if result is None:
+        return JSONResponse(content={"status": "starting"})
+    return JSONResponse(content=result)
+
+
+@app.get("/changeset/{changeset_id}")
+def evaluate_changeset(changeset_id: int):
+    """Evaluate a specific changeset through the full pipeline (on-demand)."""
+    result = check_single_changeset(changeset_id)
+    status_code = 200 if result["status"] == "ok" else 503
+    return JSONResponse(content=result, status_code=status_code)
+
+
+# ---------------------------------------------------------------------------
+# Entrypoint
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    # Start background scheduler
+    t = threading.Thread(target=_scheduler, daemon=True)
+    t.start()
+
+    # Start HTTP server
+    uvicorn.run(app, host="0.0.0.0", port=Config.MONITOR_PORT)
diff --git a/images/tiler-pipeline-monitor/requirements.txt b/images/tiler-pipeline-monitor/requirements.txt
new file mode 100644
index 000000000..6bc97657c
--- /dev/null
+++ b/images/tiler-pipeline-monitor/requirements.txt
@@ -0,0 +1,4 @@
+fastapi
+uvicorn
+requests
+psycopg2-binary
diff --git a/images/tiler-pipeline-monitor/tables_config.json b/images/tiler-pipeline-monitor/tables_config.json
new file mode 100644
index 000000000..cbfd662a7
--- /dev/null
+++ b/images/tiler-pipeline-monitor/tables_config.json
@@ -0,0 +1,270 @@
+{
+  "relation_tables": [
+    "osm_admin_relation_members",
+    "osm_transport_multilines",
+    "osm_street_multilines",
+    "osm_route_multilines",
+    "osm_communication_multilines"
+  ],
+  "way_tables": [
+    "osm_water_lines",
+    "osm_transport_lines",
+    "osm_route_lines",
+    "osm_other_lines",
+    "osm_communication_lines",
+    "osm_landuse_lines",
+    "osm_admin_lines",
+    "osm_buildings",
+    "osm_water_areas",
+    "osm_amenity_areas",
+    "osm_other_areas",
+    "osm_transport_areas",
+    "osm_landuse_areas",
+    "osm_place_areas",
+    "osm_admin_areas",
+    "osm_amenity_points",
+    "osm_buildings_points",
+    "osm_transport_points",
+    "osm_other_points",
+    "osm_place_points",
+    "osm_landuse_points"
+  ],
+  "_comment_tag_to_tables": "Mapping from OSM tag keys to imposm tables, derived from tiler-imposm/config/layers/*.json",
+  "tag_to_tables": {
+    "railway": [
+      "osm_transport_lines",
+      "osm_transport_areas",
+      "osm_transport_points",
+      "osm_transport_multilines"
+    ],
+    "highway": [
+      "osm_transport_lines",
+      "osm_transport_areas",
+      "osm_transport_points",
+      "osm_transport_multilines",
+      "osm_street_multilines"
+    ],
+    "aeroway": [
+      "osm_transport_lines",
+      "osm_transport_areas",
+      "osm_transport_points"
+    ],
+    "route": [
+      "osm_transport_lines",
+      "osm_transport_areas",
+      "osm_transport_points",
+      "osm_route_lines",
+      "osm_route_multilines"
+    ],
+    "waterway": [
+      "osm_water_lines",
+      "osm_water_areas"
+    ],
+    "building": [
+      "osm_buildings",
+      "osm_buildings_points"
+    ],
+    "amenity": [
+      "osm_amenity_areas",
+      "osm_amenity_points"
+    ],
+    "boundary": [
+      "osm_admin_lines",
+      "osm_admin_areas",
+      "osm_admin_relation_members"
+    ],
+    "landuse": [
+      "osm_landuse_areas",
+      "osm_landuse_lines",
+      "osm_landuse_points",
+      "osm_water_areas"
+    ],
+    "leisure": [
+      "osm_landuse_areas",
+      "osm_landuse_lines",
+      "osm_landuse_points"
+    ],
+    "natural": [
+      "osm_landuse_areas",
+      "osm_landuse_lines",
+      "osm_landuse_points",
+      "osm_water_lines",
+      "osm_water_areas"
+    ],
+    "place": [
+      "osm_place_areas",
+      "osm_place_points"
+    ],
+    "barrier": [
+      "osm_other_areas",
+      "osm_other_lines",
+      "osm_other_points",
+      "osm_water_lines"
+    ],
+    "historic": [
+      "osm_other_areas",
+      "osm_other_lines",
+      "osm_other_points"
+    ],
+    "man_made": [
+      "osm_other_areas",
+      "osm_other_lines",
+      "osm_other_points"
+    ],
+    "power": [
+      "osm_other_areas",
+      "osm_other_lines",
+      "osm_other_points"
+    ],
+    "military": [
+      "osm_other_areas",
+      "osm_other_lines",
+      "osm_other_points"
+    ],
+    "communication": [
+      "osm_communication_lines",
+      "osm_communication_multilines"
+    ],
+    "tourism": [
+      "osm_other_points"
+    ],
+    "shop": [
+      "osm_other_points"
+    ],
+    "craft": [
+      "osm_other_points"
+    ]
+  },
+  "_comment": "Only check the lowest and highest zoom views (extremes) per table to reduce queries",
+  "table_to_views": {
+    "osm_admin_lines": [
+      "mv_admin_boundaries_lines_z0_2",
+      "mv_admin_boundaries_lines_z16_20",
+      "mv_admin_maritime_lines_z0_5_v2",
+      "mv_admin_maritime_lines_z10_15"
+    ],
+    "osm_admin_relation_members": [
+      "mv_relation_members_boundaries",
+      "mv_admin_boundaries_lines_z0_2",
+      "mv_admin_boundaries_lines_z16_20"
+    ],
+    "osm_admin_areas": [
+      "mv_admin_boundaries_areas_z0_2",
+      "mv_admin_boundaries_areas_z16_20",
+      "mv_admin_boundaries_centroids_z0_2",
+      "mv_admin_boundaries_centroids_z16_20",
+      "mv_non_admin_boundaries_areas_z0_2",
+      "mv_non_admin_boundaries_areas_z16_20",
+      "mv_non_admin_boundaries_centroids_z0_2",
+      "mv_non_admin_boundaries_centroids_z16_20"
+    ],
+    "osm_water_lines": [
+      "mv_water_lines_z8_9",
+      "mv_water_lines_z16_20"
+    ],
+    "osm_water_areas": [
+      "mv_water_areas_z0_2",
+      "mv_water_areas_z16_20",
+      "mv_water_areas_centroids_z8_9",
+      "mv_water_areas_centroids_z16_20"
+    ],
+    "osm_transport_lines": [
+      "mv_transport_lines_z5",
+      "mv_transport_lines_z16_20"
+    ],
+    "osm_transport_multilines": [
+      "mv_transport_lines_z5",
+      "mv_transport_lines_z16_20"
+    ],
+    "osm_street_multilines": [
+      "mv_transport_lines_z5",
+      "mv_transport_lines_z16_20"
+    ],
+    "osm_transport_areas": [
+      "mv_transport_areas_z10_12",
+      "mv_transport_areas_z16_20",
+      "mv_transport_points_centroids_z10_12",
+      "mv_transport_points_centroids_z16_20"
+    ],
+    "osm_transport_points": [
+      "mv_transport_points",
+      "mv_transport_points_centroids_z16_20"
+    ],
+    "osm_route_lines": [
+      "mv_routes_normalized",
+      "mv_routes_indexed_z5",
+      "mv_routes_indexed_z16_20"
+    ],
+    "osm_route_multilines": [
+      "mv_routes_normalized",
+      "mv_routes_indexed_z5",
+      "mv_routes_indexed_z16_20"
+    ],
+    "osm_communication_lines": [
+      "mv_communication_z10_12",
+      "mv_communication_z16_20"
+    ],
+    "osm_communication_multilines": [
+      "mv_communication_z10_12",
+      "mv_communication_z16_20"
+    ],
+    "osm_landuse_areas": [
+      "mv_landuse_areas_z6_7",
+      "mv_landuse_areas_z16_20",
+      "mv_landuse_points_centroids_z6_7",
+      "mv_landuse_points_centroids_z16_20"
+    ],
+    "osm_landuse_lines": [
+      "mv_landuse_lines_z14_15",
+      "mv_landuse_lines_z16_20"
+    ],
+    "osm_landuse_points": [
+      "mv_landuse_points",
+      "mv_landuse_points_centroids_z6_7",
+      "mv_landuse_points_centroids_z16_20"
+    ],
+    "osm_buildings": [
+      "mv_buildings_areas_z14_15",
+      "mv_buildings_areas_z16_20",
+      "mv_buildings_points_centroids_z14_15",
+      "mv_buildings_points_centroids_z16_20"
+    ],
+    "osm_buildings_points": [
+      "mv_buildings_points",
+      "mv_buildings_points_centroids_z16_20"
+    ],
+    "osm_amenity_areas": [
+      "mv_amenity_areas_z14_15",
+      "mv_amenity_areas_z16_20",
+      "mv_amenity_points_centroids_z14_15",
+      "mv_amenity_points_centroids_z16_20"
+    ],
+    "osm_amenity_points": [
+      "mv_amenity_points",
+      "mv_amenity_points_centroids_z16_20"
+    ],
+    "osm_other_areas": [
+      "mv_other_areas_z8_9",
+      "mv_other_areas_z16_20",
+      "mv_other_points_centroids_z8_9",
+      "mv_other_points_centroids_z16_20"
+    ],
+    "osm_other_lines": [
+      "mv_other_lines_z14_15",
+      "mv_other_lines_z16_20"
+    ],
+    "osm_other_points": [
+      "mv_other_points",
+      "mv_other_points_centroids_z16_20"
+    ],
+    "osm_place_areas": [
+      "mv_place_areas_z14_20",
+      "mv_place_points_centroids_z0_2",
+      "mv_place_points_centroids_z11_20"
+    ],
+    "osm_place_points": [
+      "mv_place_points_centroids_z0_2",
+      "mv_place_points_centroids_z11_20"
+    ]
+  }
+}

From 775134043b402704822884645a3ec612d122f1cb Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Mon, 16 Mar 2026 19:50:32 -0500
Subject: [PATCH 03/17] Update scripts

---
 .../checks/imposm_import.py                   | 289 +++++++++++-------
 .../checks/tile_cache.py                      | 159 ++++++++++
 images/tiler-pipeline-monitor/config.py       |  26 ++
 .../tiler-pipeline-monitor/requirements.txt   |   2 +
 .../tiler-pipeline-monitor/tables_config.json | 106 -------
 5 files changed, 364 insertions(+), 218 deletions(-)
 create mode 100644 images/tiler-pipeline-monitor/checks/tile_cache.py

diff --git a/images/tiler-pipeline-monitor/checks/imposm_import.py b/images/tiler-pipeline-monitor/checks/imposm_import.py
index cc981ae17..f90ed5186 100644
--- a/images/tiler-pipeline-monitor/checks/imposm_import.py
+++ b/images/tiler-pipeline-monitor/checks/imposm_import.py
@@ -3,10 +3,12 @@
 For each changeset in the 1-2 hour window:
   1. Check if minute replication covers it (replication timestamp >= closed_at)
   2. Check if its way/relation elements exist in the tiler DB with the correct version
+  3. For a random sample: verify materialized views + S3 tile cache
 """
 
 import json
 import os
+import random
 import xml.etree.ElementTree as ET
 from datetime import datetime, timezone, timedelta
 
@@ -202,121 +204,129 @@ def _get_changeset_elements(changeset_id):
 RELATION_TABLES = _tables_config["relation_tables"]
 WAY_TABLES = _tables_config["way_tables"]
 TABLE_TO_VIEWS = _tables_config["table_to_views"]
-TAG_TO_TABLES = _tables_config.get("tag_to_tables", {})
 
 
-def _resolve_candidate_tables(elem):
-    """Determine candidate tables based on element type and tags.
-
-    Uses tag_to_tables mapping from imposm config to narrow the search
-    to only the tables where the element could exist, instead of searching all.
-    Falls back to all tables for the element type if no tags match.
-    """
-    type_tables = RELATION_TABLES if elem["type"] == "relation" else WAY_TABLES
-    tags = elem.get("tags", {})
-
-    if not tags or not TAG_TO_TABLES:
-        return type_tables
-
-    # Collect tables that match any of the element's tag keys
-    matched_tables = set()
-    for tag_key in tags:
-        for table in TAG_TO_TABLES.get(tag_key, []):
-            matched_tables.add(table)
-
-    # Intersect with type tables (relations vs ways) to respect element type
-    filtered = [t for t in type_tables if t in matched_tables]
-
-    if not filtered:
-        # No tag matched any known mapping — fall back to all tables for safety
-        return type_tables
-
-    return filtered
+def _build_union_query(tables, search_id):
+    """Build a UNION ALL query to search osm_id across multiple tables in 1 round-trip."""
+    parts = []
+    for table in tables:
+        parts.append(
+            f"SELECT '{table}' AS tbl, tags->'version' AS version "
+            f"FROM {table} WHERE osm_id = {int(search_id)} LIMIT 1"
+        )
+    return " UNION ALL ".join(parts)
 
 
-def _check_element_in_db(conn, elem):
-    """Check if an element exists in tiler DB tables (osm_*) and views (mv_*)."""
+def _check_element_in_tables(conn, elem):
+    """Check if an element exists in tiler DB tables using a single UNION ALL query."""
     osm_id = elem["osm_id"]
-    # Imposm stores relations with negative IDs
     search_id = -osm_id if elem["type"] == "relation" else osm_id
+    candidate_tables = RELATION_TABLES if elem["type"] == "relation" else WAY_TABLES
 
     cur = conn.cursor()
 
-    # --- Resolve candidate tables based on element tags ---
-    candidate_tables = _resolve_candidate_tables(elem)
-
-    # Filter to only tables that actually exist
+    # Get existing tables (cached per connection would be ideal, but simple first)
     cur.execute("""
         SELECT table_name FROM information_schema.tables
         WHERE table_schema = 'public' AND table_name LIKE 'osm_%%'
-        ORDER BY table_name
     """)
     existing_tables = {row[0] for row in cur.fetchall()}
     tables = [t for t in candidate_tables if t in existing_tables]
 
+    if not tables:
+        cur.close()
+        return {
+            "type": elem["type"],
+            "osm_id": osm_id,
+            "action": elem["action"],
+            "expected_version": elem["version"],
+            "found_in_tables": [],
+            "found_in_views": [],
+            "version_match": None,
+            "url": f"{_ohm_base()}/{elem['type']}/{elem['osm_id']}",
+        }
+
+    # Single UNION ALL query across all candidate tables
+    query = _build_union_query(tables, search_id)
     found_in_tables = []
     version_match = None
 
-    for table in tables:
-        try:
-            quoted = psycopg2.extensions.quote_ident(table, cur)
-            cur.execute(
-                f"SELECT tags->'version' FROM {quoted} WHERE osm_id = %s LIMIT 1",
-                (search_id,),
-            )
-            row = cur.fetchone()
-            if row is not None:
-                db_version = row[0]
-                found_in_tables.append(table)
-                if elem["version"] is not None and db_version is not None:
-                    try:
-                        version_match = int(db_version) >= elem["version"]
-                    except (ValueError, TypeError):
-                        version_match = None
-        except Exception:
-            conn.rollback()
-
-    # --- Search in mv_* views related to candidate tables ---
+    try:
+        cur.execute(query)
+        for row in cur.fetchall():
+            found_in_tables.append(row[0])
+            db_version = row[1]
+            if elem["version"] is not None and db_version is not None:
+                try:
+                    version_match = int(db_version) >= elem["version"]
+                except (ValueError, TypeError):
+                    version_match = None
+    except Exception:
+        conn.rollback()
+
+    cur.close()
+
+    return {
+        "type": elem["type"],
+        "osm_id": osm_id,
+        "action": elem["action"],
+        "expected_version": elem["version"],
+        "found_in_tables": found_in_tables,
+        "found_in_views": [],
+        "version_match": version_match,
+        "url": f"{_ohm_base()}/{elem['type']}/{elem['osm_id']}",
+    }
+
+
+def _check_element_in_views(conn, check):
+    """Check if an element exists in materialized views using a single UNION ALL query."""
+    osm_id = check["osm_id"]
+    search_id = -osm_id if check["type"] == "relation" else osm_id
+
+    # Collect all candidate views from the tables where the element was found
+    found_tables = check["found_in_tables"]
     candidate_views = set()
-    for table in candidate_tables:
+    for table in found_tables:
         for v in TABLE_TO_VIEWS.get(table, []):
             candidate_views.add(v)
 
-    # Filter to only views that exist
+    if not candidate_views:
+        return check
+
+    cur = conn.cursor()
+
+    # Filter to existing views
     cur.execute("""
         SELECT matviewname FROM pg_matviews
         WHERE schemaname = 'public' AND matviewname LIKE 'mv_%%'
     """)
     existing_views = {row[0] for row in cur.fetchall()}
-    views_to_check = [v for v in candidate_views if v in existing_views]
+    views = sorted(v for v in candidate_views if v in existing_views)
+
+    if not views:
+        cur.close()
+        return check
+
+    # Single UNION ALL query across all candidate views
+    parts = []
+    for view in views:
+        parts.append(
+            f"SELECT '{view}' AS vw FROM {view} "
+            f"WHERE osm_id = {int(search_id)} LIMIT 1"
+        )
+    query = " UNION ALL ".join(parts)
 
     found_in_views = []
-
-    for view in sorted(views_to_check):
-        try:
-            quoted = psycopg2.extensions.quote_ident(view, cur)
-            cur.execute(
-                f"SELECT 1 FROM {quoted} WHERE osm_id = %s LIMIT 1",
-                (search_id,),
-            )
-            if cur.fetchone() is not None:
-                found_in_views.append(view)
-        except Exception:
-            conn.rollback()
+    try:
+        cur.execute(query)
+        for row in cur.fetchall():
+            found_in_views.append(row[0])
+    except Exception:
+        conn.rollback()
 
     cur.close()
-
-    return {
-        "type": elem["type"],
-        "osm_id": osm_id,
-        "action": elem["action"],
-        "expected_version": elem["version"],
-        "found_in_tables": found_in_tables,
-        "found_in_views": found_in_views,
-        "version_match": version_match,
-        "searched_tables": candidate_tables,
-        "url": f"{_ohm_base()}/{elem['type']}/{elem['osm_id']}",
-    }
+    check["found_in_views"] = found_in_views
+    return check
 
 
 def _is_element_deleted(elem):
@@ -337,8 +347,14 @@ def _is_element_deleted(elem):
         return False
 
 
-def _check_elements_in_db(conn, changeset_id):
-    """Check all elements of a changeset in the tiler DB."""
+def _check_elements_in_db(conn, changeset_id, changeset_closed_at=None):
+    """Check all elements of a changeset in the tiler DB.
+
+    - ALL elements: verified in osm_* tables (fast, tag-filtered)
+    - SAMPLE elements: full check → tables + views + S3 tile cache
+    """
+    from checks.tile_cache import check_tile_cache_for_element
+
     try:
         elements = _get_changeset_elements(changeset_id)
     except requests.RequestException as e:
@@ -355,59 +371,96 @@ def _check_elements_in_db(conn, changeset_id):
             "elements": [],
         }
 
+    # Filter out deletes
+    active_elements = []
+    for elem in elements:
+        if elem["action"] == "delete":
+            print(f"    SKIP {elem['type']}/{elem['osm_id']} (action=delete)")
+        else:
+            active_elements.append(elem)
+
+    if not active_elements:
+        return {
+            "status": "ok",
+            "message": "All elements in this changeset are deletes",
+            "elements": [],
+        }
+
+    # Select random sample for full pipeline check (tables + views + S3)
+    sample_size = min(Config.FULL_CHECK_SAMPLE_SIZE, len(active_elements))
+    sample_indices = set(random.sample(range(len(active_elements)), sample_size))
+
+    print(f"  [tiler_db] Checking {len(active_elements)} elements "
+          f"(full pipeline check on {sample_size} sampled)")
+
     missing = []
     mismatches = []
     checked = []
+    tile_cache_results = []
 
-    print(f"  [tiler_db] Checking {len(elements)} way/relation elements")
+    for idx, elem in enumerate(active_elements):
+        is_sample = idx in sample_indices
+        sample_label = " [SAMPLE]" if is_sample else ""
 
-    for elem in elements:
-        if elem["action"] == "delete":
-            print(f"    SKIP {elem['type']}/{elem['osm_id']} (action=delete)")
-            continue
+        # Step 1: Check tables — single UNION ALL query (ALL elements)
+        check = _check_element_in_tables(conn, elem)
+
+        # Step 2: Check views — single UNION ALL query (SAMPLE only)
+        if is_sample and check["found_in_tables"]:
+            check = _check_element_in_views(conn, check)
 
-        check = _check_element_in_db(conn, elem)
         checked.append(check)
 
         tables = check["found_in_tables"]
         views = check["found_in_views"]
 
-        # Show which tags were used to resolve candidate tables
-        tag_keys = [k for k in elem.get("tags", {}) if k in TAG_TO_TABLES]
-        searched = check.get("searched_tables", [])
-
-        if tables or views:
+        if tables:
             icon = "OK" if check["version_match"] is not False else "VERSION_MISMATCH"
-            print(f"    {icon} {elem['type']}/{elem['osm_id']} v{elem['version']} "
+            print(f"    {icon}{sample_label} {elem['type']}/{elem['osm_id']} v{elem['version']} "
                   f"({elem['action']}) version_match={check['version_match']}")
-            if tag_keys:
-                print(f"         matched tags: {tag_keys} -> searched: {searched}")
-            if tables:
-                print(f"         tables: {tables}")
+            print(f"         tables: {tables}")
             if views:
                 print(f"         views:  {views}")
             print(f"         {check['url']}")
+
+            # Step 3: Check S3 tile cache (SAMPLE only)
+            if is_sample and changeset_closed_at and Config.S3_BUCKET_CACHE_TILER:
+                try:
+                    tile_result = check_tile_cache_for_element(
+                        conn, check, changeset_closed_at
+                    )
+                    tile_cache_results.append(tile_result)
+                    cache_status = tile_result.get("cache", {}).get("status", "unknown")
+                    tile_info = tile_result.get("tile", {})
+                    if cache_status == "stale":
+                        print(f"         [S3 CACHE] STALE tile z{tile_info.get('z')}/{tile_info.get('x')}/{tile_info.get('y')}")
+                    elif cache_status == "ok":
+                        print(f"         [S3 CACHE] OK tile z{tile_info.get('z')}/{tile_info.get('x')}/{tile_info.get('y')}")
+                    elif cache_status == "skipped":
+                        print(f"         [S3 CACHE] skipped: {tile_result.get('cache', {}).get('message', '')}")
+                except Exception as e:
+                    print(f"         [S3 CACHE] error: {e}")
         else:
-            # Check if the element was deleted in a later changeset
+            # Not found — check if deleted in a later changeset
             if _is_element_deleted(elem):
                 print(f"    SKIP {elem['type']}/{elem['osm_id']} v{elem['version']} "
                       f"({elem['action']}) -> deleted in a later changeset")
                 print(f"         {check['url']}")
                 check["deleted"] = True
                 continue
-            print(f"    MISSING {elem['type']}/{elem['osm_id']} v{elem['version']} "
-                  f"({elem['action']}) -> NOT in tables or views")
-            if tag_keys:
-                print(f"         matched tags: {tag_keys} -> searched: {searched}")
-            else:
-                print(f"         no matching tags found, searched all: {searched}")
+
+            print(f"    MISSING{sample_label} {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                  f"({elem['action']}) -> NOT in tables")
             print(f"         {check['url']}")
 
-        if not tables and not views and not check.get("deleted"):
+        if not tables and not check.get("deleted"):
             missing.append(f"{elem['type']}/{elem['osm_id']}")
         elif check["version_match"] is False:
             mismatches.append(f"{elem['type']}/{elem['osm_id']} expected v{elem['version']}")
 
+    # Build status message
+    stale_tiles = [r for r in tile_cache_results if r.get("cache", {}).get("status") == "stale"]
+
     if missing:
         status = "warning"
         msg = f"Missing from tiler DB: {', '.join(missing)}"
@@ -416,11 +469,23 @@ def _check_elements_in_db(conn, changeset_id):
     elif mismatches:
         status = "warning"
         msg = f"Version mismatches: {', '.join(mismatches)}"
+    elif stale_tiles:
+        status = "warning"
+        stale_ids = [f"{r['type']}/{r['osm_id']}" for r in stale_tiles]
+        msg = (f"All {len(checked)} elements in tables, "
+               f"but S3 tile cache stale for: {', '.join(stale_ids)}")
     else:
         status = "ok"
         msg = f"All {len(checked)} elements verified in tiler DB"
+        if tile_cache_results:
+            msg += f" (S3 cache OK for {len(tile_cache_results)} sampled)"
 
-    return {"status": status, "message": msg, "elements": checked}
+    return {
+        "status": status,
+        "message": msg,
+        "elements": checked,
+        "tile_cache": tile_cache_results,
+    }
 
 
 # ---------------------------------------------------------------------------
@@ -543,7 +608,7 @@ def check_pipeline():
             print(f"  [replication] UNKNOWN: Replication state unavailable")
 
         # Step 2: tiler DB
-        db_check = _check_elements_in_db(conn, cs["id"])
+        db_check = _check_elements_in_db(conn, cs["id"], cs["closed_at"])
         cs_result["tiler_db"] = db_check
         print(f"  [tiler_db] {db_check['status'].upper()}: {db_check['message']}")
 
@@ -646,7 +711,7 @@ def check_single_changeset(changeset_id):
         result["details"]["tiler_db"] = {"status": "critical", "message": str(e)}
         return result
 
-    db_check = _check_elements_in_db(conn, changeset_id)
+    db_check = _check_elements_in_db(conn, changeset_id, closed_at or None)
     conn.close()
     result["details"]["tiler_db"] = db_check
     print(f"  [tiler_db] {db_check['status'].upper()}: {db_check['message']}")
diff --git a/images/tiler-pipeline-monitor/checks/tile_cache.py b/images/tiler-pipeline-monitor/checks/tile_cache.py
new file mode 100644
index 000000000..9d21eb457
--- /dev/null
+++ b/images/tiler-pipeline-monitor/checks/tile_cache.py
@@ -0,0 +1,159 @@
+"""Pipeline check: verify tile cache in S3 is up-to-date.
+
+For a sampled element, check if the cached tile in S3 was modified
+after the changeset closed_at. If the tile is stale, the cache purge
+(SQS → tiler-cache) may have failed.
+"""
+
+import mercantile
+import psycopg2.extensions
+from datetime import datetime, timezone
+
+from config import Config
+
+
+def _get_element_centroid(conn, elem):
+    """Get the centroid (lon, lat) of an element from the tiler DB."""
+    osm_id = elem["osm_id"]
+    search_id = -osm_id if elem["type"] == "relation" else osm_id
+
+    # Search in the tables where it was found
+    found_tables = elem.get("found_in_tables", [])
+    if not found_tables:
+        return None
+
+    cur = conn.cursor()
+    for table in found_tables:
+        try:
+            quoted = psycopg2.extensions.quote_ident(table, cur)
+            cur.execute(
+                f"SELECT ST_X(ST_Centroid(ST_Transform(geometry, 4326))), "
+                f"ST_Y(ST_Centroid(ST_Transform(geometry, 4326))) "
+                f"FROM {quoted} WHERE osm_id = %s LIMIT 1",
+                (search_id,),
+            )
+            row = cur.fetchone()
+            if row and row[0] is not None:
+                cur.close()
+                return {"lon": row[0], "lat": row[1]}
+        except Exception:
+            conn.rollback()
+
+    cur.close()
+    return None
+
+
+def _get_tile_for_point(lon, lat, zoom):
+    """Convert lon/lat to tile z/x/y."""
+    tile = mercantile.tile(lon, lat, zoom)
+    return {"z": tile.z, "x": tile.x, "y": tile.y}
+
+
+def _check_tile_in_s3(tile, changeset_closed_at):
+    """Check if a cached tile in S3 is stale (older than changeset).
+
+    Returns dict with status and details for each S3 path.
+    """
+    if not Config.S3_BUCKET_CACHE_TILER:
+        return {
+            "status": "skipped",
+            "message": "S3_BUCKET_CACHE_TILER not configured",
+        }
+
+    s3 = Config.get_s3_client()
+    bucket = Config.S3_BUCKET_CACHE_TILER
+    z, x, y = tile["z"], tile["x"], tile["y"]
+
+    results = []
+    stale_paths = []
+
+    for path_prefix in Config.S3_BUCKET_PATH_FILES:
+        key = f"{path_prefix}/{z}/{x}/{y}.pbf"
+        try:
+            resp = s3.head_object(Bucket=bucket, Key=key)
+            last_modified = resp["LastModified"]
+
+            # Parse changeset closed_at
+            closed_dt = datetime.fromisoformat(
+                changeset_closed_at.replace("Z", "+00:00")
+            )
+
+            is_stale = last_modified < closed_dt
+            result = {
+                "path": key,
+                "last_modified": last_modified.isoformat(),
+                "is_stale": is_stale,
+            }
+            results.append(result)
+            if is_stale:
+                stale_paths.append(key)
+
+        except s3.exceptions.ClientError as e:
+            if e.response["Error"]["Code"] == "404":
+                # Tile not in cache — not stale, tegola will generate on demand
+                results.append({
+                    "path": key,
+                    "last_modified": None,
+                    "is_stale": False,
+                    "note": "not cached (tegola generates on demand)",
+                })
+            else:
+                results.append({
+                    "path": key,
+                    "error": str(e),
+                })
+
+    if stale_paths:
+        return {
+            "status": "stale",
+            "message": f"Tile cache is stale for: {', '.join(stale_paths)}",
+            "tile": tile,
+            "details": results,
+        }
+    else:
+        return {
+            "status": "ok",
+            "message": "Tile cache is up-to-date or not cached",
+            "tile": tile,
+            "details": results,
+        }
+
+
+def check_tile_cache_for_element(conn, elem_check, changeset_closed_at):
+    """Full tile cache verification for a single element.
+
+    Args:
+        conn: DB connection
+        elem_check: result dict from _check_element_in_db (with found_in_tables)
+        changeset_closed_at: ISO timestamp string
+
+    Returns:
+        dict with tile cache check results
+    """
+    osm_id = elem_check["osm_id"]
+    elem_type = elem_check["type"]
+    zoom = Config.TILE_CHECK_ZOOM
+
+    # Step 1: get geometry from DB
+    centroid = _get_element_centroid(conn, elem_check)
+    if not centroid:
+        return {
+            "osm_id": osm_id,
+            "type": elem_type,
+            "status": "skipped",
+            "message": "Could not get geometry from DB",
+        }
+
+    # Step 2: calculate tile
+    tile = _get_tile_for_point(centroid["lon"], centroid["lat"], zoom)
+
+    # Step 3: check S3 cache
+    cache_result = _check_tile_in_s3(tile, changeset_closed_at)
+
+    return {
+        "osm_id": osm_id,
+        "type": elem_type,
+        "centroid": centroid,
+        "tile": tile,
+        "cache": cache_result,
+    }
diff --git a/images/tiler-pipeline-monitor/config.py b/images/tiler-pipeline-monitor/config.py
index 89b698b87..95667fac9 100644
--- a/images/tiler-pipeline-monitor/config.py
+++ b/images/tiler-pipeline-monitor/config.py
@@ -38,6 +38,32 @@ class Config:
     # Server
     MONITOR_PORT = int(os.getenv("MONITOR_PORT", 8001))
 
+    # S3 tile cache verification
+    S3_BUCKET_CACHE_TILER = os.getenv("S3_BUCKET_CACHE_TILER", "")
+    S3_BUCKET_PATH_FILES = os.getenv("S3_BUCKET_PATH_FILES", "mnt/data/ohm,mnt/data/ohm_admin,mnt/data/ohm_other_boundaries").split(",")
+    TILER_CACHE_AWS_ACCESS_KEY_ID = os.getenv("TILER_CACHE_AWS_ACCESS_KEY_ID", "")
+    TILER_CACHE_AWS_SECRET_ACCESS_KEY = os.getenv("TILER_CACHE_AWS_SECRET_ACCESS_KEY", "")
+    TILER_CACHE_AWS_ENDPOINT = os.getenv("TILER_CACHE_AWS_ENDPOINT", "https://s3.amazonaws.com")
+    TILER_CACHE_REGION = os.getenv("TILER_CACHE_REGION", "us-east-1")
+    TILER_CACHE_CLOUD_INFRASTRUCTURE = os.getenv("TILER_CACHE_CLOUD_INFRASTRUCTURE", "aws")
+    # Zoom level to verify tile cache (use high zoom for precise check)
+    TILE_CHECK_ZOOM = int(os.getenv("TILE_CHECK_ZOOM", 16))
+    # Number of random elements to do full pipeline check (tables + views + S3)
+    FULL_CHECK_SAMPLE_SIZE = int(os.getenv("FULL_CHECK_SAMPLE_SIZE", 2))
+
+    @staticmethod
+    def get_s3_client():
+        import boto3
+        if Config.TILER_CACHE_CLOUD_INFRASTRUCTURE == "hetzner":
+            return boto3.client(
+                "s3",
+                aws_access_key_id=Config.TILER_CACHE_AWS_ACCESS_KEY_ID,
+                aws_secret_access_key=Config.TILER_CACHE_AWS_SECRET_ACCESS_KEY,
+                endpoint_url=Config.TILER_CACHE_AWS_ENDPOINT,
+                region_name=Config.TILER_CACHE_REGION,
+            )
+        return boto3.client("s3")
+
     @staticmethod
     def get_db_dsn():
         return (
diff --git a/images/tiler-pipeline-monitor/requirements.txt b/images/tiler-pipeline-monitor/requirements.txt
index 6bc97657c..1360cc74e 100644
--- a/images/tiler-pipeline-monitor/requirements.txt
+++ b/images/tiler-pipeline-monitor/requirements.txt
@@ -2,3 +2,5 @@ fastapi
 uvicorn
 requests
 psycopg2-binary
+mercantile
+boto3
diff --git a/images/tiler-pipeline-monitor/tables_config.json b/images/tiler-pipeline-monitor/tables_config.json
index cbfd662a7..4fd9e4e24 100644
--- a/images/tiler-pipeline-monitor/tables_config.json
+++ b/images/tiler-pipeline-monitor/tables_config.json
@@ -29,112 +29,6 @@
     "osm_place_points",
     "osm_landuse_points"
   ],
-  "_comment_tag_to_tables": "Mapping from OSM tag keys to imposm tables, derived from tiler-imposm/config/layers/*.json",
-  "tag_to_tables": {
-    "railway": [
-      "osm_transport_lines",
-      "osm_transport_areas",
-      "osm_transport_points",
-      "osm_transport_multilines"
-    ],
-    "highway": [
-      "osm_transport_lines",
-      "osm_transport_areas",
-      "osm_transport_points",
-      "osm_transport_multilines",
-      "osm_street_multilines"
-    ],
-    "aeroway": [
-      "osm_transport_lines",
-      "osm_transport_areas",
-      "osm_transport_points"
-    ],
-    "route": [
-      "osm_transport_lines",
-      "osm_transport_areas",
-      "osm_transport_points",
-      "osm_route_lines",
-      "osm_route_multilines"
-    ],
-    "waterway": [
-      "osm_water_lines",
-      "osm_water_areas"
-    ],
-    "building": [
-      "osm_buildings",
-      "osm_buildings_points"
-    ],
-    "amenity": [
-      "osm_amenity_areas",
-      "osm_amenity_points"
-    ],
-    "boundary": [
-      "osm_admin_lines",
-      "osm_admin_areas",
-      "osm_admin_relation_members"
-    ],
-    "landuse": [
-      "osm_landuse_areas",
-      "osm_landuse_lines",
-      "osm_landuse_points",
-      "osm_water_areas"
-    ],
-    "leisure": [
-      "osm_landuse_areas",
-      "osm_landuse_lines",
-      "osm_landuse_points"
-    ],
-    "natural": [
-      "osm_landuse_areas",
-      "osm_landuse_lines",
-      "osm_landuse_points",
-      "osm_water_lines",
-      "osm_water_areas"
-    ],
-    "place": [
-      "osm_place_areas",
-      "osm_place_points"
-    ],
-    "barrier": [
-      "osm_other_areas",
-      "osm_other_lines",
-      "osm_other_points",
-      "osm_water_lines"
-    ],
-    "historic": [
-      "osm_other_areas",
-      "osm_other_lines",
-      "osm_other_points"
-    ],
-    "man_made": [
-      "osm_other_areas",
-      "osm_other_lines",
-      "osm_other_points"
-    ],
-    "power": [
-      "osm_other_areas",
-      "osm_other_lines",
-      "osm_other_points"
-    ],
-    "military": [
-      "osm_other_areas",
-      "osm_other_lines",
-      "osm_other_points"
-    ],
-    "communication": [
-      "osm_communication_lines",
-      "osm_communication_multilines"
-    ],
-    "tourism": [
-      "osm_other_points"
-    ],
-    "shop": [
-      "osm_other_points"
-    ],
-    "craft": [
-      "osm_other_points"
-    ]
-  },
   "_comment": "Only check the lowest and highest zoom views (extremes) per table to reduce queries",
   "table_to_views": {
     "osm_admin_lines": [

From d8192f1510ebd08770c6cfbd73c5daa6f42eca08 Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Fri, 20 Mar 2026 15:50:30 -0500
Subject: [PATCH 04/17] Refectore code to check languages and tiler pipeline

---
 compose/tiler.yml                             |  37 +-
 hetzner/traefik/traefik.template.yml          |  13 +
 images/tiler-monitor/Dockerfile               |  46 +-
 images/tiler-monitor/entrypoint.sh            |  13 +
 .../monitor_languages.sh                      |   7 +-
 .../pipeline-monitor/checks/__init__.py       |   0
 .../pipeline-monitor/checks/imposm_import.py  | 960 ++++++++++++++++++
 .../pipeline-monitor/checks/mv_freshness.py   | 179 ++++
 .../checks/replication_lag.py                 |  89 ++
 .../pipeline-monitor/checks/tile_cache.py     | 159 +++
 .../tiler-monitor/pipeline-monitor/config.py  |  97 ++
 .../tiler-monitor/pipeline-monitor/monitor.py | 234 +++++
 .../pipeline-monitor/requirements.txt         |   6 +
 .../pipeline-monitor/retry_store.py           | 378 +++++++
 .../pipeline-monitor/static/dashboard.html    | 507 +++++++++
 .../pipeline-monitor/tables_config.json       |  94 ++
 images/tiler-pipeline-monitor/config.py       |   6 +-
 17 files changed, 2795 insertions(+), 30 deletions(-)
 create mode 100644 images/tiler-monitor/entrypoint.sh
 rename images/tiler-monitor/{ => language-monitor}/monitor_languages.sh (90%)
 create mode 100644 images/tiler-monitor/pipeline-monitor/checks/__init__.py
 create mode 100644 images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
 create mode 100644 images/tiler-monitor/pipeline-monitor/checks/mv_freshness.py
 create mode 100644 images/tiler-monitor/pipeline-monitor/checks/replication_lag.py
 create mode 100644 images/tiler-monitor/pipeline-monitor/checks/tile_cache.py
 create mode 100644 images/tiler-monitor/pipeline-monitor/config.py
 create mode 100644 images/tiler-monitor/pipeline-monitor/monitor.py
 create mode 100644 images/tiler-monitor/pipeline-monitor/requirements.txt
 create mode 100644 images/tiler-monitor/pipeline-monitor/retry_store.py
 create mode 100644 images/tiler-monitor/pipeline-monitor/static/dashboard.html
 create mode 100644 images/tiler-monitor/pipeline-monitor/tables_config.json

diff --git a/compose/tiler.yml b/compose/tiler.yml
index 10ac3158e..ca194dda9 100644
--- a/compose/tiler.yml
+++ b/compose/tiler.yml
@@ -71,21 +71,24 @@ services:
   #     - ohm_network
 
 
-  # tiler-monitor:
-  #   image: rub21/tiler-monitor:v1
-  #   build:
-  #     context: ../images/tiler-monitor
-  #     dockerfile: Dockerfile
-  #   volumes:
-  #     - /var/run/docker.sock:/var/run/docker.sock
-  #     - ../images/tiler-monitor:/app
-  #     - ../hetzner:/app/hetzner
-  #   environment:
-  #     - DOCKER_CONFIG_ENVIRONMENT=staging
-  #   env_file:
-  #     - ../envs/.env.tiler
-  #   stdin_open: true
-  #   tty: true
+  tiler-monitor:
+    image: rub21/tiler-monitor:v2
+    build:
+      context: ../images/tiler-monitor
+      dockerfile: Dockerfile
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock
+      - ../hetzner:/app/hetzner
+      - tiler_monitor_data:/data
+    ports:
+      - "8001:8001"
+    environment:
+      - TILER_MONITORING_DOCKER_CONFIG_ENVIRONMENT=staging
+    env_file:
+      - ../envs/.env.tiler
+    restart: always
+    networks:
+      - ohm_network
 
 networks:
   ohm_network:
@@ -99,3 +102,7 @@ volumes:
   tiler_imposm_data:
     driver: local
     name: tiler_imposm
+
+  tiler_monitor_data:
+    driver: local
+    name: tiler_monitor
diff --git a/hetzner/traefik/traefik.template.yml b/hetzner/traefik/traefik.template.yml
index b2f64c918..b10905f5f 100644
--- a/hetzner/traefik/traefik.template.yml
+++ b/hetzner/traefik/traefik.template.yml
@@ -161,6 +161,14 @@ http:
       middlewares:
         - secure-headers
 
+    tiler-monitoring-router:
+      rule: Host(`tiler-monitoring.{{OHM_DOMAIN}}`)
+      entryPoints:
+        - port-web
+      service: tiler_monitor
+      middlewares:
+        - secure-headers
+
   services:
     tiler_server:
       loadBalancer:
@@ -207,6 +215,11 @@ http:
         servers:
           - url: http://cadvisor:8080
 
+    tiler_monitor:
+      loadBalancer:
+        servers:
+          - url: http://tiler-monitor:8001
+
 providers:
   file:
     filename: /etc/traefik/traefik.yml
diff --git a/images/tiler-monitor/Dockerfile b/images/tiler-monitor/Dockerfile
index 0aa466bdb..ca68d3274 100644
--- a/images/tiler-monitor/Dockerfile
+++ b/images/tiler-monitor/Dockerfile
@@ -1,14 +1,42 @@
-FROM docker:cli
+FROM python:3.12-slim
 
-RUN apk add --no-cache \
-    bash \
-    curl \
-    postgresql-client \
-    docker-cli-compose
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        curl \
+        bash \
+        postgresql-client && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install Docker CLI from official static binaries
+RUN ARCH=$(dpkg --print-architecture) && \
+    if [ "$ARCH" = "amd64" ]; then DOCKER_ARCH="x86_64"; else DOCKER_ARCH="aarch64"; fi && \
+    curl -fsSL "https://download.docker.com/linux/static/stable/${DOCKER_ARCH}/docker-27.5.1.tgz" | \
+    tar xz --strip-components=1 -C /usr/local/bin docker/docker
+
+# Install Docker Compose plugin
+RUN ARCH=$(dpkg --print-architecture) && \
+    if [ "$ARCH" = "amd64" ]; then COMPOSE_ARCH="x86_64"; else COMPOSE_ARCH="aarch64"; fi && \
+    mkdir -p /usr/local/lib/docker/cli-plugins && \
+    curl -fsSL "https://github.com/docker/compose/releases/download/v2.32.4/docker-compose-linux-${COMPOSE_ARCH}" \
+    -o /usr/local/lib/docker/cli-plugins/docker-compose && \
+    chmod +x /usr/local/lib/docker/cli-plugins/docker-compose
 
 WORKDIR /app
 
-COPY monitor_languages.sh .
-RUN chmod +x monitor_languages.sh
+# Install Python dependencies for pipeline monitor
+COPY pipeline-monitor/requirements.txt /app/pipeline-monitor/requirements.txt
+RUN pip install --no-cache-dir -r /app/pipeline-monitor/requirements.txt
+
+# Copy application code
+COPY language-monitor/ /app/language-monitor/
+COPY pipeline-monitor/ /app/pipeline-monitor/
+COPY entrypoint.sh /app/entrypoint.sh
+
+RUN chmod +x /app/entrypoint.sh /app/language-monitor/monitor_languages.sh
+
+EXPOSE 8001
+
+HEALTHCHECK --interval=30s --timeout=10s --retries=3 --start-period=30s \
+    CMD curl -f http://localhost:8001/health || exit 1
 
-CMD ["bash", "monitor_languages.sh"]
+CMD ["/app/entrypoint.sh"]
diff --git a/images/tiler-monitor/entrypoint.sh b/images/tiler-monitor/entrypoint.sh
new file mode 100644
index 000000000..f1520d1c3
--- /dev/null
+++ b/images/tiler-monitor/entrypoint.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -e
+
+echo "$(date +'%Y-%m-%d %H:%M:%S') - Starting tiler-monitor (combined)"
+
+# Start language monitor in background
+echo "$(date +'%Y-%m-%d %H:%M:%S') - Starting language monitor in background..."
+bash /app/language-monitor/monitor_languages.sh &
+
+# Start pipeline monitor in foreground
+echo "$(date +'%Y-%m-%d %H:%M:%S') - Starting pipeline monitor (FastAPI on port 8001)..."
+cd /app/pipeline-monitor
+exec python monitor.py
diff --git a/images/tiler-monitor/monitor_languages.sh b/images/tiler-monitor/language-monitor/monitor_languages.sh
similarity index 90%
rename from images/tiler-monitor/monitor_languages.sh
rename to images/tiler-monitor/language-monitor/monitor_languages.sh
index 84ae99136..978df24a3 100755
--- a/images/tiler-monitor/monitor_languages.sh
+++ b/images/tiler-monitor/language-monitor/monitor_languages.sh
@@ -11,9 +11,10 @@ log_message() {
 
 PG_CONNECTION="postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB"
 
-NIM_NUMBER_LANGUAGES="${NIM_NUMBER_LANGUAGES:-5}" # Default to 5 languages
-FORCE_LANGUAGES_GENERATION="${FORCE_LANGUAGES_GENERATION:-false}"
-EVALUATION_INTERVAL="${EVALUATION_INTERVAL:-3600}" # Default to 1 hour
+NIM_NUMBER_LANGUAGES="${TILER_MONITORING_NIM_NUMBER_LANGUAGES:-5}" # Default to 5 languages
+FORCE_LANGUAGES_GENERATION="${TILER_MONITORING_FORCE_LANGUAGES_GENERATION:-false}"
+EVALUATION_INTERVAL="${TILER_MONITORING_EVALUATION_INTERVAL:-3600}" # Default to 1 hour
+DOCKER_CONFIG_ENVIRONMENT="${TILER_MONITORING_DOCKER_CONFIG_ENVIRONMENT:-staging}"
 
 log_message "Configuration Summary:"
 log_message "  Environment:             $DOCKER_CONFIG_ENVIRONMENT"
diff --git a/images/tiler-monitor/pipeline-monitor/checks/__init__.py b/images/tiler-monitor/pipeline-monitor/checks/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
new file mode 100644
index 000000000..2518fe12c
--- /dev/null
+++ b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
@@ -0,0 +1,960 @@
+"""Pipeline check: changeset-centric verification.
+
+For each changeset in the 1-2 hour window:
+  1. Check if minute replication covers it (replication timestamp >= closed_at)
+  2. Check if its way/relation elements exist in the tiler DB with the correct version
+  3. For a random sample: verify materialized views + S3 tile cache
+"""
+
+import json
+import logging
+import os
+import random
+import xml.etree.ElementTree as ET
+from datetime import datetime, timezone, timedelta
+
+import psycopg2
+import requests
+
+from config import Config
+import retry_store
+
+logger = logging.getLogger(__name__)
+
+# Load table/view mapping from JSON config
+_config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tables_config.json")
+with open(_config_path) as f:
+    _tables_config = json.load(f)
+
+OHM_BASE = None  # lazily computed
+
+
+def _ohm_base():
+    global OHM_BASE
+    if OHM_BASE is None:
+        OHM_BASE = Config.OHM_API_BASE.replace("/api/0.6", "")
+    return OHM_BASE
+
+
+def _parse_timestamp(ts_str):
+    """Parse an ISO timestamp string to a timezone-aware datetime."""
+    ts_str = ts_str.replace("Z", "+00:00")
+    return datetime.fromisoformat(ts_str)
+
+
+def _relative_age(ts_str):
+    """Return a human-readable relative age string like '4h ago' or '25m ago'."""
+    try:
+        dt = _parse_timestamp(ts_str)
+        delta = datetime.now(timezone.utc) - dt
+        total_seconds = int(delta.total_seconds())
+        if total_seconds < 60:
+            return f"{total_seconds}s ago"
+        minutes = total_seconds // 60
+        if minutes < 60:
+            return f"{minutes}m ago"
+        hours = minutes // 60
+        remaining_min = minutes % 60
+        if hours < 24:
+            return f"{hours}h{remaining_min}m ago" if remaining_min else f"{hours}h ago"
+        days = hours // 24
+        remaining_hours = hours % 24
+        return f"{days}d{remaining_hours}h ago" if remaining_hours else f"{days}d ago"
+    except Exception:
+        return ""
+
+
+# ---------------------------------------------------------------------------
+# Step 0: get changesets in the age window
+# ---------------------------------------------------------------------------
+
+def _get_changesets_in_window(min_age, max_age, limit=10):
+    """Fetch closed changesets whose age is between min_age and max_age seconds.
+
+    Fetches recent changesets and filters locally by age window.
+    """
+    now = datetime.now(timezone.utc)
+    min_closed = now - timedelta(seconds=max_age)   # oldest allowed
+    max_closed = now - timedelta(seconds=min_age)    # newest allowed
+
+    # Fetch enough to find some in the window
+    fetch_limit = 100
+    url = f"{Config.OHM_API_BASE}/changesets"
+    params = {"limit": fetch_limit, "closed": "true"}
+    headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
+
+    print(f"[pipeline] Fetching changesets: {url}?limit={fetch_limit}&closed=true")
+    print(f"  Looking for changesets closed between "
+          f"{min_closed.strftime('%Y-%m-%dT%H:%M:%SZ')} and "
+          f"{max_closed.strftime('%Y-%m-%dT%H:%M:%SZ')} "
+          f"(age {min_age//60}-{max_age//60} min)")
+
+    resp = requests.get(url, params=params, headers=headers, timeout=30)
+    resp.raise_for_status()
+
+    root = ET.fromstring(resp.content)
+    changesets = []
+    skipped_young = 0
+    skipped_old = 0
+
+    for cs in root.findall("changeset"):
+        cs_id = int(cs.attrib["id"])
+        closed_at = cs.attrib.get("closed_at", "")
+        if not closed_at:
+            continue
+        try:
+            closed_dt = _parse_timestamp(closed_at)
+        except (ValueError, TypeError):
+            continue
+
+        age_minutes = (now - closed_dt).total_seconds() / 60
+
+        if closed_dt > max_closed:
+            skipped_young += 1
+            continue
+        elif closed_dt < min_closed:
+            skipped_old += 1
+            # Changesets are ordered by newest first, so once we hit old ones, stop
+            break
+        else:
+            changesets.append({
+                "id": cs_id,
+                "closed_at": closed_at,
+                "closed_dt": closed_dt,
+                "age_minutes": round(age_minutes, 1),
+            })
+
+        if len(changesets) >= limit:
+            break
+
+    print(f"  Fetched {len(root.findall('changeset'))} changesets from API")
+    print(f"  Skipped: {skipped_young} too young (<{min_age//60}min), "
+          f"{skipped_old} too old (>{max_age//60}min)")
+    print(f"  Found {len(changesets)} changesets in window:")
+    for cs in changesets:
+        print(f"    changeset {cs['id']}: closed_at={cs['closed_at']} "
+              f"(age={cs['age_minutes']}min)")
+
+    return changesets
+
+
+# ---------------------------------------------------------------------------
+# Step 1: replication check
+# ---------------------------------------------------------------------------
+
+def _parse_replication_state(text):
+    """Parse state.txt and return (sequence, timestamp)."""
+    data = {}
+    for line in text.strip().splitlines():
+        if "=" in line:
+            key, _, value = line.partition("=")
+            data[key.strip()] = value.strip()
+    seq = int(data.get("sequenceNumber", 0))
+    ts_raw = data.get("timestamp", "").replace("\\:", ":")
+    try:
+        ts = datetime.fromisoformat(ts_raw.replace("Z", "+00:00"))
+    except ValueError:
+        ts = None
+    return seq, ts
+
+
+def _check_replication_covers(changeset, repl_seq, repl_ts):
+    """Check if the replication state covers this changeset."""
+    if repl_ts is None:
+        return {
+            "status": "warning",
+            "message": "Cannot parse replication timestamp",
+        }
+
+    closed_dt = changeset["closed_dt"]
+    if repl_ts >= closed_dt:
+        return {
+            "status": "ok",
+            "message": (f"Replication covers this changeset "
+                        f"(repl_ts={repl_ts.isoformat()} >= closed_at={changeset['closed_at']})"),
+            "replication_sequence": repl_seq,
+            "replication_timestamp": repl_ts.isoformat(),
+        }
+    else:
+        lag = (closed_dt - repl_ts).total_seconds()
+        return {
+            "status": "critical",
+            "message": (f"Replication does NOT cover this changeset. "
+                        f"Replication is {round(lag/60, 1)}min behind "
+                        f"(repl_ts={repl_ts.isoformat()} < closed_at={changeset['closed_at']})"),
+            "replication_sequence": repl_seq,
+            "replication_timestamp": repl_ts.isoformat(),
+        }
+
+
+# ---------------------------------------------------------------------------
+# Step 2: tiler DB check
+# ---------------------------------------------------------------------------
+
+def _get_changeset_elements(changeset_id):
+    """Download changeset diff and extract way/relation elements with versions."""
+    url = f"{Config.OHM_API_BASE}/changeset/{changeset_id}/download"
+    headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
+    resp = requests.get(url, headers=headers, timeout=30)
+    resp.raise_for_status()
+
+    root = ET.fromstring(resp.content)
+    elements = []
+
+    for action in root:  # create, modify, delete
+        action_type = action.tag
+        for elem in action:
+            osm_id = elem.attrib.get("id")
+            version = elem.attrib.get("version")
+            elem_type = elem.tag
+            if osm_id and elem_type in ("way", "relation"):
+                # Extract tags to determine which imposm table this element belongs to
+                tags = {}
+                for tag in elem.findall("tag"):
+                    k = tag.attrib.get("k")
+                    v = tag.attrib.get("v")
+                    if k and v:
+                        tags[k] = v
+                timestamp = elem.attrib.get("timestamp", "")
+                elements.append({
+                    "type": elem_type,
+                    "osm_id": int(osm_id),
+                    "version": int(version) if version else None,
+                    "action": action_type,
+                    "tags": tags,
+                    "timestamp": timestamp,
+                })
+    return elements
+
+
+
+# Loaded from tables_config.json
+TAG_TO_CHECK = _tables_config["tag_to_check"]
+
+# Split config keys into simple tags ("highway") and key=value tags ("type=street")
+_SIMPLE_TAGS = {}
+_KV_TAGS = {}
+for key, val in TAG_TO_CHECK.items():
+    if "=" in key:
+        _KV_TAGS[key] = val
+    else:
+        _SIMPLE_TAGS[key] = val
+
+
+def _matching_entries(elem):
+    """Return matching tag_to_check entries for this element's tags."""
+    tags = elem.get("tags", {})
+    entries = []
+    # Simple tags: match if tag key exists (e.g. "highway")
+    for tag_key in tags:
+        if tag_key in _SIMPLE_TAGS:
+            entries.append(_SIMPLE_TAGS[tag_key])
+    # Key=value tags: match if tag key AND value match (e.g. "type=street")
+    for kv, entry in _KV_TAGS.items():
+        k, v = kv.split("=", 1)
+        if tags.get(k) == v:
+            entries.append(entry)
+    return entries
+
+
+def _has_mappable_tags(elem):
+    """Return True if the element has at least one tag that imposm imports."""
+    return len(_matching_entries(elem)) > 0
+
+
+def _get_candidate_tables(elem):
+    """Return the specific tables where this element should exist based on its tags."""
+    tables = set()
+    for entry in _matching_entries(elem):
+        tables.update(entry["tables"])
+    return list(tables)
+
+
+def _get_candidate_views(elem):
+    """Return the specific views where this element should exist based on its tags.
+
+    Returns a list of (view_name, column, id_mode) tuples.
+    id_mode is 'members' for views that store member way IDs (positive),
+    or 'standard' for views that store osm_id (negative for relations).
+    """
+    views = {}
+    for entry in _matching_entries(elem):
+        col = entry.get("view_column", "osm_id")
+        id_mode = entry.get("view_id_mode", "standard")
+        for v in entry["views"]:
+            views[v] = (col, id_mode)
+    return [(v, col, mode) for v, (col, mode) in views.items()]
+
+
+def _build_union_query(tables, search_id):
+    """Build a UNION ALL query to search osm_id across multiple tables in 1 round-trip."""
+    parts = []
+    for table in tables:
+        parts.append(
+            f"(SELECT '{table}' AS tbl "
+            f"FROM {table} WHERE osm_id = {int(search_id)} LIMIT 1)"
+        )
+    return " UNION ALL ".join(parts)
+
+
+def _check_element_in_tables(conn, elem):
+    """Check if an element exists in tiler DB tables using a single UNION ALL query."""
+    osm_id = elem["osm_id"]
+    search_id = -osm_id if elem["type"] == "relation" else osm_id
+    candidate_tables = _get_candidate_tables(elem)
+
+    cur = conn.cursor()
+
+    # Get existing tables (cached per connection would be ideal, but simple first)
+    cur.execute("""
+        SELECT table_name FROM information_schema.tables
+        WHERE table_schema = 'public' AND table_name LIKE 'osm_%%'
+    """)
+    existing_tables = {row[0] for row in cur.fetchall()}
+    tables = [t for t in candidate_tables if t in existing_tables]
+
+    if not tables:
+        cur.close()
+        return {
+            "type": elem["type"],
+            "osm_id": osm_id,
+            "action": elem["action"],
+            "found_in_tables": [],
+            "found_in_views": [],
+            "url": f"{_ohm_base()}/{elem['type']}/{elem['osm_id']}",
+        }
+
+    # Single UNION ALL query across all candidate tables
+    query = _build_union_query(tables, search_id)
+    found_in_tables = []
+
+    try:
+        cur.execute(query)
+        for row in cur.fetchall():
+            found_in_tables.append(row[0])
+    except Exception:
+        conn.rollback()
+
+    cur.close()
+
+    return {
+        "type": elem["type"],
+        "osm_id": osm_id,
+        "action": elem["action"],
+        "found_in_tables": found_in_tables,
+        "found_in_views": [],
+        "url": f"{_ohm_base()}/{elem['type']}/{elem['osm_id']}",
+    }
+
+
+def _check_element_in_views(conn, elem, check):
+    """Check if an element exists in materialized views using a single UNION ALL query."""
+    osm_id = check["osm_id"]
+    is_relation = check["type"] == "relation"
+
+    candidate_views = _get_candidate_views(elem)
+
+    if not candidate_views:
+        return check
+
+    cur = conn.cursor()
+
+    # Filter to existing views
+    cur.execute("""
+        SELECT matviewname FROM pg_matviews
+        WHERE schemaname = 'public' AND matviewname LIKE 'mv_%%'
+    """)
+    existing_views = {row[0] for row in cur.fetchall()}
+
+    view_info = [(v, col, mode) for v, col, mode in candidate_views if v in existing_views]
+    missing_views = [v for v, _, _ in candidate_views if v not in existing_views]
+    if missing_views:
+        logger.debug(f"Views not found in DB for {check['type']}/{osm_id}: {missing_views}")
+
+    if not view_info:
+        cur.close()
+        return check
+
+    # For 'members' mode views (routes): the view stores member way IDs,
+    # so we need to find which way IDs belong to this relation.
+    # For 'standard' mode: use osm_id (negative for relations).
+    member_way_ids = None
+
+    # Build UNION ALL query, grouping by search strategy
+    parts = []
+    for view, col, id_mode in sorted(view_info):
+        if id_mode == "members" and is_relation:
+            # Fetch member way IDs from the route table if not already done
+            if member_way_ids is None:
+                member_way_ids = _get_relation_member_ids(conn, osm_id)
+            if member_way_ids:
+                ids_list = ", ".join(str(mid) for mid in member_way_ids)
+                parts.append(
+                    f"(SELECT '{view}' AS vw FROM {view} "
+                    f"WHERE {col} IN ({ids_list}) LIMIT 1)"
+                )
+        else:
+            search_id = -osm_id if is_relation else osm_id
+            parts.append(
+                f"(SELECT '{view}' AS vw FROM {view} "
+                f"WHERE {col} = {int(search_id)} LIMIT 1)"
+            )
+
+    found_in_views = []
+    if parts:
+        query = " UNION ALL ".join(parts)
+        try:
+            cur.execute(query)
+            for row in cur.fetchall():
+                found_in_views.append(row[0])
+        except Exception as e:
+            logger.warning(f"View query failed for {check['type']}/{osm_id} in "
+                          f"{[v for v,_,_ in view_info]}: {e}")
+            conn.rollback()
+
+    cur.close()
+    check["found_in_views"] = found_in_views
+    return check
+
+
+def _get_relation_member_ids(conn, relation_osm_id):
+    """Get member way IDs for a relation from osm_route_multilines."""
+    cur = conn.cursor()
+    try:
+        cur.execute("""
+            SELECT DISTINCT member
+            FROM osm_route_multilines
+            WHERE osm_id = %s
+        """, (-relation_osm_id,))
+        ids = [row[0] for row in cur.fetchall()]
+        return ids
+    except Exception as e:
+        logger.debug(f"Could not get members for relation {relation_osm_id}: {e}")
+        conn.rollback()
+        return []
+    finally:
+        cur.close()
+
+
+def _is_element_deleted(elem):
+    """Check if an element has been deleted in OHM (visible=false or 410 Gone)."""
+    url = f"{Config.OHM_API_BASE}/{elem['type']}/{elem['osm_id']}"
+    headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
+    try:
+        resp = requests.get(url, headers=headers, timeout=15)
+        if resp.status_code == 410:
+            return True
+        if resp.status_code == 200:
+            root = ET.fromstring(resp.content)
+            el = root.find(elem["type"])
+            if el is not None and el.attrib.get("visible") == "false":
+                return True
+        return False
+    except Exception:
+        return False
+
+
+def _check_elements_in_db(conn, changeset_id, changeset_closed_at=None):
+    """Check all elements of a changeset in the tiler DB.
+
+    - ALL elements: verified in osm_* tables (fast, tag-filtered)
+    - SAMPLE elements: full check → tables + views + S3 tile cache
+    """
+    from checks.tile_cache import check_tile_cache_for_element
+
+    try:
+        elements = _get_changeset_elements(changeset_id)
+    except requests.RequestException as e:
+        return {
+            "status": "critical",
+            "message": f"Failed to download changeset diff: {e}",
+            "elements": [],
+        }
+
+    if not elements:
+        return {
+            "status": "ok",
+            "message": "No way/relation elements in this changeset",
+            "elements": [],
+        }
+
+    # Filter elements: skip those without mappable tags (silently)
+    checkable_elements = []
+    for elem in elements:
+        if not _has_mappable_tags(elem):
+            continue
+        checkable_elements.append(elem)
+
+    if not checkable_elements:
+        return {
+            "status": "ok",
+            "message": "No importable elements in this changeset",
+            "elements": [],
+        }
+
+    # Select random sample for full pipeline check (tables + views + S3)
+    # Only sample from create/modify elements
+    import math
+    create_modify = [e for e in checkable_elements if e["action"] != "delete"]
+    sample_size = max(1, math.ceil(len(create_modify) * Config.FULL_CHECK_SAMPLE_PCT / 100))
+    sample_size = min(sample_size, len(create_modify))
+    sample_ids = set()
+    if create_modify:
+        sample_ids = {e["osm_id"] for e in random.sample(create_modify, sample_size)}
+
+    print(f"  [tiler_db] Checking {len(checkable_elements)} elements "
+          f"(full pipeline check on {sample_size}/{len(create_modify)} = {Config.FULL_CHECK_SAMPLE_PCT}% sampled)")
+
+    missing = []
+    not_deleted = []
+    checked = []
+    tile_cache_results = []
+
+    for elem in checkable_elements:
+        is_sample = elem["osm_id"] in sample_ids
+        sample_label = " [SAMPLE]" if is_sample else ""
+        ts_info = f" created={elem['timestamp']} ({_relative_age(elem['timestamp'])})" if elem.get("timestamp") else ""
+
+        # Step 1: Check tables
+        check = _check_element_in_tables(conn, elem)
+        tables = check["found_in_tables"]
+
+        if elem["action"] == "delete":
+            # DELETE: element should NOT be in the DB
+            if tables:
+                print(f"    NOT_DELETED{sample_label} {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                      f"(delete){ts_info} -> still in tables: {tables}")
+                print(f"         {check['url']}")
+                not_deleted.append(f"{elem['type']}/{elem['osm_id']}")
+            else:
+                print(f"    OK{sample_label} {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                      f"(delete){ts_info} -> correctly removed")
+            checked.append(check)
+            continue
+
+        # CREATE / MODIFY: element should be in the DB
+        # Step 2: Check views
+        if tables:
+            check = _check_element_in_views(conn, elem, check)
+
+        checked.append(check)
+        views = check["found_in_views"]
+
+        if tables:
+            print(f"    OK{sample_label} {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                  f"({elem['action']}){ts_info}")
+            print(f"         tables: {tables}")
+            print(f"         views:  {views}")
+            print(f"         {check['url']}")
+
+            # Step 3: Check S3 tile cache (SAMPLE only)
+            if is_sample and changeset_closed_at and Config.S3_BUCKET_CACHE_TILER:
+                try:
+                    tile_result = check_tile_cache_for_element(
+                        conn, check, changeset_closed_at
+                    )
+                    tile_cache_results.append(tile_result)
+                    cache_status = tile_result.get("cache", {}).get("status", "unknown")
+                    tile_info = tile_result.get("tile", {})
+                    if cache_status == "stale":
+                        print(f"         [S3 CACHE] STALE tile z{tile_info.get('z')}/{tile_info.get('x')}/{tile_info.get('y')}")
+                    elif cache_status == "ok":
+                        print(f"         [S3 CACHE] OK tile z{tile_info.get('z')}/{tile_info.get('x')}/{tile_info.get('y')}")
+                    elif cache_status == "skipped":
+                        print(f"         [S3 CACHE] skipped: {tile_result.get('cache', {}).get('message', '')}")
+                except Exception as e:
+                    print(f"         [S3 CACHE] error: {e}")
+        else:
+            # Not found — check if deleted in a later changeset
+            if _is_element_deleted(elem):
+                print(f"    SKIP {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                      f"({elem['action']}){ts_info} -> deleted in a later changeset")
+                print(f"         {check['url']}")
+                check["deleted"] = True
+                continue
+
+            print(f"    MISSING{sample_label} {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                  f"({elem['action']}){ts_info} -> NOT in tables, queued for retry")
+            print(f"         {check['url']}")
+            retry_store.add_missing(
+                changeset_id, elem["type"], elem["osm_id"], Config.MAX_RETRIES,
+                version=elem.get("version", 0), action=elem.get("action", ""),
+            )
+            missing.append(f"{elem['type']}/{elem['osm_id']}")
+
+    # Build status message
+    stale_tiles = [r for r in tile_cache_results if r.get("cache", {}).get("status") == "stale"]
+
+    problems_parts = []
+    # Missing elements are queued for retry, only warn about other issues
+    if missing:
+        problems_parts.append(f"Queued for retry: {', '.join(missing)}")
+    if not_deleted:
+        problems_parts.append(f"Not deleted from tiler DB: {', '.join(not_deleted)}")
+
+    if not_deleted:
+        status = "warning"
+        msg = ". ".join(problems_parts)
+    elif missing:
+        status = "retry_pending"
+        msg = ". ".join(problems_parts)
+    elif stale_tiles:
+        status = "warning"
+        stale_ids = [f"{r['type']}/{r['osm_id']}" for r in stale_tiles]
+        msg = (f"All {len(checked)} elements in tables, "
+               f"but S3 tile cache stale for: {', '.join(stale_ids)}")
+    else:
+        status = "ok"
+        msg = f"All {len(checked)} elements verified in tiler DB"
+        if tile_cache_results:
+            msg += f" (S3 cache OK for {len(tile_cache_results)} sampled)"
+
+    return {
+        "status": status,
+        "message": msg,
+        "elements": checked,
+        "tile_cache": tile_cache_results,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Main pipeline check (scheduled)
+# ---------------------------------------------------------------------------
+
+def check_pipeline():
+    """Check the full pipeline for changesets in the 1-2 hour age window.
+
+    For each changeset:
+      1. Is it covered by minute replication?
+      2. Are its elements in the tiler DB?
+    """
+    now = datetime.now(timezone.utc)
+    min_age = Config.CHANGESET_MIN_AGE
+    max_age = Config.CHANGESET_MAX_AGE
+
+    result = {
+        "name": "pipeline",
+        "status": "ok",
+        "message": "",
+        "details": {
+            "window": f"{min_age//60}-{max_age//60} minutes",
+            "replication": {},
+            "changesets": [],
+        },
+        "checked_at": now.isoformat(),
+    }
+
+    # --- Fetch replication state ---
+    repl_seq, repl_ts = None, None
+    try:
+        resp = requests.get(Config.REPLICATION_STATE_URL, timeout=15)
+        resp.raise_for_status()
+        repl_seq, repl_ts = _parse_replication_state(resp.text)
+        result["details"]["replication"] = {
+            "status": "ok",
+            "sequence": repl_seq,
+            "timestamp": repl_ts.isoformat() if repl_ts else None,
+        }
+        if repl_ts:
+            lag_min = (now - repl_ts).total_seconds() / 60
+            result["details"]["replication"]["lag_minutes"] = round(lag_min, 1)
+            print(f"\n[pipeline] Replication state: seq={repl_seq}, "
+                  f"ts={repl_ts.isoformat()}, lag={lag_min:.1f}min")
+    except requests.RequestException as e:
+        result["details"]["replication"] = {
+            "status": "critical",
+            "message": f"Failed to fetch replication state: {e}",
+        }
+        print(f"\n[pipeline] WARNING: Cannot fetch replication state: {e}")
+
+    # --- Get changesets in window ---
+    try:
+        changesets = _get_changesets_in_window(
+            min_age=min_age,
+            max_age=max_age,
+            limit=Config.CHANGESET_LIMIT,
+        )
+    except requests.RequestException as e:
+        result["status"] = "critical"
+        result["message"] = f"Failed to fetch changesets from OHM API: {e}"
+        return result
+
+    if not changesets:
+        result["message"] = (
+            f"No changesets found in the {min_age//60}-{max_age//60} minute window"
+        )
+        print(f"[pipeline] {result['message']}")
+        return result
+
+    print(f"[pipeline] Found {len(changesets)} changesets in "
+          f"{min_age//60}-{max_age//60}min window")
+
+    # --- Connect to tiler DB ---
+    conn = None
+    try:
+        conn = psycopg2.connect(
+            host=Config.POSTGRES_HOST,
+            port=Config.POSTGRES_PORT,
+            dbname=Config.POSTGRES_DB,
+            user=Config.POSTGRES_USER,
+            password=Config.POSTGRES_PASSWORD,
+        )
+    except psycopg2.Error as e:
+        result["status"] = "critical"
+        result["message"] = f"Cannot connect to tiler DB: {e}"
+        print(f"[pipeline] ERROR: Cannot connect to tiler DB: {e}")
+        return result
+
+    # --- Check each changeset through the pipeline ---
+    problems = []
+    skipped = 0
+
+    for cs in changesets:
+        # Skip changesets already checked with status OK
+        if retry_store.is_changeset_passed(cs["id"]):
+            skipped += 1
+            continue
+
+        print(f"\n[pipeline] === Changeset {cs['id']} === "
+              f"(closed_at={cs['closed_at']}, age={cs['age_minutes']}min)")
+        print(f"  URL: {_ohm_base()}/changeset/{cs['id']}")
+
+        cs_result = {
+            "changeset_id": cs["id"],
+            "changeset_url": f"{_ohm_base()}/changeset/{cs['id']}",
+            "closed_at": cs["closed_at"],
+            "age_minutes": cs["age_minutes"],
+            "replication": {},
+            "tiler_db": {},
+        }
+
+        # Step 1: replication
+        if repl_seq is not None:
+            repl_check = _check_replication_covers(cs, repl_seq, repl_ts)
+            cs_result["replication"] = repl_check
+            print(f"  [replication] {repl_check['status'].upper()}: {repl_check['message']}")
+
+            if repl_check["status"] != "ok":
+                problems.append(
+                    f"Changeset {cs['id']}: replication not covering"
+                )
+        else:
+            cs_result["replication"] = {"status": "unknown", "message": "Replication state unavailable"}
+            print(f"  [replication] UNKNOWN: Replication state unavailable")
+
+        # Step 2: tiler DB
+        db_check = _check_elements_in_db(conn, cs["id"], cs["closed_at"])
+        cs_result["tiler_db"] = db_check
+        print(f"  [tiler_db] {db_check['status'].upper()}: {db_check['message']}")
+
+        if db_check["status"] not in ("ok", "retry_pending"):
+            problems.append(f"Changeset {cs['id']}: {db_check['message']}")
+
+        result["details"]["changesets"].append(cs_result)
+
+        # Log to history
+        elements = db_check.get("elements", [])
+        missing_count = len([e for e in elements if not e.get("found_in_tables")])
+        retry_store.log_changeset_check(
+            changeset_id=cs["id"],
+            status=db_check["status"],
+            total_elements=len(elements),
+            missing_count=missing_count,
+            ok_count=len(elements) - missing_count,
+            message=db_check["message"],
+            closed_at=cs.get("closed_at", ""),
+            elements=elements,
+        )
+
+    # --- Recheck pending and failed retries ---
+    retryable = retry_store.get_pending() + retry_store.get_failed()
+    newly_failed = []
+
+    if retryable:
+        print(f"\n[pipeline] Rechecking {len(retryable)} retries (pending + failed)...")
+
+    for entry in retryable:
+        cs_id = entry["changeset_id"]
+        etype = entry["element_type"]
+        oid = entry["osm_id"]
+        retry_num = entry["retry_count"] + 1
+        prev_status = entry["status"]
+
+        # Check if the element is now in the DB
+        check = _check_element_in_tables(conn, {"type": etype, "osm_id": oid, "action": "modify"})
+        if check["found_in_tables"]:
+            print(f"  [retry] RESOLVED {etype}/{oid} (changeset {cs_id}) "
+                  f"-> found in tables after {retry_num} retries")
+            retry_store.mark_resolved(cs_id, etype, oid)
+        elif prev_status == "failed":
+            # Already failed, keep checking but don't increment
+            print(f"  [retry] STILL MISSING {etype}/{oid} (changeset {cs_id}) "
+                  f"-> failed, still monitoring")
+        else:
+            new_status = retry_store.increment_retry(cs_id, etype, oid)
+            if new_status == "failed":
+                print(f"  [retry] FAILED {etype}/{oid} (changeset {cs_id}) "
+                      f"-> still missing after {retry_num}/{Config.MAX_RETRIES} retries")
+                newly_failed.append({
+                    "type": etype, "osm_id": oid, "changeset_id": cs_id,
+                })
+            else:
+                print(f"  [retry] PENDING {etype}/{oid} (changeset {cs_id}) "
+                      f"-> retry {retry_num}/{Config.MAX_RETRIES}")
+
+    conn.close()
+
+    # --- Overall status ---
+    retry_summary = retry_store.summary()
+    result["details"]["retries"] = retry_summary
+
+    if newly_failed:
+        failed_summary = "; ".join(
+            f"{f['type']}/{f['osm_id']} (changeset {f['changeset_id']})"
+            for f in newly_failed
+        )
+        problems.append(f"Failed after {Config.MAX_RETRIES} retries: {failed_summary}")
+
+    has_cs_issues = any(
+        cs.get("replication", {}).get("status") == "critical"
+        or cs.get("tiler_db", {}).get("status") in ("warning", "critical")
+        for cs in result["details"]["changesets"]
+    )
+
+    # Include failed details for Slack alerting
+    failed_count = retry_summary.get("failed", 0)
+    result["details"]["newly_failed"] = newly_failed
+    result["details"]["total_failed"] = failed_count
+
+    if newly_failed:
+        result["status"] = "critical"
+        failed_labels = "; ".join(
+            f"{f['type']}/{f['osm_id']}" for f in newly_failed[:5]
+        )
+        result["message"] = f"Elements missing after all retries: {failed_labels}"
+    elif failed_count > 0:
+        result["status"] = "critical"
+        result["message"] = f"{failed_count} elements still missing after all retries"
+    elif has_cs_issues:
+        result["status"] = "warning"
+        result["message"] = f"Issues found: {'; '.join(problems[:5])}"
+    else:
+        pending_count = retry_summary.get("pending", 0)
+        checked_count = len(changesets) - skipped
+        msg = (
+            f"{checked_count} new changesets checked"
+        )
+        if skipped:
+            msg += f", {skipped} already passed (skipped)"
+        if pending_count:
+            msg += f", {pending_count} elements pending retry"
+        result["message"] = msg
+
+    if skipped:
+        print(f"[pipeline] Skipped {skipped} changesets already passed OK")
+    print(f"\n[pipeline] Result: {result['status'].upper()} — {result['message']}")
+    if retry_summary:
+        print(f"[pipeline] Retry store: {retry_summary}")
+    return result
+
+
+# ---------------------------------------------------------------------------
+# On-demand single changeset check
+# ---------------------------------------------------------------------------
+
+def check_single_changeset(changeset_id):
+    """Evaluate a single changeset through the full pipeline (on-demand)."""
+    now = datetime.now(timezone.utc)
+    result = {
+        "name": "pipeline",
+        "changeset_id": changeset_id,
+        "changeset_url": f"{_ohm_base()}/changeset/{changeset_id}",
+        "status": "ok",
+        "message": "",
+        "details": {"replication": {}, "tiler_db": {}},
+        "checked_at": now.isoformat(),
+    }
+
+    # Get changeset info
+    try:
+        url = f"{Config.OHM_API_BASE}/changeset/{changeset_id}"
+        headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
+        resp = requests.get(url, headers=headers, timeout=30)
+        resp.raise_for_status()
+        root = ET.fromstring(resp.content)
+        cs_elem = root.find("changeset")
+        closed_at = cs_elem.attrib.get("closed_at", "") if cs_elem is not None else ""
+    except Exception:
+        closed_at = ""
+
+    print(f"\n[pipeline] === Changeset {changeset_id} (on-demand) ===")
+    print(f"  URL: {_ohm_base()}/changeset/{changeset_id}")
+    if closed_at:
+        print(f"  closed_at: {closed_at}")
+
+    # Step 1: replication
+    try:
+        resp = requests.get(Config.REPLICATION_STATE_URL, timeout=15)
+        resp.raise_for_status()
+        repl_seq, repl_ts = _parse_replication_state(resp.text)
+
+        if closed_at and repl_ts:
+            closed_dt = _parse_timestamp(closed_at)
+            cs_data = {"closed_at": closed_at, "closed_dt": closed_dt}
+            repl_check = _check_replication_covers(cs_data, repl_seq, repl_ts)
+        else:
+            repl_check = {
+                "status": "ok" if repl_ts else "warning",
+                "message": f"Replication seq={repl_seq}, ts={repl_ts.isoformat() if repl_ts else 'unknown'}",
+                "replication_sequence": repl_seq,
+                "replication_timestamp": repl_ts.isoformat() if repl_ts else None,
+            }
+
+        result["details"]["replication"] = repl_check
+        print(f"  [replication] {repl_check['status'].upper()}: {repl_check['message']}")
+    except requests.RequestException as e:
+        result["details"]["replication"] = {
+            "status": "critical",
+            "message": f"Failed to fetch replication state: {e}",
+        }
+        print(f"  [replication] CRITICAL: Cannot fetch replication state: {e}")
+
+    # Step 2: tiler DB
+    try:
+        conn = psycopg2.connect(
+            host=Config.POSTGRES_HOST,
+            port=Config.POSTGRES_PORT,
+            dbname=Config.POSTGRES_DB,
+            user=Config.POSTGRES_USER,
+            password=Config.POSTGRES_PASSWORD,
+        )
+    except psycopg2.Error as e:
+        result["status"] = "critical"
+        result["message"] = f"Cannot connect to tiler DB: {e}"
+        result["details"]["tiler_db"] = {"status": "critical", "message": str(e)}
+        return result
+
+    db_check = _check_elements_in_db(conn, changeset_id, closed_at or None)
+    conn.close()
+    result["details"]["tiler_db"] = db_check
+    print(f"  [tiler_db] {db_check['status'].upper()}: {db_check['message']}")
+
+    # Overall
+    problems = []
+    repl_status = result["details"]["replication"].get("status", "ok")
+    if repl_status == "critical":
+        problems.append("Replication not covering this changeset")
+    if db_check["status"] != "ok":
+        problems.append(db_check["message"])
+
+    if problems:
+        result["status"] = "warning"
+        result["message"] = "; ".join(problems)
+    else:
+        result["message"] = (
+            f"Changeset {changeset_id} passed full pipeline check "
+            f"({len(db_check.get('elements', []))} elements verified)"
+        )
+
+    print(f"  [result] {result['status'].upper()}: {result['message']}")
+    return result
diff --git a/images/tiler-monitor/pipeline-monitor/checks/mv_freshness.py b/images/tiler-monitor/pipeline-monitor/checks/mv_freshness.py
new file mode 100644
index 000000000..35d5d4d6b
--- /dev/null
+++ b/images/tiler-monitor/pipeline-monitor/checks/mv_freshness.py
@@ -0,0 +1,179 @@
+"""Check 3: Materialized view freshness monitor.
+
+Queries pg_stat_user_tables to check when materialized views were last
+auto-analyzed/auto-vacuumed (proxy for last refresh), and also checks
+if the views exist and have rows.
+"""
+
+from datetime import datetime, timezone
+
+import psycopg2
+
+from config import Config
+
+# Key materialized views grouped by expected refresh interval.
+# group_name -> (max_stale_seconds, [view_names])
+MV_GROUPS = {
+    "admin_boundaries_lines": (
+        300,  # expect refresh every ~60s + buffer
+        [
+            "mv_admin_boundaries_lines_z4_5",
+            "mv_admin_boundaries_lines_z6_7",
+            "mv_admin_boundaries_lines_z8_9",
+            "mv_admin_boundaries_lines_z10_11",
+            "mv_admin_boundaries_lines_z12_13",
+            "mv_admin_boundaries_lines_z14_15",
+            "mv_admin_boundaries_lines_z16_20",
+        ],
+    ),
+    "water": (
+        600,  # expect refresh every ~180s + buffer
+        [
+            "mv_water_lines_z10_11",
+            "mv_water_lines_z12_13",
+            "mv_water_lines_z14_15",
+            "mv_water_lines_z16_20",
+            "mv_water_areas_z6_7",
+            "mv_water_areas_z8_9",
+            "mv_water_areas_z10_11",
+            "mv_water_areas_z12_13",
+            "mv_water_areas_z14_15",
+            "mv_water_areas_z16_20",
+        ],
+    ),
+    "transport": (
+        600,
+        [
+            "mv_transport_lines_z8_9",
+            "mv_transport_lines_z10_11",
+            "mv_transport_lines_z12_13",
+            "mv_transport_lines_z14_15",
+            "mv_transport_lines_z16_20",
+        ],
+    ),
+}
+
+
+def check_mv_freshness():
+    """Check that key materialized views exist and are being refreshed."""
+    result = {
+        "name": "mv_freshness",
+        "status": "ok",
+        "message": "",
+        "details": {"groups": {}},
+        "checked_at": datetime.now(timezone.utc).isoformat(),
+    }
+
+    try:
+        conn = psycopg2.connect(
+            host=Config.POSTGRES_HOST,
+            port=Config.POSTGRES_PORT,
+            dbname=Config.POSTGRES_DB,
+            user=Config.POSTGRES_USER,
+            password=Config.POSTGRES_PASSWORD,
+        )
+    except psycopg2.Error as e:
+        result["status"] = "critical"
+        result["message"] = f"Cannot connect to tiler DB: {e}"
+        return result
+
+    cur = conn.cursor()
+
+    # Get list of existing materialized views
+    cur.execute("SELECT matviewname FROM pg_matviews WHERE schemaname = 'public'")
+    existing_mvs = {row[0] for row in cur.fetchall()}
+
+    # Check row counts and last analyze times for MVs via pg_stat_user_tables.
+    # REFRESH MATERIALIZED VIEW triggers auto-analyze, so last_autoanalyze
+    # is a good proxy for "last refreshed".
+    cur.execute("""
+        SELECT relname, n_live_tup, last_autoanalyze, last_analyze
+        FROM pg_stat_user_tables
+        WHERE schemaname = 'public'
+          AND relname LIKE 'mv_%%'
+    """)
+    mv_stats = {}
+    for row in cur.fetchall():
+        name, n_rows, last_autoanalyze, last_analyze = row
+        # Use whichever is more recent
+        last_refreshed = max(
+            filter(None, [last_autoanalyze, last_analyze]),
+            default=None,
+        )
+        mv_stats[name] = {
+            "n_rows": n_rows,
+            "last_refreshed": last_refreshed,
+        }
+
+    cur.close()
+    conn.close()
+
+    missing_views = []
+    stale_views = []
+    empty_views = []
+    now = datetime.now(timezone.utc)
+
+    for group_name, (max_stale, views) in MV_GROUPS.items():
+        group_result = {"views": [], "status": "ok"}
+
+        for view_name in views:
+            view_info = {"name": view_name, "status": "ok"}
+
+            if view_name not in existing_mvs:
+                view_info["status"] = "critical"
+                view_info["message"] = "View does not exist"
+                missing_views.append(view_name)
+            elif view_name in mv_stats:
+                stats = mv_stats[view_name]
+                view_info["n_rows"] = stats["n_rows"]
+
+                if stats["n_rows"] == 0:
+                    view_info["status"] = "warning"
+                    view_info["message"] = "View is empty (0 rows)"
+                    empty_views.append(view_name)
+
+                if stats["last_refreshed"]:
+                    last_ref = stats["last_refreshed"]
+                    if last_ref.tzinfo is None:
+                        last_ref = last_ref.replace(tzinfo=timezone.utc)
+                    age_seconds = (now - last_ref).total_seconds()
+                    view_info["last_refreshed"] = last_ref.isoformat()
+                    view_info["age_seconds"] = round(age_seconds)
+
+                    if age_seconds > max_stale:
+                        view_info["status"] = "warning"
+                        view_info["message"] = (
+                            f"Stale: last refreshed {round(age_seconds / 60, 1)} min ago "
+                            f"(threshold: {max_stale // 60} min)"
+                        )
+                        stale_views.append(view_name)
+                else:
+                    view_info["last_refreshed"] = None
+                    view_info["message"] = "No analyze timestamp available"
+            else:
+                view_info["message"] = "No stats available"
+
+            group_result["views"].append(view_info)
+
+        if any(v["status"] == "critical" for v in group_result["views"]):
+            group_result["status"] = "critical"
+        elif any(v["status"] == "warning" for v in group_result["views"]):
+            group_result["status"] = "warning"
+
+        result["details"]["groups"][group_name] = group_result
+
+    # Overall status
+    if missing_views:
+        result["status"] = "critical"
+        result["message"] = f"Missing views: {', '.join(missing_views[:5])}"
+    elif stale_views:
+        result["status"] = "warning"
+        result["message"] = f"Stale views: {', '.join(stale_views[:5])}"
+    elif empty_views:
+        result["status"] = "warning"
+        result["message"] = f"Empty views: {', '.join(empty_views[:5])}"
+    else:
+        total = sum(len(v) for _, v in MV_GROUPS.values())
+        result["message"] = f"All {total} monitored materialized views are healthy"
+
+    return result
diff --git a/images/tiler-monitor/pipeline-monitor/checks/replication_lag.py b/images/tiler-monitor/pipeline-monitor/checks/replication_lag.py
new file mode 100644
index 000000000..c15cb2cef
--- /dev/null
+++ b/images/tiler-monitor/pipeline-monitor/checks/replication_lag.py
@@ -0,0 +1,89 @@
+"""Check 1: Minute replication lag monitor.
+
+Compares the latest replication sequence number available on S3
+against the last sequence number processed by imposm (from the tiler DB
+or the replication state endpoint).
+"""
+
+import time
+from datetime import datetime, timezone
+
+import requests
+
+from config import Config
+
+
+def _parse_state(text):
+    """Parse an imposm/osm replication state.txt and return sequence + timestamp."""
+    data = {}
+    for line in text.strip().splitlines():
+        if "=" in line:
+            key, _, value = line.partition("=")
+            data[key.strip()] = value.strip()
+    seq = int(data.get("sequenceNumber", 0))
+    ts_raw = data.get("timestamp", "")
+    # Format: 2026-03-13T12\:05\:02Z  (escaped colons in java properties)
+    ts_raw = ts_raw.replace("\\:", ":")
+    try:
+        ts = datetime.fromisoformat(ts_raw.replace("Z", "+00:00"))
+    except ValueError:
+        ts = None
+    return seq, ts
+
+
+def check_replication_lag():
+    """Return a dict with the replication lag check result."""
+    result = {
+        "name": "replication_lag",
+        "status": "ok",
+        "message": "",
+        "details": {},
+        "checked_at": datetime.now(timezone.utc).isoformat(),
+    }
+
+    try:
+        # Get latest available replication state from S3
+        resp = requests.get(Config.REPLICATION_STATE_URL, timeout=15)
+        resp.raise_for_status()
+        remote_seq, remote_ts = _parse_state(resp.text)
+
+        result["details"]["remote_sequence"] = remote_seq
+        result["details"]["remote_timestamp"] = remote_ts.isoformat() if remote_ts else None
+
+        # Get imposm's last processed state
+        # The imposm diff dir stores last.state.txt - we query it via the same
+        # base URL pattern but from the local imposm state endpoint.
+        # In Docker, we can check the DB for the latest sequence via the
+        # osm_replication_status table if available, or fall back to comparing
+        # timestamps of recent data.
+        #
+        # For now: compare remote timestamp against current time.
+        # If remote_ts is stale, replication source itself is behind.
+        # A more precise check reads imposm's last.state.txt from the shared volume.
+
+        if remote_ts:
+            lag_seconds = (datetime.now(timezone.utc) - remote_ts).total_seconds()
+            result["details"]["lag_seconds"] = round(lag_seconds)
+            result["details"]["lag_minutes"] = round(lag_seconds / 60, 1)
+
+            if lag_seconds > Config.REPLICATION_LAG_THRESHOLD:
+                result["status"] = "critical"
+                result["message"] = (
+                    f"Replication lag is {round(lag_seconds / 60, 1)} minutes "
+                    f"(threshold: {Config.REPLICATION_LAG_THRESHOLD // 60} min). "
+                    f"Last replication timestamp: {remote_ts.isoformat()}"
+                )
+            else:
+                result["message"] = (
+                    f"Replication is up to date. Lag: {round(lag_seconds / 60, 1)} min, "
+                    f"sequence: {remote_seq}"
+                )
+        else:
+            result["status"] = "warning"
+            result["message"] = "Could not parse replication timestamp"
+
+    except requests.RequestException as e:
+        result["status"] = "critical"
+        result["message"] = f"Failed to fetch replication state: {e}"
+
+    return result
diff --git a/images/tiler-monitor/pipeline-monitor/checks/tile_cache.py b/images/tiler-monitor/pipeline-monitor/checks/tile_cache.py
new file mode 100644
index 000000000..9d21eb457
--- /dev/null
+++ b/images/tiler-monitor/pipeline-monitor/checks/tile_cache.py
@@ -0,0 +1,159 @@
+"""Pipeline check: verify tile cache in S3 is up-to-date.
+
+For a sampled element, check if the cached tile in S3 was modified
+after the changeset closed_at. If the tile is stale, the cache purge
+(SQS → tiler-cache) may have failed.
+"""
+
+import mercantile
+import psycopg2.extensions
+from datetime import datetime, timezone
+
+from config import Config
+
+
+def _get_element_centroid(conn, elem):
+    """Get the centroid (lon, lat) of an element from the tiler DB."""
+    osm_id = elem["osm_id"]
+    search_id = -osm_id if elem["type"] == "relation" else osm_id
+
+    # Search in the tables where it was found
+    found_tables = elem.get("found_in_tables", [])
+    if not found_tables:
+        return None
+
+    cur = conn.cursor()
+    for table in found_tables:
+        try:
+            quoted = psycopg2.extensions.quote_ident(table, cur)
+            cur.execute(
+                f"SELECT ST_X(ST_Centroid(ST_Transform(geometry, 4326))), "
+                f"ST_Y(ST_Centroid(ST_Transform(geometry, 4326))) "
+                f"FROM {quoted} WHERE osm_id = %s LIMIT 1",
+                (search_id,),
+            )
+            row = cur.fetchone()
+            if row and row[0] is not None:
+                cur.close()
+                return {"lon": row[0], "lat": row[1]}
+        except Exception:
+            conn.rollback()
+
+    cur.close()
+    return None
+
+
+def _get_tile_for_point(lon, lat, zoom):
+    """Convert lon/lat to tile z/x/y."""
+    tile = mercantile.tile(lon, lat, zoom)
+    return {"z": tile.z, "x": tile.x, "y": tile.y}
+
+
+def _check_tile_in_s3(tile, changeset_closed_at):
+    """Check if a cached tile in S3 is stale (older than changeset).
+
+    Returns dict with status and details for each S3 path.
+    """
+    if not Config.S3_BUCKET_CACHE_TILER:
+        return {
+            "status": "skipped",
+            "message": "S3_BUCKET_CACHE_TILER not configured",
+        }
+
+    s3 = Config.get_s3_client()
+    bucket = Config.S3_BUCKET_CACHE_TILER
+    z, x, y = tile["z"], tile["x"], tile["y"]
+
+    results = []
+    stale_paths = []
+
+    for path_prefix in Config.S3_BUCKET_PATH_FILES:
+        key = f"{path_prefix}/{z}/{x}/{y}.pbf"
+        try:
+            resp = s3.head_object(Bucket=bucket, Key=key)
+            last_modified = resp["LastModified"]
+
+            # Parse changeset closed_at
+            closed_dt = datetime.fromisoformat(
+                changeset_closed_at.replace("Z", "+00:00")
+            )
+
+            is_stale = last_modified < closed_dt
+            result = {
+                "path": key,
+                "last_modified": last_modified.isoformat(),
+                "is_stale": is_stale,
+            }
+            results.append(result)
+            if is_stale:
+                stale_paths.append(key)
+
+        except s3.exceptions.ClientError as e:
+            if e.response["Error"]["Code"] == "404":
+                # Tile not in cache — not stale, tegola will generate on demand
+                results.append({
+                    "path": key,
+                    "last_modified": None,
+                    "is_stale": False,
+                    "note": "not cached (tegola generates on demand)",
+                })
+            else:
+                results.append({
+                    "path": key,
+                    "error": str(e),
+                })
+
+    if stale_paths:
+        return {
+            "status": "stale",
+            "message": f"Tile cache is stale for: {', '.join(stale_paths)}",
+            "tile": tile,
+            "details": results,
+        }
+    else:
+        return {
+            "status": "ok",
+            "message": "Tile cache is up-to-date or not cached",
+            "tile": tile,
+            "details": results,
+        }
+
+
+def check_tile_cache_for_element(conn, elem_check, changeset_closed_at):
+    """Full tile cache verification for a single element.
+
+    Args:
+        conn: DB connection
+        elem_check: result dict from _check_element_in_db (with found_in_tables)
+        changeset_closed_at: ISO timestamp string
+
+    Returns:
+        dict with tile cache check results
+    """
+    osm_id = elem_check["osm_id"]
+    elem_type = elem_check["type"]
+    zoom = Config.TILE_CHECK_ZOOM
+
+    # Step 1: get geometry from DB
+    centroid = _get_element_centroid(conn, elem_check)
+    if not centroid:
+        return {
+            "osm_id": osm_id,
+            "type": elem_type,
+            "status": "skipped",
+            "message": "Could not get geometry from DB",
+        }
+
+    # Step 2: calculate tile
+    tile = _get_tile_for_point(centroid["lon"], centroid["lat"], zoom)
+
+    # Step 3: check S3 cache
+    cache_result = _check_tile_in_s3(tile, changeset_closed_at)
+
+    return {
+        "osm_id": osm_id,
+        "type": elem_type,
+        "centroid": centroid,
+        "tile": tile,
+        "cache": cache_result,
+    }
diff --git a/images/tiler-monitor/pipeline-monitor/config.py b/images/tiler-monitor/pipeline-monitor/config.py
new file mode 100644
index 000000000..41be49de1
--- /dev/null
+++ b/images/tiler-monitor/pipeline-monitor/config.py
@@ -0,0 +1,97 @@
+import os
+import re
+
+
+def _parse_duration(value, default):
+    """Parse human-readable duration (e.g. '1h', '30m', '1.5h', '2h30m', '3600') to seconds."""
+    raw = os.getenv(value, "")
+    if not raw:
+        return default
+    # If it's just a number, treat as seconds
+    try:
+        return int(float(raw))
+    except ValueError:
+        pass
+    total = 0
+    for amount, unit in re.findall(r"(\d+\.?\d*)\s*(h|m|s)", raw.lower()):
+        amount = float(amount)
+        if unit == "h":
+            total += amount * 3600
+        elif unit == "m":
+            total += amount * 60
+        elif unit == "s":
+            total += amount
+    return int(total) if total > 0 else default
+
+
+class Config:
+    # PostgreSQL (tiler DB)
+    POSTGRES_HOST = os.getenv("POSTGRES_HOST", "localhost")
+    POSTGRES_PORT = int(os.getenv("POSTGRES_PORT", 5432))
+    POSTGRES_DB = os.getenv("POSTGRES_DB", "tiler")
+    POSTGRES_USER = os.getenv("POSTGRES_USER", "postgres")
+    POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
+
+    # Replication
+    REPLICATION_STATE_URL = os.getenv(
+        "REPLICATION_STATE_URL",
+        "https://s3.amazonaws.com/planet.openhistoricalmap.org/replication/minute/state.txt",
+    )
+    OHM_API_BASE = os.getenv("OHM_API_BASE", "https://www.openhistoricalmap.org/api/0.6")
+
+    # How often to run the pipeline check (e.g. "1h", "30m", "3600")
+    CHECK_INTERVAL = _parse_duration("TILER_MONITORING_CHECK_INTERVAL", 3600)
+
+    # OHM changeset age window (e.g. "1h", "2h30m", "3600")
+    CHANGESET_MIN_AGE = _parse_duration("TILER_MONITORING_CHANGESET_MIN_AGE", 10800)
+    CHANGESET_MAX_AGE = _parse_duration("TILER_MONITORING_CHANGESET_MAX_AGE", 14400)
+
+    # Max number of changesets to check per run
+    CHANGESET_LIMIT = int(os.getenv("CHANGESET_LIMIT", 30))
+
+    # Retry: how many times to recheck a missing element before alerting
+    MAX_RETRIES = int(os.getenv("TILER_MONITORING_MAX_RETRIES", 3))
+
+    # Verbose logging
+    VERBOSE_LOGGING = os.getenv("VERBOSE_LOGGING", "false").lower() == "true"
+
+    # Alerting (optional)
+    SLACK_WEBHOOK_URL = os.getenv("TILER_MONITORING_SLACK_WEBHOOK_URL", "")
+
+    # Server
+    MONITOR_PORT = int(os.getenv("MONITOR_PORT", 8001))
+    MONITOR_BASE_URL = os.getenv("TILER_MONITORING_BASE_URL", "")
+
+    # S3 tile cache verification
+    S3_BUCKET_CACHE_TILER = os.getenv("S3_BUCKET_CACHE_TILER", "")
+    S3_BUCKET_PATH_FILES = os.getenv("S3_BUCKET_PATH_FILES", "mnt/data/ohm,mnt/data/ohm_admin,mnt/data/ohm_other_boundaries").split(",")
+    TILER_CACHE_AWS_ACCESS_KEY_ID = os.getenv("TILER_CACHE_AWS_ACCESS_KEY_ID", "")
+    TILER_CACHE_AWS_SECRET_ACCESS_KEY = os.getenv("TILER_CACHE_AWS_SECRET_ACCESS_KEY", "")
+    TILER_CACHE_AWS_ENDPOINT = os.getenv("TILER_CACHE_AWS_ENDPOINT", "https://s3.amazonaws.com")
+    TILER_CACHE_REGION = os.getenv("TILER_CACHE_REGION", "us-east-1")
+    TILER_CACHE_CLOUD_INFRASTRUCTURE = os.getenv("TILER_CACHE_CLOUD_INFRASTRUCTURE", "aws")
+    # Zoom level to verify tile cache (use high zoom for precise check)
+    TILE_CHECK_ZOOM = int(os.getenv("TILE_CHECK_ZOOM", 16))
+    # Percentage of elements to do full pipeline check (tables + views + S3)
+    # e.g. 25 = 25% of elements, minimum 1
+    FULL_CHECK_SAMPLE_PCT = int(os.getenv("TILER_MONITORING_FULL_CHECK_SAMPLE_PCT", 25))
+
+    @staticmethod
+    def get_s3_client():
+        import boto3
+        if Config.TILER_CACHE_CLOUD_INFRASTRUCTURE == "hetzner":
+            return boto3.client(
+                "s3",
+                aws_access_key_id=Config.TILER_CACHE_AWS_ACCESS_KEY_ID,
+                aws_secret_access_key=Config.TILER_CACHE_AWS_SECRET_ACCESS_KEY,
+                endpoint_url=Config.TILER_CACHE_AWS_ENDPOINT,
+                region_name=Config.TILER_CACHE_REGION,
+            )
+        return boto3.client("s3")
+
+    @staticmethod
+    def get_db_dsn():
+        return (
+            f"postgresql://{Config.POSTGRES_USER}:{Config.POSTGRES_PASSWORD}"
+            f"@{Config.POSTGRES_HOST}:{Config.POSTGRES_PORT}/{Config.POSTGRES_DB}"
+        )
diff --git a/images/tiler-monitor/pipeline-monitor/monitor.py b/images/tiler-monitor/pipeline-monitor/monitor.py
new file mode 100644
index 000000000..090311324
--- /dev/null
+++ b/images/tiler-monitor/pipeline-monitor/monitor.py
@@ -0,0 +1,234 @@
+"""Vtile pipeline monitor.
+
+Runs periodic changeset-centric checks and exposes results via a FastAPI HTTP
+server.  Optionally sends Slack alerts when checks fail.
+"""
+
+import logging
+import os
+import threading
+import time
+from datetime import datetime, timezone
+
+import requests
+import uvicorn
+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse, JSONResponse
+
+from checks.imposm_import import check_pipeline, check_single_changeset
+from config import Config
+import retry_store
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+# Store latest check result
+_latest_result = None
+_lock = threading.Lock()
+
+app = FastAPI(title="OHM Vtile Pipeline Monitor")
+
+
+# ---------------------------------------------------------------------------
+# Slack alerting
+# ---------------------------------------------------------------------------
+
+def _send_slack_alert(check_result):
+    """Send a Slack notification when a check is not ok."""
+    if not Config.SLACK_WEBHOOK_URL:
+        return
+    status_emoji = {"ok": ":white_check_mark:", "warning": ":warning:", "critical": ":rotating_light:"}
+    emoji = status_emoji.get(check_result["status"], ":question:")
+    ohm = "https://www.openhistoricalmap.org"
+
+    lines = [f"{emoji} *OHM Tiler Pipeline Monitor* — {check_result['status'].upper()}"]
+    lines.append(check_result["message"])
+
+    # Add failed element details with links
+    newly_failed = check_result.get("details", {}).get("newly_failed", [])
+    if newly_failed:
+        lines.append("")
+        lines.append("*Elements missing after all retries:*")
+        for f in newly_failed[:10]:
+            cs_url = f"{ohm}/changeset/{f['changeset_id']}"
+            elem_url = f"{ohm}/{f['type']}/{f['osm_id']}"
+            lines.append(f"  • <{elem_url}|{f['type']}/{f['osm_id']}> in <{cs_url}|changeset {f['changeset_id']}>")
+
+    # Add dashboard link
+    if Config.MONITOR_BASE_URL:
+        lines.append("")
+        lines.append(f":mag: <{Config.MONITOR_BASE_URL}|Open Dashboard> · "
+                     f"<{Config.MONITOR_BASE_URL}/retries|View Retries>")
+
+    # Add changeset-level issues
+    changesets = check_result.get("details", {}).get("changesets", [])
+    cs_issues = [cs for cs in changesets
+                 if cs.get("tiler_db", {}).get("status") not in ("ok", "retry_pending", None)]
+    if cs_issues and not newly_failed:
+        lines.append("")
+        for cs in cs_issues[:5]:
+            cs_url = f"{ohm}/changeset/{cs['changeset_id']}"
+            msg = cs.get("tiler_db", {}).get("message", "")
+            lines.append(f"  • <{cs_url}|Changeset {cs['changeset_id']}>: {msg}")
+
+    text = "\n".join(lines)
+    try:
+        requests.post(
+            Config.SLACK_WEBHOOK_URL,
+            json={"text": text},
+            timeout=10,
+        )
+    except requests.RequestException as e:
+        logger.error(f"Failed to send Slack alert: {e}")
+
+
+# ---------------------------------------------------------------------------
+# Background scheduler
+# ---------------------------------------------------------------------------
+
+def _run_check():
+    """Run the pipeline check and update stored result."""
+    try:
+        logger.info("=============> Running pipeline check")
+        result = check_pipeline()
+        logger.info(f"Pipeline check: {result['status']} — {result['message']}")
+
+        with _lock:
+            prev = _latest_result
+            globals()["_latest_result"] = result
+
+        # Alert on state changes or new failures
+        newly_failed = result.get("details", {}).get("newly_failed", [])
+        if newly_failed:
+            # New elements just exhausted retries — always alert
+            _send_slack_alert(result)
+        elif result["status"] == "warning":
+            if prev is None or prev["status"] == "ok":
+                _send_slack_alert(result)
+        elif result["status"] == "ok" and prev and prev["status"] in ("critical", "warning"):
+            # Recovered — send ok notification
+            _send_slack_alert(result)
+
+    except Exception as e:
+        logger.exception(f"Pipeline check raised an exception: {e}")
+        with _lock:
+            globals()["_latest_result"] = {
+                "name": "pipeline",
+                "status": "critical",
+                "message": f"Check failed with exception: {e}",
+                "details": {},
+                "checked_at": datetime.now(timezone.utc).isoformat(),
+            }
+
+
+def _scheduler():
+    """Background loop that runs checks at the configured interval."""
+    logger.info(f"Pipeline monitor starting. Check interval: {Config.CHECK_INTERVAL}s")
+    time.sleep(10)
+
+    while True:
+        _run_check()
+        time.sleep(Config.CHECK_INTERVAL)
+
+
+# ---------------------------------------------------------------------------
+# HTTP endpoints
+# ---------------------------------------------------------------------------
+
+_STATIC_DIR = os.path.join(os.path.dirname(__file__), "static")
+
+
+@app.get("/", response_class=HTMLResponse)
+def dashboard():
+    """Serve the monitoring dashboard."""
+    html_path = os.path.join(_STATIC_DIR, "dashboard.html")
+    with open(html_path) as f:
+        return HTMLResponse(content=f.read())
+
+
+@app.get("/health")
+def health():
+    """Overall health: returns 200 if ok, 503 otherwise."""
+    with _lock:
+        result = _latest_result
+
+    if result is None:
+        return JSONResponse(
+            content={"status": "starting", "message": "No checks have run yet"},
+            status_code=200,
+        )
+
+    status_code = 200 if result["status"] == "ok" else 503
+    return JSONResponse(
+        content={
+            "status": result["status"],
+            "message": result["message"],
+            "checked_at": result["checked_at"],
+        },
+        status_code=status_code,
+    )
+
+
+@app.get("/checks")
+def all_checks():
+    """Return full details for the latest pipeline check."""
+    with _lock:
+        result = _latest_result
+    if result is None:
+        return JSONResponse(content={"status": "starting"})
+    return JSONResponse(content=result)
+
+
+@app.get("/changeset/{changeset_id}")
+def evaluate_changeset(changeset_id: int):
+    """Evaluate a specific changeset through the full pipeline (on-demand)."""
+    result = check_single_changeset(changeset_id)
+    status_code = 200 if result["status"] == "ok" else 503
+    return JSONResponse(content=result, status_code=status_code)
+
+
+@app.get("/retries")
+def retries():
+    """Return current retry state with full details for debugging."""
+    all_entries = retry_store.get_all_details()
+    pending = [e for e in all_entries if e["status"] == "pending"]
+    failed = [e for e in all_entries if e["status"] == "failed"]
+    return JSONResponse(content={
+        "summary": retry_store.summary(),
+        "total": len(all_entries),
+        "pending": pending,
+        "failed": failed,
+    })
+
+
+@app.get("/history")
+def history(page: int = 1, per_page: int = 20):
+    """Paginated history of all changesets checked.
+
+    Example: /history?page=1&per_page=10
+    """
+    data = retry_store.get_changeset_history(page=page, per_page=per_page)
+    return JSONResponse(content=data)
+
+
+@app.get("/history/{history_id}/elements")
+def history_elements(history_id: int):
+    """Return all elements checked for a specific history entry."""
+    elements = retry_store.get_changeset_elements(history_id)
+    return JSONResponse(content={"history_id": history_id, "elements": elements})
+
+
+# ---------------------------------------------------------------------------
+# Entrypoint
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    # Start background scheduler
+    t = threading.Thread(target=_scheduler, daemon=True)
+    t.start()
+
+    # Start HTTP server
+    uvicorn.run(app, host="0.0.0.0", port=Config.MONITOR_PORT)
diff --git a/images/tiler-monitor/pipeline-monitor/requirements.txt b/images/tiler-monitor/pipeline-monitor/requirements.txt
new file mode 100644
index 000000000..1360cc74e
--- /dev/null
+++ b/images/tiler-monitor/pipeline-monitor/requirements.txt
@@ -0,0 +1,6 @@
+fastapi
+uvicorn
+requests
+psycopg2-binary
+mercantile
+boto3
diff --git a/images/tiler-monitor/pipeline-monitor/retry_store.py b/images/tiler-monitor/pipeline-monitor/retry_store.py
new file mode 100644
index 000000000..181d1a1c8
--- /dev/null
+++ b/images/tiler-monitor/pipeline-monitor/retry_store.py
@@ -0,0 +1,378 @@
+"""SQLite-backed retry store for missing pipeline elements.
+
+Tracks elements that were not found in the tiler DB so they can be
+rechecked on subsequent runs.  After MAX_RETRIES consecutive failures
+the element is marked as "failed" and an alert can be triggered.
+
+Uses a single shared connection with a threading lock to avoid
+"database is locked" errors from concurrent access.
+"""
+
+import logging
+import sqlite3
+import os
+import threading
+from datetime import datetime, timezone
+
+logger = logging.getLogger(__name__)
+
+_DB_PATH = os.getenv("TILER_MONITORING_RETRY_DB", "/data/pipeline_retries.db")
+_lock = threading.Lock()
+_conn = None
+
+
+def _get_conn():
+    """Return the shared connection, creating it on first call."""
+    global _conn
+    if _conn is None:
+        _conn = sqlite3.connect(_DB_PATH, check_same_thread=False)
+        _conn.row_factory = sqlite3.Row
+        _conn.execute("PRAGMA journal_mode=WAL")
+        _conn.execute("PRAGMA busy_timeout=5000")
+        _init_tables(_conn)
+    return _conn
+
+
+def _init_tables(conn):
+    """Create all tables and indexes."""
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS pending_retries (
+            changeset_id  INTEGER NOT NULL,
+            element_type  TEXT    NOT NULL,
+            osm_id        INTEGER NOT NULL,
+            version       INTEGER NOT NULL DEFAULT 0,
+            action        TEXT    NOT NULL DEFAULT '',
+            retry_count   INTEGER NOT NULL DEFAULT 0,
+            max_retries   INTEGER NOT NULL,
+            first_seen    TEXT    NOT NULL,
+            last_checked  TEXT    NOT NULL,
+            status        TEXT    NOT NULL DEFAULT 'pending',
+            PRIMARY KEY (changeset_id, element_type, osm_id)
+        )
+    """)
+    # Migrate: add columns if missing (for existing DBs)
+    for col, typedef in [("version", "INTEGER NOT NULL DEFAULT 0"),
+                         ("action", "TEXT NOT NULL DEFAULT ''")]:
+        try:
+            conn.execute(f"ALTER TABLE pending_retries ADD COLUMN {col} {typedef}")
+        except sqlite3.OperationalError:
+            pass
+
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS changeset_history (
+            id              INTEGER PRIMARY KEY AUTOINCREMENT,
+            changeset_id    INTEGER NOT NULL,
+            closed_at       TEXT    NOT NULL DEFAULT '',
+            checked_at      TEXT    NOT NULL,
+            status          TEXT    NOT NULL,
+            total_elements  INTEGER NOT NULL DEFAULT 0,
+            missing_count   INTEGER NOT NULL DEFAULT 0,
+            ok_count        INTEGER NOT NULL DEFAULT 0,
+            message         TEXT    NOT NULL DEFAULT ''
+        )
+    """)
+    try:
+        conn.execute("ALTER TABLE changeset_history ADD COLUMN closed_at TEXT NOT NULL DEFAULT ''")
+    except sqlite3.OperationalError:
+        pass
+    conn.execute("""
+        CREATE INDEX IF NOT EXISTS idx_history_checked_at
+        ON changeset_history(checked_at DESC)
+    """)
+
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS element_history (
+            id              INTEGER PRIMARY KEY AUTOINCREMENT,
+            history_id      INTEGER NOT NULL,
+            changeset_id    INTEGER NOT NULL,
+            element_type    TEXT    NOT NULL,
+            osm_id          INTEGER NOT NULL,
+            version         INTEGER NOT NULL DEFAULT 0,
+            action          TEXT    NOT NULL DEFAULT '',
+            status          TEXT    NOT NULL,
+            found_in_tables TEXT    NOT NULL DEFAULT '',
+            found_in_views  TEXT    NOT NULL DEFAULT '',
+            checked_at      TEXT    NOT NULL,
+            FOREIGN KEY (history_id) REFERENCES changeset_history(id)
+        )
+    """)
+    conn.execute("""
+        CREATE INDEX IF NOT EXISTS idx_element_history_id
+        ON element_history(history_id)
+    """)
+
+    conn.commit()
+
+
+# ---------------------------------------------------------------------------
+# Pending retries
+# ---------------------------------------------------------------------------
+
+def add_missing(changeset_id: int, element_type: str, osm_id: int,
+                max_retries: int, version: int = 0, action: str = ""):
+    """Register a missing element for future retry. If it already exists, do nothing."""
+    now = datetime.now(timezone.utc).isoformat()
+    with _lock:
+        conn = _get_conn()
+        conn.execute("""
+            INSERT OR IGNORE INTO pending_retries
+                (changeset_id, element_type, osm_id, version, action,
+                 retry_count, max_retries, first_seen, last_checked, status)
+            VALUES (?, ?, ?, ?, ?, 0, ?, ?, ?, 'pending')
+        """, (changeset_id, element_type, osm_id, version, action, max_retries, now, now))
+        conn.commit()
+
+
+def get_pending():
+    """Return all elements with status='pending' that still need to be rechecked."""
+    with _lock:
+        conn = _get_conn()
+        rows = conn.execute(
+            "SELECT * FROM pending_retries WHERE status = 'pending'"
+        ).fetchall()
+        return [dict(r) for r in rows]
+
+
+def mark_resolved(changeset_id: int, element_type: str, osm_id: int):
+    """Element was found in a retry — remove it from pending."""
+    with _lock:
+        conn = _get_conn()
+        conn.execute("""
+            DELETE FROM pending_retries
+            WHERE changeset_id = ? AND element_type = ? AND osm_id = ?
+        """, (changeset_id, element_type, osm_id))
+        conn.commit()
+
+
+def increment_retry(changeset_id: int, element_type: str, osm_id: int):
+    """Bump retry_count. If it reaches max_retries, flip status to 'failed'.
+
+    Returns the new status ('pending' or 'failed').
+    """
+    now = datetime.now(timezone.utc).isoformat()
+    with _lock:
+        conn = _get_conn()
+        conn.execute("""
+            UPDATE pending_retries
+            SET retry_count = retry_count + 1, last_checked = ?
+            WHERE changeset_id = ? AND element_type = ? AND osm_id = ?
+        """, (now, changeset_id, element_type, osm_id))
+
+        row = conn.execute("""
+            SELECT retry_count, max_retries FROM pending_retries
+            WHERE changeset_id = ? AND element_type = ? AND osm_id = ?
+        """, (changeset_id, element_type, osm_id)).fetchone()
+
+        if row and row["retry_count"] >= row["max_retries"]:
+            conn.execute("""
+                UPDATE pending_retries SET status = 'failed'
+                WHERE changeset_id = ? AND element_type = ? AND osm_id = ?
+            """, (changeset_id, element_type, osm_id))
+            conn.commit()
+            return "failed"
+
+        conn.commit()
+        return "pending"
+
+
+def get_failed():
+    """Return all elements that exhausted their retries."""
+    with _lock:
+        conn = _get_conn()
+        rows = conn.execute(
+            "SELECT * FROM pending_retries WHERE status = 'failed'"
+        ).fetchall()
+        return [dict(r) for r in rows]
+
+
+def clear_failed():
+    """Remove all failed entries (call after alerting)."""
+    with _lock:
+        conn = _get_conn()
+        conn.execute("DELETE FROM pending_retries WHERE status = 'failed'")
+        conn.commit()
+
+
+def get_all_details(ohm_base="https://www.openhistoricalmap.org"):
+    """Return all entries enriched with URLs and human-readable times."""
+    with _lock:
+        conn = _get_conn()
+        rows = conn.execute(
+            "SELECT * FROM pending_retries ORDER BY status, first_seen DESC"
+        ).fetchall()
+
+    now = datetime.now(timezone.utc)
+    results = []
+    for r in rows:
+        entry = dict(r)
+        entry["changeset_url"] = f"{ohm_base}/changeset/{r['changeset_id']}"
+        entry["element_url"] = f"{ohm_base}/{r['element_type']}/{r['osm_id']}"
+        if r["version"]:
+            entry["element_url"] += f"/history/{r['version']}"
+        try:
+            first = datetime.fromisoformat(r["first_seen"])
+            entry["age"] = _human_duration((now - first).total_seconds())
+        except Exception:
+            entry["age"] = ""
+        try:
+            last = datetime.fromisoformat(r["last_checked"])
+            entry["last_checked_ago"] = _human_duration((now - last).total_seconds())
+        except Exception:
+            entry["last_checked_ago"] = ""
+        entry["retries_remaining"] = max(0, r["max_retries"] - r["retry_count"])
+        results.append(entry)
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Changeset history
+# ---------------------------------------------------------------------------
+
+def log_changeset_check(changeset_id: int, status: str,
+                        total_elements: int, missing_count: int,
+                        ok_count: int, message: str,
+                        closed_at: str = "", elements: list = None):
+    """Record a changeset check and its elements in the history tables."""
+    now = datetime.now(timezone.utc).isoformat()
+    with _lock:
+        conn = _get_conn()
+        cur = conn.execute("""
+            INSERT INTO changeset_history
+                (changeset_id, closed_at, checked_at, status, total_elements, missing_count, ok_count, message)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+        """, (changeset_id, closed_at or "", now, status, total_elements, missing_count, ok_count, message))
+        history_id = cur.lastrowid
+
+        if elements:
+            for elem in elements:
+                tables = ", ".join(elem.get("found_in_tables", []))
+                views = ", ".join(elem.get("found_in_views", []))
+                found = bool(elem.get("found_in_tables"))
+                deleted = elem.get("deleted", False)
+                if deleted:
+                    elem_status = "skipped"
+                elif elem.get("action") == "delete":
+                    elem_status = "ok" if not found else "not_deleted"
+                elif found:
+                    elem_status = "ok"
+                else:
+                    elem_status = "missing"
+                conn.execute("""
+                    INSERT INTO element_history
+                        (history_id, changeset_id, element_type, osm_id, version,
+                         action, status, found_in_tables, found_in_views, checked_at)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """, (history_id, changeset_id, elem.get("type", ""),
+                      elem.get("osm_id", 0), elem.get("version", 0),
+                      elem.get("action", ""), elem_status, tables, views, now))
+        conn.commit()
+
+
+def get_changeset_history(page: int = 1, per_page: int = 20,
+                          ohm_base: str = "https://www.openhistoricalmap.org"):
+    """Return paginated changeset check history with details."""
+    with _lock:
+        conn = _get_conn()
+        total = conn.execute("SELECT COUNT(*) FROM changeset_history").fetchone()[0]
+        total_pages = max(1, (total + per_page - 1) // per_page)
+        offset = (page - 1) * per_page
+
+        rows = conn.execute("""
+            SELECT * FROM changeset_history
+            ORDER BY checked_at DESC
+            LIMIT ? OFFSET ?
+        """, (per_page, offset)).fetchall()
+
+    now = datetime.now(timezone.utc)
+    results = []
+    for r in rows:
+        entry = dict(r)
+        entry["changeset_url"] = f"{ohm_base}/changeset/{r['changeset_id']}"
+        try:
+            checked = datetime.fromisoformat(r["checked_at"])
+            entry["checked_ago"] = _human_duration((now - checked).total_seconds())
+        except Exception:
+            entry["checked_ago"] = ""
+        if r["closed_at"]:
+            try:
+                closed = datetime.fromisoformat(r["closed_at"].replace("Z", "+00:00"))
+                entry["closed_ago"] = _human_duration((now - closed).total_seconds())
+            except Exception:
+                entry["closed_ago"] = ""
+        else:
+            entry["closed_ago"] = ""
+        results.append(entry)
+
+    return {
+        "page": page,
+        "per_page": per_page,
+        "total": total,
+        "total_pages": total_pages,
+        "changesets": results,
+    }
+
+
+def get_changeset_elements(history_id: int,
+                           ohm_base: str = "https://www.openhistoricalmap.org"):
+    """Return all elements checked for a specific history entry."""
+    with _lock:
+        conn = _get_conn()
+        rows = conn.execute("""
+            SELECT * FROM element_history
+            WHERE history_id = ?
+            ORDER BY status DESC, element_type, osm_id
+        """, (history_id,)).fetchall()
+
+    results = []
+    for r in rows:
+        entry = dict(r)
+        entry["element_url"] = f"{ohm_base}/{r['element_type']}/{r['osm_id']}"
+        if r["version"]:
+            entry["element_url"] += f"/history/{r['version']}"
+        entry["found_in_tables"] = r["found_in_tables"].split(", ") if r["found_in_tables"] else []
+        entry["found_in_views"] = r["found_in_views"].split(", ") if r["found_in_views"] else []
+        results.append(entry)
+    return results
+
+
+def is_changeset_passed(changeset_id: int) -> bool:
+    """Return True if this changeset was already checked with status 'ok'."""
+    with _lock:
+        conn = _get_conn()
+        row = conn.execute("""
+            SELECT 1 FROM changeset_history
+            WHERE changeset_id = ? AND status = 'ok'
+            LIMIT 1
+        """, (changeset_id,)).fetchone()
+        return row is not None
+
+
+def summary():
+    """Return counts by status for logging."""
+    with _lock:
+        conn = _get_conn()
+        rows = conn.execute(
+            "SELECT status, COUNT(*) as cnt FROM pending_retries GROUP BY status"
+        ).fetchall()
+        return {r["status"]: r["cnt"] for r in rows}
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _human_duration(seconds):
+    """Convert seconds to human-readable string like '2h 15m ago'."""
+    seconds = int(seconds)
+    if seconds < 60:
+        return f"{seconds}s ago"
+    minutes = seconds // 60
+    if minutes < 60:
+        return f"{minutes}m ago"
+    hours = minutes // 60
+    remaining_min = minutes % 60
+    if hours < 24:
+        return f"{hours}h {remaining_min}m ago" if remaining_min else f"{hours}h ago"
+    days = hours // 24
+    remaining_hours = hours % 24
+    return f"{days}d {remaining_hours}h ago" if remaining_hours else f"{days}d ago"
diff --git a/images/tiler-monitor/pipeline-monitor/static/dashboard.html b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
new file mode 100644
index 000000000..7b0380f1e
--- /dev/null
+++ b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
@@ -0,0 +1,507 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>OHM Tiler Pipeline Monitor</title>
+<style>
+  :root {
+    --bg: #0d1117; --surface: #161b22; --border: #30363d;
+    --text: #e6edf3; --muted: #8b949e; --accent: #58a6ff;
+    --green: #3fb950; --yellow: #d29922; --red: #f85149;
+    --orange: #db6d28;
+  }
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
+    background: var(--bg); color: var(--text); line-height: 1.5; padding: 20px; }
+  a { color: var(--accent); text-decoration: none; }
+  a:hover { text-decoration: underline; }
+
+  .header { display: flex; align-items: center; gap: 16px; margin-bottom: 24px; flex-wrap: wrap; }
+  .header h1 { font-size: 22px; font-weight: 600; }
+  .header .status-dot { width: 12px; height: 12px; border-radius: 50%; display: inline-block; }
+
+  .cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 12px; margin-bottom: 24px; }
+  .card { background: var(--surface); border: 1px solid var(--border); border-radius: 8px; padding: 16px; }
+  .card .label { font-size: 12px; color: var(--muted); text-transform: uppercase; letter-spacing: 0.5px; }
+  .card .value { font-size: 28px; font-weight: 600; margin-top: 4px; }
+
+  .tabs { display: flex; gap: 0; border-bottom: 1px solid var(--border); margin-bottom: 16px; }
+  .tab { padding: 8px 16px; cursor: pointer; color: var(--muted); border-bottom: 2px solid transparent;
+    font-size: 14px; background: none; border-top: none; border-left: none; border-right: none; }
+  .tab:hover { color: var(--text); }
+  .tab.active { color: var(--text); border-bottom-color: var(--accent); }
+  .tab .badge { background: var(--border); color: var(--muted); font-size: 11px; padding: 1px 6px;
+    border-radius: 10px; margin-left: 6px; }
+  .tab .badge.warn { background: var(--yellow); color: #000; }
+  .tab .badge.err { background: var(--red); color: #fff; }
+
+  .panel { display: none; }
+  .panel.active { display: block; }
+
+  table { width: 100%; border-collapse: collapse; font-size: 13px; }
+  th { text-align: left; padding: 8px 12px; color: var(--muted); font-weight: 500;
+    border-bottom: 1px solid var(--border); position: sticky; top: 0; background: var(--bg); }
+  td { padding: 8px 12px; border-bottom: 1px solid var(--border); vertical-align: top; }
+  tr:hover td { background: var(--surface); }
+
+  .badge-status { padding: 2px 8px; border-radius: 12px; font-size: 11px; font-weight: 600; text-transform: uppercase; }
+  .badge-ok { background: rgba(63,185,80,0.15); color: var(--green); }
+  .badge-warning { background: rgba(210,153,34,0.15); color: var(--yellow); }
+  .badge-critical, .badge-failed { background: rgba(248,81,73,0.15); color: var(--red); }
+  .badge-pending { background: rgba(219,109,40,0.15); color: var(--orange); }
+  .badge-retry_pending { background: rgba(219,109,40,0.15); color: var(--orange); }
+  .badge-missing { background: rgba(248,81,73,0.15); color: var(--red); }
+  .badge-not_deleted { background: rgba(210,153,34,0.15); color: var(--yellow); }
+  .badge-skipped { background: rgba(139,148,158,0.15); color: var(--muted); }
+
+  .pagination { display: flex; gap: 8px; align-items: center; justify-content: center; margin-top: 16px; }
+  .pagination button { background: var(--surface); border: 1px solid var(--border); color: var(--text);
+    padding: 6px 12px; border-radius: 6px; cursor: pointer; font-size: 13px; }
+  .pagination button:hover { background: var(--border); }
+  .pagination button:disabled { opacity: 0.4; cursor: not-allowed; }
+  .pagination span { color: var(--muted); font-size: 13px; }
+
+  .refresh-bar { display: flex; align-items: center; gap: 12px; margin-bottom: 16px; }
+  .refresh-bar button { background: var(--surface); border: 1px solid var(--border); color: var(--accent);
+    padding: 6px 14px; border-radius: 6px; cursor: pointer; font-size: 13px; }
+  .refresh-bar button:hover { background: var(--border); }
+  .refresh-bar .last-refresh { color: var(--muted); font-size: 12px; }
+  .auto-label { color: var(--muted); font-size: 12px; display: flex; align-items: center; gap: 4px; }
+  .auto-label input { accent-color: var(--accent); }
+
+  .empty { color: var(--muted); text-align: center; padding: 40px; font-size: 14px; }
+
+  .retry-bar { display: flex; align-items: center; gap: 4px; }
+  .retry-bar .fill { height: 6px; border-radius: 3px; }
+  .retry-bar .used { background: var(--yellow); }
+  .retry-bar .remaining { background: var(--border); }
+
+  .search-box { background: var(--surface); border: 1px solid var(--border); color: var(--text);
+    padding: 6px 12px; border-radius: 6px; font-size: 13px; width: 220px; }
+  .search-box::placeholder { color: var(--muted); }
+
+  /* Expandable rows */
+  .expand-btn { background: none; border: 1px solid var(--border); color: var(--muted); cursor: pointer;
+    width: 24px; height: 24px; border-radius: 4px; font-size: 12px; display: inline-flex;
+    align-items: center; justify-content: center; transition: all 0.15s; }
+  .expand-btn:hover { color: var(--text); border-color: var(--accent); }
+  .expand-btn.open { color: var(--accent); transform: rotate(90deg); }
+
+  .elements-row td { padding: 0; border-bottom: 1px solid var(--border); }
+  .elements-container { background: var(--surface); padding: 12px 16px 12px 48px; }
+  .elements-container table { margin: 0; }
+  .elements-container th { background: var(--surface); font-size: 12px; padding: 4px 10px; }
+  .elements-container td { padding: 4px 10px; border-bottom: 1px solid var(--border); font-size: 12px; }
+  .elements-container tr:last-child td { border-bottom: none; }
+
+  .tag-list { display: flex; flex-wrap: wrap; gap: 3px; }
+  .tag { background: rgba(88,166,255,0.1); color: var(--accent); padding: 1px 6px;
+    border-radius: 3px; font-size: 11px; white-space: nowrap; }
+
+  .loading-elements { color: var(--muted); font-size: 12px; padding: 8px; }
+
+  @media (max-width: 600px) {
+    .cards { grid-template-columns: 1fr 1fr; }
+    table { font-size: 12px; }
+    td, th { padding: 6px 8px; }
+  }
+</style>
+</head>
+<body>
+
+<div class="header">
+  <span class="status-dot" id="statusDot"></span>
+  <h1>OHM Tiler Pipeline Monitor</h1>
+</div>
+
+<div class="cards" id="cards">
+  <div class="card"><div class="label">Status</div><div class="value" style="color:var(--muted)">LOADING...</div></div>
+</div>
+
+<div class="refresh-bar">
+  <button onclick="refreshAll()">Refresh</button>
+  <span class="last-refresh" id="lastRefresh"></span>
+  <label class="auto-label">
+    <input type="checkbox" id="autoRefresh" checked> Auto-refresh 30s
+  </label>
+</div>
+
+<div class="tabs">
+  <button class="tab active" data-tab="health" onclick="switchTab('health')">Current Run</button>
+  <button class="tab" data-tab="history" onclick="switchTab('history')">
+    History <span class="badge" id="historyBadge">0</span>
+  </button>
+  <button class="tab" data-tab="retries" onclick="switchTab('retries')">
+    Retries <span class="badge" id="retriesBadge">0</span>
+  </button>
+</div>
+
+<!-- Health panel -->
+<div class="panel active" id="panel-health">
+  <table>
+    <thead><tr>
+      <th style="width:32px"></th>
+      <th>Changeset</th><th>Status</th><th>Elements</th><th>Missing</th>
+      <th>Replication</th><th>Age</th><th>Message</th>
+    </tr></thead>
+    <tbody id="healthBody"></tbody>
+  </table>
+  <div class="empty" id="healthEmpty" style="display:none">Waiting for current run to complete...</div>
+</div>
+
+<!-- History panel -->
+<div class="panel" id="panel-history">
+  <div style="margin-bottom: 12px;">
+    <input class="search-box" id="historySearch" placeholder="Filter by changeset ID..." oninput="filterHistory()">
+  </div>
+  <table>
+    <thead><tr>
+      <th style="width:32px"></th>
+      <th>Changeset</th><th>Status</th><th>Total</th><th>OK</th>
+      <th>Missing</th><th>Closed</th><th>Checked</th><th>Message</th>
+    </tr></thead>
+    <tbody id="historyBody"></tbody>
+  </table>
+  <div class="empty" id="historyEmpty" style="display:none">No changesets checked yet.</div>
+  <div class="pagination" id="historyPagination"></div>
+</div>
+
+<!-- Retries panel -->
+<div class="panel" id="panel-retries">
+  <table>
+    <thead><tr>
+      <th>Element</th><th>Changeset</th><th>Action</th><th>Status</th>
+      <th>Retries</th><th>First Seen</th><th>Last Checked</th>
+    </tr></thead>
+    <tbody id="retriesBody"></tbody>
+  </table>
+  <div class="empty" id="retriesEmpty" style="display:none">No pending retries. All elements are in sync.</div>
+</div>
+
+<script>
+const API = '';  // Use relative URLs — works regardless of host/port
+let historyPage = 1;
+const historyPerPage = 20;
+let historyData = {};
+let autoRefreshTimer = null;
+// Cache loaded elements per history_id
+const elementsCache = {};
+
+function statusBadge(s) {
+  return `<span class="badge-status badge-${s}">${s}</span>`;
+}
+
+function retryBar(used, total) {
+  const pctUsed = (used / total) * 100;
+  const pctRem = 100 - pctUsed;
+  return `<div class="retry-bar" title="${used}/${total} retries used">
+    <div class="fill used" style="width:${pctUsed}%"></div>
+    <div class="fill remaining" style="width:${pctRem}%"></div>
+    <span style="margin-left:6px;font-size:12px;color:var(--muted)">${used}/${total}</span>
+  </div>`;
+}
+
+function statusColor(s) {
+  return { ok: 'var(--green)', warning: 'var(--yellow)', critical: 'var(--red)' }[s] || 'var(--muted)';
+}
+
+function tagsHtml(list) {
+  if (!list || !list.length) return '<span style="color:var(--muted)">—</span>';
+  return `<div class="tag-list">${list.map(t => `<span class="tag">${t}</span>`).join('')}</div>`;
+}
+
+function escAttr(s) { return (s||'').replace(/"/g, '&quot;').replace(/</g, '&lt;'); }
+
+// ---- Render elements sub-table ----
+function renderElements(elements, containerId) {
+  const container = document.getElementById(containerId);
+  if (!elements.length) {
+    container.innerHTML = '<div class="loading-elements">No elements recorded.</div>';
+    return;
+  }
+
+  const rows = elements.map(e => `<tr>
+    <td><a href="${e.element_url}" target="_blank">${e.element_type}/${e.osm_id}</a>
+      ${e.version ? `<span style="color:var(--muted)">v${e.version}</span>` : ''}</td>
+    <td>${e.action || '—'}</td>
+    <td>${statusBadge(e.status)}</td>
+    <td>${tagsHtml(e.found_in_tables)}</td>
+    <td>${tagsHtml(e.found_in_views)}</td>
+  </tr>`).join('');
+
+  container.innerHTML = `<table>
+    <thead><tr><th>Element</th><th>Action</th><th>Status</th><th>Tables</th><th>Views</th></tr></thead>
+    <tbody>${rows}</tbody>
+  </table>`;
+}
+
+// ---- Toggle elements for history rows ----
+async function toggleHistoryElements(historyId, btn) {
+  const rowId = `elements-row-${historyId}`;
+  const existing = document.getElementById(rowId);
+
+  if (existing) {
+    existing.remove();
+    btn.classList.remove('open');
+    return;
+  }
+
+  btn.classList.add('open');
+  const parentRow = btn.closest('tr');
+  const colCount = parentRow.cells.length;
+  const newRow = document.createElement('tr');
+  newRow.id = rowId;
+  newRow.className = 'elements-row';
+  newRow.innerHTML = `<td colspan="${colCount}"><div class="elements-container" id="elems-${historyId}">
+    <div class="loading-elements">Loading elements...</div>
+  </div></td>`;
+  parentRow.after(newRow);
+
+  if (elementsCache[historyId]) {
+    renderElements(elementsCache[historyId], `elems-${historyId}`);
+    return;
+  }
+
+  try {
+    const r = await fetch(`${API}/history/${historyId}/elements`);
+    const data = await r.json();
+    elementsCache[historyId] = data.elements || [];
+    renderElements(elementsCache[historyId], `elems-${historyId}`);
+  } catch (e) {
+    document.getElementById(`elems-${historyId}`).innerHTML =
+      '<div class="loading-elements" style="color:var(--red)">Failed to load elements.</div>';
+  }
+}
+
+// ---- Toggle elements for health (live) rows ----
+function toggleHealthElements(csId, elements, btn) {
+  const rowId = `health-elems-${csId}`;
+  const existing = document.getElementById(rowId);
+  if (existing) {
+    existing.remove();
+    btn.classList.remove('open');
+    return;
+  }
+  btn.classList.add('open');
+  const parentRow = btn.closest('tr');
+  const colCount = parentRow.cells.length;
+  const newRow = document.createElement('tr');
+  newRow.id = rowId;
+  newRow.className = 'elements-row';
+  const containerId = `health-container-${csId}`;
+  newRow.innerHTML = `<td colspan="${colCount}"><div class="elements-container" id="${containerId}"></div></td>`;
+  parentRow.after(newRow);
+
+  // Map live data to match element_history format
+  const mapped = elements.map(e => ({
+    element_type: e.type,
+    osm_id: e.osm_id,
+    version: e.version || 0,
+    action: e.action,
+    status: e.deleted ? 'skipped' : (e.found_in_tables && e.found_in_tables.length ? 'ok' : 'missing'),
+    found_in_tables: e.found_in_tables || [],
+    found_in_views: e.found_in_views || [],
+    element_url: e.url || `https://www.openhistoricalmap.org/${e.type}/${e.osm_id}`,
+  }));
+  renderElements(mapped, containerId);
+}
+
+// ---- Health ----
+async function loadHealth() {
+  try {
+    const r = await fetch(`${API}/checks`);
+    const data = await r.json();
+
+    document.getElementById('statusDot').style.background = statusColor(data.status);
+
+    const cards = document.getElementById('cards');
+    const repl = data.details?.replication || {};
+    const changesets = data.details?.changesets || [];
+    const retries = data.details?.retries || {};
+    const okCount = changesets.filter(c => c.tiler_db?.status === 'ok').length;
+    const issueCount = changesets.length - okCount;
+
+    cards.innerHTML = `
+      <div class="card">
+        <div class="label">Status</div>
+        <div class="value" style="color:${statusColor(data.status)}">${(data.status || '...').toUpperCase()}</div>
+      </div>
+      <div class="card">
+        <div class="label">Changesets Checked</div>
+        <div class="value">${changesets.length}</div>
+      </div>
+      <div class="card">
+        <div class="label">OK / Issues</div>
+        <div class="value"><span style="color:var(--green)">${okCount}</span> / <span style="color:${issueCount ? 'var(--yellow)' : 'var(--muted)'}">${issueCount}</span></div>
+      </div>
+      <div class="card">
+        <div class="label">Replication Lag</div>
+        <div class="value">${repl.lag_minutes != null ? repl.lag_minutes + 'm' : '—'}</div>
+      </div>
+      <div class="card">
+        <div class="label">Pending Retries</div>
+        <div class="value" style="color:${(retries.pending||0) ? 'var(--orange)' : 'var(--muted)'}">${retries.pending || 0}</div>
+      </div>
+      <div class="card">
+        <div class="label">Window</div>
+        <div class="value" style="font-size:16px">${data.details?.window || '—'}</div>
+      </div>
+    `;
+
+    const tbody = document.getElementById('healthBody');
+    const empty = document.getElementById('healthEmpty');
+    if (!changesets.length) {
+      tbody.innerHTML = '';
+      empty.style.display = 'block';
+      return;
+    }
+    empty.style.display = 'none';
+
+    tbody.innerHTML = changesets.map((cs, i) => {
+      const db = cs.tiler_db || {};
+      const elems = db.elements || [];
+      const missingCount = elems.filter(e => !e.found_in_tables?.length && !e.deleted).length;
+      const hasElems = elems.length > 0;
+      return `<tr>
+        <td>${hasElems ? `<button class="expand-btn" onclick='toggleHealthElements(${cs.changeset_id}, ${JSON.stringify(elems).replace(/'/g,"&#39;")}, this)'>&#9654;</button>` : ''}</td>
+        <td><a href="${cs.changeset_url}" target="_blank">${cs.changeset_id}</a></td>
+        <td>${statusBadge(db.status || 'unknown')}</td>
+        <td>${elems.length}</td>
+        <td style="color:${missingCount ? 'var(--yellow)' : 'var(--muted)'}">${missingCount || '—'}</td>
+        <td>${statusBadge(cs.replication?.status || 'unknown')}</td>
+        <td>${cs.age_minutes != null ? cs.age_minutes + 'm' : '—'}</td>
+        <td style="max-width:300px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap"
+            title="${escAttr(db.message)}">${db.message || ''}</td>
+      </tr>`;
+    }).join('');
+  } catch (e) {
+    console.error('Failed to load health:', e);
+  }
+}
+
+// ---- History ----
+async function loadHistory(page) {
+  historyPage = page || 1;
+  try {
+    const r = await fetch(`${API}/history?page=${historyPage}&per_page=${historyPerPage}`);
+    historyData = await r.json();
+
+    document.getElementById('historyBadge').textContent = historyData.total || 0;
+
+    const tbody = document.getElementById('historyBody');
+    const empty = document.getElementById('historyEmpty');
+    const cs = historyData.changesets || [];
+
+    if (!cs.length) {
+      tbody.innerHTML = '';
+      empty.style.display = 'block';
+      document.getElementById('historyPagination').innerHTML = '';
+      return;
+    }
+    empty.style.display = 'none';
+
+    tbody.innerHTML = cs.map(c => `<tr>
+      <td>${c.total_elements > 0 ? `<button class="expand-btn" onclick="toggleHistoryElements(${c.id}, this)">&#9654;</button>` : ''}</td>
+      <td><a href="${c.changeset_url}" target="_blank">${c.changeset_id}</a></td>
+      <td>${statusBadge(c.status)}</td>
+      <td>${c.total_elements}</td>
+      <td style="color:var(--green)">${c.ok_count}</td>
+      <td style="color:${c.missing_count ? 'var(--yellow)' : 'var(--muted)'}">${c.missing_count}</td>
+      <td title="${c.closed_at || ''}">${c.closed_ago || '—'}</td>
+      <td title="${c.checked_at}">${c.checked_ago}</td>
+      <td style="max-width:300px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap"
+          title="${escAttr(c.message)}">${c.message || ''}</td>
+    </tr>`).join('');
+
+    const pag = document.getElementById('historyPagination');
+    pag.innerHTML = `
+      <button ${historyPage <= 1 ? 'disabled' : ''} onclick="loadHistory(${historyPage - 1})">Prev</button>
+      <span>Page ${historyPage} of ${historyData.total_pages}</span>
+      <button ${historyPage >= historyData.total_pages ? 'disabled' : ''} onclick="loadHistory(${historyPage + 1})">Next</button>
+    `;
+  } catch (e) {
+    console.error('Failed to load history:', e);
+  }
+}
+
+function filterHistory() {
+  const q = document.getElementById('historySearch').value.trim();
+  const rows = document.querySelectorAll('#historyBody tr:not(.elements-row)');
+  rows.forEach(row => {
+    const match = !q || row.textContent.includes(q);
+    row.style.display = match ? '' : 'none';
+    // Also hide expanded elements if parent is hidden
+    const next = row.nextElementSibling;
+    if (next && next.classList.contains('elements-row')) {
+      next.style.display = match ? '' : 'none';
+    }
+  });
+}
+
+// ---- Retries ----
+async function loadRetries() {
+  try {
+    const r = await fetch(`${API}/retries`);
+    const data = await r.json();
+
+    const pending = data.pending || [];
+    const failed = data.failed || [];
+    const all = [...failed, ...pending];
+
+    const badge = document.getElementById('retriesBadge');
+    badge.textContent = data.total || 0;
+    badge.className = 'badge' + (failed.length ? ' err' : pending.length ? ' warn' : '');
+
+    const tbody = document.getElementById('retriesBody');
+    const empty = document.getElementById('retriesEmpty');
+
+    if (!all.length) {
+      tbody.innerHTML = '';
+      empty.style.display = 'block';
+      return;
+    }
+    empty.style.display = 'none';
+
+    tbody.innerHTML = all.map(e => `<tr>
+      <td><a href="${e.element_url}" target="_blank">${e.element_type}/${e.osm_id}</a>
+        ${e.version ? `<span style="color:var(--muted)">v${e.version}</span>` : ''}</td>
+      <td><a href="${e.changeset_url}" target="_blank">${e.changeset_id}</a></td>
+      <td>${e.action || '—'}</td>
+      <td>${statusBadge(e.status)}</td>
+      <td style="width:120px">${retryBar(e.retry_count, e.max_retries)}</td>
+      <td title="${e.first_seen}">${e.age}</td>
+      <td title="${e.last_checked}">${e.last_checked_ago}</td>
+    </tr>`).join('');
+  } catch (e) {
+    console.error('Failed to load retries:', e);
+  }
+}
+
+// ---- Tabs ----
+function switchTab(name) {
+  document.querySelectorAll('.tab').forEach(t => t.classList.toggle('active', t.dataset.tab === name));
+  document.querySelectorAll('.panel').forEach(p => p.classList.toggle('active', p.id === `panel-${name}`));
+  if (name === 'history' && !historyData.total) loadHistory(1);
+  if (name === 'retries') loadRetries();
+}
+
+// ---- Refresh ----
+async function refreshAll() {
+  await Promise.all([loadHealth(), loadHistory(historyPage), loadRetries()]);
+  document.getElementById('lastRefresh').textContent = `Last refresh: ${new Date().toLocaleTimeString()}`;
+}
+
+function startAutoRefresh() {
+  if (autoRefreshTimer) clearInterval(autoRefreshTimer);
+  autoRefreshTimer = setInterval(() => {
+    if (document.getElementById('autoRefresh').checked) refreshAll();
+  }, 30000);
+}
+
+// ---- Init ----
+refreshAll();
+startAutoRefresh();
+</script>
+</body>
+</html>
diff --git a/images/tiler-monitor/pipeline-monitor/tables_config.json b/images/tiler-monitor/pipeline-monitor/tables_config.json
new file mode 100644
index 000000000..1613aca75
--- /dev/null
+++ b/images/tiler-monitor/pipeline-monitor/tables_config.json
@@ -0,0 +1,94 @@
+{
+  "tag_to_check": {
+    "amenity": {
+      "tables": ["osm_amenity_areas", "osm_amenity_points"],
+      "views": ["mv_amenity_areas_z14_15", "mv_amenity_areas_z16_20", "mv_amenity_points", "mv_amenity_points_centroids_z14_15", "mv_amenity_points_centroids_z16_20"]
+    },
+    "aeroway": {
+      "tables": ["osm_transport_lines", "osm_transport_areas", "osm_transport_points"],
+      "views": ["mv_transport_lines_z5", "mv_transport_lines_z16_20", "mv_transport_areas_z10_12", "mv_transport_areas_z16_20", "mv_transport_points", "mv_transport_points_centroids_z16_20"]
+    },
+    "barrier": {
+      "tables": ["osm_other_areas", "osm_other_lines", "osm_other_points", "osm_water_lines"],
+      "views": ["mv_other_areas_z8_9", "mv_other_areas_z16_20", "mv_other_lines_z14_15", "mv_other_lines_z16_20", "mv_other_points", "mv_other_points_centroids_z16_20", "mv_water_lines_z8_9", "mv_water_lines_z16_20"]
+    },
+    "boundary": {
+      "tables": ["osm_admin_areas", "osm_admin_lines", "osm_admin_relation_members"],
+      "views": ["mv_admin_boundaries_areas_z0_2", "mv_admin_boundaries_areas_z16_20", "mv_admin_boundaries_lines_z0_2", "mv_admin_boundaries_lines_z16_20", "mv_admin_boundaries_centroids_z0_2", "mv_admin_boundaries_centroids_z16_20", "mv_admin_maritime_lines_z0_5_v2", "mv_admin_maritime_lines_z10_15", "mv_non_admin_boundaries_areas_z0_2", "mv_non_admin_boundaries_areas_z16_20", "mv_non_admin_boundaries_centroids_z0_2", "mv_non_admin_boundaries_centroids_z16_20", "mv_relation_members_boundaries"]
+    },
+    "building": {
+      "tables": ["osm_buildings", "osm_buildings_points"],
+      "views": ["mv_buildings_areas_z14_15", "mv_buildings_areas_z16_20", "mv_buildings_points", "mv_buildings_points_centroids_z14_15", "mv_buildings_points_centroids_z16_20"]
+    },
+    "communication": {
+      "tables": ["osm_communication_lines", "osm_communication_multilines"],
+      "views": ["mv_communication_z10_12", "mv_communication_z16_20"]
+    },
+    "craft": {
+      "tables": ["osm_other_points"],
+      "views": ["mv_other_points", "mv_other_points_centroids_z16_20"]
+    },
+    "highway": {
+      "tables": ["osm_transport_lines", "osm_transport_areas", "osm_transport_points", "osm_transport_multilines"],
+      "views": ["mv_transport_lines_z5", "mv_transport_lines_z16_20", "mv_transport_areas_z10_12", "mv_transport_areas_z16_20", "mv_transport_points", "mv_transport_points_centroids_z10_12", "mv_transport_points_centroids_z16_20"]
+    },
+    "historic": {
+      "tables": ["osm_other_areas", "osm_other_lines", "osm_other_points"],
+      "views": ["mv_other_areas_z8_9", "mv_other_areas_z16_20", "mv_other_lines_z14_15", "mv_other_lines_z16_20", "mv_other_points", "mv_other_points_centroids_z8_9", "mv_other_points_centroids_z16_20"]
+    },
+    "landuse": {
+      "tables": ["osm_landuse_areas", "osm_landuse_lines", "osm_landuse_points", "osm_water_areas"],
+      "views": ["mv_landuse_areas_z6_7", "mv_landuse_areas_z16_20", "mv_landuse_lines_z14_15", "mv_landuse_lines_z16_20", "mv_landuse_points", "mv_landuse_points_centroids_z6_7", "mv_landuse_points_centroids_z16_20", "mv_water_areas_z0_2", "mv_water_areas_z16_20", "mv_water_areas_centroids_z8_9", "mv_water_areas_centroids_z16_20"]
+    },
+    "leisure": {
+      "tables": ["osm_landuse_areas", "osm_landuse_lines", "osm_landuse_points"],
+      "views": ["mv_landuse_areas_z6_7", "mv_landuse_areas_z16_20", "mv_landuse_lines_z14_15", "mv_landuse_lines_z16_20", "mv_landuse_points", "mv_landuse_points_centroids_z6_7", "mv_landuse_points_centroids_z16_20"]
+    },
+    "man_made": {
+      "tables": ["osm_other_areas", "osm_other_lines", "osm_other_points"],
+      "views": ["mv_other_areas_z8_9", "mv_other_areas_z16_20", "mv_other_lines_z14_15", "mv_other_lines_z16_20", "mv_other_points", "mv_other_points_centroids_z8_9", "mv_other_points_centroids_z16_20"]
+    },
+    "military": {
+      "tables": ["osm_other_areas", "osm_other_lines", "osm_other_points"],
+      "views": ["mv_other_areas_z8_9", "mv_other_areas_z16_20", "mv_other_lines_z14_15", "mv_other_lines_z16_20", "mv_other_points", "mv_other_points_centroids_z8_9", "mv_other_points_centroids_z16_20"]
+    },
+    "natural": {
+      "tables": ["osm_landuse_areas", "osm_landuse_lines", "osm_landuse_points", "osm_water_areas", "osm_water_lines"],
+      "views": ["mv_landuse_areas_z6_7", "mv_landuse_areas_z16_20", "mv_landuse_lines_z14_15", "mv_landuse_lines_z16_20", "mv_landuse_points", "mv_landuse_points_centroids_z6_7", "mv_landuse_points_centroids_z16_20", "mv_water_areas_z0_2", "mv_water_areas_z16_20", "mv_water_areas_centroids_z8_9", "mv_water_areas_centroids_z16_20", "mv_water_lines_z8_9", "mv_water_lines_z16_20"]
+    },
+    "place": {
+      "tables": ["osm_place_areas", "osm_place_points"],
+      "views": ["mv_place_areas_z14_20", "mv_place_points_centroids_z0_2", "mv_place_points_centroids_z11_20"]
+    },
+    "power": {
+      "tables": ["osm_other_areas", "osm_other_lines", "osm_other_points"],
+      "views": ["mv_other_areas_z8_9", "mv_other_areas_z16_20", "mv_other_lines_z14_15", "mv_other_lines_z16_20", "mv_other_points", "mv_other_points_centroids_z8_9", "mv_other_points_centroids_z16_20"]
+    },
+    "railway": {
+      "tables": ["osm_transport_lines", "osm_transport_areas", "osm_transport_points", "osm_transport_multilines"],
+      "views": ["mv_transport_lines_z5", "mv_transport_lines_z16_20", "mv_transport_areas_z10_12", "mv_transport_areas_z16_20", "mv_transport_points", "mv_transport_points_centroids_z10_12", "mv_transport_points_centroids_z16_20"]
+    },
+    "route": {
+      "tables": ["osm_route_lines", "osm_route_multilines"],
+      "views": ["mv_routes_indexed_z16_20"],
+      "view_column": "osm_id",
+      "view_id_mode": "members"
+    },
+    "shop": {
+      "tables": ["osm_other_points"],
+      "views": ["mv_other_points", "mv_other_points_centroids_z16_20"]
+    },
+    "tourism": {
+      "tables": ["osm_other_points"],
+      "views": ["mv_other_points", "mv_other_points_centroids_z16_20"]
+    },
+    "type=street": {
+      "tables": ["osm_street_multilines"],
+      "views": ["mv_transport_lines_z5", "mv_transport_lines_z16_20"]
+    },
+    "waterway": {
+      "tables": ["osm_water_lines", "osm_water_areas"],
+      "views": ["mv_water_lines_z8_9", "mv_water_lines_z16_20", "mv_water_areas_z0_2", "mv_water_areas_z16_20", "mv_water_areas_centroids_z8_9", "mv_water_areas_centroids_z16_20"]
+    }
+  }
+}
diff --git a/images/tiler-pipeline-monitor/config.py b/images/tiler-pipeline-monitor/config.py
index 95667fac9..756c72c6d 100644
--- a/images/tiler-pipeline-monitor/config.py
+++ b/images/tiler-pipeline-monitor/config.py
@@ -17,14 +17,14 @@ class Config:
     OHM_API_BASE = os.getenv("OHM_API_BASE", "https://www.openhistoricalmap.org/api/0.6")
 
     # How often to run the pipeline check (seconds)
-    CHECK_INTERVAL = int(os.getenv("CHECK_INTERVAL", 3600))  # 1 hour
+    CHECK_INTERVAL = int(os.getenv("TILER_MONITORING_CHECK_INTERVAL", 3600))  # 1 hour
 
     # OHM changeset age window (seconds)
     # Only check changesets closed at least CHANGESET_MIN_AGE ago
     # and at most CHANGESET_MAX_AGE ago.
     # Example: min=3600 max=10800 → changesets closed between 1 and 3 hours ago
-    CHANGESET_MIN_AGE = int(os.getenv("CHANGESET_MIN_AGE", 10800))    # 1 hour
-    CHANGESET_MAX_AGE = int(os.getenv("CHANGESET_MAX_AGE", 14400))   # 3 hours
+    CHANGESET_MIN_AGE = int(os.getenv("TILER_MONITORING_CHANGESET_MIN_AGE", 10800))    # 1 hour
+    CHANGESET_MAX_AGE = int(os.getenv("TILER_MONITORING_CHANGESET_MAX_AGE", 14400))   # 3 hours
 
     # Max number of changesets to check per run
     CHANGESET_LIMIT = int(os.getenv("CHANGESET_LIMIT", 30))

From 8a72a97ed0d4ea8a454709fef850ce737863fda3 Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Fri, 20 Mar 2026 16:12:31 -0500
Subject: [PATCH 05/17] Update config to deploy in production

---
 hetzner/tiler/tiler.production.yml | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hetzner/tiler/tiler.production.yml b/hetzner/tiler/tiler.production.yml
index 7fb106ee7..b1e7b19b5 100644
--- a/hetzner/tiler/tiler.production.yml
+++ b/hetzner/tiler/tiler.production.yml
@@ -120,11 +120,12 @@ services:
 
   tiler_monitor:
     container_name: tiler_monitor
-    image: ghcr.io/openhistoricalmap/tiler-monitor:0.0.1-0.dev.git.2874.ha9bff68
+    image: ghcr.io/openhistoricalmap/tiler-monitor:0.0.1-0.dev.git.3333.hd8192f15
     volumes:
       - /var/run/docker.sock:/var/run/docker.sock
       - ../../images/tiler-monitor:/app
       - ../../hetzner:/app/hetzner
+      - tiler_monitor_data:/data
     environment:
       - DOCKER_CONFIG_ENVIRONMENT=production
     stdin_open: true
@@ -133,7 +134,7 @@ services:
       - .env.tiler
     networks:
       - ohm_network
-      
+
 volumes:
   tiler_pgdata:
     driver: local
@@ -142,6 +143,10 @@ volumes:
     driver: local
     name: tiler_imposm_17_03
 
+  tiler_monitor_data:
+    driver: local
+    name: tiler_monitor_data
+
 networks:
   ohm_network:
     external: true

From 83e17c3439b56e5b8406a0e2849666d2ea71d21b Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Fri, 20 Mar 2026 16:28:13 -0500
Subject: [PATCH 06/17] Fix permissions

---
 images/tiler-monitor/entrypoint.sh            |   0
 images/tiler-pipeline-monitor/Dockerfile      |  19 -
 .../tiler-pipeline-monitor/checks/__init__.py |   0
 .../checks/imposm_import.py                   | 737 ------------------
 .../checks/mv_freshness.py                    | 179 -----
 .../checks/replication_lag.py                 |  89 ---
 .../checks/tile_cache.py                      | 159 ----
 images/tiler-pipeline-monitor/config.py       |  72 --
 images/tiler-pipeline-monitor/monitor.py      | 151 ----
 .../tiler-pipeline-monitor/requirements.txt   |   6 -
 .../tiler-pipeline-monitor/tables_config.json | 164 ----
 11 files changed, 1576 deletions(-)
 mode change 100644 => 100755 images/tiler-monitor/entrypoint.sh
 delete mode 100644 images/tiler-pipeline-monitor/Dockerfile
 delete mode 100644 images/tiler-pipeline-monitor/checks/__init__.py
 delete mode 100644 images/tiler-pipeline-monitor/checks/imposm_import.py
 delete mode 100644 images/tiler-pipeline-monitor/checks/mv_freshness.py
 delete mode 100644 images/tiler-pipeline-monitor/checks/replication_lag.py
 delete mode 100644 images/tiler-pipeline-monitor/checks/tile_cache.py
 delete mode 100644 images/tiler-pipeline-monitor/config.py
 delete mode 100644 images/tiler-pipeline-monitor/monitor.py
 delete mode 100644 images/tiler-pipeline-monitor/requirements.txt
 delete mode 100644 images/tiler-pipeline-monitor/tables_config.json

diff --git a/images/tiler-monitor/entrypoint.sh b/images/tiler-monitor/entrypoint.sh
old mode 100644
new mode 100755
diff --git a/images/tiler-pipeline-monitor/Dockerfile b/images/tiler-pipeline-monitor/Dockerfile
deleted file mode 100644
index 51cf7f628..000000000
--- a/images/tiler-pipeline-monitor/Dockerfile
+++ /dev/null
@@ -1,19 +0,0 @@
-FROM python:3.12-slim
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends curl && \
-    rm -rf /var/lib/apt/lists/*
-
-WORKDIR /app
-
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-
-COPY . .
-
-EXPOSE 8001
-
-HEALTHCHECK --interval=30s --timeout=10s --retries=3 --start-period=30s \
-    CMD curl -f http://localhost:8001/health || exit 1
-
-CMD ["python", "monitor.py"]
diff --git a/images/tiler-pipeline-monitor/checks/__init__.py b/images/tiler-pipeline-monitor/checks/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/images/tiler-pipeline-monitor/checks/imposm_import.py b/images/tiler-pipeline-monitor/checks/imposm_import.py
deleted file mode 100644
index f90ed5186..000000000
--- a/images/tiler-pipeline-monitor/checks/imposm_import.py
+++ /dev/null
@@ -1,737 +0,0 @@
-"""Pipeline check: changeset-centric verification.
-
-For each changeset in the 1-2 hour window:
-  1. Check if minute replication covers it (replication timestamp >= closed_at)
-  2. Check if its way/relation elements exist in the tiler DB with the correct version
-  3. For a random sample: verify materialized views + S3 tile cache
-"""
-
-import json
-import os
-import random
-import xml.etree.ElementTree as ET
-from datetime import datetime, timezone, timedelta
-
-import psycopg2
-import requests
-
-from config import Config
-
-# Load table/view mapping from JSON config
-_config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tables_config.json")
-with open(_config_path) as f:
-    _tables_config = json.load(f)
-
-OHM_BASE = None  # lazily computed
-
-
-def _ohm_base():
-    global OHM_BASE
-    if OHM_BASE is None:
-        OHM_BASE = Config.OHM_API_BASE.replace("/api/0.6", "")
-    return OHM_BASE
-
-
-def _parse_timestamp(ts_str):
-    """Parse an ISO timestamp string to a timezone-aware datetime."""
-    ts_str = ts_str.replace("Z", "+00:00")
-    return datetime.fromisoformat(ts_str)
-
-
-# ---------------------------------------------------------------------------
-# Step 0: get changesets in the age window
-# ---------------------------------------------------------------------------
-
-def _get_changesets_in_window(min_age, max_age, limit=10):
-    """Fetch closed changesets whose age is between min_age and max_age seconds.
-
-    Fetches recent changesets and filters locally by age window.
-    """
-    now = datetime.now(timezone.utc)
-    min_closed = now - timedelta(seconds=max_age)   # oldest allowed
-    max_closed = now - timedelta(seconds=min_age)    # newest allowed
-
-    # Fetch enough to find some in the window
-    fetch_limit = 100
-    url = f"{Config.OHM_API_BASE}/changesets"
-    params = {"limit": fetch_limit, "closed": "true"}
-    headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
-
-    print(f"[pipeline] Fetching changesets: {url}?limit={fetch_limit}&closed=true")
-    print(f"  Looking for changesets closed between "
-          f"{min_closed.strftime('%Y-%m-%dT%H:%M:%SZ')} and "
-          f"{max_closed.strftime('%Y-%m-%dT%H:%M:%SZ')} "
-          f"(age {min_age//60}-{max_age//60} min)")
-
-    resp = requests.get(url, params=params, headers=headers, timeout=30)
-    resp.raise_for_status()
-
-    root = ET.fromstring(resp.content)
-    changesets = []
-    skipped_young = 0
-    skipped_old = 0
-
-    for cs in root.findall("changeset"):
-        cs_id = int(cs.attrib["id"])
-        closed_at = cs.attrib.get("closed_at", "")
-        if not closed_at:
-            continue
-        try:
-            closed_dt = _parse_timestamp(closed_at)
-        except (ValueError, TypeError):
-            continue
-
-        age_minutes = (now - closed_dt).total_seconds() / 60
-
-        if closed_dt > max_closed:
-            skipped_young += 1
-            continue
-        elif closed_dt < min_closed:
-            skipped_old += 1
-            # Changesets are ordered by newest first, so once we hit old ones, stop
-            break
-        else:
-            changesets.append({
-                "id": cs_id,
-                "closed_at": closed_at,
-                "closed_dt": closed_dt,
-                "age_minutes": round(age_minutes, 1),
-            })
-
-        if len(changesets) >= limit:
-            break
-
-    print(f"  Fetched {len(root.findall('changeset'))} changesets from API")
-    print(f"  Skipped: {skipped_young} too young (<{min_age//60}min), "
-          f"{skipped_old} too old (>{max_age//60}min)")
-    print(f"  Found {len(changesets)} changesets in window:")
-    for cs in changesets:
-        print(f"    changeset {cs['id']}: closed_at={cs['closed_at']} "
-              f"(age={cs['age_minutes']}min)")
-
-    return changesets
-
-
-# ---------------------------------------------------------------------------
-# Step 1: replication check
-# ---------------------------------------------------------------------------
-
-def _parse_replication_state(text):
-    """Parse state.txt and return (sequence, timestamp)."""
-    data = {}
-    for line in text.strip().splitlines():
-        if "=" in line:
-            key, _, value = line.partition("=")
-            data[key.strip()] = value.strip()
-    seq = int(data.get("sequenceNumber", 0))
-    ts_raw = data.get("timestamp", "").replace("\\:", ":")
-    try:
-        ts = datetime.fromisoformat(ts_raw.replace("Z", "+00:00"))
-    except ValueError:
-        ts = None
-    return seq, ts
-
-
-def _check_replication_covers(changeset, repl_seq, repl_ts):
-    """Check if the replication state covers this changeset."""
-    if repl_ts is None:
-        return {
-            "status": "warning",
-            "message": "Cannot parse replication timestamp",
-        }
-
-    closed_dt = changeset["closed_dt"]
-    if repl_ts >= closed_dt:
-        return {
-            "status": "ok",
-            "message": (f"Replication covers this changeset "
-                        f"(repl_ts={repl_ts.isoformat()} >= closed_at={changeset['closed_at']})"),
-            "replication_sequence": repl_seq,
-            "replication_timestamp": repl_ts.isoformat(),
-        }
-    else:
-        lag = (closed_dt - repl_ts).total_seconds()
-        return {
-            "status": "critical",
-            "message": (f"Replication does NOT cover this changeset. "
-                        f"Replication is {round(lag/60, 1)}min behind "
-                        f"(repl_ts={repl_ts.isoformat()} < closed_at={changeset['closed_at']})"),
-            "replication_sequence": repl_seq,
-            "replication_timestamp": repl_ts.isoformat(),
-        }
-
-
-# ---------------------------------------------------------------------------
-# Step 2: tiler DB check
-# ---------------------------------------------------------------------------
-
-def _get_changeset_elements(changeset_id):
-    """Download changeset diff and extract way/relation elements with versions."""
-    url = f"{Config.OHM_API_BASE}/changeset/{changeset_id}/download"
-    headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
-    resp = requests.get(url, headers=headers, timeout=30)
-    resp.raise_for_status()
-
-    root = ET.fromstring(resp.content)
-    elements = []
-
-    for action in root:  # create, modify, delete
-        action_type = action.tag
-        for elem in action:
-            osm_id = elem.attrib.get("id")
-            version = elem.attrib.get("version")
-            elem_type = elem.tag
-            if osm_id and elem_type in ("way", "relation"):
-                # Extract tags to determine which imposm table this element belongs to
-                tags = {}
-                for tag in elem.findall("tag"):
-                    k = tag.attrib.get("k")
-                    v = tag.attrib.get("v")
-                    if k and v:
-                        tags[k] = v
-                elements.append({
-                    "type": elem_type,
-                    "osm_id": int(osm_id),
-                    "version": int(version) if version else None,
-                    "action": action_type,
-                    "tags": tags,
-                })
-    return elements
-
-
-
-# Loaded from tables_config.json
-RELATION_TABLES = _tables_config["relation_tables"]
-WAY_TABLES = _tables_config["way_tables"]
-TABLE_TO_VIEWS = _tables_config["table_to_views"]
-
-
-def _build_union_query(tables, search_id):
-    """Build a UNION ALL query to search osm_id across multiple tables in 1 round-trip."""
-    parts = []
-    for table in tables:
-        parts.append(
-            f"SELECT '{table}' AS tbl, tags->'version' AS version "
-            f"FROM {table} WHERE osm_id = {int(search_id)} LIMIT 1"
-        )
-    return " UNION ALL ".join(parts)
-
-
-def _check_element_in_tables(conn, elem):
-    """Check if an element exists in tiler DB tables using a single UNION ALL query."""
-    osm_id = elem["osm_id"]
-    search_id = -osm_id if elem["type"] == "relation" else osm_id
-    candidate_tables = RELATION_TABLES if elem["type"] == "relation" else WAY_TABLES
-
-    cur = conn.cursor()
-
-    # Get existing tables (cached per connection would be ideal, but simple first)
-    cur.execute("""
-        SELECT table_name FROM information_schema.tables
-        WHERE table_schema = 'public' AND table_name LIKE 'osm_%%'
-    """)
-    existing_tables = {row[0] for row in cur.fetchall()}
-    tables = [t for t in candidate_tables if t in existing_tables]
-
-    if not tables:
-        cur.close()
-        return {
-            "type": elem["type"],
-            "osm_id": osm_id,
-            "action": elem["action"],
-            "expected_version": elem["version"],
-            "found_in_tables": [],
-            "found_in_views": [],
-            "version_match": None,
-            "url": f"{_ohm_base()}/{elem['type']}/{elem['osm_id']}",
-        }
-
-    # Single UNION ALL query across all candidate tables
-    query = _build_union_query(tables, search_id)
-    found_in_tables = []
-    version_match = None
-
-    try:
-        cur.execute(query)
-        for row in cur.fetchall():
-            found_in_tables.append(row[0])
-            db_version = row[1]
-            if elem["version"] is not None and db_version is not None:
-                try:
-                    version_match = int(db_version) >= elem["version"]
-                except (ValueError, TypeError):
-                    version_match = None
-    except Exception:
-        conn.rollback()
-
-    cur.close()
-
-    return {
-        "type": elem["type"],
-        "osm_id": osm_id,
-        "action": elem["action"],
-        "expected_version": elem["version"],
-        "found_in_tables": found_in_tables,
-        "found_in_views": [],
-        "version_match": version_match,
-        "url": f"{_ohm_base()}/{elem['type']}/{elem['osm_id']}",
-    }
-
-
-def _check_element_in_views(conn, check):
-    """Check if an element exists in materialized views using a single UNION ALL query."""
-    osm_id = check["osm_id"]
-    search_id = -osm_id if check["type"] == "relation" else osm_id
-
-    # Collect all candidate views from the tables where the element was found
-    found_tables = check["found_in_tables"]
-    candidate_views = set()
-    for table in found_tables:
-        for v in TABLE_TO_VIEWS.get(table, []):
-            candidate_views.add(v)
-
-    if not candidate_views:
-        return check
-
-    cur = conn.cursor()
-
-    # Filter to existing views
-    cur.execute("""
-        SELECT matviewname FROM pg_matviews
-        WHERE schemaname = 'public' AND matviewname LIKE 'mv_%%'
-    """)
-    existing_views = {row[0] for row in cur.fetchall()}
-    views = sorted(v for v in candidate_views if v in existing_views)
-
-    if not views:
-        cur.close()
-        return check
-
-    # Single UNION ALL query across all candidate views
-    parts = []
-    for view in views:
-        parts.append(
-            f"SELECT '{view}' AS vw FROM {view} "
-            f"WHERE osm_id = {int(search_id)} LIMIT 1"
-        )
-    query = " UNION ALL ".join(parts)
-
-    found_in_views = []
-    try:
-        cur.execute(query)
-        for row in cur.fetchall():
-            found_in_views.append(row[0])
-    except Exception:
-        conn.rollback()
-
-    cur.close()
-    check["found_in_views"] = found_in_views
-    return check
-
-
-def _is_element_deleted(elem):
-    """Check if an element has been deleted in OHM (visible=false or 410 Gone)."""
-    url = f"{Config.OHM_API_BASE}/{elem['type']}/{elem['osm_id']}"
-    headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
-    try:
-        resp = requests.get(url, headers=headers, timeout=15)
-        if resp.status_code == 410:
-            return True
-        if resp.status_code == 200:
-            root = ET.fromstring(resp.content)
-            el = root.find(elem["type"])
-            if el is not None and el.attrib.get("visible") == "false":
-                return True
-        return False
-    except Exception:
-        return False
-
-
-def _check_elements_in_db(conn, changeset_id, changeset_closed_at=None):
-    """Check all elements of a changeset in the tiler DB.
-
-    - ALL elements: verified in osm_* tables (fast, tag-filtered)
-    - SAMPLE elements: full check → tables + views + S3 tile cache
-    """
-    from checks.tile_cache import check_tile_cache_for_element
-
-    try:
-        elements = _get_changeset_elements(changeset_id)
-    except requests.RequestException as e:
-        return {
-            "status": "critical",
-            "message": f"Failed to download changeset diff: {e}",
-            "elements": [],
-        }
-
-    if not elements:
-        return {
-            "status": "ok",
-            "message": "No way/relation elements in this changeset",
-            "elements": [],
-        }
-
-    # Filter out deletes
-    active_elements = []
-    for elem in elements:
-        if elem["action"] == "delete":
-            print(f"    SKIP {elem['type']}/{elem['osm_id']} (action=delete)")
-        else:
-            active_elements.append(elem)
-
-    if not active_elements:
-        return {
-            "status": "ok",
-            "message": "All elements in this changeset are deletes",
-            "elements": [],
-        }
-
-    # Select random sample for full pipeline check (tables + views + S3)
-    sample_size = min(Config.FULL_CHECK_SAMPLE_SIZE, len(active_elements))
-    sample_indices = set(random.sample(range(len(active_elements)), sample_size))
-
-    print(f"  [tiler_db] Checking {len(active_elements)} elements "
-          f"(full pipeline check on {sample_size} sampled)")
-
-    missing = []
-    mismatches = []
-    checked = []
-    tile_cache_results = []
-
-    for idx, elem in enumerate(active_elements):
-        is_sample = idx in sample_indices
-        sample_label = " [SAMPLE]" if is_sample else ""
-
-        # Step 1: Check tables — single UNION ALL query (ALL elements)
-        check = _check_element_in_tables(conn, elem)
-
-        # Step 2: Check views — single UNION ALL query (SAMPLE only)
-        if is_sample and check["found_in_tables"]:
-            check = _check_element_in_views(conn, check)
-
-        checked.append(check)
-
-        tables = check["found_in_tables"]
-        views = check["found_in_views"]
-
-        if tables:
-            icon = "OK" if check["version_match"] is not False else "VERSION_MISMATCH"
-            print(f"    {icon}{sample_label} {elem['type']}/{elem['osm_id']} v{elem['version']} "
-                  f"({elem['action']}) version_match={check['version_match']}")
-            print(f"         tables: {tables}")
-            if views:
-                print(f"         views:  {views}")
-            print(f"         {check['url']}")
-
-            # Step 3: Check S3 tile cache (SAMPLE only)
-            if is_sample and changeset_closed_at and Config.S3_BUCKET_CACHE_TILER:
-                try:
-                    tile_result = check_tile_cache_for_element(
-                        conn, check, changeset_closed_at
-                    )
-                    tile_cache_results.append(tile_result)
-                    cache_status = tile_result.get("cache", {}).get("status", "unknown")
-                    tile_info = tile_result.get("tile", {})
-                    if cache_status == "stale":
-                        print(f"         [S3 CACHE] STALE tile z{tile_info.get('z')}/{tile_info.get('x')}/{tile_info.get('y')}")
-                    elif cache_status == "ok":
-                        print(f"         [S3 CACHE] OK tile z{tile_info.get('z')}/{tile_info.get('x')}/{tile_info.get('y')}")
-                    elif cache_status == "skipped":
-                        print(f"         [S3 CACHE] skipped: {tile_result.get('cache', {}).get('message', '')}")
-                except Exception as e:
-                    print(f"         [S3 CACHE] error: {e}")
-        else:
-            # Not found — check if deleted in a later changeset
-            if _is_element_deleted(elem):
-                print(f"    SKIP {elem['type']}/{elem['osm_id']} v{elem['version']} "
-                      f"({elem['action']}) -> deleted in a later changeset")
-                print(f"         {check['url']}")
-                check["deleted"] = True
-                continue
-
-            print(f"    MISSING{sample_label} {elem['type']}/{elem['osm_id']} v{elem['version']} "
-                  f"({elem['action']}) -> NOT in tables")
-            print(f"         {check['url']}")
-
-        if not tables and not check.get("deleted"):
-            missing.append(f"{elem['type']}/{elem['osm_id']}")
-        elif check["version_match"] is False:
-            mismatches.append(f"{elem['type']}/{elem['osm_id']} expected v{elem['version']}")
-
-    # Build status message
-    stale_tiles = [r for r in tile_cache_results if r.get("cache", {}).get("status") == "stale"]
-
-    if missing:
-        status = "warning"
-        msg = f"Missing from tiler DB: {', '.join(missing)}"
-        if mismatches:
-            msg += f". Version mismatches: {', '.join(mismatches)}"
-    elif mismatches:
-        status = "warning"
-        msg = f"Version mismatches: {', '.join(mismatches)}"
-    elif stale_tiles:
-        status = "warning"
-        stale_ids = [f"{r['type']}/{r['osm_id']}" for r in stale_tiles]
-        msg = (f"All {len(checked)} elements in tables, "
-               f"but S3 tile cache stale for: {', '.join(stale_ids)}")
-    else:
-        status = "ok"
-        msg = f"All {len(checked)} elements verified in tiler DB"
-        if tile_cache_results:
-            msg += f" (S3 cache OK for {len(tile_cache_results)} sampled)"
-
-    return {
-        "status": status,
-        "message": msg,
-        "elements": checked,
-        "tile_cache": tile_cache_results,
-    }
-
-
-# ---------------------------------------------------------------------------
-# Main pipeline check (scheduled)
-# ---------------------------------------------------------------------------
-
-def check_pipeline():
-    """Check the full pipeline for changesets in the 1-2 hour age window.
-
-    For each changeset:
-      1. Is it covered by minute replication?
-      2. Are its elements in the tiler DB?
-    """
-    now = datetime.now(timezone.utc)
-    min_age = Config.CHANGESET_MIN_AGE
-    max_age = Config.CHANGESET_MAX_AGE
-
-    result = {
-        "name": "pipeline",
-        "status": "ok",
-        "message": "",
-        "details": {
-            "window": f"{min_age//60}-{max_age//60} minutes",
-            "replication": {},
-            "changesets": [],
-        },
-        "checked_at": now.isoformat(),
-    }
-
-    # --- Fetch replication state ---
-    repl_seq, repl_ts = None, None
-    try:
-        resp = requests.get(Config.REPLICATION_STATE_URL, timeout=15)
-        resp.raise_for_status()
-        repl_seq, repl_ts = _parse_replication_state(resp.text)
-        result["details"]["replication"] = {
-            "status": "ok",
-            "sequence": repl_seq,
-            "timestamp": repl_ts.isoformat() if repl_ts else None,
-        }
-        if repl_ts:
-            lag_min = (now - repl_ts).total_seconds() / 60
-            result["details"]["replication"]["lag_minutes"] = round(lag_min, 1)
-            print(f"\n[pipeline] Replication state: seq={repl_seq}, "
-                  f"ts={repl_ts.isoformat()}, lag={lag_min:.1f}min")
-    except requests.RequestException as e:
-        result["details"]["replication"] = {
-            "status": "critical",
-            "message": f"Failed to fetch replication state: {e}",
-        }
-        print(f"\n[pipeline] WARNING: Cannot fetch replication state: {e}")
-
-    # --- Get changesets in window ---
-    try:
-        changesets = _get_changesets_in_window(
-            min_age=min_age,
-            max_age=max_age,
-            limit=Config.CHANGESET_LIMIT,
-        )
-    except requests.RequestException as e:
-        result["status"] = "critical"
-        result["message"] = f"Failed to fetch changesets from OHM API: {e}"
-        return result
-
-    if not changesets:
-        result["message"] = (
-            f"No changesets found in the {min_age//60}-{max_age//60} minute window"
-        )
-        print(f"[pipeline] {result['message']}")
-        return result
-
-    print(f"[pipeline] Found {len(changesets)} changesets in "
-          f"{min_age//60}-{max_age//60}min window")
-
-    # --- Connect to tiler DB ---
-    conn = None
-    try:
-        conn = psycopg2.connect(
-            host=Config.POSTGRES_HOST,
-            port=Config.POSTGRES_PORT,
-            dbname=Config.POSTGRES_DB,
-            user=Config.POSTGRES_USER,
-            password=Config.POSTGRES_PASSWORD,
-        )
-    except psycopg2.Error as e:
-        result["status"] = "critical"
-        result["message"] = f"Cannot connect to tiler DB: {e}"
-        print(f"[pipeline] ERROR: Cannot connect to tiler DB: {e}")
-        return result
-
-    # --- Check each changeset through the pipeline ---
-    problems = []
-
-    for cs in changesets:
-        print(f"\n[pipeline] === Changeset {cs['id']} === "
-              f"(closed_at={cs['closed_at']}, age={cs['age_minutes']}min)")
-        print(f"  URL: {_ohm_base()}/changeset/{cs['id']}")
-
-        cs_result = {
-            "changeset_id": cs["id"],
-            "changeset_url": f"{_ohm_base()}/changeset/{cs['id']}",
-            "closed_at": cs["closed_at"],
-            "age_minutes": cs["age_minutes"],
-            "replication": {},
-            "tiler_db": {},
-        }
-
-        # Step 1: replication
-        if repl_seq is not None:
-            repl_check = _check_replication_covers(cs, repl_seq, repl_ts)
-            cs_result["replication"] = repl_check
-            print(f"  [replication] {repl_check['status'].upper()}: {repl_check['message']}")
-
-            if repl_check["status"] != "ok":
-                problems.append(
-                    f"Changeset {cs['id']}: replication not covering"
-                )
-        else:
-            cs_result["replication"] = {"status": "unknown", "message": "Replication state unavailable"}
-            print(f"  [replication] UNKNOWN: Replication state unavailable")
-
-        # Step 2: tiler DB
-        db_check = _check_elements_in_db(conn, cs["id"], cs["closed_at"])
-        cs_result["tiler_db"] = db_check
-        print(f"  [tiler_db] {db_check['status'].upper()}: {db_check['message']}")
-
-        if db_check["status"] != "ok":
-            problems.append(f"Changeset {cs['id']}: {db_check['message']}")
-
-        result["details"]["changesets"].append(cs_result)
-
-    conn.close()
-
-    # --- Overall status ---
-    if any(cs.get("replication", {}).get("status") == "critical"
-           or cs.get("tiler_db", {}).get("status") in ("warning", "critical")
-           for cs in result["details"]["changesets"]):
-        result["status"] = "warning"
-        result["message"] = f"Issues found: {'; '.join(problems[:5])}"
-    else:
-        result["message"] = (
-            f"All {len(changesets)} changesets in {min_age//60}-{max_age//60}min "
-            f"window passed pipeline check"
-        )
-
-    print(f"\n[pipeline] Result: {result['status'].upper()} — {result['message']}")
-    return result
-
-
-# ---------------------------------------------------------------------------
-# On-demand single changeset check
-# ---------------------------------------------------------------------------
-
-def check_single_changeset(changeset_id):
-    """Evaluate a single changeset through the full pipeline (on-demand)."""
-    now = datetime.now(timezone.utc)
-    result = {
-        "name": "pipeline",
-        "changeset_id": changeset_id,
-        "changeset_url": f"{_ohm_base()}/changeset/{changeset_id}",
-        "status": "ok",
-        "message": "",
-        "details": {"replication": {}, "tiler_db": {}},
-        "checked_at": now.isoformat(),
-    }
-
-    # Get changeset info
-    try:
-        url = f"{Config.OHM_API_BASE}/changeset/{changeset_id}"
-        headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
-        resp = requests.get(url, headers=headers, timeout=30)
-        resp.raise_for_status()
-        root = ET.fromstring(resp.content)
-        cs_elem = root.find("changeset")
-        closed_at = cs_elem.attrib.get("closed_at", "") if cs_elem is not None else ""
-    except Exception:
-        closed_at = ""
-
-    print(f"\n[pipeline] === Changeset {changeset_id} (on-demand) ===")
-    print(f"  URL: {_ohm_base()}/changeset/{changeset_id}")
-    if closed_at:
-        print(f"  closed_at: {closed_at}")
-
-    # Step 1: replication
-    try:
-        resp = requests.get(Config.REPLICATION_STATE_URL, timeout=15)
-        resp.raise_for_status()
-        repl_seq, repl_ts = _parse_replication_state(resp.text)
-
-        if closed_at and repl_ts:
-            closed_dt = _parse_timestamp(closed_at)
-            cs_data = {"closed_at": closed_at, "closed_dt": closed_dt}
-            repl_check = _check_replication_covers(cs_data, repl_seq, repl_ts)
-        else:
-            repl_check = {
-                "status": "ok" if repl_ts else "warning",
-                "message": f"Replication seq={repl_seq}, ts={repl_ts.isoformat() if repl_ts else 'unknown'}",
-                "replication_sequence": repl_seq,
-                "replication_timestamp": repl_ts.isoformat() if repl_ts else None,
-            }
-
-        result["details"]["replication"] = repl_check
-        print(f"  [replication] {repl_check['status'].upper()}: {repl_check['message']}")
-    except requests.RequestException as e:
-        result["details"]["replication"] = {
-            "status": "critical",
-            "message": f"Failed to fetch replication state: {e}",
-        }
-        print(f"  [replication] CRITICAL: Cannot fetch replication state: {e}")
-
-    # Step 2: tiler DB
-    try:
-        conn = psycopg2.connect(
-            host=Config.POSTGRES_HOST,
-            port=Config.POSTGRES_PORT,
-            dbname=Config.POSTGRES_DB,
-            user=Config.POSTGRES_USER,
-            password=Config.POSTGRES_PASSWORD,
-        )
-    except psycopg2.Error as e:
-        result["status"] = "critical"
-        result["message"] = f"Cannot connect to tiler DB: {e}"
-        result["details"]["tiler_db"] = {"status": "critical", "message": str(e)}
-        return result
-
-    db_check = _check_elements_in_db(conn, changeset_id, closed_at or None)
-    conn.close()
-    result["details"]["tiler_db"] = db_check
-    print(f"  [tiler_db] {db_check['status'].upper()}: {db_check['message']}")
-
-    # Overall
-    problems = []
-    repl_status = result["details"]["replication"].get("status", "ok")
-    if repl_status == "critical":
-        problems.append("Replication not covering this changeset")
-    if db_check["status"] != "ok":
-        problems.append(db_check["message"])
-
-    if problems:
-        result["status"] = "warning"
-        result["message"] = "; ".join(problems)
-    else:
-        result["message"] = (
-            f"Changeset {changeset_id} passed full pipeline check "
-            f"({len(db_check.get('elements', []))} elements verified)"
-        )
-
-    print(f"  [result] {result['status'].upper()}: {result['message']}")
-    return result
diff --git a/images/tiler-pipeline-monitor/checks/mv_freshness.py b/images/tiler-pipeline-monitor/checks/mv_freshness.py
deleted file mode 100644
index 35d5d4d6b..000000000
--- a/images/tiler-pipeline-monitor/checks/mv_freshness.py
+++ /dev/null
@@ -1,179 +0,0 @@
-"""Check 3: Materialized view freshness monitor.
-
-Queries pg_stat_user_tables to check when materialized views were last
-auto-analyzed/auto-vacuumed (proxy for last refresh), and also checks
-if the views exist and have rows.
-"""
-
-from datetime import datetime, timezone
-
-import psycopg2
-
-from config import Config
-
-# Key materialized views grouped by expected refresh interval.
-# group_name -> (max_stale_seconds, [view_names])
-MV_GROUPS = {
-    "admin_boundaries_lines": (
-        300,  # expect refresh every ~60s + buffer
-        [
-            "mv_admin_boundaries_lines_z4_5",
-            "mv_admin_boundaries_lines_z6_7",
-            "mv_admin_boundaries_lines_z8_9",
-            "mv_admin_boundaries_lines_z10_11",
-            "mv_admin_boundaries_lines_z12_13",
-            "mv_admin_boundaries_lines_z14_15",
-            "mv_admin_boundaries_lines_z16_20",
-        ],
-    ),
-    "water": (
-        600,  # expect refresh every ~180s + buffer
-        [
-            "mv_water_lines_z10_11",
-            "mv_water_lines_z12_13",
-            "mv_water_lines_z14_15",
-            "mv_water_lines_z16_20",
-            "mv_water_areas_z6_7",
-            "mv_water_areas_z8_9",
-            "mv_water_areas_z10_11",
-            "mv_water_areas_z12_13",
-            "mv_water_areas_z14_15",
-            "mv_water_areas_z16_20",
-        ],
-    ),
-    "transport": (
-        600,
-        [
-            "mv_transport_lines_z8_9",
-            "mv_transport_lines_z10_11",
-            "mv_transport_lines_z12_13",
-            "mv_transport_lines_z14_15",
-            "mv_transport_lines_z16_20",
-        ],
-    ),
-}
-
-
-def check_mv_freshness():
-    """Check that key materialized views exist and are being refreshed."""
-    result = {
-        "name": "mv_freshness",
-        "status": "ok",
-        "message": "",
-        "details": {"groups": {}},
-        "checked_at": datetime.now(timezone.utc).isoformat(),
-    }
-
-    try:
-        conn = psycopg2.connect(
-            host=Config.POSTGRES_HOST,
-            port=Config.POSTGRES_PORT,
-            dbname=Config.POSTGRES_DB,
-            user=Config.POSTGRES_USER,
-            password=Config.POSTGRES_PASSWORD,
-        )
-    except psycopg2.Error as e:
-        result["status"] = "critical"
-        result["message"] = f"Cannot connect to tiler DB: {e}"
-        return result
-
-    cur = conn.cursor()
-
-    # Get list of existing materialized views
-    cur.execute("SELECT matviewname FROM pg_matviews WHERE schemaname = 'public'")
-    existing_mvs = {row[0] for row in cur.fetchall()}
-
-    # Check row counts and last analyze times for MVs via pg_stat_user_tables.
-    # REFRESH MATERIALIZED VIEW triggers auto-analyze, so last_autoanalyze
-    # is a good proxy for "last refreshed".
-    cur.execute("""
-        SELECT relname, n_live_tup, last_autoanalyze, last_analyze
-        FROM pg_stat_user_tables
-        WHERE schemaname = 'public'
-          AND relname LIKE 'mv_%%'
-    """)
-    mv_stats = {}
-    for row in cur.fetchall():
-        name, n_rows, last_autoanalyze, last_analyze = row
-        # Use whichever is more recent
-        last_refreshed = max(
-            filter(None, [last_autoanalyze, last_analyze]),
-            default=None,
-        )
-        mv_stats[name] = {
-            "n_rows": n_rows,
-            "last_refreshed": last_refreshed,
-        }
-
-    cur.close()
-    conn.close()
-
-    missing_views = []
-    stale_views = []
-    empty_views = []
-    now = datetime.now(timezone.utc)
-
-    for group_name, (max_stale, views) in MV_GROUPS.items():
-        group_result = {"views": [], "status": "ok"}
-
-        for view_name in views:
-            view_info = {"name": view_name, "status": "ok"}
-
-            if view_name not in existing_mvs:
-                view_info["status"] = "critical"
-                view_info["message"] = "View does not exist"
-                missing_views.append(view_name)
-            elif view_name in mv_stats:
-                stats = mv_stats[view_name]
-                view_info["n_rows"] = stats["n_rows"]
-
-                if stats["n_rows"] == 0:
-                    view_info["status"] = "warning"
-                    view_info["message"] = "View is empty (0 rows)"
-                    empty_views.append(view_name)
-
-                if stats["last_refreshed"]:
-                    last_ref = stats["last_refreshed"]
-                    if last_ref.tzinfo is None:
-                        last_ref = last_ref.replace(tzinfo=timezone.utc)
-                    age_seconds = (now - last_ref).total_seconds()
-                    view_info["last_refreshed"] = last_ref.isoformat()
-                    view_info["age_seconds"] = round(age_seconds)
-
-                    if age_seconds > max_stale:
-                        view_info["status"] = "warning"
-                        view_info["message"] = (
-                            f"Stale: last refreshed {round(age_seconds / 60, 1)} min ago "
-                            f"(threshold: {max_stale // 60} min)"
-                        )
-                        stale_views.append(view_name)
-                else:
-                    view_info["last_refreshed"] = None
-                    view_info["message"] = "No analyze timestamp available"
-            else:
-                view_info["message"] = "No stats available"
-
-            group_result["views"].append(view_info)
-
-        if any(v["status"] == "critical" for v in group_result["views"]):
-            group_result["status"] = "critical"
-        elif any(v["status"] == "warning" for v in group_result["views"]):
-            group_result["status"] = "warning"
-
-        result["details"]["groups"][group_name] = group_result
-
-    # Overall status
-    if missing_views:
-        result["status"] = "critical"
-        result["message"] = f"Missing views: {', '.join(missing_views[:5])}"
-    elif stale_views:
-        result["status"] = "warning"
-        result["message"] = f"Stale views: {', '.join(stale_views[:5])}"
-    elif empty_views:
-        result["status"] = "warning"
-        result["message"] = f"Empty views: {', '.join(empty_views[:5])}"
-    else:
-        total = sum(len(v) for _, v in MV_GROUPS.values())
-        result["message"] = f"All {total} monitored materialized views are healthy"
-
-    return result
diff --git a/images/tiler-pipeline-monitor/checks/replication_lag.py b/images/tiler-pipeline-monitor/checks/replication_lag.py
deleted file mode 100644
index c15cb2cef..000000000
--- a/images/tiler-pipeline-monitor/checks/replication_lag.py
+++ /dev/null
@@ -1,89 +0,0 @@
-"""Check 1: Minute replication lag monitor.
-
-Compares the latest replication sequence number available on S3
-against the last sequence number processed by imposm (from the tiler DB
-or the replication state endpoint).
-"""
-
-import time
-from datetime import datetime, timezone
-
-import requests
-
-from config import Config
-
-
-def _parse_state(text):
-    """Parse an imposm/osm replication state.txt and return sequence + timestamp."""
-    data = {}
-    for line in text.strip().splitlines():
-        if "=" in line:
-            key, _, value = line.partition("=")
-            data[key.strip()] = value.strip()
-    seq = int(data.get("sequenceNumber", 0))
-    ts_raw = data.get("timestamp", "")
-    # Format: 2026-03-13T12\:05\:02Z  (escaped colons in java properties)
-    ts_raw = ts_raw.replace("\\:", ":")
-    try:
-        ts = datetime.fromisoformat(ts_raw.replace("Z", "+00:00"))
-    except ValueError:
-        ts = None
-    return seq, ts
-
-
-def check_replication_lag():
-    """Return a dict with the replication lag check result."""
-    result = {
-        "name": "replication_lag",
-        "status": "ok",
-        "message": "",
-        "details": {},
-        "checked_at": datetime.now(timezone.utc).isoformat(),
-    }
-
-    try:
-        # Get latest available replication state from S3
-        resp = requests.get(Config.REPLICATION_STATE_URL, timeout=15)
-        resp.raise_for_status()
-        remote_seq, remote_ts = _parse_state(resp.text)
-
-        result["details"]["remote_sequence"] = remote_seq
-        result["details"]["remote_timestamp"] = remote_ts.isoformat() if remote_ts else None
-
-        # Get imposm's last processed state
-        # The imposm diff dir stores last.state.txt - we query it via the same
-        # base URL pattern but from the local imposm state endpoint.
-        # In Docker, we can check the DB for the latest sequence via the
-        # osm_replication_status table if available, or fall back to comparing
-        # timestamps of recent data.
-        #
-        # For now: compare remote timestamp against current time.
-        # If remote_ts is stale, replication source itself is behind.
-        # A more precise check reads imposm's last.state.txt from the shared volume.
-
-        if remote_ts:
-            lag_seconds = (datetime.now(timezone.utc) - remote_ts).total_seconds()
-            result["details"]["lag_seconds"] = round(lag_seconds)
-            result["details"]["lag_minutes"] = round(lag_seconds / 60, 1)
-
-            if lag_seconds > Config.REPLICATION_LAG_THRESHOLD:
-                result["status"] = "critical"
-                result["message"] = (
-                    f"Replication lag is {round(lag_seconds / 60, 1)} minutes "
-                    f"(threshold: {Config.REPLICATION_LAG_THRESHOLD // 60} min). "
-                    f"Last replication timestamp: {remote_ts.isoformat()}"
-                )
-            else:
-                result["message"] = (
-                    f"Replication is up to date. Lag: {round(lag_seconds / 60, 1)} min, "
-                    f"sequence: {remote_seq}"
-                )
-        else:
-            result["status"] = "warning"
-            result["message"] = "Could not parse replication timestamp"
-
-    except requests.RequestException as e:
-        result["status"] = "critical"
-        result["message"] = f"Failed to fetch replication state: {e}"
-
-    return result
diff --git a/images/tiler-pipeline-monitor/checks/tile_cache.py b/images/tiler-pipeline-monitor/checks/tile_cache.py
deleted file mode 100644
index 9d21eb457..000000000
--- a/images/tiler-pipeline-monitor/checks/tile_cache.py
+++ /dev/null
@@ -1,159 +0,0 @@
-"""Pipeline check: verify tile cache in S3 is up-to-date.
-
-For a sampled element, check if the cached tile in S3 was modified
-after the changeset closed_at. If the tile is stale, the cache purge
-(SQS → tiler-cache) may have failed.
-"""
-
-import mercantile
-import psycopg2.extensions
-from datetime import datetime, timezone
-
-from config import Config
-
-
-def _get_element_centroid(conn, elem):
-    """Get the centroid (lon, lat) of an element from the tiler DB."""
-    osm_id = elem["osm_id"]
-    search_id = -osm_id if elem["type"] == "relation" else osm_id
-
-    # Search in the tables where it was found
-    found_tables = elem.get("found_in_tables", [])
-    if not found_tables:
-        return None
-
-    cur = conn.cursor()
-    for table in found_tables:
-        try:
-            quoted = psycopg2.extensions.quote_ident(table, cur)
-            cur.execute(
-                f"SELECT ST_X(ST_Centroid(ST_Transform(geometry, 4326))), "
-                f"ST_Y(ST_Centroid(ST_Transform(geometry, 4326))) "
-                f"FROM {quoted} WHERE osm_id = %s LIMIT 1",
-                (search_id,),
-            )
-            row = cur.fetchone()
-            if row and row[0] is not None:
-                cur.close()
-                return {"lon": row[0], "lat": row[1]}
-        except Exception:
-            conn.rollback()
-
-    cur.close()
-    return None
-
-
-def _get_tile_for_point(lon, lat, zoom):
-    """Convert lon/lat to tile z/x/y."""
-    tile = mercantile.tile(lon, lat, zoom)
-    return {"z": tile.z, "x": tile.x, "y": tile.y}
-
-
-def _check_tile_in_s3(tile, changeset_closed_at):
-    """Check if a cached tile in S3 is stale (older than changeset).
-
-    Returns dict with status and details for each S3 path.
-    """
-    if not Config.S3_BUCKET_CACHE_TILER:
-        return {
-            "status": "skipped",
-            "message": "S3_BUCKET_CACHE_TILER not configured",
-        }
-
-    s3 = Config.get_s3_client()
-    bucket = Config.S3_BUCKET_CACHE_TILER
-    z, x, y = tile["z"], tile["x"], tile["y"]
-
-    results = []
-    stale_paths = []
-
-    for path_prefix in Config.S3_BUCKET_PATH_FILES:
-        key = f"{path_prefix}/{z}/{x}/{y}.pbf"
-        try:
-            resp = s3.head_object(Bucket=bucket, Key=key)
-            last_modified = resp["LastModified"]
-
-            # Parse changeset closed_at
-            closed_dt = datetime.fromisoformat(
-                changeset_closed_at.replace("Z", "+00:00")
-            )
-
-            is_stale = last_modified < closed_dt
-            result = {
-                "path": key,
-                "last_modified": last_modified.isoformat(),
-                "is_stale": is_stale,
-            }
-            results.append(result)
-            if is_stale:
-                stale_paths.append(key)
-
-        except s3.exceptions.ClientError as e:
-            if e.response["Error"]["Code"] == "404":
-                # Tile not in cache — not stale, tegola will generate on demand
-                results.append({
-                    "path": key,
-                    "last_modified": None,
-                    "is_stale": False,
-                    "note": "not cached (tegola generates on demand)",
-                })
-            else:
-                results.append({
-                    "path": key,
-                    "error": str(e),
-                })
-
-    if stale_paths:
-        return {
-            "status": "stale",
-            "message": f"Tile cache is stale for: {', '.join(stale_paths)}",
-            "tile": tile,
-            "details": results,
-        }
-    else:
-        return {
-            "status": "ok",
-            "message": "Tile cache is up-to-date or not cached",
-            "tile": tile,
-            "details": results,
-        }
-
-
-def check_tile_cache_for_element(conn, elem_check, changeset_closed_at):
-    """Full tile cache verification for a single element.
-
-    Args:
-        conn: DB connection
-        elem_check: result dict from _check_element_in_db (with found_in_tables)
-        changeset_closed_at: ISO timestamp string
-
-    Returns:
-        dict with tile cache check results
-    """
-    osm_id = elem_check["osm_id"]
-    elem_type = elem_check["type"]
-    zoom = Config.TILE_CHECK_ZOOM
-
-    # Step 1: get geometry from DB
-    centroid = _get_element_centroid(conn, elem_check)
-    if not centroid:
-        return {
-            "osm_id": osm_id,
-            "type": elem_type,
-            "status": "skipped",
-            "message": "Could not get geometry from DB",
-        }
-
-    # Step 2: calculate tile
-    tile = _get_tile_for_point(centroid["lon"], centroid["lat"], zoom)
-
-    # Step 3: check S3 cache
-    cache_result = _check_tile_in_s3(tile, changeset_closed_at)
-
-    return {
-        "osm_id": osm_id,
-        "type": elem_type,
-        "centroid": centroid,
-        "tile": tile,
-        "cache": cache_result,
-    }
diff --git a/images/tiler-pipeline-monitor/config.py b/images/tiler-pipeline-monitor/config.py
deleted file mode 100644
index 756c72c6d..000000000
--- a/images/tiler-pipeline-monitor/config.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import os
-
-
-class Config:
-    # PostgreSQL (tiler DB)
-    POSTGRES_HOST = os.getenv("POSTGRES_HOST", "localhost")
-    POSTGRES_PORT = int(os.getenv("POSTGRES_PORT", 5432))
-    POSTGRES_DB = os.getenv("POSTGRES_DB", "tiler")
-    POSTGRES_USER = os.getenv("POSTGRES_USER", "postgres")
-    POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
-
-    # Replication
-    REPLICATION_STATE_URL = os.getenv(
-        "REPLICATION_STATE_URL",
-        "https://s3.amazonaws.com/planet.openhistoricalmap.org/replication/minute/state.txt",
-    )
-    OHM_API_BASE = os.getenv("OHM_API_BASE", "https://www.openhistoricalmap.org/api/0.6")
-
-    # How often to run the pipeline check (seconds)
-    CHECK_INTERVAL = int(os.getenv("TILER_MONITORING_CHECK_INTERVAL", 3600))  # 1 hour
-
-    # OHM changeset age window (seconds)
-    # Only check changesets closed at least CHANGESET_MIN_AGE ago
-    # and at most CHANGESET_MAX_AGE ago.
-    # Example: min=3600 max=10800 → changesets closed between 1 and 3 hours ago
-    CHANGESET_MIN_AGE = int(os.getenv("TILER_MONITORING_CHANGESET_MIN_AGE", 10800))    # 1 hour
-    CHANGESET_MAX_AGE = int(os.getenv("TILER_MONITORING_CHANGESET_MAX_AGE", 14400))   # 3 hours
-
-    # Max number of changesets to check per run
-    CHANGESET_LIMIT = int(os.getenv("CHANGESET_LIMIT", 30))
-
-    # Verbose logging
-    VERBOSE_LOGGING = os.getenv("VERBOSE_LOGGING", "false").lower() == "true"
-
-    # Alerting (optional)
-    SLACK_WEBHOOK_URL = os.getenv("SLACK_WEBHOOK_URL", "")
-
-    # Server
-    MONITOR_PORT = int(os.getenv("MONITOR_PORT", 8001))
-
-    # S3 tile cache verification
-    S3_BUCKET_CACHE_TILER = os.getenv("S3_BUCKET_CACHE_TILER", "")
-    S3_BUCKET_PATH_FILES = os.getenv("S3_BUCKET_PATH_FILES", "mnt/data/ohm,mnt/data/ohm_admin,mnt/data/ohm_other_boundaries").split(",")
-    TILER_CACHE_AWS_ACCESS_KEY_ID = os.getenv("TILER_CACHE_AWS_ACCESS_KEY_ID", "")
-    TILER_CACHE_AWS_SECRET_ACCESS_KEY = os.getenv("TILER_CACHE_AWS_SECRET_ACCESS_KEY", "")
-    TILER_CACHE_AWS_ENDPOINT = os.getenv("TILER_CACHE_AWS_ENDPOINT", "https://s3.amazonaws.com")
-    TILER_CACHE_REGION = os.getenv("TILER_CACHE_REGION", "us-east-1")
-    TILER_CACHE_CLOUD_INFRASTRUCTURE = os.getenv("TILER_CACHE_CLOUD_INFRASTRUCTURE", "aws")
-    # Zoom level to verify tile cache (use high zoom for precise check)
-    TILE_CHECK_ZOOM = int(os.getenv("TILE_CHECK_ZOOM", 16))
-    # Number of random elements to do full pipeline check (tables + views + S3)
-    FULL_CHECK_SAMPLE_SIZE = int(os.getenv("FULL_CHECK_SAMPLE_SIZE", 2))
-
-    @staticmethod
-    def get_s3_client():
-        import boto3
-        if Config.TILER_CACHE_CLOUD_INFRASTRUCTURE == "hetzner":
-            return boto3.client(
-                "s3",
-                aws_access_key_id=Config.TILER_CACHE_AWS_ACCESS_KEY_ID,
-                aws_secret_access_key=Config.TILER_CACHE_AWS_SECRET_ACCESS_KEY,
-                endpoint_url=Config.TILER_CACHE_AWS_ENDPOINT,
-                region_name=Config.TILER_CACHE_REGION,
-            )
-        return boto3.client("s3")
-
-    @staticmethod
-    def get_db_dsn():
-        return (
-            f"postgresql://{Config.POSTGRES_USER}:{Config.POSTGRES_PASSWORD}"
-            f"@{Config.POSTGRES_HOST}:{Config.POSTGRES_PORT}/{Config.POSTGRES_DB}"
-        )
diff --git a/images/tiler-pipeline-monitor/monitor.py b/images/tiler-pipeline-monitor/monitor.py
deleted file mode 100644
index 5a5b88a9d..000000000
--- a/images/tiler-pipeline-monitor/monitor.py
+++ /dev/null
@@ -1,151 +0,0 @@
-"""Vtile pipeline monitor.
-
-Runs periodic changeset-centric checks and exposes results via a FastAPI HTTP
-server.  Optionally sends Slack alerts when checks fail.
-"""
-
-import logging
-import threading
-import time
-from datetime import datetime, timezone
-
-import requests
-import uvicorn
-from fastapi import FastAPI
-from fastapi.responses import JSONResponse
-
-from checks.imposm_import import check_pipeline, check_single_changeset
-from config import Config
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-)
-logger = logging.getLogger(__name__)
-
-# Store latest check result
-_latest_result = None
-_lock = threading.Lock()
-
-app = FastAPI(title="OHM Vtile Pipeline Monitor")
-
-
-# ---------------------------------------------------------------------------
-# Slack alerting
-# ---------------------------------------------------------------------------
-
-def _send_slack_alert(check_result):
-    """Send a Slack notification when a check is not ok."""
-    if not Config.SLACK_WEBHOOK_URL:
-        return
-    status_emoji = {"ok": ":white_check_mark:", "warning": ":warning:", "critical": ":rotating_light:"}
-    emoji = status_emoji.get(check_result["status"], ":question:")
-    text = f"{emoji} *{check_result['name']}* — {check_result['status'].upper()}\n{check_result['message']}"
-    try:
-        requests.post(
-            Config.SLACK_WEBHOOK_URL,
-            json={"text": text},
-            timeout=10,
-        )
-    except requests.RequestException as e:
-        logger.error(f"Failed to send Slack alert: {e}")
-
-
-# ---------------------------------------------------------------------------
-# Background scheduler
-# ---------------------------------------------------------------------------
-
-def _run_check():
-    """Run the pipeline check and update stored result."""
-    try:
-        logger.info("=============> Running pipeline check")
-        result = check_pipeline()
-        logger.info(f"Pipeline check: {result['status']} — {result['message']}")
-
-        with _lock:
-            prev = _latest_result
-            globals()["_latest_result"] = result
-
-        # Alert on state transitions to non-ok
-        if result["status"] != "ok":
-            if prev is None or prev["status"] == "ok":
-                _send_slack_alert(result)
-
-    except Exception as e:
-        logger.exception(f"Pipeline check raised an exception: {e}")
-        with _lock:
-            globals()["_latest_result"] = {
-                "name": "pipeline",
-                "status": "critical",
-                "message": f"Check failed with exception: {e}",
-                "details": {},
-                "checked_at": datetime.now(timezone.utc).isoformat(),
-            }
-
-
-def _scheduler():
-    """Background loop that runs checks at the configured interval."""
-    logger.info(f"Pipeline monitor starting. Check interval: {Config.CHECK_INTERVAL}s")
-    time.sleep(10)
-
-    while True:
-        _run_check()
-        time.sleep(Config.CHECK_INTERVAL)
-
-
-# ---------------------------------------------------------------------------
-# HTTP endpoints
-# ---------------------------------------------------------------------------
-
-@app.get("/health")
-def health():
-    """Overall health: returns 200 if ok, 503 otherwise."""
-    with _lock:
-        result = _latest_result
-
-    if result is None:
-        return JSONResponse(
-            content={"status": "starting", "message": "No checks have run yet"},
-            status_code=200,
-        )
-
-    status_code = 200 if result["status"] == "ok" else 503
-    return JSONResponse(
-        content={
-            "status": result["status"],
-            "message": result["message"],
-            "checked_at": result["checked_at"],
-        },
-        status_code=status_code,
-    )
-
-
-@app.get("/checks")
-def all_checks():
-    """Return full details for the latest pipeline check."""
-    with _lock:
-        result = _latest_result
-    if result is None:
-        return JSONResponse(content={"status": "starting"})
-    return JSONResponse(content=result)
-
-
-@app.get("/changeset/{changeset_id}")
-def evaluate_changeset(changeset_id: int):
-    """Evaluate a specific changeset through the full pipeline (on-demand)."""
-    result = check_single_changeset(changeset_id)
-    status_code = 200 if result["status"] == "ok" else 503
-    return JSONResponse(content=result, status_code=status_code)
-
-
-# ---------------------------------------------------------------------------
-# Entrypoint
-# ---------------------------------------------------------------------------
-
-if __name__ == "__main__":
-    # Start background scheduler
-    t = threading.Thread(target=_scheduler, daemon=True)
-    t.start()
-
-    # Start HTTP server
-    uvicorn.run(app, host="0.0.0.0", port=Config.MONITOR_PORT)
diff --git a/images/tiler-pipeline-monitor/requirements.txt b/images/tiler-pipeline-monitor/requirements.txt
deleted file mode 100644
index 1360cc74e..000000000
--- a/images/tiler-pipeline-monitor/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-fastapi
-uvicorn
-requests
-psycopg2-binary
-mercantile
-boto3
diff --git a/images/tiler-pipeline-monitor/tables_config.json b/images/tiler-pipeline-monitor/tables_config.json
deleted file mode 100644
index 4fd9e4e24..000000000
--- a/images/tiler-pipeline-monitor/tables_config.json
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-  "relation_tables": [
-    "osm_admin_relation_members",
-    "osm_transport_multilines",
-    "osm_street_multilines",
-    "osm_route_multilines",
-    "osm_communication_multilines"
-  ],
-  "way_tables": [
-    "osm_water_lines",
-    "osm_transport_lines",
-    "osm_route_lines",
-    "osm_other_lines",
-    "osm_communication_lines",
-    "osm_landuse_lines",
-    "osm_admin_lines",
-    "osm_buildings",
-    "osm_water_areas",
-    "osm_amenity_areas",
-    "osm_other_areas",
-    "osm_transport_areas",
-    "osm_landuse_areas",
-    "osm_place_areas",
-    "osm_admin_areas",
-    "osm_amenity_points",
-    "osm_buildings_points",
-    "osm_transport_points",
-    "osm_other_points",
-    "osm_place_points",
-    "osm_landuse_points"
-  ],
-  "_comment": "Only check the lowest and highest zoom views (extremes) per table to reduce queries",
-  "table_to_views": {
-    "osm_admin_lines": [
-      "mv_admin_boundaries_lines_z0_2",
-      "mv_admin_boundaries_lines_z16_20",
-      "mv_admin_maritime_lines_z0_5_v2",
-      "mv_admin_maritime_lines_z10_15"
-    ],
-    "osm_admin_relation_members": [
-      "mv_relation_members_boundaries",
-      "mv_admin_boundaries_lines_z0_2",
-      "mv_admin_boundaries_lines_z16_20"
-    ],
-    "osm_admin_areas": [
-      "mv_admin_boundaries_areas_z0_2",
-      "mv_admin_boundaries_areas_z16_20",
-      "mv_admin_boundaries_centroids_z0_2",
-      "mv_admin_boundaries_centroids_z16_20",
-      "mv_non_admin_boundaries_areas_z0_2",
-      "mv_non_admin_boundaries_areas_z16_20",
-      "mv_non_admin_boundaries_centroids_z0_2",
-      "mv_non_admin_boundaries_centroids_z16_20"
-    ],
-    "osm_water_lines": [
-      "mv_water_lines_z8_9",
-      "mv_water_lines_z16_20"
-    ],
-    "osm_water_areas": [
-      "mv_water_areas_z0_2",
-      "mv_water_areas_z16_20",
-      "mv_water_areas_centroids_z8_9",
-      "mv_water_areas_centroids_z16_20"
-    ],
-    "osm_transport_lines": [
-      "mv_transport_lines_z5",
-      "mv_transport_lines_z16_20"
-    ],
-    "osm_transport_multilines": [
-      "mv_transport_lines_z5",
-      "mv_transport_lines_z16_20"
-    ],
-    "osm_street_multilines": [
-      "mv_transport_lines_z5",
-      "mv_transport_lines_z16_20"
-    ],
-    "osm_transport_areas": [
-      "mv_transport_areas_z10_12",
-      "mv_transport_areas_z16_20",
-      "mv_transport_points_centroids_z10_12",
-      "mv_transport_points_centroids_z16_20"
-    ],
-    "osm_transport_points": [
-      "mv_transport_points",
-      "mv_transport_points_centroids_z16_20"
-    ],
-    "osm_route_lines": [
-      "mv_routes_normalized",
-      "mv_routes_indexed_z5",
-      "mv_routes_indexed_z16_20"
-    ],
-    "osm_route_multilines": [
-      "mv_routes_normalized",
-      "mv_routes_indexed_z5",
-      "mv_routes_indexed_z16_20"
-    ],
-    "osm_communication_lines": [
-      "mv_communication_z10_12",
-      "mv_communication_z16_20"
-    ],
-    "osm_communication_multilines": [
-      "mv_communication_z10_12",
-      "mv_communication_z16_20"
-    ],
-    "osm_landuse_areas": [
-      "mv_landuse_areas_z6_7",
-      "mv_landuse_areas_z16_20",
-      "mv_landuse_points_centroids_z6_7",
-      "mv_landuse_points_centroids_z16_20"
-    ],
-    "osm_landuse_lines": [
-      "mv_landuse_lines_z14_15",
-      "mv_landuse_lines_z16_20"
-    ],
-    "osm_landuse_points": [
-      "mv_landuse_points",
-      "mv_landuse_points_centroids_z6_7",
-      "mv_landuse_points_centroids_z16_20"
-    ],
-    "osm_buildings": [
-      "mv_buildings_areas_z14_15",
-      "mv_buildings_areas_z16_20",
-      "mv_buildings_points_centroids_z14_15",
-      "mv_buildings_points_centroids_z16_20"
-    ],
-    "osm_buildings_points": [
-      "mv_buildings_points",
-      "mv_buildings_points_centroids_z16_20"
-    ],
-    "osm_amenity_areas": [
-      "mv_amenity_areas_z14_15",
-      "mv_amenity_areas_z16_20",
-      "mv_amenity_points_centroids_z14_15",
-      "mv_amenity_points_centroids_z16_20"
-    ],
-    "osm_amenity_points": [
-      "mv_amenity_points",
-      "mv_amenity_points_centroids_z16_20"
-    ],
-    "osm_other_areas": [
-      "mv_other_areas_z8_9",
-      "mv_other_areas_z16_20",
-      "mv_other_points_centroids_z8_9",
-      "mv_other_points_centroids_z16_20"
-    ],
-    "osm_other_lines": [
-      "mv_other_lines_z14_15",
-      "mv_other_lines_z16_20"
-    ],
-    "osm_other_points": [
-      "mv_other_points",
-      "mv_other_points_centroids_z16_20"
-    ],
-    "osm_place_areas": [
-      "mv_place_areas_z14_20",
-      "mv_place_points_centroids_z0_2",
-      "mv_place_points_centroids_z11_20"
-    ],
-    "osm_place_points": [
-      "mv_place_points_centroids_z0_2",
-      "mv_place_points_centroids_z11_20"
-    ]
-  }
-}

From acc77a6633938440d5a96a85653829fa2eee5ea1 Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Sat, 21 Mar 2026 14:28:46 -0500
Subject: [PATCH 07/17] check in all tables in case not fund

---
 .../pipeline-monitor/checks/imposm_import.py  | 71 ++++++++++++++++++-
 .../tiler-monitor/pipeline-monitor/monitor.py |  9 ++-
 .../pipeline-monitor/static/dashboard.html    | 35 +++++++++
 3 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
index 2518fe12c..3199e45a4 100644
--- a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
+++ b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
@@ -311,7 +311,13 @@ def _check_element_in_tables(conn, elem):
         WHERE table_schema = 'public' AND table_name LIKE 'osm_%%'
     """)
     existing_tables = {row[0] for row in cur.fetchall()}
-    tables = [t for t in candidate_tables if t in existing_tables]
+
+    if candidate_tables:
+        # Normal path: filter to candidate tables that exist
+        tables = [t for t in candidate_tables if t in existing_tables]
+    else:
+        # Retry path: no tags available, search ALL osm_* tables
+        tables = sorted(existing_tables)
 
     if not tables:
         cur.close()
@@ -958,3 +964,66 @@ def check_single_changeset(changeset_id):
 
     print(f"  [result] {result['status'].upper()}: {result['message']}")
     return result
+
+
+def recheck_retries():
+    """Manually recheck all pending and failed retries against the tiler DB.
+
+    Returns a summary of resolved, still-missing, and newly-failed elements.
+    """
+    retryable = retry_store.get_pending() + retry_store.get_failed()
+    if not retryable:
+        return {"resolved": [], "still_missing": [], "newly_failed": [], "message": "No retries to check"}
+
+    try:
+        conn = psycopg2.connect(
+            host=Config.POSTGRES_HOST,
+            port=Config.POSTGRES_PORT,
+            dbname=Config.POSTGRES_DB,
+            user=Config.POSTGRES_USER,
+            password=Config.POSTGRES_PASSWORD,
+        )
+    except psycopg2.Error as e:
+        return {"error": f"Cannot connect to tiler DB: {e}"}
+
+    resolved = []
+    still_missing = []
+    newly_failed = []
+
+    for entry in retryable:
+        cs_id = entry["changeset_id"]
+        etype = entry["element_type"]
+        oid = entry["osm_id"]
+        retry_num = entry["retry_count"] + 1
+        prev_status = entry["status"]
+
+        check = _check_element_in_tables(conn, {"type": etype, "osm_id": oid, "action": "modify"})
+        if check["found_in_tables"]:
+            retry_store.mark_resolved(cs_id, etype, oid)
+            resolved.append({"type": etype, "osm_id": oid, "changeset_id": cs_id,
+                             "found_in_tables": check["found_in_tables"]})
+        elif prev_status == "failed":
+            still_missing.append({"type": etype, "osm_id": oid, "changeset_id": cs_id})
+        else:
+            new_status = retry_store.increment_retry(cs_id, etype, oid)
+            if new_status == "failed":
+                newly_failed.append({"type": etype, "osm_id": oid, "changeset_id": cs_id})
+            else:
+                still_missing.append({"type": etype, "osm_id": oid, "changeset_id": cs_id})
+
+    conn.close()
+
+    msg_parts = []
+    if resolved:
+        msg_parts.append(f"{len(resolved)} resolved")
+    if still_missing:
+        msg_parts.append(f"{len(still_missing)} still missing")
+    if newly_failed:
+        msg_parts.append(f"{len(newly_failed)} newly failed")
+
+    return {
+        "resolved": resolved,
+        "still_missing": still_missing,
+        "newly_failed": newly_failed,
+        "message": ", ".join(msg_parts) if msg_parts else "No retries to check",
+    }
diff --git a/images/tiler-monitor/pipeline-monitor/monitor.py b/images/tiler-monitor/pipeline-monitor/monitor.py
index 090311324..a3e538dcf 100644
--- a/images/tiler-monitor/pipeline-monitor/monitor.py
+++ b/images/tiler-monitor/pipeline-monitor/monitor.py
@@ -15,7 +15,7 @@
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse, JSONResponse
 
-from checks.imposm_import import check_pipeline, check_single_changeset
+from checks.imposm_import import check_pipeline, check_single_changeset, recheck_retries
 from config import Config
 import retry_store
 
@@ -190,6 +190,13 @@ def evaluate_changeset(changeset_id: int):
     return JSONResponse(content=result, status_code=status_code)
 
 
+@app.post("/retries/recheck")
+def retries_recheck():
+    """Manually trigger a recheck of all pending and failed retries."""
+    result = recheck_retries()
+    return JSONResponse(content=result)
+
+
 @app.get("/retries")
 def retries():
     """Return current retry state with full details for debugging."""
diff --git a/images/tiler-monitor/pipeline-monitor/static/dashboard.html b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
index 7b0380f1e..1326cc8e0 100644
--- a/images/tiler-monitor/pipeline-monitor/static/dashboard.html
+++ b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
@@ -169,6 +169,12 @@ <h1>OHM Tiler Pipeline Monitor</h1>
 
 <!-- Retries panel -->
 <div class="panel" id="panel-retries">
+  <div style="margin-bottom: 12px; display: flex; align-items: center; gap: 12px;">
+    <button onclick="recheckRetries(this)" style="background:var(--surface);border:1px solid var(--border);color:var(--accent);padding:6px 14px;border-radius:6px;cursor:pointer;font-size:13px;">
+      Recheck Now
+    </button>
+    <span id="recheckResult" style="font-size:12px;color:var(--muted)"></span>
+  </div>
   <table>
     <thead><tr>
       <th>Element</th><th>Changeset</th><th>Action</th><th>Status</th>
@@ -439,6 +445,35 @@ <h1>OHM Tiler Pipeline Monitor</h1>
   });
 }
 
+// ---- Recheck retries ----
+async function recheckRetries(btn) {
+  const resultEl = document.getElementById('recheckResult');
+  btn.disabled = true;
+  btn.textContent = 'Rechecking...';
+  resultEl.textContent = '';
+  try {
+    const r = await fetch(`${API}/retries/recheck`, { method: 'POST' });
+    const data = await r.json();
+    if (data.error) {
+      resultEl.style.color = 'var(--red)';
+      resultEl.textContent = data.error;
+    } else {
+      const parts = [];
+      if (data.resolved?.length) parts.push(`${data.resolved.length} resolved`);
+      if (data.still_missing?.length) parts.push(`${data.still_missing.length} still missing`);
+      if (data.newly_failed?.length) parts.push(`${data.newly_failed.length} newly failed`);
+      resultEl.style.color = data.resolved?.length ? 'var(--green)' : 'var(--muted)';
+      resultEl.textContent = parts.length ? parts.join(', ') : 'No retries to check';
+    }
+    await loadRetries();
+  } catch (e) {
+    resultEl.style.color = 'var(--red)';
+    resultEl.textContent = 'Failed to recheck';
+  }
+  btn.disabled = false;
+  btn.textContent = 'Recheck Now';
+}
+
 // ---- Retries ----
 async function loadRetries() {
   try {

From 93659397c8df6d5096426e765aedbb1a5033c29b Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Sat, 21 Mar 2026 14:39:50 -0500
Subject: [PATCH 08/17] Add a recheck option

---
 .../pipeline-monitor/checks/imposm_import.py  | 52 +++++++++++++++++++
 .../tiler-monitor/pipeline-monitor/monitor.py |  8 +++
 .../pipeline-monitor/static/dashboard.html    | 38 +++++++++++++-
 3 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
index 3199e45a4..5f60b45e7 100644
--- a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
+++ b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
@@ -966,6 +966,58 @@ def check_single_changeset(changeset_id):
     return result
 
 
+def recheck_single_element(element_type, osm_id):
+    """Manually recheck a single element in the tiler DB.
+
+    Returns detailed info about where it was found or why it's missing.
+    """
+    try:
+        conn = psycopg2.connect(
+            host=Config.POSTGRES_HOST,
+            port=Config.POSTGRES_PORT,
+            dbname=Config.POSTGRES_DB,
+            user=Config.POSTGRES_USER,
+            password=Config.POSTGRES_PASSWORD,
+        )
+    except psycopg2.Error as e:
+        return {"status": "error", "message": f"Cannot connect to tiler DB: {e}"}
+
+    elem = {"type": element_type, "osm_id": osm_id, "action": "modify"}
+    check = _check_element_in_tables(conn, elem)
+
+    # Also check views if found in tables
+    if check["found_in_tables"]:
+        check = _check_element_in_views(conn, elem, check)
+
+    conn.close()
+
+    found = bool(check["found_in_tables"])
+
+    # If found, resolve all pending retries for this element
+    if found:
+        pending = retry_store.get_pending() + retry_store.get_failed()
+        for entry in pending:
+            if entry["element_type"] == element_type and entry["osm_id"] == osm_id:
+                retry_store.mark_resolved(entry["changeset_id"], element_type, osm_id)
+
+    # Build detailed result
+    search_id = -osm_id if element_type == "relation" else osm_id
+    result = {
+        "status": "resolved" if found else "not_found",
+        "element_type": element_type,
+        "osm_id": osm_id,
+        "search_id": search_id,
+        "found_in_tables": check["found_in_tables"],
+        "found_in_views": check["found_in_views"],
+        "message": (
+            f"Found in: {', '.join(check['found_in_tables'])}"
+            if found
+            else f"Not found in any osm_* table (searched with osm_id={search_id})"
+        ),
+    }
+    return result
+
+
 def recheck_retries():
     """Manually recheck all pending and failed retries against the tiler DB.
 
diff --git a/images/tiler-monitor/pipeline-monitor/monitor.py b/images/tiler-monitor/pipeline-monitor/monitor.py
index a3e538dcf..d6a87ef34 100644
--- a/images/tiler-monitor/pipeline-monitor/monitor.py
+++ b/images/tiler-monitor/pipeline-monitor/monitor.py
@@ -197,6 +197,14 @@ def retries_recheck():
     return JSONResponse(content=result)
 
 
+@app.post("/retries/recheck/{element_type}/{osm_id}")
+def retries_recheck_single(element_type: str, osm_id: int):
+    """Manually recheck a single element in the tiler DB."""
+    from checks.imposm_import import recheck_single_element
+    result = recheck_single_element(element_type, osm_id)
+    return JSONResponse(content=result)
+
+
 @app.get("/retries")
 def retries():
     """Return current retry state with full details for debugging."""
diff --git a/images/tiler-monitor/pipeline-monitor/static/dashboard.html b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
index 1326cc8e0..0d4590cd5 100644
--- a/images/tiler-monitor/pipeline-monitor/static/dashboard.html
+++ b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
@@ -178,7 +178,7 @@ <h1>OHM Tiler Pipeline Monitor</h1>
   <table>
     <thead><tr>
       <th>Element</th><th>Changeset</th><th>Action</th><th>Status</th>
-      <th>Retries</th><th>First Seen</th><th>Last Checked</th>
+      <th>Retries</th><th>First Seen</th><th>Last Checked</th><th></th>
     </tr></thead>
     <tbody id="retriesBody"></tbody>
   </table>
@@ -445,6 +445,34 @@ <h1>OHM Tiler Pipeline Monitor</h1>
   });
 }
 
+// ---- Recheck single element ----
+async function recheckSingle(elementType, osmId, btn) {
+  const resultEl = document.getElementById(`recheck-result-${elementType}-${osmId}`);
+  btn.disabled = true;
+  resultEl.textContent = 'checking...';
+  resultEl.style.color = 'var(--muted)';
+  try {
+    const r = await fetch(`${API}/retries/recheck/${elementType}/${osmId}`, { method: 'POST' });
+    const data = await r.json();
+    if (data.status === 'resolved') {
+      resultEl.style.color = 'var(--green)';
+      resultEl.textContent = `Found in: ${data.found_in_tables.join(', ')}`;
+      // Refresh the retries list after a short delay so user can see the result
+      setTimeout(() => loadRetries(), 2000);
+    } else if (data.status === 'error') {
+      resultEl.style.color = 'var(--red)';
+      resultEl.textContent = data.message;
+    } else {
+      resultEl.style.color = 'var(--red)';
+      resultEl.textContent = data.message;
+    }
+  } catch (e) {
+    resultEl.style.color = 'var(--red)';
+    resultEl.textContent = 'Request failed';
+  }
+  btn.disabled = false;
+}
+
 // ---- Recheck retries ----
 async function recheckRetries(btn) {
   const resultEl = document.getElementById('recheckResult');
@@ -498,7 +526,7 @@ <h1>OHM Tiler Pipeline Monitor</h1>
     }
     empty.style.display = 'none';
 
-    tbody.innerHTML = all.map(e => `<tr>
+    tbody.innerHTML = all.map(e => `<tr id="retry-row-${e.element_type}-${e.osm_id}">
       <td><a href="${e.element_url}" target="_blank">${e.element_type}/${e.osm_id}</a>
         ${e.version ? `<span style="color:var(--muted)">v${e.version}</span>` : ''}</td>
       <td><a href="${e.changeset_url}" target="_blank">${e.changeset_id}</a></td>
@@ -507,6 +535,12 @@ <h1>OHM Tiler Pipeline Monitor</h1>
       <td style="width:120px">${retryBar(e.retry_count, e.max_retries)}</td>
       <td title="${e.first_seen}">${e.age}</td>
       <td title="${e.last_checked}">${e.last_checked_ago}</td>
+      <td>
+        <button class="expand-btn" style="width:auto;padding:2px 8px;font-size:11px"
+          onclick="recheckSingle('${e.element_type}', ${e.osm_id}, this)"
+          title="Recheck this element in the tiler DB now">&#x21bb;</button>
+        <span id="recheck-result-${e.element_type}-${e.osm_id}" style="font-size:11px;margin-left:4px"></span>
+      </td>
     </tr>`).join('');
   } catch (e) {
     console.error('Failed to load retries:', e);

From 68c8edc60bb6ea775951007d591a6e170fa0ce3c Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Sat, 21 Mar 2026 14:56:20 -0500
Subject: [PATCH 09/17] update

---
 .../pipeline-monitor/checks/imposm_import.py  | 30 +++++++++++++++----
 .../pipeline-monitor/retry_store.py           |  4 ---
 .../pipeline-monitor/static/dashboard.html    | 24 +++++++--------
 3 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
index 5f60b45e7..15ad5627d 100644
--- a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
+++ b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
@@ -983,6 +983,18 @@ def recheck_single_element(element_type, osm_id):
         return {"status": "error", "message": f"Cannot connect to tiler DB: {e}"}
 
     elem = {"type": element_type, "osm_id": osm_id, "action": "modify"}
+    search_id = -osm_id if element_type == "relation" else osm_id
+
+    # Get all osm_* tables to report what was searched
+    cur = conn.cursor()
+    cur.execute("""
+        SELECT table_name FROM information_schema.tables
+        WHERE table_schema = 'public' AND table_name LIKE 'osm_%%'
+        ORDER BY table_name
+    """)
+    all_tables = [row[0] for row in cur.fetchall()]
+    cur.close()
+
     check = _check_element_in_tables(conn, elem)
 
     # Also check views if found in tables
@@ -1001,7 +1013,16 @@ def recheck_single_element(element_type, osm_id):
                 retry_store.mark_resolved(entry["changeset_id"], element_type, osm_id)
 
     # Build detailed result
-    search_id = -osm_id if element_type == "relation" else osm_id
+    if found:
+        message = f"Found in: {', '.join(check['found_in_tables'])}"
+        if check["found_in_views"]:
+            message += f" | Views: {', '.join(check['found_in_views'])}"
+    else:
+        message = (
+            f"Not found in any of {len(all_tables)} osm_* tables "
+            f"(searched with osm_id={search_id})"
+        )
+
     result = {
         "status": "resolved" if found else "not_found",
         "element_type": element_type,
@@ -1009,11 +1030,8 @@ def recheck_single_element(element_type, osm_id):
         "search_id": search_id,
         "found_in_tables": check["found_in_tables"],
         "found_in_views": check["found_in_views"],
-        "message": (
-            f"Found in: {', '.join(check['found_in_tables'])}"
-            if found
-            else f"Not found in any osm_* table (searched with osm_id={search_id})"
-        ),
+        "searched_tables": all_tables,
+        "message": message,
     }
     return result
 
diff --git a/images/tiler-monitor/pipeline-monitor/retry_store.py b/images/tiler-monitor/pipeline-monitor/retry_store.py
index 181d1a1c8..2aed4e886 100644
--- a/images/tiler-monitor/pipeline-monitor/retry_store.py
+++ b/images/tiler-monitor/pipeline-monitor/retry_store.py
@@ -207,8 +207,6 @@ def get_all_details(ohm_base="https://www.openhistoricalmap.org"):
         entry = dict(r)
         entry["changeset_url"] = f"{ohm_base}/changeset/{r['changeset_id']}"
         entry["element_url"] = f"{ohm_base}/{r['element_type']}/{r['osm_id']}"
-        if r["version"]:
-            entry["element_url"] += f"/history/{r['version']}"
         try:
             first = datetime.fromisoformat(r["first_seen"])
             entry["age"] = _human_duration((now - first).total_seconds())
@@ -327,8 +325,6 @@ def get_changeset_elements(history_id: int,
     for r in rows:
         entry = dict(r)
         entry["element_url"] = f"{ohm_base}/{r['element_type']}/{r['osm_id']}"
-        if r["version"]:
-            entry["element_url"] += f"/history/{r['version']}"
         entry["found_in_tables"] = r["found_in_tables"].split(", ") if r["found_in_tables"] else []
         entry["found_in_views"] = r["found_in_views"].split(", ") if r["found_in_views"] else []
         results.append(entry)
diff --git a/images/tiler-monitor/pipeline-monitor/static/dashboard.html b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
index 0d4590cd5..a07720985 100644
--- a/images/tiler-monitor/pipeline-monitor/static/dashboard.html
+++ b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
@@ -449,26 +449,24 @@ <h1>OHM Tiler Pipeline Monitor</h1>
 async function recheckSingle(elementType, osmId, btn) {
   const resultEl = document.getElementById(`recheck-result-${elementType}-${osmId}`);
   btn.disabled = true;
-  resultEl.textContent = 'checking...';
-  resultEl.style.color = 'var(--muted)';
+  resultEl.innerHTML = '<span style="color:var(--muted)">checking...</span>';
   try {
     const r = await fetch(`${API}/retries/recheck/${elementType}/${osmId}`, { method: 'POST' });
     const data = await r.json();
     if (data.status === 'resolved') {
-      resultEl.style.color = 'var(--green)';
-      resultEl.textContent = `Found in: ${data.found_in_tables.join(', ')}`;
-      // Refresh the retries list after a short delay so user can see the result
-      setTimeout(() => loadRetries(), 2000);
-    } else if (data.status === 'error') {
-      resultEl.style.color = 'var(--red)';
-      resultEl.textContent = data.message;
+      let html = `<span style="color:var(--green)">&#10003; Found in: ${data.found_in_tables.join(', ')}</span>`;
+      if (data.found_in_views?.length) {
+        html += `<br><span style="color:var(--green);opacity:0.7">Views: ${data.found_in_views.join(', ')}</span>`;
+      }
+      resultEl.innerHTML = html;
+      setTimeout(() => loadRetries(), 3000);
+    } else if (data.status === 'not_found') {
+      resultEl.innerHTML = `<span style="color:var(--red)">&#10007; Not found (osm_id=${data.search_id}) in ${data.searched_tables?.length || '?'} tables</span>`;
     } else {
-      resultEl.style.color = 'var(--red)';
-      resultEl.textContent = data.message;
+      resultEl.innerHTML = `<span style="color:var(--red)">&#10007; ${data.message}</span>`;
     }
   } catch (e) {
-    resultEl.style.color = 'var(--red)';
-    resultEl.textContent = 'Request failed';
+    resultEl.innerHTML = '<span style="color:var(--red)">Request failed</span>';
   }
   btn.disabled = false;
 }

From 9b7fbc70b00b585428dda9df748f8fc80132bdcb Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Sat, 21 Mar 2026 15:08:45 -0500
Subject: [PATCH 10/17] update

---
 .../tiler-monitor/pipeline-monitor/monitor.py | 13 +++++++--
 .../pipeline-monitor/static/dashboard.html    | 28 +++++++++++++------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/images/tiler-monitor/pipeline-monitor/monitor.py b/images/tiler-monitor/pipeline-monitor/monitor.py
index d6a87ef34..66d514d52 100644
--- a/images/tiler-monitor/pipeline-monitor/monitor.py
+++ b/images/tiler-monitor/pipeline-monitor/monitor.py
@@ -200,9 +200,16 @@ def retries_recheck():
 @app.post("/retries/recheck/{element_type}/{osm_id}")
 def retries_recheck_single(element_type: str, osm_id: int):
     """Manually recheck a single element in the tiler DB."""
-    from checks.imposm_import import recheck_single_element
-    result = recheck_single_element(element_type, osm_id)
-    return JSONResponse(content=result)
+    try:
+        from checks.imposm_import import recheck_single_element
+        result = recheck_single_element(element_type, osm_id)
+        return JSONResponse(content=result)
+    except Exception as e:
+        logger.exception(f"Recheck failed for {element_type}/{osm_id}")
+        return JSONResponse(
+            content={"status": "error", "message": f"Recheck failed: {e}"},
+            status_code=500,
+        )
 
 
 @app.get("/retries")
diff --git a/images/tiler-monitor/pipeline-monitor/static/dashboard.html b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
index a07720985..7ad561053 100644
--- a/images/tiler-monitor/pipeline-monitor/static/dashboard.html
+++ b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
@@ -447,26 +447,36 @@ <h1>OHM Tiler Pipeline Monitor</h1>
 
 // ---- Recheck single element ----
 async function recheckSingle(elementType, osmId, btn) {
-  const resultEl = document.getElementById(`recheck-result-${elementType}-${osmId}`);
+  // Update ALL result spans for this element (may appear multiple times with different versions)
+  const allResults = document.querySelectorAll(`.recheck-result-${elementType}-${osmId}`);
   btn.disabled = true;
-  resultEl.innerHTML = '<span style="color:var(--muted)">checking...</span>';
+  allResults.forEach(el => el.innerHTML = '<span style="color:var(--muted)">checking...</span>');
   try {
     const r = await fetch(`${API}/retries/recheck/${elementType}/${osmId}`, { method: 'POST' });
+    if (!r.ok) {
+      const errText = await r.text();
+      allResults.forEach(el => el.innerHTML = `<span style="color:var(--red)">&#10007; Server error ${r.status}: ${errText.substring(0, 100)}</span>`);
+      btn.disabled = false;
+      return;
+    }
     const data = await r.json();
     if (data.status === 'resolved') {
-      let html = `<span style="color:var(--green)">&#10003; Found in: ${data.found_in_tables.join(', ')}</span>`;
+      let html = `<span style="color:var(--green)">&#10003; ${(data.found_in_tables || []).join(', ')}</span>`;
       if (data.found_in_views?.length) {
         html += `<br><span style="color:var(--green);opacity:0.7">Views: ${data.found_in_views.join(', ')}</span>`;
       }
-      resultEl.innerHTML = html;
+      allResults.forEach(el => el.innerHTML = html);
       setTimeout(() => loadRetries(), 3000);
     } else if (data.status === 'not_found') {
-      resultEl.innerHTML = `<span style="color:var(--red)">&#10007; Not found (osm_id=${data.search_id}) in ${data.searched_tables?.length || '?'} tables</span>`;
+      const tables = data.searched_tables?.length || '?';
+      const sid = data.search_id != null ? data.search_id : osmId;
+      allResults.forEach(el => el.innerHTML = `<span style="color:var(--red)">&#10007; Not found (osm_id=${sid}) in ${tables} tables</span>`);
     } else {
-      resultEl.innerHTML = `<span style="color:var(--red)">&#10007; ${data.message}</span>`;
+      const msg = data.message || data.detail || JSON.stringify(data);
+      allResults.forEach(el => el.innerHTML = `<span style="color:var(--red)">&#10007; ${msg}</span>`);
     }
   } catch (e) {
-    resultEl.innerHTML = '<span style="color:var(--red)">Request failed</span>';
+    allResults.forEach(el => el.innerHTML = `<span style="color:var(--red)">Request failed: ${e.message}</span>`);
   }
   btn.disabled = false;
 }
@@ -524,7 +534,7 @@ <h1>OHM Tiler Pipeline Monitor</h1>
     }
     empty.style.display = 'none';
 
-    tbody.innerHTML = all.map(e => `<tr id="retry-row-${e.element_type}-${e.osm_id}">
+    tbody.innerHTML = all.map(e => `<tr>
       <td><a href="${e.element_url}" target="_blank">${e.element_type}/${e.osm_id}</a>
         ${e.version ? `<span style="color:var(--muted)">v${e.version}</span>` : ''}</td>
       <td><a href="${e.changeset_url}" target="_blank">${e.changeset_id}</a></td>
@@ -537,7 +547,7 @@ <h1>OHM Tiler Pipeline Monitor</h1>
         <button class="expand-btn" style="width:auto;padding:2px 8px;font-size:11px"
           onclick="recheckSingle('${e.element_type}', ${e.osm_id}, this)"
           title="Recheck this element in the tiler DB now">&#x21bb;</button>
-        <span id="recheck-result-${e.element_type}-${e.osm_id}" style="font-size:11px;margin-left:4px"></span>
+        <span class="recheck-result-${e.element_type}-${e.osm_id}" style="font-size:11px;margin-left:4px"></span>
       </td>
     </tr>`).join('');
   } catch (e) {

From 79d30c1b97f29ae9d6c6581c9d857d2983c254a8 Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Sat, 21 Mar 2026 15:24:03 -0500
Subject: [PATCH 11/17] update

---
 .../pipeline-monitor/checks/imposm_import.py  | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
index 15ad5627d..58c928d6d 100644
--- a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
+++ b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
@@ -460,6 +460,36 @@ def _is_element_deleted(elem):
         return False
 
 
+def _get_current_element_tags(element_type, osm_id):
+    """Fetch the latest version of an element from OHM API and return its tags.
+
+    Returns None if the element is deleted/gone or the request fails.
+    Returns a dict of tags if the element exists.
+    """
+    url = f"{Config.OHM_API_BASE}/{element_type}/{osm_id}"
+    headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
+    try:
+        resp = requests.get(url, headers=headers, timeout=15)
+        if resp.status_code == 410:
+            return None
+        if resp.status_code == 200:
+            root = ET.fromstring(resp.content)
+            el = root.find(element_type)
+            if el is not None:
+                if el.attrib.get("visible") == "false":
+                    return None
+                tags = {}
+                for tag in el.findall("tag"):
+                    k = tag.attrib.get("k")
+                    v = tag.attrib.get("v")
+                    if k and v:
+                        tags[k] = v
+                return tags
+        return None
+    except Exception:
+        return None
+
+
 def _check_elements_in_db(conn, changeset_id, changeset_closed_at=None):
     """Check all elements of a changeset in the tiler DB.
 
@@ -786,6 +816,22 @@ def check_pipeline():
         retry_num = entry["retry_count"] + 1
         prev_status = entry["status"]
 
+        # Check if the latest version still has mappable tags
+        current_tags = _get_current_element_tags(etype, oid)
+        if current_tags is None:
+            # Element was deleted — no longer needs to be in DB
+            print(f"  [retry] RESOLVED {etype}/{oid} (changeset {cs_id}) "
+                  f"-> element deleted in latest version, no longer expected in DB")
+            retry_store.mark_resolved(cs_id, etype, oid)
+            continue
+        current_elem = {"tags": current_tags}
+        if not _has_mappable_tags(current_elem):
+            # Latest version has no mappable tags — imposm won't import it
+            print(f"  [retry] RESOLVED {etype}/{oid} (changeset {cs_id}) "
+                  f"-> latest version has no mappable tags, no longer expected in DB")
+            retry_store.mark_resolved(cs_id, etype, oid)
+            continue
+
         # Check if the element is now in the DB
         check = _check_element_in_tables(conn, {"type": etype, "osm_id": oid, "action": "modify"})
         if check["found_in_tables"]:

From 74b0dfd0856843eedef1492981cd8cd9c99fc910 Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Mon, 23 Mar 2026 14:56:13 -0500
Subject: [PATCH 12/17] Reject values to evaluate

---
 .../pipeline-monitor/checks/imposm_import.py  | 49 +++++++++++++++++--
 .../pipeline-monitor/tables_config.json       |  3 ++
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
index 58c928d6d..64c60cb57 100644
--- a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
+++ b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
@@ -231,6 +231,9 @@ def _get_changeset_elements(changeset_id):
 # Loaded from tables_config.json
 TAG_TO_CHECK = _tables_config["tag_to_check"]
 
+# Reject rules: tag key -> list of values that imposm rejects (not imported)
+_REJECT_VALUES = _tables_config.get("reject_values", {})
+
 # Split config keys into simple tags ("highway") and key=value tags ("type=street")
 _SIMPLE_TAGS = {}
 _KV_TAGS = {}
@@ -242,12 +245,20 @@ def _get_changeset_elements(changeset_id):
 
 
 def _matching_entries(elem):
-    """Return matching tag_to_check entries for this element's tags."""
+    """Return matching tag_to_check entries for this element's tags.
+
+    Skips tags whose value is rejected by imposm (e.g. natural=coastline).
+    Other mappable tags on the same element are still matched.
+    """
     tags = elem.get("tags", {})
     entries = []
-    # Simple tags: match if tag key exists (e.g. "highway")
+    # Simple tags: match if tag key exists (e.g. "highway"),
+    # but skip if the value is in the reject list for that key
     for tag_key in tags:
         if tag_key in _SIMPLE_TAGS:
+            rejected = _REJECT_VALUES.get(tag_key, [])
+            if tags[tag_key] in rejected:
+                continue
             entries.append(_SIMPLE_TAGS[tag_key])
     # Key=value tags: match if tag key AND value match (e.g. "type=street")
     for kv, entry in _KV_TAGS.items():
@@ -490,11 +501,15 @@ def _get_current_element_tags(element_type, osm_id):
         return None
 
 
-def _check_elements_in_db(conn, changeset_id, changeset_closed_at=None):
+def _check_elements_in_db(conn, changeset_id, changeset_closed_at=None, already_checked=None):
     """Check all elements of a changeset in the tiler DB.
 
     - ALL elements: verified in osm_* tables (fast, tag-filtered)
     - SAMPLE elements: full check → tables + views + S3 tile cache
+
+    If *already_checked* is a set of (type, osm_id) tuples, elements that
+    were already verified in a newer changeset are skipped to avoid
+    duplicate reporting.
     """
     from checks.tile_cache import check_tile_cache_for_element
 
@@ -528,6 +543,25 @@ def _check_elements_in_db(conn, changeset_id, changeset_closed_at=None):
             "elements": [],
         }
 
+    # Deduplicate: skip elements already checked in a newer changeset
+    if already_checked is not None:
+        deduped = []
+        for elem in checkable_elements:
+            key = (elem["type"], elem["osm_id"])
+            if key in already_checked:
+                print(f"    SKIP {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                      f"-> already checked in a newer changeset")
+                continue
+            deduped.append(elem)
+        checkable_elements = deduped
+
+    if not checkable_elements:
+        return {
+            "status": "ok",
+            "message": "All elements already checked in newer changesets",
+            "elements": [],
+        }
+
     # Select random sample for full pipeline check (tables + views + S3)
     # Only sample from create/modify elements
     import math
@@ -744,6 +778,9 @@ def check_pipeline():
     # --- Check each changeset through the pipeline ---
     problems = []
     skipped = 0
+    # Track already-checked elements to avoid duplicates across changesets.
+    # Changesets are ordered newest-first, so only the latest version is checked.
+    checked_elements = set()  # (type, osm_id)
 
     for cs in changesets:
         # Skip changesets already checked with status OK
@@ -779,10 +816,14 @@ def check_pipeline():
             print(f"  [replication] UNKNOWN: Replication state unavailable")
 
         # Step 2: tiler DB
-        db_check = _check_elements_in_db(conn, cs["id"], cs["closed_at"])
+        db_check = _check_elements_in_db(conn, cs["id"], cs["closed_at"], already_checked=checked_elements)
         cs_result["tiler_db"] = db_check
         print(f"  [tiler_db] {db_check['status'].upper()}: {db_check['message']}")
 
+        # Track checked elements to skip in older changesets
+        for elem in db_check.get("elements", []):
+            checked_elements.add((elem["type"], elem["osm_id"]))
+
         if db_check["status"] not in ("ok", "retry_pending"):
             problems.append(f"Changeset {cs['id']}: {db_check['message']}")
 
diff --git a/images/tiler-monitor/pipeline-monitor/tables_config.json b/images/tiler-monitor/pipeline-monitor/tables_config.json
index 1613aca75..cf70e8884 100644
--- a/images/tiler-monitor/pipeline-monitor/tables_config.json
+++ b/images/tiler-monitor/pipeline-monitor/tables_config.json
@@ -90,5 +90,8 @@
       "tables": ["osm_water_lines", "osm_water_areas"],
       "views": ["mv_water_lines_z8_9", "mv_water_lines_z16_20", "mv_water_areas_z0_2", "mv_water_areas_z16_20", "mv_water_areas_centroids_z8_9", "mv_water_areas_centroids_z16_20"]
     }
+  },
+  "reject_values": {
+    "natural": ["coastline"]
   }
 }

From b614d8f5231ba4ae0ccf474b760d249cb4bf4a32 Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Mon, 23 Mar 2026 15:15:38 -0500
Subject: [PATCH 13/17] Retry to solve issues?

---
 .../pipeline-monitor/checks/imposm_import.py        | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
index 64c60cb57..8ab5405a7 100644
--- a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
+++ b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
@@ -1154,6 +1154,19 @@ def recheck_retries():
         retry_num = entry["retry_count"] + 1
         prev_status = entry["status"]
 
+        # Check if the latest version still has mappable tags
+        current_tags = _get_current_element_tags(etype, oid)
+        if current_tags is None:
+            retry_store.mark_resolved(cs_id, etype, oid)
+            resolved.append({"type": etype, "osm_id": oid, "changeset_id": cs_id,
+                             "reason": "element deleted"})
+            continue
+        if not _has_mappable_tags({"tags": current_tags}):
+            retry_store.mark_resolved(cs_id, etype, oid)
+            resolved.append({"type": etype, "osm_id": oid, "changeset_id": cs_id,
+                             "reason": "no mappable tags (rejected by imposm)"})
+            continue
+
         check = _check_element_in_tables(conn, {"type": etype, "osm_id": oid, "action": "modify"})
         if check["found_in_tables"]:
             retry_store.mark_resolved(cs_id, etype, oid)

From b5912567284513a1db7759e27c56e2c23c6437e6 Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Mon, 23 Mar 2026 16:00:27 -0500
Subject: [PATCH 14/17] Update

---
 .../tiler-monitor/pipeline-monitor/monitor.py | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/images/tiler-monitor/pipeline-monitor/monitor.py b/images/tiler-monitor/pipeline-monitor/monitor.py
index 66d514d52..c1ad96c6c 100644
--- a/images/tiler-monitor/pipeline-monitor/monitor.py
+++ b/images/tiler-monitor/pipeline-monitor/monitor.py
@@ -194,6 +194,22 @@ def evaluate_changeset(changeset_id: int):
 def retries_recheck():
     """Manually trigger a recheck of all pending and failed retries."""
     result = recheck_retries()
+
+    # Update cached status if all retries are now resolved
+    remaining = retry_store.summary()
+    if remaining.get("failed", 0) == 0 and remaining.get("pending", 0) == 0:
+        with _lock:
+            prev = _latest_result
+            if prev and prev.get("status") == "critical":
+                updated = dict(prev)
+                updated["status"] = "ok"
+                updated["message"] = "All retries resolved"
+                updated["details"] = dict(prev.get("details", {}))
+                updated["details"]["retries"] = remaining
+                updated["details"]["total_failed"] = 0
+                updated["details"]["newly_failed"] = []
+                globals()["_latest_result"] = updated
+
     return JSONResponse(content=result)
 
 
@@ -203,6 +219,22 @@ def retries_recheck_single(element_type: str, osm_id: int):
     try:
         from checks.imposm_import import recheck_single_element
         result = recheck_single_element(element_type, osm_id)
+
+        # Update cached status if no more failed retries
+        remaining = retry_store.summary()
+        if remaining.get("failed", 0) == 0 and remaining.get("pending", 0) == 0:
+            with _lock:
+                prev = _latest_result
+                if prev and prev.get("status") == "critical":
+                    updated = dict(prev)
+                    updated["status"] = "ok"
+                    updated["message"] = "All retries resolved"
+                    updated["details"] = dict(prev.get("details", {}))
+                    updated["details"]["retries"] = remaining
+                    updated["details"]["total_failed"] = 0
+                    updated["details"]["newly_failed"] = []
+                    globals()["_latest_result"] = updated
+
         return JSONResponse(content=result)
     except Exception as e:
         logger.exception(f"Recheck failed for {element_type}/{osm_id}")

From d0822b8f7ba25698c9d190e1b85b7e5d180ba9d5 Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Sat, 28 Mar 2026 11:48:10 -0500
Subject: [PATCH 15/17] Add feed rss

---
 .../pipeline-monitor/checks/imposm_import.py  | 134 ++++++++++++++++--
 .../tiler-monitor/pipeline-monitor/monitor.py | 109 +++++++++++++-
 .../pipeline-monitor/retry_store.py           | 133 +++++++++++++++--
 .../pipeline-monitor/static/dashboard.html    |  19 ++-
 .../pipeline-monitor/tables_config.json       |   8 +-
 5 files changed, 375 insertions(+), 28 deletions(-)

diff --git a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
index 8ab5405a7..b384b7abb 100644
--- a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
+++ b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
@@ -119,6 +119,7 @@ def _get_changesets_in_window(min_age, max_age, limit=10):
         else:
             changesets.append({
                 "id": cs_id,
+                "created_at": cs.attrib.get("created_at", ""),
                 "closed_at": closed_at,
                 "closed_dt": closed_dt,
                 "age_minutes": round(age_minutes, 1),
@@ -216,6 +217,8 @@ def _get_changeset_elements(changeset_id):
                     if k and v:
                         tags[k] = v
                 timestamp = elem.attrib.get("timestamp", "")
+                # Count nodes for ways (to detect invalid geometries)
+                node_count = len(elem.findall("nd")) if elem_type == "way" else 0
                 elements.append({
                     "type": elem_type,
                     "osm_id": int(osm_id),
@@ -223,6 +226,7 @@ def _get_changeset_elements(changeset_id):
                     "action": action_type,
                     "tags": tags,
                     "timestamp": timestamp,
+                    "node_count": node_count,
                 })
     return elements
 
@@ -234,6 +238,10 @@ def _get_changeset_elements(changeset_id):
 # Reject rules: tag key -> list of values that imposm rejects (not imported)
 _REJECT_VALUES = _tables_config.get("reject_values", {})
 
+# Relation types that imposm actually imports (multipolygon, boundary, route, street)
+# Relations with other type= values (e.g. site, associatedStreet) are ignored by imposm
+_IMPORTABLE_RELATION_TYPES = set(_tables_config.get("importable_relation_types", []))
+
 # Split config keys into simple tags ("highway") and key=value tags ("type=street")
 _SIMPLE_TAGS = {}
 _KV_TAGS = {}
@@ -273,6 +281,42 @@ def _has_mappable_tags(elem):
     return len(_matching_entries(elem)) > 0
 
 
+def _has_invalid_geometry(elem):
+    """Return True if the element has a geometry that imposm will reject.
+
+    Cases detected:
+    - Ways with area=yes but fewer than 4 nodes (need 3 unique + closing node
+      to form a valid polygon).
+    - Ways (non-area) with fewer than 2 nodes.
+    """
+    if elem.get("type") != "way":
+        return False
+    node_count = elem.get("node_count", 0)
+    if node_count == 0:
+        return False  # no node info available, don't skip
+    tags = elem.get("tags", {})
+    if tags.get("area") == "yes":
+        # A polygon needs at least 4 nodes (3 unique points + closing node)
+        return node_count < 4
+    # A line needs at least 2 nodes
+    return node_count < 2
+
+
+def _is_non_importable_relation(elem):
+    """Return True if the element is a relation with a type that imposm does not import.
+
+    Imposm only processes certain relation types (multipolygon, boundary, route, street).
+    Relations with other type= values (e.g. site, associatedStreet) are ignored.
+    """
+    if elem.get("type") != "relation":
+        return False
+    tags = elem.get("tags", {})
+    rel_type = tags.get("type", "")
+    if not rel_type:
+        return False  # no type tag, don't skip (could be imported by other means)
+    return rel_type not in _IMPORTABLE_RELATION_TYPES
+
+
 def _get_candidate_tables(elem):
     """Return the specific tables where this element should exist based on its tags."""
     tables = set()
@@ -501,6 +545,37 @@ def _get_current_element_tags(element_type, osm_id):
         return None
 
 
+def _get_current_element_info(element_type, osm_id):
+    """Fetch the latest version of an element from OHM API.
+
+    Returns None if the element is deleted/gone or the request fails.
+    Returns a dict with 'tags' and 'node_count' (for ways) if the element exists.
+    """
+    url = f"{Config.OHM_API_BASE}/{element_type}/{osm_id}"
+    headers = {"User-Agent": "ohm-pipeline-monitor/1.0"}
+    try:
+        resp = requests.get(url, headers=headers, timeout=15)
+        if resp.status_code == 410:
+            return None
+        if resp.status_code == 200:
+            root = ET.fromstring(resp.content)
+            el = root.find(element_type)
+            if el is not None:
+                if el.attrib.get("visible") == "false":
+                    return None
+                tags = {}
+                for tag in el.findall("tag"):
+                    k = tag.attrib.get("k")
+                    v = tag.attrib.get("v")
+                    if k and v:
+                        tags[k] = v
+                node_count = len(el.findall("nd")) if element_type == "way" else 0
+                return {"tags": tags, "node_count": node_count}
+        return None
+    except Exception:
+        return None
+
+
 def _check_elements_in_db(conn, changeset_id, changeset_closed_at=None, already_checked=None):
     """Check all elements of a changeset in the tiler DB.
 
@@ -529,11 +604,22 @@ def _check_elements_in_db(conn, changeset_id, changeset_closed_at=None, already_
             "elements": [],
         }
 
-    # Filter elements: skip those without mappable tags (silently)
+    # Filter elements: skip those without mappable tags or with invalid geometry
     checkable_elements = []
     for elem in elements:
         if not _has_mappable_tags(elem):
             continue
+        if _has_invalid_geometry(elem):
+            tags = elem.get("tags", {})
+            print(f"    SKIP {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                  f"-> invalid geometry (area={tags.get('area', 'no')}, "
+                  f"nodes={elem.get('node_count', '?')}), imposm will reject")
+            continue
+        if _is_non_importable_relation(elem):
+            rel_type = elem.get("tags", {}).get("type", "")
+            print(f"    SKIP {elem['type']}/{elem['osm_id']} v{elem['version']} "
+                  f"-> relation type={rel_type} is not imported by imposm")
+            continue
         checkable_elements.append(elem)
 
     if not checkable_elements:
@@ -649,6 +735,7 @@ def _check_elements_in_db(conn, changeset_id, changeset_closed_at=None, already_
             retry_store.add_missing(
                 changeset_id, elem["type"], elem["osm_id"], Config.MAX_RETRIES,
                 version=elem.get("version", 0), action=elem.get("action", ""),
+                closed_at=changeset_closed_at or "",
             )
             missing.append(f"{elem['type']}/{elem['osm_id']}")
 
@@ -839,6 +926,7 @@ def check_pipeline():
             missing_count=missing_count,
             ok_count=len(elements) - missing_count,
             message=db_check["message"],
+            created_at=cs.get("created_at", ""),
             closed_at=cs.get("closed_at", ""),
             elements=elements,
         )
@@ -857,21 +945,36 @@ def check_pipeline():
         retry_num = entry["retry_count"] + 1
         prev_status = entry["status"]
 
-        # Check if the latest version still has mappable tags
-        current_tags = _get_current_element_tags(etype, oid)
-        if current_tags is None:
+        # Check if the latest version still has mappable tags / valid geometry
+        current_info = _get_current_element_info(etype, oid)
+        if current_info is None:
             # Element was deleted — no longer needs to be in DB
             print(f"  [retry] RESOLVED {etype}/{oid} (changeset {cs_id}) "
                   f"-> element deleted in latest version, no longer expected in DB")
             retry_store.mark_resolved(cs_id, etype, oid)
             continue
-        current_elem = {"tags": current_tags}
+        current_elem = {"type": etype, "tags": current_info["tags"],
+                        "node_count": current_info["node_count"]}
         if not _has_mappable_tags(current_elem):
             # Latest version has no mappable tags — imposm won't import it
             print(f"  [retry] RESOLVED {etype}/{oid} (changeset {cs_id}) "
                   f"-> latest version has no mappable tags, no longer expected in DB")
             retry_store.mark_resolved(cs_id, etype, oid)
             continue
+        if _has_invalid_geometry(current_elem):
+            # Invalid geometry — imposm will reject it
+            tags = current_info["tags"]
+            print(f"  [retry] RESOLVED {etype}/{oid} (changeset {cs_id}) "
+                  f"-> invalid geometry (area={tags.get('area', 'no')}, "
+                  f"nodes={current_info['node_count']}), imposm rejects this")
+            retry_store.mark_resolved(cs_id, etype, oid)
+            continue
+        if _is_non_importable_relation(current_elem):
+            rel_type = current_info["tags"].get("type", "")
+            print(f"  [retry] RESOLVED {etype}/{oid} (changeset {cs_id}) "
+                  f"-> relation type={rel_type} is not imported by imposm")
+            retry_store.mark_resolved(cs_id, etype, oid)
+            continue
 
         # Check if the element is now in the DB
         check = _check_element_in_tables(conn, {"type": etype, "osm_id": oid, "action": "modify"})
@@ -1154,18 +1257,31 @@ def recheck_retries():
         retry_num = entry["retry_count"] + 1
         prev_status = entry["status"]
 
-        # Check if the latest version still has mappable tags
-        current_tags = _get_current_element_tags(etype, oid)
-        if current_tags is None:
+        # Check if the latest version still has mappable tags / valid geometry
+        current_info = _get_current_element_info(etype, oid)
+        if current_info is None:
             retry_store.mark_resolved(cs_id, etype, oid)
             resolved.append({"type": etype, "osm_id": oid, "changeset_id": cs_id,
                              "reason": "element deleted"})
             continue
-        if not _has_mappable_tags({"tags": current_tags}):
+        current_elem = {"type": etype, "tags": current_info["tags"],
+                        "node_count": current_info["node_count"]}
+        if not _has_mappable_tags(current_elem):
             retry_store.mark_resolved(cs_id, etype, oid)
             resolved.append({"type": etype, "osm_id": oid, "changeset_id": cs_id,
                              "reason": "no mappable tags (rejected by imposm)"})
             continue
+        if _has_invalid_geometry(current_elem):
+            retry_store.mark_resolved(cs_id, etype, oid)
+            resolved.append({"type": etype, "osm_id": oid, "changeset_id": cs_id,
+                             "reason": f"invalid geometry (area={current_info['tags'].get('area', 'no')}, nodes={current_info['node_count']})"})
+            continue
+        if _is_non_importable_relation(current_elem):
+            rel_type = current_info["tags"].get("type", "")
+            retry_store.mark_resolved(cs_id, etype, oid)
+            resolved.append({"type": etype, "osm_id": oid, "changeset_id": cs_id,
+                             "reason": f"relation type={rel_type} not imported by imposm"})
+            continue
 
         check = _check_element_in_tables(conn, {"type": etype, "osm_id": oid, "action": "modify"})
         if check["found_in_tables"]:
diff --git a/images/tiler-monitor/pipeline-monitor/monitor.py b/images/tiler-monitor/pipeline-monitor/monitor.py
index c1ad96c6c..647909df1 100644
--- a/images/tiler-monitor/pipeline-monitor/monitor.py
+++ b/images/tiler-monitor/pipeline-monitor/monitor.py
@@ -13,7 +13,7 @@
 import requests
 import uvicorn
 from fastapi import FastAPI
-from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.responses import HTMLResponse, JSONResponse, Response
 
 from checks.imposm_import import check_pipeline, check_single_changeset, recheck_retries
 from config import Config
@@ -105,12 +105,32 @@ def _run_check():
         if newly_failed:
             # New elements just exhausted retries — always alert
             _send_slack_alert(result)
+            # Log each failed element as a feed event
+            ohm = "https://www.openhistoricalmap.org"
+            for f in newly_failed:
+                retry_store.add_feed_event(
+                    event_type="failed",
+                    title=f"FAILED: {f['type']}/{f['osm_id']} not found in tiler DB after all retries",
+                    description=(
+                        f"Element {f['type']}/{f['osm_id']} from changeset {f['changeset_id']} "
+                        f"was not found in the tiler database after all retries."
+                    ),
+                    link=f"{ohm}/{f['type']}/{f['osm_id']}",
+                    element_type=f["type"],
+                    osm_id=f["osm_id"],
+                    changeset_id=f["changeset_id"],
+                )
         elif result["status"] == "warning":
             if prev is None or prev["status"] == "ok":
                 _send_slack_alert(result)
         elif result["status"] == "ok" and prev and prev["status"] in ("critical", "warning"):
             # Recovered — send ok notification
             _send_slack_alert(result)
+            retry_store.add_feed_event(
+                event_type="recovered",
+                title="RECOVERED: All pipeline elements verified OK",
+                description=result["message"],
+            )
 
     except Exception as e:
         logger.exception(f"Pipeline check raised an exception: {e}")
@@ -275,6 +295,93 @@ def history_elements(history_id: int):
     return JSONResponse(content={"history_id": history_id, "elements": elements})
 
 
+# ---------------------------------------------------------------------------
+# RSS / Atom feed
+# ---------------------------------------------------------------------------
+
+def _xml_escape(text):
+    """Escape XML special characters."""
+    return (str(text)
+            .replace("&", "&amp;")
+            .replace("<", "&lt;")
+            .replace(">", "&gt;")
+            .replace('"', "&quot;")
+            .replace("'", "&apos;"))
+
+
+def _to_rfc822(iso_str):
+    """Convert ISO timestamp to RFC 822 format for RSS."""
+    try:
+        dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00"))
+        return dt.strftime("%a, %d %b %Y %H:%M:%S +0000")
+    except Exception:
+        return datetime.now(timezone.utc).strftime("%a, %d %b %Y %H:%M:%S +0000")
+
+
+def _build_rss_feed():
+    """Build an RSS 2.0 feed from persistent feed events.
+
+    Each event is a unique item with a stable guid (based on DB id),
+    so Slack's /feed command detects new items as they appear.
+    """
+    base_url = Config.MONITOR_BASE_URL or "https://tiler-monitoring.openhistoricalmap.org"
+    now = datetime.now(timezone.utc)
+    rfc822_now = now.strftime("%a, %d %b %Y %H:%M:%S +0000")
+
+    events = retry_store.get_feed_events(limit=50)
+
+    items_xml = []
+    for ev in events:
+        title = ev["title"]
+        link = ev["link"] or base_url
+        guid = f"ohm-tiler-feed-{ev['id']}"
+        pub_date = _to_rfc822(ev["created_at"])
+        desc = ev["description"]
+
+        # Add element/changeset links in description if available
+        if ev["osm_id"] and ev["element_type"]:
+            ohm = "https://www.openhistoricalmap.org"
+            elem_link = f"{ohm}/{ev['element_type']}/{ev['osm_id']}"
+            cs_link = f"{ohm}/changeset/{ev['changeset_id']}" if ev["changeset_id"] else ""
+            desc_parts = [desc]
+            desc_parts.append(f"Element: {elem_link}")
+            if cs_link:
+                desc_parts.append(f"Changeset: {cs_link}")
+            desc_parts.append(f"Dashboard: {base_url}")
+            desc = " | ".join(desc_parts)
+
+        items_xml.append(f"""    <item>
+      <title>{_xml_escape(title)}</title>
+      <link>{_xml_escape(link)}</link>
+      <guid isPermaLink="false">{_xml_escape(guid)}</guid>
+      <pubDate>{pub_date}</pubDate>
+      <description>{_xml_escape(desc)}</description>
+    </item>""")
+
+    items_str = "\n".join(items_xml) if items_xml else ""
+
+    feed = f"""<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
+  <channel>
+    <title>OHM Tiler Pipeline Monitor - Alerts</title>
+    <link>{_xml_escape(base_url)}</link>
+    <description>Alerts from the OHM tiler pipeline monitor: failed elements not found in the tiler DB after retries.</description>
+    <language>en</language>
+    <lastBuildDate>{rfc822_now}</lastBuildDate>
+    <atom:link href="{_xml_escape(base_url)}/feed.rss" rel="self" type="application/rss+xml"/>
+{items_str}
+  </channel>
+</rss>"""
+    return feed
+
+
+@app.get("/feed.rss")
+def rss_feed():
+    """RSS 2.0 feed of pipeline alerts for Slack integration."""
+    xml = _build_rss_feed()
+    return Response(content=xml, media_type="application/rss+xml")
+
+
 # ---------------------------------------------------------------------------
 # Entrypoint
 # ---------------------------------------------------------------------------
diff --git a/images/tiler-monitor/pipeline-monitor/retry_store.py b/images/tiler-monitor/pipeline-monitor/retry_store.py
index 2aed4e886..9b2a52ec5 100644
--- a/images/tiler-monitor/pipeline-monitor/retry_store.py
+++ b/images/tiler-monitor/pipeline-monitor/retry_store.py
@@ -47,12 +47,14 @@ def _init_tables(conn):
             first_seen    TEXT    NOT NULL,
             last_checked  TEXT    NOT NULL,
             status        TEXT    NOT NULL DEFAULT 'pending',
+            closed_at     TEXT    NOT NULL DEFAULT '',
             PRIMARY KEY (changeset_id, element_type, osm_id)
         )
     """)
     # Migrate: add columns if missing (for existing DBs)
     for col, typedef in [("version", "INTEGER NOT NULL DEFAULT 0"),
-                         ("action", "TEXT NOT NULL DEFAULT ''")]:
+                         ("action", "TEXT NOT NULL DEFAULT ''"),
+                         ("closed_at", "TEXT NOT NULL DEFAULT ''")]:
         try:
             conn.execute(f"ALTER TABLE pending_retries ADD COLUMN {col} {typedef}")
         except sqlite3.OperationalError:
@@ -62,6 +64,7 @@ def _init_tables(conn):
         CREATE TABLE IF NOT EXISTS changeset_history (
             id              INTEGER PRIMARY KEY AUTOINCREMENT,
             changeset_id    INTEGER NOT NULL,
+            created_at      TEXT    NOT NULL DEFAULT '',
             closed_at       TEXT    NOT NULL DEFAULT '',
             checked_at      TEXT    NOT NULL,
             status          TEXT    NOT NULL,
@@ -71,10 +74,12 @@ def _init_tables(conn):
             message         TEXT    NOT NULL DEFAULT ''
         )
     """)
-    try:
-        conn.execute("ALTER TABLE changeset_history ADD COLUMN closed_at TEXT NOT NULL DEFAULT ''")
-    except sqlite3.OperationalError:
-        pass
+    for hist_col, hist_typedef in [("closed_at", "TEXT NOT NULL DEFAULT ''"),
+                                    ("created_at", "TEXT NOT NULL DEFAULT ''")]:
+        try:
+            conn.execute(f"ALTER TABLE changeset_history ADD COLUMN {hist_col} {hist_typedef}")
+        except sqlite3.OperationalError:
+            pass
     conn.execute("""
         CREATE INDEX IF NOT EXISTS idx_history_checked_at
         ON changeset_history(checked_at DESC)
@@ -101,6 +106,28 @@ def _init_tables(conn):
         ON element_history(history_id)
     """)
 
+    # Feed events: persistent log of alerts (failed elements, recoveries)
+    # for the RSS feed. Items are never deleted, only added.
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS feed_events (
+            id              INTEGER PRIMARY KEY AUTOINCREMENT,
+            event_type      TEXT    NOT NULL,
+            element_type    TEXT    NOT NULL DEFAULT '',
+            osm_id          INTEGER NOT NULL DEFAULT 0,
+            version         INTEGER NOT NULL DEFAULT 0,
+            changeset_id    INTEGER NOT NULL DEFAULT 0,
+            action          TEXT    NOT NULL DEFAULT '',
+            title           TEXT    NOT NULL,
+            description     TEXT    NOT NULL DEFAULT '',
+            link            TEXT    NOT NULL DEFAULT '',
+            created_at      TEXT    NOT NULL
+        )
+    """)
+    conn.execute("""
+        CREATE INDEX IF NOT EXISTS idx_feed_events_created
+        ON feed_events(created_at DESC)
+    """)
+
     conn.commit()
 
 
@@ -109,7 +136,8 @@ def _init_tables(conn):
 # ---------------------------------------------------------------------------
 
 def add_missing(changeset_id: int, element_type: str, osm_id: int,
-                max_retries: int, version: int = 0, action: str = ""):
+                max_retries: int, version: int = 0, action: str = "",
+                closed_at: str = ""):
     """Register a missing element for future retry. If it already exists, do nothing."""
     now = datetime.now(timezone.utc).isoformat()
     with _lock:
@@ -117,9 +145,9 @@ def add_missing(changeset_id: int, element_type: str, osm_id: int,
         conn.execute("""
             INSERT OR IGNORE INTO pending_retries
                 (changeset_id, element_type, osm_id, version, action,
-                 retry_count, max_retries, first_seen, last_checked, status)
-            VALUES (?, ?, ?, ?, ?, 0, ?, ?, ?, 'pending')
-        """, (changeset_id, element_type, osm_id, version, action, max_retries, now, now))
+                 retry_count, max_retries, first_seen, last_checked, status, closed_at)
+            VALUES (?, ?, ?, ?, ?, 0, ?, ?, ?, 'pending', ?)
+        """, (changeset_id, element_type, osm_id, version, action, max_retries, now, now, closed_at or ""))
         conn.commit()
 
 
@@ -201,6 +229,27 @@ def get_all_details(ohm_base="https://www.openhistoricalmap.org"):
             "SELECT * FROM pending_retries ORDER BY status, first_seen DESC"
         ).fetchall()
 
+        # Get changeset object counts from history (latest check per changeset)
+        cs_ids = list(set(r["changeset_id"] for r in rows))
+        cs_stats = {}
+        if cs_ids:
+            placeholders = ",".join("?" * len(cs_ids))
+            stats_rows = conn.execute(f"""
+                SELECT changeset_id, total_elements, missing_count, ok_count
+                FROM changeset_history
+                WHERE id IN (
+                    SELECT MAX(id) FROM changeset_history
+                    WHERE changeset_id IN ({placeholders})
+                    GROUP BY changeset_id
+                )
+            """, cs_ids).fetchall()
+            for sr in stats_rows:
+                cs_stats[sr["changeset_id"]] = {
+                    "total_elements": sr["total_elements"],
+                    "missing_count": sr["missing_count"],
+                    "ok_count": sr["ok_count"],
+                }
+
     now = datetime.now(timezone.utc)
     results = []
     for r in rows:
@@ -217,6 +266,21 @@ def get_all_details(ohm_base="https://www.openhistoricalmap.org"):
             entry["last_checked_ago"] = _human_duration((now - last).total_seconds())
         except Exception:
             entry["last_checked_ago"] = ""
+        # Changeset closed_at (close time as formatted date)
+        closed_at_val = r["closed_at"] if "closed_at" in r.keys() else ""
+        if closed_at_val:
+            try:
+                closed = datetime.fromisoformat(closed_at_val.replace("Z", "+00:00"))
+                entry["closed_at_fmt"] = closed.strftime("%Y-%m-%d %H:%M UTC")
+            except Exception:
+                entry["closed_at_fmt"] = ""
+        else:
+            entry["closed_at_fmt"] = ""
+        # Changeset object counts
+        stats = cs_stats.get(r["changeset_id"], {})
+        entry["cs_total_elements"] = stats.get("total_elements", 0)
+        entry["cs_ok_count"] = stats.get("ok_count", 0)
+        entry["cs_missing_count"] = stats.get("missing_count", 0)
         entry["retries_remaining"] = max(0, r["max_retries"] - r["retry_count"])
         results.append(entry)
     return results
@@ -229,16 +293,17 @@ def get_all_details(ohm_base="https://www.openhistoricalmap.org"):
 def log_changeset_check(changeset_id: int, status: str,
                         total_elements: int, missing_count: int,
                         ok_count: int, message: str,
-                        closed_at: str = "", elements: list = None):
+                        created_at: str = "", closed_at: str = "",
+                        elements: list = None):
     """Record a changeset check and its elements in the history tables."""
     now = datetime.now(timezone.utc).isoformat()
     with _lock:
         conn = _get_conn()
         cur = conn.execute("""
             INSERT INTO changeset_history
-                (changeset_id, closed_at, checked_at, status, total_elements, missing_count, ok_count, message)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
-        """, (changeset_id, closed_at or "", now, status, total_elements, missing_count, ok_count, message))
+                (changeset_id, created_at, closed_at, checked_at, status, total_elements, missing_count, ok_count, message)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+        """, (changeset_id, created_at or "", closed_at or "", now, status, total_elements, missing_count, ok_count, message))
         history_id = cur.lastrowid
 
         if elements:
@@ -299,6 +364,15 @@ def get_changeset_history(page: int = 1, per_page: int = 20,
                 entry["closed_ago"] = ""
         else:
             entry["closed_ago"] = ""
+        created_at_val = r["created_at"] if "created_at" in r.keys() else ""
+        if created_at_val:
+            try:
+                created = datetime.fromisoformat(created_at_val.replace("Z", "+00:00"))
+                entry["created_fmt"] = created.strftime("%Y-%m-%d %H:%M UTC")
+            except Exception:
+                entry["created_fmt"] = ""
+        else:
+            entry["created_fmt"] = ""
         results.append(entry)
 
     return {
@@ -353,6 +427,39 @@ def summary():
         return {r["status"]: r["cnt"] for r in rows}
 
 
+# ---------------------------------------------------------------------------
+# Feed events
+# ---------------------------------------------------------------------------
+
+def add_feed_event(event_type: str, title: str, description: str = "",
+                   link: str = "", element_type: str = "", osm_id: int = 0,
+                   version: int = 0, changeset_id: int = 0, action: str = ""):
+    """Add a persistent event to the RSS feed."""
+    now = datetime.now(timezone.utc).isoformat()
+    with _lock:
+        conn = _get_conn()
+        conn.execute("""
+            INSERT INTO feed_events
+                (event_type, element_type, osm_id, version, changeset_id,
+                 action, title, description, link, created_at)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+        """, (event_type, element_type, osm_id, version, changeset_id,
+              action, title, description, link, now))
+        conn.commit()
+
+
+def get_feed_events(limit: int = 50):
+    """Return the most recent feed events for the RSS feed."""
+    with _lock:
+        conn = _get_conn()
+        rows = conn.execute("""
+            SELECT * FROM feed_events
+            ORDER BY created_at DESC
+            LIMIT ?
+        """, (limit,)).fetchall()
+        return [dict(r) for r in rows]
+
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
diff --git a/images/tiler-monitor/pipeline-monitor/static/dashboard.html b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
index 7ad561053..10dd71630 100644
--- a/images/tiler-monitor/pipeline-monitor/static/dashboard.html
+++ b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
@@ -159,7 +159,7 @@ <h1>OHM Tiler Pipeline Monitor</h1>
     <thead><tr>
       <th style="width:32px"></th>
       <th>Changeset</th><th>Status</th><th>Total</th><th>OK</th>
-      <th>Missing</th><th>Closed</th><th>Checked</th><th>Message</th>
+      <th>Missing</th><th>Created</th><th>Closed</th><th>Checked</th><th>Message</th>
     </tr></thead>
     <tbody id="historyBody"></tbody>
   </table>
@@ -178,7 +178,7 @@ <h1>OHM Tiler Pipeline Monitor</h1>
   <table>
     <thead><tr>
       <th>Element</th><th>Changeset</th><th>Action</th><th>Status</th>
-      <th>Retries</th><th>First Seen</th><th>Last Checked</th><th></th>
+      <th>Retries</th><th>Objects</th><th>Changeset Closed</th><th>First Seen</th><th>Last Checked</th><th></th>
     </tr></thead>
     <tbody id="retriesBody"></tbody>
   </table>
@@ -414,6 +414,7 @@ <h1>OHM Tiler Pipeline Monitor</h1>
       <td>${c.total_elements}</td>
       <td style="color:var(--green)">${c.ok_count}</td>
       <td style="color:${c.missing_count ? 'var(--yellow)' : 'var(--muted)'}">${c.missing_count}</td>
+      <td title="${c.created_at || ''}">${c.created_fmt || '—'}</td>
       <td title="${c.closed_at || ''}">${c.closed_ago || '—'}</td>
       <td title="${c.checked_at}">${c.checked_ago}</td>
       <td style="max-width:300px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap"
@@ -534,13 +535,22 @@ <h1>OHM Tiler Pipeline Monitor</h1>
     }
     empty.style.display = 'none';
 
-    tbody.innerHTML = all.map(e => `<tr>
+    tbody.innerHTML = all.map(e => {
+      const total = e.cs_total_elements || 0;
+      const missing = e.cs_missing_count || 0;
+      const ok = e.cs_ok_count || 0;
+      const objectsHtml = total > 0
+        ? `<span style="color:${missing ? 'var(--yellow)' : 'var(--green)'}" title="${ok} ok, ${missing} missing of ${total} total">${missing}/${total} missing</span>`
+        : '<span style="color:var(--muted)">—</span>';
+      return `<tr>
       <td><a href="${e.element_url}" target="_blank">${e.element_type}/${e.osm_id}</a>
         ${e.version ? `<span style="color:var(--muted)">v${e.version}</span>` : ''}</td>
       <td><a href="${e.changeset_url}" target="_blank">${e.changeset_id}</a></td>
       <td>${e.action || '—'}</td>
       <td>${statusBadge(e.status)}</td>
       <td style="width:120px">${retryBar(e.retry_count, e.max_retries)}</td>
+      <td>${objectsHtml}</td>
+      <td title="${e.closed_at || ''}">${e.closed_at_fmt || '—'}</td>
       <td title="${e.first_seen}">${e.age}</td>
       <td title="${e.last_checked}">${e.last_checked_ago}</td>
       <td>
@@ -549,7 +559,8 @@ <h1>OHM Tiler Pipeline Monitor</h1>
           title="Recheck this element in the tiler DB now">&#x21bb;</button>
         <span class="recheck-result-${e.element_type}-${e.osm_id}" style="font-size:11px;margin-left:4px"></span>
       </td>
-    </tr>`).join('');
+    </tr>`;
+    }).join('');
   } catch (e) {
     console.error('Failed to load retries:', e);
   }
diff --git a/images/tiler-monitor/pipeline-monitor/tables_config.json b/images/tiler-monitor/pipeline-monitor/tables_config.json
index cf70e8884..ef0c407f1 100644
--- a/images/tiler-monitor/pipeline-monitor/tables_config.json
+++ b/images/tiler-monitor/pipeline-monitor/tables_config.json
@@ -93,5 +93,11 @@
   },
   "reject_values": {
     "natural": ["coastline"]
-  }
+  },
+  "importable_relation_types": [
+    "multipolygon",
+    "boundary",
+    "route",
+    "street"
+  ]
 }

From 63df1944222400a6b17954f9a2afbbbfc73136c1 Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Sat, 28 Mar 2026 12:52:18 -0500
Subject: [PATCH 16/17] Add MISSING_THRESHOLD_PCT

---
 hetzner/tiler/tiler.production.yml            |  7 ++-
 .../pipeline-monitor/checks/imposm_import.py  | 36 ++++++++++---
 .../tiler-monitor/pipeline-monitor/config.py  |  6 +++
 .../tiler-monitor/pipeline-monitor/monitor.py |  2 +
 .../pipeline-monitor/retry_store.py           | 54 ++++++++++++++++---
 .../pipeline-monitor/static/dashboard.html    |  5 +-
 6 files changed, 89 insertions(+), 21 deletions(-)

diff --git a/hetzner/tiler/tiler.production.yml b/hetzner/tiler/tiler.production.yml
index f49847f49..239abdf94 100644
--- a/hetzner/tiler/tiler.production.yml
+++ b/hetzner/tiler/tiler.production.yml
@@ -120,10 +120,10 @@ services:
 
   tiler_monitor:
     container_name: tiler_monitor
-    image: ghcr.io/openhistoricalmap/tiler-monitor:0.0.1-0.dev.git.3333.hd8192f15
+    image: ghcr.io/openhistoricalmap/tiler-monitor:0.0.1-0.dev.git.3352.hf1f72cb1
     volumes:
       - /var/run/docker.sock:/var/run/docker.sock
-      - ../../images/tiler-monitor:/app
+      # - ../../images/tiler-monitor:/app
       - ../../hetzner:/app/hetzner
       - tiler_monitor_data:/data
     environment:
@@ -134,7 +134,7 @@ services:
       - .env.tiler
     networks:
       - ohm_network
-
+      
 volumes:
   tiler_pgdata:
     driver: local
@@ -142,7 +142,6 @@ volumes:
   tiler_imposm_data:
     driver: local
     name: tiler_imposm_17_03
-
   tiler_monitor_data:
     driver: local
     name: tiler_monitor_data
diff --git a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
index 0fc9603d2..3dae996a8 100644
--- a/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
+++ b/images/tiler-monitor/pipeline-monitor/checks/imposm_import.py
@@ -952,12 +952,12 @@ def check_pipeline():
             elements=elements,
         )
 
-    # --- Recheck pending and failed retries ---
-    retryable = retry_store.get_pending() + retry_store.get_failed()
+    # --- Recheck pending, failed, and warning retries ---
+    retryable = retry_store.get_pending() + retry_store.get_failed() + retry_store.get_warnings()
     newly_failed = []
 
     if retryable:
-        print(f"\n[pipeline] Rechecking {len(retryable)} retries (pending + failed)...")
+        print(f"\n[pipeline] Rechecking {len(retryable)} retries (pending + failed + warning)...")
 
     for entry in retryable:
         cs_id = entry["changeset_id"]
@@ -1003,18 +1003,34 @@ def check_pipeline():
             print(f"  [retry] RESOLVED {etype}/{oid} (changeset {cs_id}) "
                   f"-> found in tables after {retry_num} retries")
             retry_store.mark_resolved(cs_id, etype, oid)
-        elif prev_status == "failed":
-            # Already failed, keep checking but don't increment
+        elif prev_status in ("failed", "warning"):
+            # Already exhausted retries, keep checking but don't increment
             print(f"  [retry] STILL MISSING {etype}/{oid} (changeset {cs_id}) "
-                  f"-> failed, still monitoring")
+                  f"-> {prev_status}, still monitoring")
         else:
-            new_status = retry_store.increment_retry(cs_id, etype, oid)
+            # Check missing percentage threshold to decide severity
+            cs_stats = retry_store.get_changeset_stats(cs_id)
+            if cs_stats and cs_stats["total_elements"] > 0:
+                missing_pct = (cs_stats["missing_count"] / cs_stats["total_elements"]) * 100
+            else:
+                missing_pct = 100  # no stats available, assume worst case
+
+            above_threshold = missing_pct >= Config.MISSING_THRESHOLD_PCT
+            final_status = "failed" if above_threshold else "warning"
+            new_status = retry_store.increment_retry(cs_id, etype, oid, final_status=final_status)
+
             if new_status == "failed":
                 print(f"  [retry] FAILED {etype}/{oid} (changeset {cs_id}) "
-                      f"-> still missing after {retry_num}/{Config.MAX_RETRIES} retries")
+                      f"-> still missing after {retry_num}/{Config.MAX_RETRIES} retries "
+                      f"({missing_pct:.1f}% missing >= {Config.MISSING_THRESHOLD_PCT}% threshold)")
                 newly_failed.append({
                     "type": etype, "osm_id": oid, "changeset_id": cs_id,
                 })
+            elif new_status == "warning":
+                print(f"  [retry] WARNING {etype}/{oid} (changeset {cs_id}) "
+                      f"-> still missing after {retry_num}/{Config.MAX_RETRIES} retries "
+                      f"({missing_pct:.1f}% missing < {Config.MISSING_THRESHOLD_PCT}% threshold, "
+                      f"not alerting)")
             else:
                 print(f"  [retry] PENDING {etype}/{oid} (changeset {cs_id}) "
                       f"-> retry {retry_num}/{Config.MAX_RETRIES}")
@@ -1040,8 +1056,10 @@ def check_pipeline():
 
     # Include failed details for Slack alerting
     failed_count = retry_summary.get("failed", 0)
+    warning_count = retry_summary.get("warning", 0)
     result["details"]["newly_failed"] = newly_failed
     result["details"]["total_failed"] = failed_count
+    result["details"]["total_warnings"] = warning_count
 
     if newly_failed:
         result["status"] = "critical"
@@ -1065,6 +1083,8 @@ def check_pipeline():
             msg += f", {skipped} already passed (skipped)"
         if pending_count:
             msg += f", {pending_count} elements pending retry"
+        if warning_count:
+            msg += f", {warning_count} elements below threshold (warning)"
         result["message"] = msg
 
     if skipped:
diff --git a/images/tiler-monitor/pipeline-monitor/config.py b/images/tiler-monitor/pipeline-monitor/config.py
index 41be49de1..5b91f1d18 100644
--- a/images/tiler-monitor/pipeline-monitor/config.py
+++ b/images/tiler-monitor/pipeline-monitor/config.py
@@ -52,6 +52,12 @@ class Config:
     # Retry: how many times to recheck a missing element before alerting
     MAX_RETRIES = int(os.getenv("TILER_MONITORING_MAX_RETRIES", 3))
 
+    # Missing threshold: minimum percentage of missing elements in a changeset
+    # to consider it a real failure. Below this threshold, elements are marked
+    # as "warning" instead of "failed" and do NOT trigger RSS/Slack alerts.
+    # e.g. 10 = 10% — if only 1/44 elements is missing (2.3%), it's a warning.
+    MISSING_THRESHOLD_PCT = int(os.getenv("TILER_MONITORING_MISSING_THRESHOLD_PCT", 10))
+
     # Verbose logging
     VERBOSE_LOGGING = os.getenv("VERBOSE_LOGGING", "false").lower() == "true"
 
diff --git a/images/tiler-monitor/pipeline-monitor/monitor.py b/images/tiler-monitor/pipeline-monitor/monitor.py
index 647909df1..18acb82b1 100644
--- a/images/tiler-monitor/pipeline-monitor/monitor.py
+++ b/images/tiler-monitor/pipeline-monitor/monitor.py
@@ -270,11 +270,13 @@ def retries():
     all_entries = retry_store.get_all_details()
     pending = [e for e in all_entries if e["status"] == "pending"]
     failed = [e for e in all_entries if e["status"] == "failed"]
+    warnings = [e for e in all_entries if e["status"] == "warning"]
     return JSONResponse(content={
         "summary": retry_store.summary(),
         "total": len(all_entries),
         "pending": pending,
         "failed": failed,
+        "warnings": warnings,
     })
 
 
diff --git a/images/tiler-monitor/pipeline-monitor/retry_store.py b/images/tiler-monitor/pipeline-monitor/retry_store.py
index 9b2a52ec5..ccfccc583 100644
--- a/images/tiler-monitor/pipeline-monitor/retry_store.py
+++ b/images/tiler-monitor/pipeline-monitor/retry_store.py
@@ -172,10 +172,14 @@ def mark_resolved(changeset_id: int, element_type: str, osm_id: int):
         conn.commit()
 
 
-def increment_retry(changeset_id: int, element_type: str, osm_id: int):
-    """Bump retry_count. If it reaches max_retries, flip status to 'failed'.
+def increment_retry(changeset_id: int, element_type: str, osm_id: int,
+                     final_status: str = "failed"):
+    """Bump retry_count. If it reaches max_retries, flip status to *final_status*.
 
-    Returns the new status ('pending' or 'failed').
+    *final_status* is normally 'failed', but callers may pass 'warning' when
+    the missing percentage is below the alerting threshold.
+
+    Returns the new status ('pending', 'warning', or 'failed').
     """
     now = datetime.now(timezone.utc).isoformat()
     with _lock:
@@ -193,18 +197,18 @@ def increment_retry(changeset_id: int, element_type: str, osm_id: int):
 
         if row and row["retry_count"] >= row["max_retries"]:
             conn.execute("""
-                UPDATE pending_retries SET status = 'failed'
+                UPDATE pending_retries SET status = ?
                 WHERE changeset_id = ? AND element_type = ? AND osm_id = ?
-            """, (changeset_id, element_type, osm_id))
+            """, (final_status, changeset_id, element_type, osm_id))
             conn.commit()
-            return "failed"
+            return final_status
 
         conn.commit()
         return "pending"
 
 
 def get_failed():
-    """Return all elements that exhausted their retries."""
+    """Return all elements that exhausted their retries with status='failed'."""
     with _lock:
         conn = _get_conn()
         rows = conn.execute(
@@ -213,6 +217,16 @@ def get_failed():
         return [dict(r) for r in rows]
 
 
+def get_warnings():
+    """Return all elements that exhausted retries but are below the missing threshold."""
+    with _lock:
+        conn = _get_conn()
+        rows = conn.execute(
+            "SELECT * FROM pending_retries WHERE status = 'warning'"
+        ).fetchall()
+        return [dict(r) for r in rows]
+
+
 def clear_failed():
     """Remove all failed entries (call after alerting)."""
     with _lock:
@@ -286,6 +300,32 @@ def get_all_details(ohm_base="https://www.openhistoricalmap.org"):
     return results
 
 
+# ---------------------------------------------------------------------------
+# Changeset stats helpers
+# ---------------------------------------------------------------------------
+
+def get_changeset_stats(changeset_id: int):
+    """Return the latest total_elements, missing_count, ok_count for a changeset.
+
+    Returns a dict with those keys, or None if no history exists.
+    """
+    with _lock:
+        conn = _get_conn()
+        row = conn.execute("""
+            SELECT total_elements, missing_count, ok_count
+            FROM changeset_history
+            WHERE changeset_id = ?
+            ORDER BY id DESC LIMIT 1
+        """, (changeset_id,)).fetchone()
+    if row:
+        return {
+            "total_elements": row["total_elements"],
+            "missing_count": row["missing_count"],
+            "ok_count": row["ok_count"],
+        }
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Changeset history
 # ---------------------------------------------------------------------------
diff --git a/images/tiler-monitor/pipeline-monitor/static/dashboard.html b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
index 10dd71630..b6980c94e 100644
--- a/images/tiler-monitor/pipeline-monitor/static/dashboard.html
+++ b/images/tiler-monitor/pipeline-monitor/static/dashboard.html
@@ -519,11 +519,12 @@ <h1>OHM Tiler Pipeline Monitor</h1>
 
     const pending = data.pending || [];
     const failed = data.failed || [];
-    const all = [...failed, ...pending];
+    const warnings = data.warnings || [];
+    const all = [...failed, ...warnings, ...pending];
 
     const badge = document.getElementById('retriesBadge');
     badge.textContent = data.total || 0;
-    badge.className = 'badge' + (failed.length ? ' err' : pending.length ? ' warn' : '');
+    badge.className = 'badge' + (failed.length ? ' err' : (warnings.length || pending.length) ? ' warn' : '');
 
     const tbody = document.getElementById('retriesBody');
     const empty = document.getElementById('retriesEmpty');

From d4ce55f80b1dd7663d7711563f74bdeebf78c821 Mon Sep 17 00:00:00 2001
From: Rub21 <rub2106@gmail.com>
Date: Sat, 28 Mar 2026 14:21:20 -0500
Subject: [PATCH 17/17] Update tiler_monitor docker container id

---
 hetzner/tiler/tiler.production.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hetzner/tiler/tiler.production.yml b/hetzner/tiler/tiler.production.yml
index 239abdf94..6a7199417 100644
--- a/hetzner/tiler/tiler.production.yml
+++ b/hetzner/tiler/tiler.production.yml
@@ -120,7 +120,7 @@ services:
 
   tiler_monitor:
     container_name: tiler_monitor
-    image: ghcr.io/openhistoricalmap/tiler-monitor:0.0.1-0.dev.git.3352.hf1f72cb1
+    image: ghcr.io/openhistoricalmap/tiler-monitor:0.0.1-0.dev.git.3353.h63df1944
     volumes:
       - /var/run/docker.sock:/var/run/docker.sock
       # - ../../images/tiler-monitor:/app