From c0b11245824d56686e4ffc9226b606545ad89389 Mon Sep 17 00:00:00 2001
From: Martin Dell <martin.dell@gmail.com>
Date: Sat, 18 Apr 2026 07:52:28 +0100
Subject: [PATCH] fix: resume processing automatically after container restart
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two bugs combined to cause processing to stall permanently after any
container restart:

1. Images left in 'processing' status from an unclean shutdown were
   never recovered, permanently blocking the worker queue.

2. The worker queue is in-memory only — existing 'pending' items in the
   database were never re-enqueued on startup, so processing never
   resumed without manual user intervention.

Fix: add two startup recovery steps in lifespan():
- Reset any stuck 'processing' images back to 'pending' immediately
  after DB init.
- Re-enqueue all 'pending' images immediately after the worker starts.

Both are single SQL queries, cheap at startup, and no-ops on a clean
first run.
---
 backend/main.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/backend/main.py b/backend/main.py
index 24f1d77..b265969 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -57,6 +57,20 @@ async def lifespan(app: FastAPI):
     db = await init_db(db_path)
     app.state.db = db
 
+    # Startup recovery: reset any images stuck in 'processing' from a previous
+    # unclean shutdown. Without this, a container restart leaves those images
+    # permanently blocked and the worker queue stalls indefinitely.
+    async with db.execute(
+        "UPDATE images SET status = 'pending', in_queue = 0 WHERE status = 'processing'"
+    ) as cursor:
+        recovered = cursor.rowcount
+    if recovered:
+        await db.commit()
+        logger.info(
+            "Startup recovery: reset %d stuck 'processing' image(s) to 'pending'",
+            recovered,
+        )
+
     # Prompt library — ensure defaults exist, load active templates
     await ensure_defaults(db)
     vision_prompt = await get_active_prompt(db, STAGE_VISION)
@@ -79,6 +93,20 @@ async def lifespan(app: FastAPI):
     app.state.worker = worker
     await worker.start()
 
+    # Startup recovery: re-enqueue any images that were pending when the
+    # container last stopped. The worker queue is in-memory only, so pending
+    # items from a previous session are never picked up without this.
+    async with db.execute(
+        "SELECT id FROM images WHERE status = 'pending'"
+    ) as cursor:
+        pending_ids = [row[0] for row in await cursor.fetchall()]
+    if pending_ids:
+        enqueued = await worker.enqueue(pending_ids)
+        logger.info(
+            "Startup recovery: enqueued %d pending image(s) for processing",
+            enqueued,
+        )
+
     # File watcher
     watcher = FileWatcher(db=db, settings=settings, worker=worker)
     app.state.watcher = watcher