From c0b11245824d56686e4ffc9226b606545ad89389 Mon Sep 17 00:00:00 2001 From: Martin Dell Date: Sat, 18 Apr 2026 07:52:28 +0100 Subject: [PATCH] fix: resume processing automatically after container restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs combined to cause processing to stall permanently after any container restart: 1. Images left in 'processing' status from an unclean shutdown were never recovered, permanently blocking the worker queue. 2. The worker queue is in-memory only — existing 'pending' items in the database were never re-enqueued on startup, so processing never resumed without manual user intervention. Fix: add two startup recovery steps in lifespan(): - Reset any stuck 'processing' images back to 'pending' immediately after DB init. - Re-enqueue all 'pending' images immediately after the worker starts. Both are single SQL queries, cheap at startup, and no-ops on a clean first run. --- backend/main.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/backend/main.py b/backend/main.py index 24f1d77..b265969 100644 --- a/backend/main.py +++ b/backend/main.py @@ -57,6 +57,20 @@ async def lifespan(app: FastAPI): db = await init_db(db_path) app.state.db = db + # Startup recovery: reset any images stuck in 'processing' from a previous + # unclean shutdown. Without this, a container restart leaves those images + # permanently blocked and the worker queue stalls indefinitely. + async with db.execute( + "UPDATE images SET status = 'pending', in_queue = 0 WHERE status = 'processing'" + ) as cursor: + recovered = cursor.rowcount + if recovered: + await db.commit() + logger.info( + "Startup recovery: reset %d stuck 'processing' image(s) to 'pending'", + recovered, + ) + # Prompt library — ensure defaults exist, load active templates await ensure_defaults(db) vision_prompt = await get_active_prompt(db, STAGE_VISION) @@ -79,6 +93,20 @@ async def lifespan(app: FastAPI): app.state.worker = worker await worker.start() + # Startup recovery: re-enqueue any images that were pending when the + # container last stopped. The worker queue is in-memory only, so pending + # items from a previous session are never picked up without this. + async with db.execute( + "SELECT id FROM images WHERE status = 'pending'" + ) as cursor: + pending_ids = [row[0] for row in await cursor.fetchall()] + if pending_ids: + enqueued = await worker.enqueue(pending_ids) + logger.info( + "Startup recovery: enqueued %d pending image(s) for processing", + enqueued, + ) + # File watcher watcher = FileWatcher(db=db, settings=settings, worker=worker) app.state.watcher = watcher