diff --git a/backend/app/services/document_metadata_service.py b/backend/app/services/document_metadata_service.py index b334933..3e82659 100644 --- a/backend/app/services/document_metadata_service.py +++ b/backend/app/services/document_metadata_service.py @@ -69,7 +69,9 @@ async def get_document(doc_id: str) -> dict | None: .maybe_single() .execute() ) - return _normalize(result.data) if result.data else None + if result is None or not getattr(result, "data", None): + return None + return _normalize(result.data) async def update_document_stage(doc_id: str, stage: str) -> None: diff --git a/backend/app/services/document_pipeline.py b/backend/app/services/document_pipeline.py index 513c4aa..7a7b3a0 100644 --- a/backend/app/services/document_pipeline.py +++ b/backend/app/services/document_pipeline.py @@ -28,6 +28,12 @@ _VALID_DOC_TYPES = {"RFQ", "PO", "CFG", "Client CSV", "Sales CSV"} _COGNEE_TIMEOUT = int(os.getenv("COGNEE_TIMEOUT_SECONDS", "300")) +# Serialize run_pipeline() across concurrent uploads so we don't burst +# past Gemini's per-minute embedding cap. One doc fully completes (or +# fails) before the next pipeline starts. Upload response still returns +# immediately; docs queue as status="processing". +_PIPELINE_LOCK = asyncio.Lock() + # --------------------------------------------------------------------------- # Helpers @@ -108,7 +114,20 @@ async def run_pipeline( Progress stages written to DB: uploading → ingesting → building_graph → analyzing → extracting_insights → completed (or failed) + + Serialized via `_PIPELINE_LOCK`: if several uploads arrive at once, + each pipeline waits for the prior one to finish. Upload response still + returns immediately — docs queue as status="processing". """ + async with _PIPELINE_LOCK: + await _run_pipeline_locked(file_path, doc_id, original_filename) + + +async def _run_pipeline_locked( + file_path: Path, + doc_id: str, + original_filename: str, +) -> None: sb = await get_async_supabase() async def _update(**fields) -> None: diff --git a/backend/cortex_local.db b/backend/cortex_local.db deleted file mode 100644 index 77287f8..0000000 Binary files a/backend/cortex_local.db and /dev/null differ diff --git a/backend/cortex_local.db-shm b/backend/cortex_local.db-shm deleted file mode 100644 index fe9ac28..0000000 Binary files a/backend/cortex_local.db-shm and /dev/null differ diff --git a/backend/cortex_local.db-wal b/backend/cortex_local.db-wal deleted file mode 100644 index e69de29..0000000