From 2ce8a15a715238259fa2b2c4b4e62c57e0d1bd2b Mon Sep 17 00:00:00 2001 From: Padmajakachare1911 Date: Thu, 4 Jun 2026 16:17:25 +0530 Subject: [PATCH] fix(upload): surface PDF validation errors and align dropzone (Issue #16) Return consistent 400 detail strings for extension, MIME, magic bytes, and size checks. Toast upload and ingest failures in the UI, auto-remove failed cards, and restrict the sidebar dropzone to PDF only with updated helper copy. Co-authored-by: Cursor --- backend/app/routes/documents.py | 46 +++++++++++---- .../tests/test_document_upload_validation.py | 56 ++++++++++++++----- frontend/src/app/dashboard/page.tsx | 29 ++++++++-- .../components/document/DocumentSidebar.tsx | 11 ++-- frontend/src/lib/i18n.ts | 8 +-- 5 files changed, 111 insertions(+), 39 deletions(-) diff --git a/backend/app/routes/documents.py b/backend/app/routes/documents.py index 73ce58ef..12508ff1 100644 --- a/backend/app/routes/documents.py +++ b/backend/app/routes/documents.py @@ -200,30 +200,56 @@ async def upload_document( HTTPException: With status code 500 if: - The server lacks the 'python-magic' dependency. """ - # ── Validate file type ─────────────────────────── if not file.filename: raise HTTPException(status_code=400, detail="No filename provided") - ext = file.filename.rsplit(".", 1)[-1].lower() - if ext not in settings.ALLOWED_EXTENSIONS: + # ── Issue #16: consistent validation errors ────────────────────────── + ALLOWED_CONTENT_TYPES = {"application/pdf"} + ALLOWED_EXTENSIONS = {".pdf"} + MAX_FILE_SIZE_BYTES = settings.MAX_UPLOAD_SIZE_MB * 1024 * 1024 + + ext = os.path.splitext(file.filename or "")[1].lower() + if ext not in ALLOWED_EXTENSIONS: raise HTTPException( status_code=400, - detail=f"File type '.{ext}' not supported. Allowed: {', '.join(settings.ALLOWED_EXTENSIONS)}", + detail=f"Unsupported file type '{ext or 'unknown'}'. Only PDF files are accepted.", ) - # ── Validate and save file to disk ─────────────── - temp_path = await validate_upload(file) + if file.content_type not in ALLOWED_CONTENT_TYPES: + raise HTTPException( + status_code=400, + detail=f"Invalid content type '{file.content_type}'. Only PDF files are accepted.", + ) + + contents = await file.read() + + if not contents.startswith(b"%PDF"): + raise HTTPException( + status_code=400, + detail="The uploaded file does not appear to be a valid PDF.", + ) + + if len(contents) > MAX_FILE_SIZE_BYTES: + mb = len(contents) / (1024 * 1024) + raise HTTPException( + status_code=400, + detail=( + f"File too large ({mb:.1f} MB). " + f"Maximum allowed size is {settings.MAX_UPLOAD_SIZE_MB} MB." + ), + ) + # ── end validation ──────────────────────────────────────────────────── user_dir = os.path.join(settings.UPLOAD_DIR, user.id) os.makedirs(user_dir, exist_ok=True) - stored_filename = f"{uuid.uuid4().hex}.{ext}" + stored_filename = f"{uuid.uuid4().hex}.pdf" filepath = os.path.join(user_dir, stored_filename) - # Move temp file to final destination - shutil.move(temp_path, filepath) + with open(filepath, "wb") as f: + f.write(contents) - file_size = Path(filepath).stat().st_size + file_size = len(contents) # ── Create database record ─────────────────────── document = Document( diff --git a/backend/tests/test_document_upload_validation.py b/backend/tests/test_document_upload_validation.py index 10bc252e..732e5a91 100644 --- a/backend/tests/test_document_upload_validation.py +++ b/backend/tests/test_document_upload_validation.py @@ -24,8 +24,16 @@ def _pdf_bytes() -> bytes: return buffer.getvalue() -def _upload_file(name: str, content: bytes) -> UploadFile: - return UploadFile(filename=name, file=io.BytesIO(content)) +def _upload_file( + name: str, + content: bytes, + content_type: str = "application/pdf", +) -> UploadFile: + return UploadFile( + filename=name, + file=io.BytesIO(content), + headers={"content-type": content_type}, + ) def _run(coro): @@ -123,22 +131,12 @@ def test_upload_document_handles_duplicate_original_names( session.commit() session.refresh(user) - temp_files: list[Path] = [] - - async def fake_validate_upload(_file: UploadFile) -> str: - handle = documents.tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") - with handle: - handle.write(_pdf_bytes()) - temp_files.append(Path(handle.name)) - return handle.name - class FakeUUID: def __init__(self, value: str) -> None: self.hex = value uuid_values = iter([FakeUUID(first_hex), FakeUUID(second_hex)]) - monkeypatch.setattr(documents, "validate_upload", fake_validate_upload) monkeypatch.setattr(documents.settings, "UPLOAD_DIR", str(tmp_path / "uploads")) monkeypatch.setattr(documents.uuid, "uuid4", lambda: next(uuid_values)) monkeypatch.setattr( @@ -147,16 +145,17 @@ def __init__(self, value: str) -> None: lambda **_kwargs: types.SimpleNamespace(id="queued-task"), ) + pdf_bytes = _pdf_bytes() first = _run( documents.upload_document( - file=_upload_file("same-name.pdf", b"first"), + file=_upload_file("same-name.pdf", pdf_bytes), user=user, db=session, ) ) second = _run( documents.upload_document( - file=_upload_file("same-name.pdf", b"second"), + file=_upload_file("same-name.pdf", pdf_bytes), user=user, db=session, ) @@ -170,4 +169,31 @@ def __init__(self, value: str) -> None: assert first.task_id == second.task_id == "queued-task" assert (tmp_path / "uploads" / user.id / f"{first_hex}.pdf").exists() assert (tmp_path / "uploads" / user.id / f"{second_hex}.pdf").exists() - assert all(not path.exists() for path in temp_files) + + +def test_upload_document_rejects_non_pdf_extension() -> None: + with pytest.raises(HTTPException) as exc: + _run( + documents.upload_document( + file=_upload_file("test.docx", b"%PDF-1.4", "application/pdf"), + user=types.SimpleNamespace(id="user-1"), + db=types.SimpleNamespace(), + ) + ) + + assert exc.value.status_code == 400 + assert "Unsupported file type" in exc.value.detail + + +def test_upload_document_rejects_invalid_magic_bytes() -> None: + with pytest.raises(HTTPException) as exc: + _run( + documents.upload_document( + file=_upload_file("fake.pdf", b"not a pdf file"), + user=types.SimpleNamespace(id="user-1"), + db=types.SimpleNamespace(), + ) + ) + + assert exc.value.status_code == 400 + assert "does not appear to be a valid PDF" in exc.value.detail diff --git a/frontend/src/app/dashboard/page.tsx b/frontend/src/app/dashboard/page.tsx index f5a25f0a..79910df3 100644 --- a/frontend/src/app/dashboard/page.tsx +++ b/frontend/src/app/dashboard/page.tsx @@ -61,6 +61,7 @@ export default function DashboardPage() { const [documents, setDocuments] = useState([]); const prevDocsRef = useRef>({}); + const toastedFailures = useRef>(new Set()); const [activeDoc, setActiveDoc] = useState(null); const [pdfPage, setPdfPage] = useState(1); const [pdfHighlightTarget, setPdfHighlightTarget] = useState<{ @@ -141,17 +142,33 @@ export default function DashboardPage() { nextPrevDocs[doc.id] = doc.status; const oldStatus = prev[doc.id]; - if (oldStatus && oldStatus !== doc.status) { - if (doc.status === "ready") { - toast.success(`🎉 Ingestion complete: '${doc.original_name}' is ready!`); - } else if (doc.status === "failed") { - toast.error(`❌ Ingestion failed for '${doc.original_name}': ${doc.error_message || "Unknown error"}`); - } + if (oldStatus && oldStatus !== doc.status && doc.status === "ready") { + toast.success(`🎉 Ingestion complete: '${doc.original_name}' is ready!`); } }); prevDocsRef.current = nextPrevDocs; }, [documents]); + // ── Issue #16: surface async processing failures ────────────────────── + useEffect(() => { + documents.forEach((doc) => { + if (doc.status === "failed" && !toastedFailures.current.has(doc.id)) { + toastedFailures.current.add(doc.id); + + const reason = + doc.error_message ?? + `Processing failed for "${doc.original_name}".`; + + toast.error(reason); + + setTimeout(() => { + setDocuments((prev) => prev.filter((d) => d.id !== doc.id)); + toastedFailures.current.delete(doc.id); + }, 4000); + } + }); + }, [documents]); + // Poll for processing status useEffect(() => { const hasPending = (documents || []).some( diff --git a/frontend/src/components/document/DocumentSidebar.tsx b/frontend/src/components/document/DocumentSidebar.tsx index d2ce3490..3a40cf1a 100644 --- a/frontend/src/components/document/DocumentSidebar.tsx +++ b/frontend/src/components/document/DocumentSidebar.tsx @@ -91,7 +91,8 @@ export default function DocumentSidebar({ } catch (err) { const message = err instanceof Error ? err.message : t("documents.uploadFailed"); setUploadError(message); - toast.error(`❌ Upload failed: ${message}`); + toast.error(message); + return; } finally { setUploading(false); setUploadProgress(0); @@ -105,9 +106,6 @@ export default function DocumentSidebar({ onDrop, accept: { "application/pdf": [".pdf"], - "application/vnd.openxmlformats-officedocument.wordprocessingml.document": [".docx"], - "text/plain": [".txt"], - "text/markdown": [".md"], }, disabled: uploading, }); @@ -343,6 +341,11 @@ export default function DocumentSidebar({ )} + {doc.status === "failed" && doc.error_message && ( +

+ {doc.error_message} +

+ )}
{/* Action buttons (Settings and Delete) are only visible on hover and when the document is ready. The settings button is disabled if the document is not ready, and the delete button shows a loader when the document is being deleted. */} diff --git a/frontend/src/lib/i18n.ts b/frontend/src/lib/i18n.ts index cd9f3b38..c112b6d4 100644 --- a/frontend/src/lib/i18n.ts +++ b/frontend/src/lib/i18n.ts @@ -76,7 +76,7 @@ const resources = { uploading: "Uploading...", dropHere: "Drop files here", dropOrClick: "Drop files or click to upload", - uploadFormats: "PDF, DOCX, TXT, MD (max 50MB)", + uploadFormats: "PDF files only (max 20 MB)", documentsTitle: "Documents ({{count}})", noDocuments: "No documents yet", getStarted: "Upload a file to get started", @@ -158,7 +158,7 @@ const resources = { uploading: "अपलोड हो रहा है...", dropHere: "फ़ाइलें यहाँ छोड़ें", dropOrClick: "फ़ाइलें छोड़ें या अपलोड के लिए क्लिक करें", - uploadFormats: "PDF, DOCX, TXT, MD (अधिकतम 50MB)", + uploadFormats: "PDF files only (max 20 MB)", documentsTitle: "दस्तावेज़ ({{count}})", noDocuments: "अभी तक कोई दस्तावेज़ नहीं", getStarted: "शुरू करने के लिए फ़ाइल अपलोड करें", @@ -240,7 +240,7 @@ const resources = { uploading: "Subiendo...", dropHere: "Suelta archivos aquí", dropOrClick: "Suelta archivos o haz clic para subir", - uploadFormats: "PDF, DOCX, TXT, MD (máx. 50MB)", + uploadFormats: "Solo PDF (máx. 20 MB)", documentsTitle: "Documentos ({{count}})", noDocuments: "Aún no hay documentos", getStarted: "Sube un archivo para comenzar", @@ -322,7 +322,7 @@ const resources = { uploading: "Envoi en cours...", dropHere: "Déposez les fichiers ici", dropOrClick: "Déposez les fichiers ou cliquez pour téléverser", - uploadFormats: "PDF, DOCX, TXT, MD (max 50 Mo)", + uploadFormats: "PDF uniquement (max 20 Mo)", documentsTitle: "Documents ({{count}})", noDocuments: "Aucun document pour le moment", getStarted: "Importez un fichier pour commencer",