Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 36 additions & 10 deletions backend/app/routes/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,30 +200,56 @@ async def upload_document(
HTTPException: With status code 500 if:
- The server lacks the 'python-magic' dependency.
"""
# ── Validate file type ───────────────────────────
if not file.filename:
raise HTTPException(status_code=400, detail="No filename provided")

ext = file.filename.rsplit(".", 1)[-1].lower()
if ext not in settings.ALLOWED_EXTENSIONS:
# ── Issue #16: consistent validation errors ──────────────────────────
ALLOWED_CONTENT_TYPES = {"application/pdf"}
ALLOWED_EXTENSIONS = {".pdf"}
MAX_FILE_SIZE_BYTES = settings.MAX_UPLOAD_SIZE_MB * 1024 * 1024

ext = os.path.splitext(file.filename or "")[1].lower()
if ext not in ALLOWED_EXTENSIONS:
raise HTTPException(
status_code=400,
detail=f"File type '.{ext}' not supported. Allowed: {', '.join(settings.ALLOWED_EXTENSIONS)}",
detail=f"Unsupported file type '{ext or 'unknown'}'. Only PDF files are accepted.",
)

# ── Validate and save file to disk ───────────────
temp_path = await validate_upload(file)
if file.content_type not in ALLOWED_CONTENT_TYPES:
raise HTTPException(
status_code=400,
detail=f"Invalid content type '{file.content_type}'. Only PDF files are accepted.",
)

contents = await file.read()

if not contents.startswith(b"%PDF"):
raise HTTPException(
status_code=400,
detail="The uploaded file does not appear to be a valid PDF.",
)

if len(contents) > MAX_FILE_SIZE_BYTES:
mb = len(contents) / (1024 * 1024)
raise HTTPException(
status_code=400,
detail=(
f"File too large ({mb:.1f} MB). "
f"Maximum allowed size is {settings.MAX_UPLOAD_SIZE_MB} MB."
),
)
# ── end validation ────────────────────────────────────────────────────

user_dir = os.path.join(settings.UPLOAD_DIR, user.id)
os.makedirs(user_dir, exist_ok=True)

stored_filename = f"{uuid.uuid4().hex}.{ext}"
stored_filename = f"{uuid.uuid4().hex}.pdf"
filepath = os.path.join(user_dir, stored_filename)

# Move temp file to final destination
shutil.move(temp_path, filepath)
with open(filepath, "wb") as f:
f.write(contents)

file_size = Path(filepath).stat().st_size
file_size = len(contents)

# ── Create database record ───────────────────────
document = Document(
Expand Down
56 changes: 41 additions & 15 deletions backend/tests/test_document_upload_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,16 @@ def _pdf_bytes() -> bytes:
return buffer.getvalue()


def _upload_file(name: str, content: bytes) -> UploadFile:
return UploadFile(filename=name, file=io.BytesIO(content))
def _upload_file(
name: str,
content: bytes,
content_type: str = "application/pdf",
) -> UploadFile:
return UploadFile(
filename=name,
file=io.BytesIO(content),
headers={"content-type": content_type},
)


def _run(coro):
Expand Down Expand Up @@ -123,22 +131,12 @@ def test_upload_document_handles_duplicate_original_names(
session.commit()
session.refresh(user)

temp_files: list[Path] = []

async def fake_validate_upload(_file: UploadFile) -> str:
handle = documents.tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
with handle:
handle.write(_pdf_bytes())
temp_files.append(Path(handle.name))
return handle.name

class FakeUUID:
def __init__(self, value: str) -> None:
self.hex = value

uuid_values = iter([FakeUUID(first_hex), FakeUUID(second_hex)])

monkeypatch.setattr(documents, "validate_upload", fake_validate_upload)
monkeypatch.setattr(documents.settings, "UPLOAD_DIR", str(tmp_path / "uploads"))
monkeypatch.setattr(documents.uuid, "uuid4", lambda: next(uuid_values))
monkeypatch.setattr(
Expand All @@ -147,16 +145,17 @@ def __init__(self, value: str) -> None:
lambda **_kwargs: types.SimpleNamespace(id="queued-task"),
)

pdf_bytes = _pdf_bytes()
first = _run(
documents.upload_document(
file=_upload_file("same-name.pdf", b"first"),
file=_upload_file("same-name.pdf", pdf_bytes),
user=user,
db=session,
)
)
second = _run(
documents.upload_document(
file=_upload_file("same-name.pdf", b"second"),
file=_upload_file("same-name.pdf", pdf_bytes),
user=user,
db=session,
)
Expand All @@ -170,4 +169,31 @@ def __init__(self, value: str) -> None:
assert first.task_id == second.task_id == "queued-task"
assert (tmp_path / "uploads" / user.id / f"{first_hex}.pdf").exists()
assert (tmp_path / "uploads" / user.id / f"{second_hex}.pdf").exists()
assert all(not path.exists() for path in temp_files)


def test_upload_document_rejects_non_pdf_extension() -> None:
with pytest.raises(HTTPException) as exc:
_run(
documents.upload_document(
file=_upload_file("test.docx", b"%PDF-1.4", "application/pdf"),
user=types.SimpleNamespace(id="user-1"),
db=types.SimpleNamespace(),
)
)

assert exc.value.status_code == 400
assert "Unsupported file type" in exc.value.detail


def test_upload_document_rejects_invalid_magic_bytes() -> None:
with pytest.raises(HTTPException) as exc:
_run(
documents.upload_document(
file=_upload_file("fake.pdf", b"not a pdf file"),
user=types.SimpleNamespace(id="user-1"),
db=types.SimpleNamespace(),
)
)

assert exc.value.status_code == 400
assert "does not appear to be a valid PDF" in exc.value.detail
29 changes: 23 additions & 6 deletions frontend/src/app/dashboard/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,12 @@
}

export default function DashboardPage() {
const { user, loading, initialized } = useAuth();

Check warning on line 59 in frontend/src/app/dashboard/page.tsx

View workflow job for this annotation

GitHub Actions / ⚛️ Frontend — TypeScript & Build

'loading' is assigned a value but never used
const router = useRouter();

const [documents, setDocuments] = useState<DocInfo[]>([]);
const prevDocsRef = useRef<Record<string, string>>({});
const toastedFailures = useRef<Set<string>>(new Set());
const [activeDoc, setActiveDoc] = useState<DocInfo | null>(null);
const [pdfPage, setPdfPage] = useState(1);
const [pdfHighlightTarget, setPdfHighlightTarget] = useState<{
Expand Down Expand Up @@ -141,17 +142,33 @@
nextPrevDocs[doc.id] = doc.status;

const oldStatus = prev[doc.id];
if (oldStatus && oldStatus !== doc.status) {
if (doc.status === "ready") {
toast.success(`🎉 Ingestion complete: '${doc.original_name}' is ready!`);
} else if (doc.status === "failed") {
toast.error(`❌ Ingestion failed for '${doc.original_name}': ${doc.error_message || "Unknown error"}`);
}
if (oldStatus && oldStatus !== doc.status && doc.status === "ready") {
toast.success(`🎉 Ingestion complete: '${doc.original_name}' is ready!`);
}
});
prevDocsRef.current = nextPrevDocs;
}, [documents]);

// ── Issue #16: surface async processing failures ──────────────────────
useEffect(() => {
documents.forEach((doc) => {
if (doc.status === "failed" && !toastedFailures.current.has(doc.id)) {
toastedFailures.current.add(doc.id);

const reason =
doc.error_message ??
`Processing failed for "${doc.original_name}".`;

toast.error(reason);

setTimeout(() => {
setDocuments((prev) => prev.filter((d) => d.id !== doc.id));
toastedFailures.current.delete(doc.id);
}, 4000);
}
});
}, [documents]);

// Poll for processing status
useEffect(() => {
const hasPending = (documents || []).some(
Expand Down
11 changes: 7 additions & 4 deletions frontend/src/components/document/DocumentSidebar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ export default function DocumentSidebar({
} catch (err) {
const message = err instanceof Error ? err.message : t("documents.uploadFailed");
setUploadError(message);
toast.error(`❌ Upload failed: ${message}`);
toast.error(message);
return;
} finally {
setUploading(false);
setUploadProgress(0);
Expand All @@ -105,9 +106,6 @@ export default function DocumentSidebar({
onDrop,
accept: {
"application/pdf": [".pdf"],
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": [".docx"],
"text/plain": [".txt"],
"text/markdown": [".md"],
},
disabled: uploading,
});
Expand Down Expand Up @@ -343,6 +341,11 @@ export default function DocumentSidebar({
</Badge>
)}
</div>
{doc.status === "failed" && doc.error_message && (
<p className="mt-1.5 text-xs text-red-500 dark:text-red-400 leading-snug">
{doc.error_message}
</p>
)}
</div>
<div className="flex items-center gap-1 shrink-0">
{/* Action buttons (Settings and Delete) are only visible on hover and when the document is ready. The settings button is disabled if the document is not ready, and the delete button shows a loader when the document is being deleted. */}
Expand Down
8 changes: 4 additions & 4 deletions frontend/src/lib/i18n.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ const resources = {
uploading: "Uploading...",
dropHere: "Drop files here",
dropOrClick: "Drop files or click to upload",
uploadFormats: "PDF, DOCX, TXT, MD (max 50MB)",
uploadFormats: "PDF files only (max 20 MB)",
documentsTitle: "Documents ({{count}})",
noDocuments: "No documents yet",
getStarted: "Upload a file to get started",
Expand Down Expand Up @@ -158,7 +158,7 @@ const resources = {
uploading: "अपलोड हो रहा है...",
dropHere: "फ़ाइलें यहाँ छोड़ें",
dropOrClick: "फ़ाइलें छोड़ें या अपलोड के लिए क्लिक करें",
uploadFormats: "PDF, DOCX, TXT, MD (अधिकतम 50MB)",
uploadFormats: "PDF files only (max 20 MB)",
documentsTitle: "दस्तावेज़ ({{count}})",
noDocuments: "अभी तक कोई दस्तावेज़ नहीं",
getStarted: "शुरू करने के लिए फ़ाइल अपलोड करें",
Expand Down Expand Up @@ -240,7 +240,7 @@ const resources = {
uploading: "Subiendo...",
dropHere: "Suelta archivos aquí",
dropOrClick: "Suelta archivos o haz clic para subir",
uploadFormats: "PDF, DOCX, TXT, MD (máx. 50MB)",
uploadFormats: "Solo PDF (máx. 20 MB)",
documentsTitle: "Documentos ({{count}})",
noDocuments: "Aún no hay documentos",
getStarted: "Sube un archivo para comenzar",
Expand Down Expand Up @@ -322,7 +322,7 @@ const resources = {
uploading: "Envoi en cours...",
dropHere: "Déposez les fichiers ici",
dropOrClick: "Déposez les fichiers ou cliquez pour téléverser",
uploadFormats: "PDF, DOCX, TXT, MD (max 50 Mo)",
uploadFormats: "PDF uniquement (max 20 Mo)",
documentsTitle: "Documents ({{count}})",
noDocuments: "Aucun document pour le moment",
getStarted: "Importez un fichier pour commencer",
Expand Down
Loading