diff --git a/.env.example b/.env.example
index 7b9223c..497120a 100644
--- a/.env.example
+++ b/.env.example
@@ -5,6 +5,7 @@
 
 # ── General ──────────────────────────────────
 ENVIRONMENT=development
+CORS_ALLOWED_ORIGINS=http://localhost:5173
 
 # ── LLM ──────────────────────────────────────
 LLM_PROVIDER=gemini
@@ -36,8 +37,11 @@ SUPABASE_SERVICE_ROLE_KEY=
 
 ENABLE_BACKEND_ACCESS_CONTROL=false
 
+# ── Cognee ──────────────────────────────────
+COGNEE_TIMEOUT_SECONDS=300
+
 # Cloudfare
 CLOUDFLARE_R2_ENDPOINT=
-`CLOUDFLARE_R2_ACCESS_KEY_ID=
+CLOUDFLARE_R2_ACCESS_KEY_ID=
 CLOUDFLARE_R2_SECRET_KEY=
 CLOUDFLARE_R2_BUCKET_NAME=
diff --git a/.github/workflows/backend-lint-check.yml b/.github/workflows/backend-lint-check.yml
index b9759b3..4acf21e 100644
--- a/.github/workflows/backend-lint-check.yml
+++ b/.github/workflows/backend-lint-check.yml
@@ -14,7 +14,7 @@ jobs:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.11"
+          python-version: "3.12"
       - name: Lint
         run: |
           cd backend
diff --git a/.github/workflows/backend-test.yml b/.github/workflows/backend-test.yml
new file mode 100644
index 0000000..ee04935
--- /dev/null
+++ b/.github/workflows/backend-test.yml
@@ -0,0 +1,40 @@
+name: Backend Tests
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches: [main]
+    paths:
+      - "backend/**"
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.12"
+
+      - name: Cache pip
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('backend/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-
+
+      - name: Install dependencies
+        run: |
+          cd backend
+          pip install -r requirements.txt
+          pip install pytest-asyncio
+
+      - name: Run tests
+        run: |
+          cd backend
+          pytest tests/ \
+            --ignore=tests/test_storage.py \
+            --ignore=tests/test_cognee.py \
+            -v --tb=short
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..e5f8458
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,190 @@
+# Cortex
+
+Document knowledge graph system powered by Cognee. Ingests PDFs/CSVs/text via `cognee.add()` → `cognee.cognify()`, then serves knowledge-graph search via `SearchType.GRAPH_COMPLETION`.
+
+## What to ignore
+- `archive/` — deprecated, do not review
+- `backend/app/services/extraction/` — old ETL pipeline, being replaced
+- `supabase/` — not part of current sprint
+
+## Active codebase (review here)
+- `backend/app/` — all active backend code
+- `backend/tests/` — pytest tests
+- `frontend/` — React SPA (active development)
+
+## Tech stack
+
+### Backend
+- FastAPI + Uvicorn (Python 3.12)
+- Cognee (`cognee[postgres,gemini]>=0.5.5`) — knowledge graph engine
+  - Graph store: Kuzu (embedded, `.cognee_system/`)
+  - Vector store: pgvector via PostgreSQL
+  - LLM: Google Gemini (`LLM_PROVIDER=gemini`)
+  - Embeddings: configured via `EMBEDDING_PROVIDER` / `EMBEDDING_MODEL`
+- Supabase — document metadata, async client
+- LiteLLM — LLM abstraction layer
+- Cloudflare R2 — raw file storage (pre-signed URLs via `boto3`)
+- Ruff for linting/formatting
+
+### Frontend
+- React 18 + TypeScript
+- Vite (dev server + build)
+- Tailwind CSS
+- React Router v6
+- React Query (TanStack Query v5)
+- react-force-graph-2d — knowledge graph visualization
+- Axios — HTTP client
+
+## Architecture
+
+All routes are mounted under `/api` via `app/api.py`.
+
+```
+POST /api/documents/upload
+  → save file to /tmp/cognee_uploads/
+  → create_document() in Supabase (status=processing)
+  → run_pipeline() in background:
+      → upload_to_r2() (raw file to Cloudflare R2)
+      → LLM-based client name + document type classification
+      → cognee.add(file_path, dataset_name=client_name)
+      → cognee.cognify(datasets=[client_name])
+      → cognee.search(SearchType.CHUNKS) × 3 for summary/insights/entities
+      → write results to Supabase (status=completed)
+
+GET /api/documents/search?q=...&dataset=...&search_type=...
+  → search_knowledge_graph(query, dataset, limit, search_type)
+      → cognee.search(SearchType.GRAPH_COMPLETION, ...)
+
+GET /api/documents/graph
+  → get_graph_data() → D3-compatible node/link JSON
+
+GET /api/documents/          — list all documents
+GET /api/documents/{doc_id}  — single document
+GET /api/documents/{doc_id}/file-url — pre-signed R2 download URL
+GET /api/health              — Supabase connectivity check
+```
+
+### Key files
+- `app/main.py` — FastAPI app, lifespan (Supabase → wait_for_supabase → webhooks → queue → Cognee → recover_stale_documents)
+- `app/api.py` — central router, mounts all sub-routers under `/api`
+- `app/cognee_config.py` — `setup_cognee()`, wired into lifespan
+- `app/routes/documents.py` — upload, search, graph, list, get, file-url
+- `app/services/ingest.py` — `check_cognee_storage()` (startup writability check for `.cognee_system/`)
+- `app/services/cognee_service.py` — `search_knowledge_graph()` (used by `/documents/search` route)
+- `app/services/document_pipeline.py` — `run_pipeline()` (background ingest orchestration)
+- `app/services/document_metadata_service.py` — Supabase CRUD for document records + `recover_stale_documents()`
+- `app/services/graph_service.py` — `get_graph_data()` for D3 visualization
+- `app/services/storage.py` — `upload_to_r2()` and `get_presigned_url()` for Cloudflare R2
+- `app/services/supabase_check.py` — `wait_for_supabase()` (startup health check)
+- `app/utils/validation.py` — `sanitize_dataset_name()`, `validate_dataset_name()`
+- `app/core/` — Supabase client, LiteLLM client, webhooks, dependencies
+
+### Frontend pages
+- `/` → `SearchPage` — knowledge graph search
+- `/upload` → `UploadPage` — document upload
+- `/documents` → `DocumentsPage` — document list
+- `/documents/:id` → `DocumentDetailPage` — single document view
+- `/graph` → `GraphPage` — force-graph visualization
+
+## Running the project
+```bash
+# Postgres (pgvector) — required for Cognee; exposes localhost:5433
+docker compose up -d postgres
+
+# Local Supabase stack — metadata store (PostgREST on :54321, Postgres on :54322)
+# Applies supabase/migrations/*.sql automatically. Run once per machine, persists across restarts.
+supabase start
+# If cortex_documents schema is out of date after pulling new migrations:
+supabase db reset --local
+
+# Backend
+cd backend
+python -m uvicorn app.main:app --reload
+
+# Frontend
+cd frontend
+npm run dev
+```
+
+Point `.env` at the local Supabase:
+- `SUPABASE_URL=http://127.0.0.1:54321`
+- `SUPABASE_SERVICE_ROLE_KEY=<value from "supabase status -o env">`
+
+## Running tests
+```bash
+cd backend && pytest
+```
+
+## Linting (enforced in CI on every PR)
+```bash
+cd backend && ruff check   # must pass before merge
+cd backend && ruff format  # auto-format
+```
+
+## CI/CD (GitHub Actions)
+- `backend-lint-check.yml` — Ruff lint on backend PRs
+- `backend-test.yml` — pytest on backend PRs (skips `test_storage.py` and `test_cognee.py` which need credentials)
+- `frontend-lint-check.yml` — ESLint on frontend PRs
+- `frontend-prettier-check.yml` — Prettier format check on frontend PRs
+- `docker-build.yml` — Docker image build
+- `claude.yml` / `claude-code-review.yml` — Claude Code automation
+- `cleanup-ghcr.yml` — GHCR image cleanup
+- `supabase-deploy.yml` — Supabase deployment
+
+## Required environment variables
+
+See `.env.example` (project root) for a copy-paste template.
+
+```
+# General
+ENVIRONMENT, CORS_ALLOWED_ORIGINS
+
+# Supabase (required — used by lifespan, document metadata, search)
+SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY
+
+# LLM / Embeddings
+LLM_PROVIDER, LLM_MODEL, LLM_API_KEY
+EMBEDDING_PROVIDER, EMBEDDING_MODEL, EMBEDDING_API_KEY
+
+# Cognee persistence (read by Cognee SDK internally, not by app code)
+VECTOR_DB_PROVIDER, VECTOR_DB_URL
+DB_PROVIDER, DB_HOST, DB_PORT, DB_NAME, DB_USER, DB_PASSWORD
+
+# Cognee timeout (optional, default 300s)
+COGNEE_TIMEOUT_SECONDS
+
+# Cognee storage path (optional, default ".cognee_system")
+COGNEE_SYSTEM_PATH
+
+# Webhooks (required if webhook dispatch is enabled in lifespan)
+WEBHOOK_BASE_URL, WEBHOOK_SECRET
+
+# Object storage (optional — Cloudflare R2)
+CLOUDFLARE_R2_ENDPOINT, CLOUDFLARE_R2_ACCESS_KEY_ID, CLOUDFLARE_R2_SECRET_KEY, CLOUDFLARE_R2_BUCKET_NAME
+```
+
+## Branch & PR naming
+
+**Branches:** `<issue-number>-<short-kebab-description>`
+> Use GitHub's "Create a branch" button on the issue — it generates this automatically.
+> Example: `35-build-knowledge-search-service`
+
+**PR titles:** conventional commits prefix + imperative description
+- `feat:` new functionality — `feat: build knowledge search service (#35)`
+- `fix:` bug fix — `fix: delete temp files in finally block`
+- `chore:` deps/config/tooling — `chore: add cognee dependencies to requirements`
+- `docs:` research/docs — `docs: cognee pipeline notes`
+- `test:` tests only — `test: add test_cognee smoke test`
+
+**PR body:** must include `Closes #<number>` — Claude's ticket compliance check depends on this.
+
+## Code review checklist
+- `run_pipeline()` sanitizes client names via `sanitize_dataset_name()` from `utils/validation.py`
+- `cognify()` never called without a prior `cognee.add()`
+- Cognee operations in `run_pipeline()` use `asyncio.wait_for()` with `COGNEE_TIMEOUT_SECONDS` (default 300s)
+- Temp files (`/tmp/cognee_uploads/`) deleted in `finally` block of `run_pipeline()`
+- All Cognee operations use `async/await` — no blocking I/O in async routes
+- Exceptions caught and returned as `HTTPException` — no raw tracebacks to client
+- Search endpoint defaults to `SearchType.GRAPH_COMPLETION`
+- Allowed upload extensions: `.pdf`, `.csv`, `.txt` — max 5 files per request
+- Stale documents (stuck in `processing` >30 min) are auto-recovered to `failed` on startup
diff --git a/README.md b/README.md
index 0c00f39..dbc7caa 100644
--- a/README.md
+++ b/README.md
@@ -1,70 +1,208 @@
-# Cortex ETL System
+# Cortex
 
-Automated knowledge base creation system for manufacturing CPQ systems. Processes multi-format data (CSV, PDF, APIs) into structured, queryable databases with complete tenant isolation.
+Document knowledge graph system powered by [Cognee](https://github.com/topoteretes/cognee). Ingests PDFs, CSVs, and text files, builds a knowledge graph via LLM-driven extraction, and serves semantic search over the resulting graph.
 
-## Architecture
+## Tech stack
 
-- **Backend**: FastAPI for ETL processing and webhook handling
-- **Frontend**: React/TS Vite app for tenant/admin interfaces
-- **Database**: PostgreSQL with schema-per-tenant isolation via Supabase
-- **Development**: Local Supabase stack via Docker
+| Layer | Technology |
+|-------|-----------|
+| Backend | FastAPI, Python 3.12, Uvicorn |
+| Knowledge graph | Cognee SDK (Kuzu graph store, pgvector, Gemini LLM) |
+| Database | PostgreSQL 16 + pgvector |
+| Document metadata | Supabase (async client) |
+| Object storage | Cloudflare R2 (optional) |
+| Frontend | React 18, TypeScript, Vite, Tailwind CSS |
+| Data fetching | TanStack Query v5, Axios |
+| Graph visualization | react-force-graph-2d |
 
-## Quick Start
+## Prerequisites
 
-### Prerequisites
+- Python 3.12
+- Node.js 18+
+- Docker and Docker Compose (for containerized setup)
+- A Google Gemini API key (used for LLM and embeddings)
 
-- Docker Desktop
-- Node.js 22
+## Getting started
 
-### Development Setup
+### 1. Clone and configure environment
 
 ```bash
-# Clone and start everything
-git clone https://github.com/GenerateNU/cortex-etl-source.git
-cd cortex-etl-source
-npm run fresh
+git clone <repo-url>
+cd cortex_s26
+cp .env.example .env
 ```
 
-This single command:
+Open `.env` and fill in the required secrets:
 
-- Generates all environment variables
-- Starts local Supabase stack
-- Builds and runs frontend/backend containers
+```
+LLM_API_KEY=<your-gemini-api-key>
+EMBEDDING_API_KEY=<your-gemini-api-key>
+SUPABASE_URL=<your-supabase-url>
+SUPABASE_SERVICE_ROLE_KEY=<your-supabase-key>
+```
+
+The rest of the defaults work for local development. See `.env.example` for the full list.
 
-### Access Points
+### 2a. Docker setup (recommended)
+
+```bash
+docker compose up
+```
 
-- **Frontend**: http://localhost:5173
-- **Backend API**: http://localhost:8000
-- **Supabase Studio**: http://localhost:54323
+This starts:
 
-### Development Login Credentials
+- **backend** at `http://localhost:8000` (FastAPI with hot-reload)
+- **postgres** at `localhost:5433` (pgvector/pgvector:pg16)
+
+The backend container mounts `./backend` as a volume, so code changes reload automatically.
+
+### 2b. Manual setup
+
+**Backend:**
+
+```bash
+cd backend
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+python -m uvicorn app.main:app --reload
+```
 
-| Email                     | Password | Role   |
-| ------------------------- | -------- | ------ |
-| admin@cortex.com          | password | Admin  |
-| eng@kawasaki-robotics.com | password | Tenant |
-| eng@kuka.com              | password | Tenant |
-| eng@staubli.com           | password | Tenant |
-| eng@milara.com            | password | Tenant |
+This requires a running PostgreSQL instance with the pgvector extension. Update `DB_*` and `VECTOR_DB_URL` in `.env` to match your database.
 
-## Available Commands
+**Frontend:**
 
 ```bash
-npm run init-dev    # installs all dev requirements and initializes supabase
-npm run build       # builds the frontend and backend containers
-npm run up          # starts supabase, the frontend, and the backend containers
-npm run down        # closes supabase, the frotend, and the backend containers
-npm run rebuild     # rebuilds the frontend and backend containers
-npm run reset       # clears supabase's database, reruns migrations, and reseeds
-npm run hard-clean  # downs everything and prunes all volumes
-npm run fresh       # hard resets and starts every service from scratch
+cd frontend
+npm install
+npm run dev
 ```
 
-## Project Structure
+The dev server starts at `http://localhost:3000`.
+
+> **Note:** Set `CORS_ALLOWED_ORIGINS=http://localhost:3000` in `.env` so the backend accepts requests from the frontend.
+
+## Project structure
 
 ```
-├── frontend/           # React/TS Vite tenant interface
-├── backend/           # FastAPI ETL processing
-├── docker-compose.yml # Application containers
-└── init-dev.js       # Environment generator
+cortex_s26/
+├── backend/
+│   ├── app/
+│   │   ├── main.py                        # FastAPI app, lifespan startup
+│   │   ├── api.py                         # Central router, mounts all sub-routers under /api
+│   │   ├── cognee_config.py               # Cognee SDK initialization
+│   │   ├── routes/
+│   │   │   └── documents.py               # Upload, search, graph, list, file-url
+│   │   ├── services/
+│   │   │   ├── document_pipeline.py       # Background ingest orchestration
+│   │   │   ├── document_metadata_service.py  # Supabase CRUD for documents
+│   │   │   ├── cognee_service.py          # Knowledge graph search
+│   │   │   ├── graph_service.py           # D3-compatible graph data
+│   │   │   └── storage.py                 # Cloudflare R2 operations
+│   │   ├── core/                          # Supabase client, LiteLLM client, webhooks
+│   │   └── utils/                         # Validation helpers
+│   ├── tests/
+│   ├── Dockerfile
+│   └── requirements.txt
+├── frontend/
+│   └── src/
+│       ├── pages/                         # SearchPage, UploadPage, DocumentsPage,
+│       │                                  # DocumentDetailPage, GraphPage
+│       ├── components/                    # Navbar, NodeDetailPanel
+│       └── services/api.ts               # Axios client and TypeScript types
+├── supabase/migrations/                   # Schema migrations
+├── .github/workflows/                     # CI/CD pipelines
+├── docker-compose.yml
+└── .env.example
 ```
+
+## API endpoints
+
+All routes are mounted under `/api` via `app/api.py`.
+
+| Method | Path | Description |
+|--------|------|-------------|
+| `POST` | `/api/documents/upload` | Upload up to 5 files (.pdf, .csv, .txt) |
+| `GET` | `/api/documents/search?q=...` | Search the knowledge graph |
+| `GET` | `/api/documents/graph` | D3-compatible node/link JSON |
+| `GET` | `/api/documents/` | List all documents |
+| `GET` | `/api/documents/{id}` | Single document by ID |
+| `GET` | `/api/documents/{id}/file-url` | Pre-signed R2 download URL |
+| `GET` | `/api/health` | Health check |
+
+## Running tests
+
+```bash
+cd backend
+pytest                              # all tests
+pytest tests/test_integration.py    # integration tests only
+pytest -v                           # verbose output
+```
+
+`test_storage.py` and `test_cognee.py` require live credentials and are skipped in CI.
+
+## Linting and formatting
+
+**Backend (Ruff):**
+
+```bash
+cd backend
+ruff check            # lint (must pass before merge)
+ruff check --fix      # auto-fix lint issues
+ruff format           # auto-format
+```
+
+**Frontend (ESLint + Prettier):**
+
+```bash
+cd frontend
+npx eslint src/
+npx prettier --check src/
+npx prettier --write src/    # auto-format
+```
+
+## CI/CD
+
+GitHub Actions run on every PR:
+
+| Workflow | What it checks |
+|----------|---------------|
+| `backend-lint-check.yml` | Ruff lint |
+| `backend-test.yml` | pytest (skips credential-dependent tests) |
+| `frontend-lint-check.yml` | ESLint |
+| `frontend-prettier-check.yml` | Prettier formatting |
+| `docker-build.yml` | Docker image builds |
+
+## Branch and PR conventions
+
+**Branches:** `<issue-number>-<short-kebab-description>`
+
+Use GitHub's "Create a branch" button on the issue. Example: `35-build-knowledge-search-service`
+
+**PR titles:** use a conventional commit prefix with an imperative description.
+
+| Prefix | Use for | Example |
+|--------|---------|---------|
+| `feat:` | New functionality | `feat: build knowledge search service (#35)` |
+| `fix:` | Bug fix | `fix: delete temp files in finally block` |
+| `chore:` | Deps, config, tooling | `chore: add cognee dependencies` |
+| `docs:` | Documentation | `docs: cognee pipeline notes` |
+| `test:` | Tests only | `test: add integration test suite` |
+
+**PR body:** must include `Closes #<number>` to link the related issue.
+
+## Environment variables
+
+See `.env.example` for a copy-paste template. Key variables:
+
+| Variable | Required | Notes |
+|----------|----------|-------|
+| `LLM_API_KEY` | Yes | Gemini API key |
+| `LLM_PROVIDER` / `LLM_MODEL` | Yes | Defaults: `gemini` / `gemini/gemini-flash-latest` |
+| `EMBEDDING_API_KEY` | Yes | Can reuse `LLM_API_KEY` for Gemini |
+| `SUPABASE_URL` | Yes | Supabase project URL |
+| `SUPABASE_SERVICE_ROLE_KEY` | Yes | Supabase service role key |
+| `DB_HOST` / `DB_PORT` / `DB_NAME` / `DB_USER` / `DB_PASSWORD` | Yes | PostgreSQL connection (overridden by Docker Compose) |
+| `VECTOR_DB_URL` | Yes | pgvector connection string |
+| `CLOUDFLARE_R2_*` | No | Omit to skip file storage |
+| `COGNEE_TIMEOUT_SECONDS` | No | Default: 300s |
diff --git a/backend/app/api.py b/backend/app/api.py
index 246fb53..657decc 100644
--- a/backend/app/api.py
+++ b/backend/app/api.py
@@ -1,12 +1,7 @@
-from app.core.supabase import get_async_supabase
-from app.routes.classification_routes import router as classification_router
-from app.routes.migration_routes import router as migration_router
-from app.routes.pattern_recognition_routes import router as pattern_recognition_router
-from app.routes.preprocess_routes import router as preprocess_router
-from app.routes.search_routes import router as search_router
 from fastapi import APIRouter, Depends
 from supabase._async.client import AsyncClient
 
+from app.core.supabase import get_async_supabase
 from app.routes.documents import router as documents_router
 
 api_router = APIRouter(prefix="/api")
@@ -15,15 +10,12 @@
 @api_router.get("/health")
 async def health_check(supabase: AsyncClient = Depends(get_async_supabase)):
     try:
-        await supabase.table("cortex_documents").select("count", count="exact").execute()
+        await (
+            supabase.table("cortex_documents").select("count", count="exact").execute()
+        )
         return {"status": "healthy", "database": "connected"}
     except Exception as e:
         return {"status": "unhealthy", "database": "disconnected", "error": str(e)}
 
 
-api_router.include_router(preprocess_router)
-api_router.include_router(search_router)
-api_router.include_router(classification_router)
-api_router.include_router(migration_router)
-api_router.include_router(pattern_recognition_router)
 api_router.include_router(documents_router)
diff --git a/backend/app/cognee_config.py b/backend/app/cognee_config.py
index 68b9271..a993fea 100644
--- a/backend/app/cognee_config.py
+++ b/backend/app/cognee_config.py
@@ -16,6 +16,18 @@ async def setup_cognee() -> None:
     if _cognee_initialized:
         return
 
+    # Fail fast if critical env vars are missing
+    required_vars = {
+        "LLM_API_KEY": os.getenv("LLM_API_KEY"),
+        "SUPABASE_URL": os.getenv("SUPABASE_URL"),
+        "SUPABASE_SERVICE_ROLE_KEY": os.getenv("SUPABASE_SERVICE_ROLE_KEY"),
+    }
+    missing = [k for k, v in required_vars.items() if not v]
+    if missing:
+        raise RuntimeError(
+            f"Missing required environment variables: {', '.join(missing)}"
+        )
+
     llm_provider = os.getenv("LLM_PROVIDER")
     llm_model = os.getenv("LLM_MODEL")
     llm_api_key = os.getenv("LLM_API_KEY")
@@ -42,13 +54,27 @@ async def setup_cognee() -> None:
             }
         )
 
-    # Force LanceDB to use a local file path. Without this, Cognee picks up
-    # VECTOR_DB_URL (a PostgreSQL URL) from the environment and passes it to
-    # LanceDB, which only supports file/S3/GCS paths — causing a startup crash.
+    cognee.config.set_graph_db_config(
+        {
+            "graph_database_provider": "kuzu",
+        }
+    )
+
     cognee.config.set_vector_db_config(
         {
-            "vector_db_provider": "lancedb",
-            "vector_db_url": "/app/.cognee_system/lancedb",
+            "vector_db_provider": "pgvector",
+            "vector_db_url": os.getenv("VECTOR_DB_URL", ""),
+        }
+    )
+    cognee.config.set_relational_db_config(
+        {
+            "db_path": "",
+            "db_provider": "postgres",
+            "db_host": os.getenv("DB_HOST"),
+            "db_port": os.getenv("DB_PORT", "5432"),
+            "db_name": os.getenv("DB_NAME"),
+            "db_username": os.getenv("DB_USER"),
+            "db_password": os.getenv("DB_PASSWORD"),
         }
     )
 
diff --git a/backend/app/core/dependencies.py b/backend/app/core/dependencies.py
index 8d50f55..7091b8a 100644
--- a/backend/app/core/dependencies.py
+++ b/backend/app/core/dependencies.py
@@ -1,8 +1,12 @@
+import logging
+
 from fastapi import Depends, HTTPException, Request
 from supabase._async.client import AsyncClient
 
 from app.core.supabase import get_async_supabase
 
+logger = logging.getLogger(__name__)
+
 
 async def get_current_user(
     request: Request, supabase: AsyncClient = Depends(get_async_supabase)
@@ -38,9 +42,8 @@ async def get_current_user(
             },
         }
     except Exception as e:
-        raise HTTPException(
-            status_code=401, detail=f"Authentication failed: {str(e)}"
-        ) from e
+        logger.exception("Authentication failed")
+        raise HTTPException(status_code=401, detail="Authentication failed") from e
 
 
 async def get_current_admin(
diff --git a/backend/app/core/litellm.py b/backend/app/core/litellm.py
index dd412dc..49de3f4 100644
--- a/backend/app/core/litellm.py
+++ b/backend/app/core/litellm.py
@@ -1,11 +1,14 @@
 import asyncio
 import base64
-import os
+import logging
+import random
 from enum import Enum
 from typing import Any
 
 from litellm import acompletion, aembedding
 
+logger = logging.getLogger(__name__)
+
 
 class ModelType(Enum):
     """Available LLM models."""
@@ -32,17 +35,10 @@ class LLMClient:
     """Simplified LLM client for agentic workflows."""
 
     def __init__(self):
-        """Initialize client and load API keys."""
+        """Initialize client."""
         self.model = ModelType.GEMINI_FLASH
         self.embedding_model = EmbeddingModelType.GEMINI_TEXT_EMBEDDING
         self.system_prompt: str | None = None
-        self._load_api_keys()
-
-    def _load_api_keys(self) -> None:
-        """Load API keys from environment."""
-        for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY"]:
-            if key in os.environ:
-                os.environ[key] = os.environ[key]
 
     def set_model(self, model: ModelType) -> None:
         """Set the model to use for completions."""
@@ -79,9 +75,7 @@ async def embed(
         inputs = [input_text] if isinstance(input_text, str) else input_text
 
         # Generate embeddings with fixed dimensions
-        for attempt in range(
-            10
-        ):  # Retry up to 10 times to handle 5 RPM limit gracefully
+        for attempt in range(10):
             try:
                 response: Any = await aembedding(
                     model=embed_model, input=inputs, dimensions=768
@@ -95,15 +89,17 @@ async def embed(
             except Exception as e:
                 error_str = str(e)
                 if attempt == 9:
-                    raise e
+                    raise
                 if "RateLimitError" in error_str or "429" in error_str:
-                    print(
-                        f"Embedding rate limit hit. Waiting 60 seconds before retry (Attempt {attempt + 1}/10)...",
-                        flush=True,
+                    wait = min(12 * (2**attempt) + random.uniform(0, 5), 120)
+                    logger.warning(
+                        "Embedding rate limit hit, retrying in %.1fs (attempt %d/10)",
+                        wait,
+                        attempt + 1,
                     )
-                    await asyncio.sleep(60)
+                    await asyncio.sleep(wait)
                 else:
-                    raise e
+                    raise
 
     async def chat(
         self,
@@ -148,9 +144,7 @@ async def chat(
         else:
             messages.append({"role": "user", "content": content})
 
-        for attempt in range(
-            10
-        ):  # Retry up to 10 times to handle 5 RPM limit gracefully
+        for attempt in range(10):
             try:
                 return await acompletion(
                     model=self.model.value,
@@ -161,14 +155,14 @@ async def chat(
             except Exception as e:
                 error_str = str(e)
                 if attempt == 9:
-                    raise e
+                    raise
                 if "RateLimitError" in error_str or "429" in error_str:
-                    # The free tier is 15-20 requests per minute.
-                    # If we hit the limit, wait 60 seconds to let the quota refresh and respect requested retryDelay
-                    print(
-                        f"Rate limit hit. Waiting 60 seconds before retry (Attempt {attempt + 1}/10)...",
-                        flush=True,
+                    wait = min(12 * (2**attempt) + random.uniform(0, 5), 120)
+                    logger.warning(
+                        "Chat rate limit hit, retrying in %.1fs (attempt %d/10)",
+                        wait,
+                        attempt + 1,
                     )
-                    await asyncio.sleep(60)
+                    await asyncio.sleep(wait)
                 else:
-                    raise e
+                    raise
diff --git a/backend/app/core/supabase.py b/backend/app/core/supabase.py
index 633da0a..5f9fcd2 100644
--- a/backend/app/core/supabase.py
+++ b/backend/app/core/supabase.py
@@ -1,8 +1,11 @@
+import logging
 import os
 
 from supabase._async.client import AsyncClient
 from supabase._async.client import create_client as acreate_client
 
+logger = logging.getLogger(__name__)
+
 supabase: AsyncClient | None = None
 
 
@@ -12,5 +15,5 @@ async def get_async_supabase() -> AsyncClient:
         supabase = await acreate_client(
             os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_ROLE_KEY")
         )
-        print("Supabase Initialized")
+        logger.info("Supabase Initialized")
     return supabase
diff --git a/backend/app/core/webhooks.py b/backend/app/core/webhooks.py
index bf80199..8f4d1d3 100644
--- a/backend/app/core/webhooks.py
+++ b/backend/app/core/webhooks.py
@@ -1,7 +1,10 @@
+import logging
 import os
 
 from supabase._async.client import AsyncClient
 
+logger = logging.getLogger(__name__)
+
 
 async def configure_webhooks(supabase: AsyncClient):
     """Configure webhook settings in database on startup"""
@@ -9,8 +12,8 @@ async def configure_webhooks(supabase: AsyncClient):
     webhook_secret = os.getenv("WEBHOOK_SECRET")
 
     if not webhook_base_url or not webhook_secret:
-        print("⚠️  WARNING: Webhook configuration missing. File extraction disabled.")
-        print("    Set WEBHOOK_BASE_URL and WEBHOOK_SECRET in .env")
+        logger.warning("Webhook configuration missing. File extraction disabled.")
+        logger.warning("Set WEBHOOK_BASE_URL and WEBHOOK_SECRET in .env")
         return
 
     try:
@@ -20,6 +23,6 @@ async def configure_webhooks(supabase: AsyncClient):
             "update_webhook_config", {"url": webhook_url, "secret": webhook_secret}
         ).execute()
 
-        print(f"✓ Webhook configured: {webhook_url}")
+        logger.info("Webhook configured: %s", webhook_url)
     except Exception as e:
-        print(f"✗ Failed to configure webhook: {e}")
+        logger.error("Failed to configure webhook: %s", e)
diff --git a/backend/app/main.py b/backend/app/main.py
index fd829d7..2712518 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -1,3 +1,4 @@
+import logging
 import os
 from contextlib import asynccontextmanager
 
@@ -5,6 +6,8 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 
+logger = logging.getLogger(__name__)
+
 # Load env vars from .env file (looks in current or parent directories)
 load_dotenv()  # noqa: E402
 
@@ -21,41 +24,47 @@
     )
 
 
+from app.api import api_router  # noqa: E402
+from app.cognee_config import setup_cognee  # noqa: E402
 from app.core.supabase import get_async_supabase  # noqa: E402
 from app.core.webhooks import configure_webhooks  # noqa: E402
 from app.services.extraction.preprocessing_queue import init_queue  # noqa: E402
 from app.services.supabase_check import wait_for_supabase  # noqa: E402
 
-from app.api import api_router  # noqa: E402
-from app.cognee_config import setup_cognee  # noqa: E402
-
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    # Startup
-    print("LIFESPAN STARTING", flush=True)
-    supabase = await get_async_supabase()
-
-    await wait_for_supabase(supabase)
-
-    await configure_webhooks(supabase)
-
-    await init_queue(supabase)
-
-    await setup_cognee()
+    from app.services.document_metadata_service import recover_stale_documents
+    from app.services.extraction.preprocessing_queue import shutdown_queue
+
+    logger.info("Lifespan starting")
+    try:
+        supabase = await get_async_supabase()
+        await wait_for_supabase(supabase)
+        await configure_webhooks(supabase)
+        await init_queue(supabase)
+        await setup_cognee()
+        await recover_stale_documents()
+    except Exception:
+        logger.exception("Startup failed")
+        raise
 
     yield
-    # Shutdown (if needed)
+
+    # Shutdown
+    await shutdown_queue()
 
 
 app = FastAPI(title="Cortex ETL API", lifespan=lifespan)
 
+_allowed_origins = os.getenv("CORS_ALLOWED_ORIGINS", "http://localhost:5173").split(",")
+
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=False,
-    allow_methods=["*"],
-    allow_headers=["*"],
+    allow_origins=_allowed_origins,
+    allow_credentials=True,
+    allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
+    allow_headers=["Authorization", "Content-Type"],
 )
 
 app.include_router(api_router)
diff --git a/backend/app/repositories/extraction_repository.py b/backend/app/repositories/extraction_repository.py
index 48f3abd..a419516 100644
--- a/backend/app/repositories/extraction_repository.py
+++ b/backend/app/repositories/extraction_repository.py
@@ -1,8 +1,12 @@
+import logging
+from datetime import datetime, timezone
 from typing import Any
 from uuid import UUID
 
 from supabase._async.client import AsyncClient
 
+logger = logging.getLogger(__name__)
+
 
 class ExtractionRepository:
     def __init__(self, supabase: AsyncClient):
@@ -74,7 +78,7 @@ async def update_extraction_result(
                     "summary": summary,
                     "extracted_json": extracted_json,
                     "embedding": embedding,
-                    "processed_at": "now()",
+                    "processed_at": datetime.now(timezone.utc).isoformat(),
                 }
             )
             .eq("file_id", str(file_id))
@@ -108,7 +112,7 @@ async def create_extraction_entry(
                     "extracted_json": extracted_json,
                     "embedding": embedding,
                     "row_index": row_index,
-                    "processed_at": "now()",
+                    "processed_at": datetime.now(timezone.utc).isoformat(),
                 }
             )
             .execute()
@@ -149,7 +153,7 @@ async def download_file(self, file_path_or_link: str) -> bytes:
 
             return await self.supabase.storage.from_("documents").download(path)
         except Exception as e:
-            print(f"Download Error: {e}")
+            logger.error("Download Error: %s", e)
             raise
 
     async def delete_by_file_id(self, file_id: UUID) -> None:
diff --git a/backend/app/routes/classification_routes.py b/backend/app/routes/classification_routes.py
deleted file mode 100644
index 5678142..0000000
--- a/backend/app/routes/classification_routes.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from uuid import UUID
-
-from fastapi import APIRouter, Depends
-from supabase._async.client import AsyncClient
-
-from app.core.supabase import get_async_supabase
-from app.services.classification_service import ClassificationService
-
-router = APIRouter(prefix="/classification", tags=["Classification"])
-
-
-def get_service(
-    supabase: AsyncClient = Depends(get_async_supabase),
-) -> ClassificationService:
-    return ClassificationService(supabase)
-
-
-@router.get("/list/{tenant_id}")
-async def list_classifications(
-    tenant_id: UUID, service: ClassificationService = Depends(get_service)
-):
-    return await service.get_classifications(tenant_id)
-
-
-@router.post("/create_classifications/{tenant_id}")
-async def create_classifications(
-    tenant_id: UUID,
-    # In a real app we'd accept a body with names, but Frontend hook
-    # `useClassifications` calls this without body?
-    # Let's check `classification.hooks.tsx`.
-    # It seems to just POST to `/create_classifications/{tenant_id}` with no body?
-    # Wait, the hook `createClassificationsMutation` calls `api.post(...)`.
-    # The hook creates classifications?
-    # Ah, `createClassificationsMutation` in frontend seems to imply "Auto-generate classifications"
-    # OR it's a manual create.
-    # AdminPage.tsx -> ClassificationStep might have a form.
-    # Actually, looking at `ClassificationStep`, it likely lets user type names.
-    # If the hook payload is empty, maybe it's "Suggest Classifications"?
-    # Let's assume for now it might trigger AUTO-creation from documents.
-    service: ClassificationService = Depends(get_service),
-):
-    """
-    Generate valid classifications based on existing unclassified documents.
-    """
-    # For MVP, let's just create some default ones if none exist,
-    # or scan files to suggest.
-    # The Frontend `useClassifications` has `createClassifications`.
-    # Let's verify what the frontend sends.
-    # IF the frontend sends data, we need Pydantic model.
-    # Logic: Scan all files, ask LLM "What are the distinct categories?", create them.
-
-    # Implementation:
-    # 1. Fetch file summaries
-    # 2. Ask LLM to cluster/name them
-    # 3. Create those classifications
-
-    # Placeholder:
-    defaults = ["Invoices", "Contracts", "Specifications", "Receipts"]
-    return await service.create_classifications_batch(tenant_id, defaults)
-
-
-@router.post("/classify_files/{tenant_id}")
-async def classify_files(
-    tenant_id: UUID, service: ClassificationService = Depends(get_service)
-):
-    """
-    Assign existing classifications to unclassified files.
-    """
-    return await service.classify_files(tenant_id)
-
-
-@router.get("/visualize_clustering/{tenant_id}")
-async def visualize_clustering(
-    tenant_id: UUID, service: ClassificationService = Depends(get_service)
-):
-    return await service.get_clustering_visualization(tenant_id)
diff --git a/backend/app/routes/documents.py b/backend/app/routes/documents.py
index 168d9a6..95a5b11 100644
--- a/backend/app/routes/documents.py
+++ b/backend/app/routes/documents.py
@@ -12,23 +12,27 @@
 
 from __future__ import annotations
 
+import hashlib
+import logging
 import uuid
 from pathlib import Path
 
+from cognee import SearchType
 from fastapi import APIRouter, BackgroundTasks, File, HTTPException, Query, UploadFile
 from pydantic import BaseModel
 
-from cognee import SearchType
-
 from app.services.cognee_service import search_knowledge_graph
-from app.services.storage import get_presigned_url
 from app.services.document_metadata_service import (
     create_document,
+    find_document_by_hash,
     get_all_documents,
     get_document,
 )
 from app.services.document_pipeline import run_pipeline
 from app.services.graph_service import get_graph_data
+from app.services.storage import get_presigned_url
+
+logger = logging.getLogger(__name__)
 
 # ---------------------------------------------------------------------------
 # Pydantic models
@@ -38,6 +42,8 @@
 class UploadedFile(BaseModel):
     id: str
     filename: str
+    duplicate: bool = False
+    existing_doc_id: str | None = None
 
 
 class UploadResponse(BaseModel):
@@ -113,20 +119,33 @@ async def upload_documents(
                 ),
             )
 
-        doc_id = await create_document(None, filename)
-        temp_path = UPLOAD_DIR / f"{uuid.uuid4()}{suffix}"
-
-        # Save file to disk
+        # Read file and compute content hash for deduplication
         try:
             contents = await upload_file.read()
-            temp_path.write_bytes(contents)
         finally:
             await upload_file.close()
 
+        content_hash = hashlib.sha256(contents).hexdigest()
+
+        # Check for an existing completed document with the same content
+        existing = await find_document_by_hash(content_hash)
+        if existing:
+            uploaded.append(
+                UploadedFile(
+                    id=existing["id"],
+                    filename=filename,
+                    duplicate=True,
+                    existing_doc_id=existing["id"],
+                )
+            )
+            continue
+
+        doc_id = await create_document(filename, content_hash=content_hash)
+        temp_path = UPLOAD_DIR / f"{uuid.uuid4()}{suffix}"
+        temp_path.write_bytes(contents)
+
         # Fire-and-forget pipeline
-        background_tasks.add_task(
-            run_pipeline, temp_path, doc_id, filename, None
-        )
+        background_tasks.add_task(run_pipeline, temp_path, doc_id, filename)
 
         uploaded.append(UploadedFile(id=doc_id, filename=filename))
 
@@ -135,7 +154,9 @@ async def upload_documents(
 
 @router.get("/graph")
 async def get_graph(
-    dataset: str | None = Query(default=None, description="Filter by dataset/client name"),
+    dataset: str | None = Query(
+        default=None, description="Filter by dataset/client name"
+    ),
 ):
     """
     Return a D3-compatible knowledge graph for all documents or a specific
@@ -144,8 +165,9 @@ async def get_graph(
     try:
         data = await get_graph_data(dataset=dataset)
         return data
-    except Exception as exc:
-        raise HTTPException(status_code=500, detail=f"Graph retrieval failed: {exc}")
+    except Exception:
+        logger.exception("Graph retrieval failed")
+        raise HTTPException(status_code=500, detail="Graph retrieval failed") from None
 
 
 @router.get("/search", response_model=SearchResponse)
@@ -165,8 +187,7 @@ async def search_documents(
     Search the Cognee knowledge graph. Each result includes up to 3 source
     documents from the matching dataset so the frontend can show provenance.
     """
-    import os
-    from supabase import create_client
+    from app.core.supabase import get_async_supabase
 
     try:
         raw_results = await search_knowledge_graph(
@@ -179,13 +200,10 @@ async def search_documents(
         }
 
         # Batch-fetch up to 3 completed docs per dataset from Supabase
-        sb = create_client(
-            os.getenv("SUPABASE_URL", ""),
-            os.getenv("SUPABASE_SERVICE_ROLE_KEY", ""),
-        )
+        sb = await get_async_supabase()
         dataset_docs: dict[str, list[DocumentSource]] = {}
         for ds in dataset_names:
-            rows = (
+            rows = await (
                 sb.table("cortex_documents")
                 .select("id,original_filename,document_type,dataset_name")
                 .eq("dataset_name", ds)
@@ -194,12 +212,10 @@ async def search_documents(
                 .limit(3)
                 .execute()
             )
-            dataset_docs[ds] = [
-                DocumentSource(**row) for row in (rows.data or [])
-            ]
+            dataset_docs[ds] = [DocumentSource(**row) for row in (rows.data or [])]
 
         # Fallback: top-3 completed docs regardless of dataset
-        fallback_rows = (
+        fallback_rows = await (
             sb.table("cortex_documents")
             .select("id,original_filename,document_type,dataset_name")
             .eq("status", "completed")
@@ -221,17 +237,21 @@ async def search_documents(
 
         return SearchResponse(query=q, results=results, total=len(results))
 
-    except Exception as exc:
-        raise HTTPException(status_code=500, detail=f"Search failed: {exc}")
+    except Exception:
+        logger.exception("Search failed")
+        raise HTTPException(status_code=500, detail="Search failed") from None
 
 
 @router.get("/")
 async def list_documents():
     """Return all document records ordered by upload date (newest first)."""
     try:
-        return await get_all_documents(None)
-    except Exception as exc:
-        raise HTTPException(status_code=500, detail=f"Failed to fetch documents: {exc}")
+        return await get_all_documents()
+    except Exception:
+        logger.exception("Failed to fetch documents")
+        raise HTTPException(
+            status_code=500, detail="Failed to fetch documents"
+        ) from None
 
 
 @router.get("/{doc_id}/file-url")
@@ -241,16 +261,21 @@ async def get_file_url(doc_id: str):
     stored in Cloudflare R2. 404 if no file has been stored yet.
     """
     try:
-        doc = await get_document(None, doc_id)
-    except Exception as exc:
-        raise HTTPException(status_code=500, detail=str(exc))
+        doc = await get_document(doc_id)
+    except Exception:
+        logger.exception("Failed to retrieve document for file-url")
+        raise HTTPException(
+            status_code=500, detail="Failed to retrieve document"
+        ) from None
 
     if not doc:
         raise HTTPException(status_code=404, detail="Document not found.")
 
     r2_key = doc.get("file_url")
     if not r2_key:
-        raise HTTPException(status_code=404, detail="No raw file stored for this document.")
+        raise HTTPException(
+            status_code=404, detail="No raw file stored for this document."
+        )
 
     url = get_presigned_url(r2_key)
     if not url:
@@ -263,9 +288,12 @@ async def get_file_url(doc_id: str):
 async def get_document_by_id(doc_id: str):
     """Return a single document record. 404 if not found."""
     try:
-        doc = await get_document(None, doc_id)
-    except Exception as exc:
-        raise HTTPException(status_code=500, detail=f"Failed to fetch document: {exc}")
+        doc = await get_document(doc_id)
+    except Exception:
+        logger.exception("Failed to fetch document")
+        raise HTTPException(
+            status_code=500, detail="Failed to fetch document"
+        ) from None
 
     if doc is None:
         raise HTTPException(status_code=404, detail=f"Document '{doc_id}' not found.")
diff --git a/backend/app/routes/migration_routes.py b/backend/app/routes/migration_routes.py
deleted file mode 100644
index e167a3d..0000000
--- a/backend/app/routes/migration_routes.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from uuid import UUID
-
-from fastapi import APIRouter, Depends
-from supabase._async.client import AsyncClient
-
-from app.core.supabase import get_async_supabase
-from app.services.migration_service import MigrationService
-
-router = APIRouter(prefix="/migrations", tags=["Migrations"])
-
-
-def get_service(
-    supabase: AsyncClient = Depends(get_async_supabase),
-) -> MigrationService:
-    return MigrationService(supabase)
-
-
-@router.get("/{tenant_id}")
-async def list_migrations(
-    tenant_id: UUID, service: MigrationService = Depends(get_service)
-):
-    return await service.list_migrations(tenant_id)
-
-
-@router.post("/generate/{tenant_id}")
-async def generate_migrations(
-    tenant_id: UUID, service: MigrationService = Depends(get_service)
-):
-    return await service.generate_migrations(tenant_id)
-
-
-@router.post("/execute/{tenant_id}")
-async def execute_migrations(
-    tenant_id: UUID, service: MigrationService = Depends(get_service)
-):
-    await service.execute_migrations(tenant_id)
-    return {"message": "Migrations executed successfully"}
-
-
-@router.post("/load_data/{tenant_id}")
-async def load_data(tenant_id: UUID, service: MigrationService = Depends(get_service)):
-    return await service.load_data(tenant_id)
-
-
-@router.get("/connection-url/{tenant_id}")
-async def get_connection_url(
-    tenant_id: UUID, service: MigrationService = Depends(get_service)
-):
-    return await service.get_connection_url(tenant_id)
diff --git a/backend/app/routes/pattern_recognition_routes.py b/backend/app/routes/pattern_recognition_routes.py
deleted file mode 100644
index d3a3ece..0000000
--- a/backend/app/routes/pattern_recognition_routes.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from uuid import UUID
-
-from fastapi import APIRouter, Depends
-from supabase._async.client import AsyncClient
-
-from app.core.supabase import get_async_supabase
-from app.services.pattern_recognition_service import PatternRecognitionService
-
-router = APIRouter(prefix="/pattern-recognition", tags=["Pattern Recognition"])
-
-
-def get_service(
-    supabase: AsyncClient = Depends(get_async_supabase),
-) -> PatternRecognitionService:
-    return PatternRecognitionService(supabase)
-
-
-@router.post("/analyze/{tenant_id}")
-async def analyze_relationships(
-    tenant_id: UUID, service: PatternRecognitionService = Depends(get_service)
-):
-    """
-    Analyzes relationships for the given tenant.
-    Note: tenant_id is kept for URL compatibility but ignored by service.
-    """
-    return await service.analyze_relationships(tenant_id)
-
-
-@router.get("/graph")
-async def get_graph_data(service: PatternRecognitionService = Depends(get_service)):
-    """
-    Returns nodes and edges for the relationship graph.
-    """
-    return await service.get_graph_data()
diff --git a/backend/app/routes/preprocess_routes.py b/backend/app/routes/preprocess_routes.py
deleted file mode 100644
index 67d82d8..0000000
--- a/backend/app/routes/preprocess_routes.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from uuid import UUID
-
-from fastapi import APIRouter, Depends, HTTPException
-
-from app.services.extraction.preprocessing_queue import PreprocessingQueue, get_queue
-
-router = APIRouter(prefix="/preprocess", tags=["preprocess"])
-
-
-@router.post("/{file_id}")
-async def preprocess_file(
-    file_id: UUID, queue: PreprocessingQueue = Depends(get_queue)
-):
-    """
-    Queue a file for preprocessing (Extraction).
-    """
-    try:
-        # Enqueue the file_id directly
-        task_id = await queue.enqueue(file_id)
-        return {"message": "File queued for preprocessing", "task_id": task_id}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e)) from e
diff --git a/backend/app/routes/search_routes.py b/backend/app/routes/search_routes.py
deleted file mode 100644
index 1696bae..0000000
--- a/backend/app/routes/search_routes.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from fastapi import APIRouter, Depends, HTTPException
-from supabase._async.client import AsyncClient
-
-from app.core.supabase import get_async_supabase
-from app.schemas.search_schemas import (
-    RAGSearchResponse,
-    SearchRequest,
-    SearchResponse,
-    SearchResult,
-)
-from app.services.search_service import SearchService
-
-router = APIRouter(prefix="/search", tags=["Search"])
-
-
-def get_search_service(
-    supabase: AsyncClient = Depends(get_async_supabase),
-) -> SearchService:
-    return SearchService(supabase)
-
-
-@router.post("/", response_model=SearchResponse)
-async def search_documents(
-    request: SearchRequest, service: SearchService = Depends(get_search_service)
-):
-    """
-    Semantic search across extracted documents.
-    """
-    try:
-        results = await service.search(request.query, request.limit, request.threshold)
-
-        # Map to schema
-        mapped_results = [
-            SearchResult(
-                file_id=r["file_id"],
-                file_name=r.get("file_name"),
-                file_type=r.get("file_type"),
-                summary=r.get("summary"),
-                extracted_json=r.get("extracted_json"),
-                similarity=r["similarity"],
-            )
-            for r in results
-        ]
-
-        return SearchResponse(results=mapped_results)
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e)) from e
-
-
-@router.post("/rag", response_model=RAGSearchResponse)
-async def rag_search_documents(
-    request: SearchRequest, service: SearchService = Depends(get_search_service)
-):
-    """
-    RAG search across extracted documents with synthesized answer.
-    """
-    try:
-        result = await service.rag_search(
-            request.query, request.limit, request.threshold
-        )
-
-        mapped_sources = [
-            SearchResult(
-                file_id=r["file_id"],
-                file_name=r.get("file_name"),
-                file_type=r.get("file_type"),
-                summary=r.get("summary"),
-                extracted_json=r.get("extracted_json"),
-                similarity=r["similarity"],
-            )
-            for r in result["sources"]
-        ]
-
-        return RAGSearchResponse(answer=result["answer"], sources=mapped_sources)
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e)) from e
diff --git a/backend/app/schemas/search_schemas.py b/backend/app/schemas/search_schemas.py
deleted file mode 100644
index 1b25aab..0000000
--- a/backend/app/schemas/search_schemas.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from typing import Any
-from uuid import UUID
-
-from pydantic import BaseModel, Field
-
-
-class SearchRequest(BaseModel):
-    query: str
-    limit: int = Field(default=5, ge=1, le=20)
-    threshold: float = Field(default=0.5, ge=0.0, le=1.0)
-
-
-class SearchResult(BaseModel):
-    file_id: UUID
-    file_name: str | None
-    file_type: str | None
-    summary: str | None
-    extracted_json: dict[str, Any] | None
-    similarity: float
-
-
-class SearchResponse(BaseModel):
-    results: list[SearchResult]
-
-
-class RAGSearchResponse(BaseModel):
-    answer: str
-    sources: list[SearchResult]
diff --git a/backend/app/services/classification_service.py b/backend/app/services/classification_service.py
deleted file mode 100644
index ebd32be..0000000
--- a/backend/app/services/classification_service.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import json
-from typing import Any
-from uuid import UUID
-
-from supabase._async.client import AsyncClient
-
-from app.core.litellm import LLMClient
-
-
-class ClassificationService:
-    def __init__(self, supabase: AsyncClient):
-        self.supabase = supabase
-        self.llm = LLMClient()
-
-    async def get_classifications(self, tenant_id: UUID) -> list[dict[str, Any]]:
-        """Fetch all classifications for a tenant."""
-        response = (
-            await self.supabase.table("classifications")
-            .select("*")
-            .eq("tenant_id", str(tenant_id))
-            .execute()
-        )
-        return response.data or []
-
-    async def create_classification(
-        self, tenant_id: UUID, name: str, description: str | None = None
-    ) -> dict[str, Any]:
-        """Create a new classification."""
-        # Check if exists
-        existing = (
-            await self.supabase.table("classifications")
-            .select("*")
-            .eq("tenant_id", str(tenant_id))
-            .eq("name", name)
-            .execute()
-        )
-
-        if existing.data:
-            return existing.data[0]
-
-        response = (
-            await self.supabase.table("classifications")
-            .insert({"tenant_id": str(tenant_id), "name": name})
-            .execute()
-        )
-
-        return response.data[0] if response.data else None
-
-    async def create_classifications_batch(
-        self, tenant_id: UUID, names: list[str]
-    ) -> list[dict[str, Any]]:
-        """Create multiple classifications at once."""
-        results = []
-        for name in names:
-            res = await self.create_classification(tenant_id, name)
-            if res:
-                results.append(res)
-        return results
-
-    async def classify_files(self, tenant_id: UUID) -> dict[str, int]:
-        """
-        Auto-classify unclassified files using LLM.
-        """
-        # 1. Get all classifications
-        classifications = await self.get_classifications(tenant_id)
-        if not classifications:
-            return {"classified": 0, "failed": 0, "skipped": 0}
-
-        class_names = [c["name"] for c in classifications]
-
-        # 2. Get unclassified files (where classification_id is NULL)
-        # Note: In PRD file_uploads links to classification.
-        # Check if 'file_uploads' table has 'classification_id'.
-        # Based on setup_database.sql, 'file_uploads' has 'classification_id'.
-
-        files_resp = (
-            await self.supabase.table("file_uploads")
-            .select("*, raw_files(file_name, file_link), extracted_files(summary)")
-            .eq("tenant_id", str(tenant_id))
-            .is_("classification_id", "null")
-            .execute()
-        )
-
-        files_to_classify = files_resp.data or []
-        classified_count = 0
-        failed_count = 0
-
-        for file_record in files_to_classify:
-            summary = file_record.get("extracted_files", {}).get("summary")
-            file_name = file_record.get("raw_files", {}).get("file_name")
-
-            if not summary:
-                continue
-
-            # 3. Ask LLM
-            prompt = (
-                f"File: {file_name}\n"
-                f"Summary: {summary}\n"
-                f"Available Classifications: {', '.join(class_names)}\n\n"
-                "Task: Assign the best matching classification from the list.\n"
-                'Return a JSON object: { "classification": "Exact Name From List" }\n'
-                'If none match well, return { "classification": null }'
-            )
-
-            try:
-                response = await self.llm.chat(prompt, json_response=True)
-                # Parse response - assuming LLMClient returns a ModelResponse-like object
-                # but we've patched it to return Any (dict) in previous steps.
-                # Just in case, let's handle the dict structure carefully.
-
-                content_str = response.choices[0].message.content
-                result = json.loads(content_str)
-                best_class = result.get("classification")
-
-                if best_class and best_class in class_names:
-                    # Find ID
-                    class_id = next(
-                        c["id"] for c in classifications if c["name"] == best_class
-                    )
-
-                    # Update DB
-                    await (
-                        self.supabase.table("file_uploads")
-                        .update({"classification_id": class_id})
-                        .eq("id", file_record["id"])
-                        .execute()
-                    )
-                    classified_count += 1
-            except Exception as e:
-                print(f"Failed to classify file {file_record['id']}: {e}")
-                failed_count += 1
-
-        return {"classified": classified_count, "failed": failed_count}
-
-    async def get_clustering_visualization(self, tenant_id: UUID) -> dict[str, Any]:
-        """
-        Return data for visualization.
-        For now, returns a mock structure or simple mapping.
-        PRD implies 2D/3D points. We'll return existing files grouped by classification.
-        """
-        # Fetch all files with classification
-        files_resp = (
-            await self.supabase.table("file_uploads")
-            .select("id, name, classification_id, classifications(name)")
-            .eq("tenant_id", str(tenant_id))
-            .not_.is_("classification_id", "null")
-            .execute()
-        )
-
-        data = files_resp.data or []
-
-        # Group logic or just return raw list for frontend to handle?
-        # Frontend expects 'VisualizationResponse'.
-        # Let's peek at frontend types if needed, but for now return raw data
-        # and let frontend helper parse it if possible, or build simple nodes/links.
-
-        return {"points": data}  # Simplified
diff --git a/backend/app/services/cognee_service.py b/backend/app/services/cognee_service.py
index 0be5cc8..6432290 100644
--- a/backend/app/services/cognee_service.py
+++ b/backend/app/services/cognee_service.py
@@ -2,9 +2,13 @@
 Cognee service layer — wraps cognee SDK calls for use by route handlers.
 """
 
+import logging
+
 import cognee
 from cognee import SearchType
 
+logger = logging.getLogger(__name__)
+
 
 async def search_knowledge_graph(
     query_text: str,
@@ -24,7 +28,11 @@ async def search_knowledge_graph(
     if dataset:
         search_kwargs["datasets"] = [dataset]
 
-    raw_results = await cognee.search(**search_kwargs)
+    try:
+        raw_results = await cognee.search(**search_kwargs)
+    except Exception:
+        logger.exception("Cognee search failed for query=%s", query_text)
+        raise
 
     results = []
     for r in raw_results or []:
@@ -46,10 +54,12 @@ async def search_knowledge_graph(
         else:
             text = str(payload)
 
-        results.append({
-            "text": text,
-            "score": None,
-            "dataset_name": result_dataset,
-        })
+        results.append(
+            {
+                "text": text,
+                "score": None,
+                "dataset_name": result_dataset,
+            }
+        )
 
     return results[:limit]
diff --git a/backend/app/services/document_metadata_service.py b/backend/app/services/document_metadata_service.py
index a58db80..b334933 100644
--- a/backend/app/services/document_metadata_service.py
+++ b/backend/app/services/document_metadata_service.py
@@ -1,64 +1,120 @@
 """
-Document metadata store — Supabase-backed.
+Document metadata store — Supabase-backed (async).
 """
+
 from __future__ import annotations
 
+import logging
 import uuid as _uuid
-from datetime import datetime, timezone
+from datetime import datetime, timedelta, timezone
 
+from app.core.supabase import get_async_supabase
 
-def _client():
-    import os
-    from supabase import create_client
-    return create_client(
-        os.getenv("SUPABASE_URL", ""),
-        os.getenv("SUPABASE_SERVICE_ROLE_KEY", ""),
-    )
+logger = logging.getLogger(__name__)
 
 
-async def create_document(supabase, original_filename: str) -> str:
+async def create_document(
+    original_filename: str, content_hash: str | None = None
+) -> str:
     doc_id = str(_uuid.uuid4())
     now = datetime.now(timezone.utc).isoformat()
-    _client().table("cortex_documents").insert({
+    sb = await get_async_supabase()
+    row: dict = {
         "id": doc_id,
         "original_filename": original_filename,
         "dataset_name": "processing",
         "status": "processing",
         "progress_stage": "uploading",
         "uploaded_at": now,
-    }).execute()
+    }
+    if content_hash:
+        row["content_hash"] = content_hash
+    await sb.table("cortex_documents").insert(row).execute()
     return doc_id
 
 
-async def get_all_documents(supabase) -> list[dict]:
-    result = _client().table("cortex_documents").select("*").order(
-        "uploaded_at", desc=True
-    ).execute()
+async def find_document_by_hash(content_hash: str) -> dict | None:
+    """Return the first completed document with a matching content hash, or None."""
+    sb = await get_async_supabase()
+    result = await (
+        sb.table("cortex_documents")
+        .select("*")
+        .eq("content_hash", content_hash)
+        .eq("status", "completed")
+        .order("uploaded_at", desc=True)
+        .limit(1)
+        .execute()
+    )
+    row = result.data[0] if result.data else None
+    return _normalize(row) if row else None
+
+
+async def get_all_documents() -> list[dict]:
+    sb = await get_async_supabase()
+    result = (
+        await sb.table("cortex_documents")
+        .select("*")
+        .order("uploaded_at", desc=True)
+        .execute()
+    )
     return [_normalize(r) for r in (result.data or [])]
 
 
-async def get_document(supabase, doc_id: str) -> dict | None:
-    result = _client().table("cortex_documents").select("*").eq(
-        "id", doc_id
-    ).maybe_single().execute()
+async def get_document(doc_id: str) -> dict | None:
+    sb = await get_async_supabase()
+    result = (
+        await sb.table("cortex_documents")
+        .select("*")
+        .eq("id", doc_id)
+        .maybe_single()
+        .execute()
+    )
     return _normalize(result.data) if result.data else None
 
 
-async def update_document_stage(supabase, doc_id: str, stage: str) -> None:
-    _client().table("cortex_documents").update(
-        {"progress_stage": stage}
-    ).eq("id", doc_id).execute()
+async def update_document_stage(doc_id: str, stage: str) -> None:
+    sb = await get_async_supabase()
+    await (
+        sb.table("cortex_documents")
+        .update({"progress_stage": stage})
+        .eq("id", doc_id)
+        .execute()
+    )
 
 
 def _normalize(row: dict) -> dict:
     """Ensure insights/entities are always lists and file_url is present."""
+    import json
+
     row = dict(row)
     for field in ("insights", "entities"):
         val = row.get(field)
         if isinstance(val, str):
-            import json
             row[field] = json.loads(val)
         elif val is None:
             row[field] = []
     row.setdefault("file_url", None)
     return row
+
+
+async def recover_stale_documents(stale_minutes: int = 30) -> int:
+    """Mark documents stuck in 'processing' for >stale_minutes as 'failed'."""
+    cutoff = (datetime.now(timezone.utc) - timedelta(minutes=stale_minutes)).isoformat()
+    sb = await get_async_supabase()
+    result = await (
+        sb.table("cortex_documents")
+        .update(
+            {
+                "status": "failed",
+                "progress_stage": "failed",
+                "error_message": "Recovered: pipeline did not complete (server restart)",
+            }
+        )
+        .eq("status", "processing")
+        .lt("uploaded_at", cutoff)
+        .execute()
+    )
+    count = len(result.data or [])
+    if count:
+        logger.info("Recovered %d stale documents", count)
+    return count
diff --git a/backend/app/services/document_pipeline.py b/backend/app/services/document_pipeline.py
index ea5901b..762ba44 100644
--- a/backend/app/services/document_pipeline.py
+++ b/backend/app/services/document_pipeline.py
@@ -12,7 +12,6 @@
 import json
 import logging
 import os
-import re
 from datetime import datetime, timezone
 from pathlib import Path
 
@@ -20,17 +19,21 @@
 import litellm
 from cognee import SearchType
 
+from app.core.supabase import get_async_supabase
 from app.services.storage import upload_to_r2
+from app.utils.validation import sanitize_dataset_name
 
 logger = logging.getLogger(__name__)
 
 _VALID_DOC_TYPES = {"RFQ", "PO", "CFG", "Client CSV", "Sales CSV"}
+_COGNEE_TIMEOUT = int(os.getenv("COGNEE_TIMEOUT_SECONDS", "300"))
 
 
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _llm_model() -> str:
     return os.getenv("LLM_MODEL", "gemini/gemini-flash-latest")
 
@@ -68,13 +71,44 @@ async def _call_llm(prompt: str, max_retries: int = 6) -> str:
         except litellm.RateLimitError:
             if attempt == max_retries - 1:
                 raise
-            wait = delay * (2 ** attempt)
+            wait = delay * (2**attempt)
             logger.warning(
                 "LLM rate limit, retrying in %ss (attempt %d/%d)",
-                wait, attempt + 1, max_retries,
+                wait,
+                attempt + 1,
+                max_retries,
             )
             await asyncio.sleep(wait)
-    return ""
+    return ""  # pragma: no cover – loop always returns or raises
+
+
+_BULLET_PREFIXES = ("- ", "* ", "• ", "– ", "— ")
+
+
+def _split_bulleted(raw: list[str]) -> list[str]:
+    """Split bulleted/numbered LLM answers into discrete items.
+
+    GRAPH_COMPLETION returns one narrative string per result; the UI renders
+    a list, so we split on newlines and strip leading bullet/number markers.
+    """
+    items: list[str] = []
+    for block in raw:
+        for line in block.splitlines():
+            line = line.strip()
+            if not line:
+                continue
+            for prefix in _BULLET_PREFIXES:
+                if line.startswith(prefix):
+                    line = line[len(prefix) :].strip()
+                    break
+            else:
+                # Strip "1. ", "2) " style numeric prefixes
+                head, sep, rest = line.partition(" ")
+                if sep and head.rstrip(".)").isdigit():
+                    line = rest.strip()
+            if line:
+                items.append(line)
+    return items
 
 
 def _extract_search_text(result) -> str:
@@ -96,11 +130,11 @@ def _extract_search_text(result) -> str:
 # Pipeline
 # ---------------------------------------------------------------------------
 
+
 async def run_pipeline(
     file_path: Path,
     doc_id: str,
     original_filename: str,
-    supabase,  # unused – kept for API compatibility; we create our own sync client
 ) -> None:
     """
     Full processing pipeline for a single document.
@@ -109,16 +143,11 @@ async def run_pipeline(
         uploading → ingesting → building_graph → analyzing
         → extracting_insights → completed  (or failed)
     """
-    from supabase import create_client
-
-    sb = create_client(
-        os.getenv("SUPABASE_URL", ""),
-        os.getenv("SUPABASE_SERVICE_ROLE_KEY", ""),
-    )
+    sb = await get_async_supabase()
 
-    def _update(**fields) -> None:
+    async def _update(**fields) -> None:
         try:
-            sb.table("cortex_documents").update(fields).eq("id", doc_id).execute()
+            await sb.table("cortex_documents").update(fields).eq("id", doc_id).execute()
         except Exception as exc:
             logger.warning("DB update failed for doc %s: %s", doc_id, exc)
 
@@ -132,12 +161,12 @@ def _now() -> str:
         r2_key = f"documents/{doc_id}/{original_filename}"
         file_url = await upload_to_r2(str(file_path), r2_key)
         if file_url:
-            _update(file_url=file_url)
+            await _update(file_url=file_url)
 
         # ------------------------------------------------------------------
         # Step 2 – Extract text, detect client name + document type (1 LLM call)
         # ------------------------------------------------------------------
-        _update(progress_stage="ingesting")
+        await _update(progress_stage="ingesting")
 
         doc_text = ""
         if file_path.suffix.lower() == ".pdf":
@@ -158,62 +187,88 @@ def _now() -> str:
             ]
             client_name_raw = lines[0] if lines else "Unknown"
             doc_type_raw = lines[1] if len(lines) > 1 else "Unknown"
-            # Cognee dataset names: alphanumeric + underscores only
-            client_name = re.sub(r"[^A-Za-z0-9_]", "_", client_name_raw).strip("_") or "Unknown"
+            client_name = sanitize_dataset_name(client_name_raw)
             document_type = doc_type_raw if doc_type_raw in _VALID_DOC_TYPES else None
         else:
             client_name = "Unknown"
             document_type = None
 
-        _update(dataset_name=client_name)
+        await _update(dataset_name=client_name)
 
         # ------------------------------------------------------------------
         # Step 3 – Add to Cognee
         # ------------------------------------------------------------------
-        await cognee.add(str(file_path), dataset_name=client_name)
-        _update(progress_stage="building_graph")
+        await asyncio.wait_for(
+            cognee.add(str(file_path), dataset_name=client_name),
+            timeout=_COGNEE_TIMEOUT,
+        )
+        await _update(progress_stage="building_graph")
 
         # ------------------------------------------------------------------
         # Step 4 – Cognify (build knowledge graph)
         # ------------------------------------------------------------------
-        await cognee.cognify(datasets=[client_name])
-        _update(progress_stage="analyzing")
+        await asyncio.wait_for(
+            cognee.cognify(datasets=[client_name]),
+            timeout=_COGNEE_TIMEOUT,
+        )
+        await _update(progress_stage="analyzing")
 
         # ------------------------------------------------------------------
         # Step 5 – Extract summary
         # ------------------------------------------------------------------
-        summary_results = await cognee.search(
-            query_text="Summarize this document",
-            query_type=SearchType.CHUNKS,
-            datasets=[client_name],
+        summary_results = await asyncio.wait_for(
+            cognee.search(
+                query_text="Provide a concise executive summary of this document.",
+                query_type=SearchType.GRAPH_SUMMARY_COMPLETION,
+                datasets=[client_name],
+            ),
+            timeout=_COGNEE_TIMEOUT,
         )
         summary = _extract_search_text(summary_results[0]) if summary_results else ""
 
         # ------------------------------------------------------------------
-        # Step 6 – Extract insights
+        # Step 6 – Extract insights (key relationships & takeaways)
         # ------------------------------------------------------------------
-        _update(progress_stage="extracting_insights")
-        insights_results = await cognee.search(
-            query_text="What are all the entities and relationships?",
-            query_type=SearchType.CHUNKS,
-            datasets=[client_name],
+        await _update(progress_stage="extracting_insights")
+        insights_results = await asyncio.wait_for(
+            cognee.search(
+                query_text=(
+                    "What are the key insights, relationships, and notable "
+                    "takeaways from this document? Return each as a separate "
+                    "bullet point."
+                ),
+                query_type=SearchType.GRAPH_COMPLETION,
+                datasets=[client_name],
+            ),
+            timeout=_COGNEE_TIMEOUT,
+        )
+        insights: list[str] = _split_bulleted(
+            [_extract_search_text(r) for r in (insights_results or [])]
         )
-        insights: list[str] = [_extract_search_text(r) for r in (insights_results or [])]
 
         # ------------------------------------------------------------------
         # Step 7 – Extract entities
         # ------------------------------------------------------------------
-        entity_results = await cognee.search(
-            query_text="List all entities",
-            query_type=SearchType.CHUNKS,
-            datasets=[client_name],
+        entity_results = await asyncio.wait_for(
+            cognee.search(
+                query_text=(
+                    "List the key named entities in this document "
+                    "(people, organizations, products, locations, identifiers). "
+                    "Return one entity per line, no descriptions."
+                ),
+                query_type=SearchType.GRAPH_COMPLETION,
+                datasets=[client_name],
+            ),
+            timeout=_COGNEE_TIMEOUT,
+        )
+        entities: list[str] = _split_bulleted(
+            [_extract_search_text(r) for r in (entity_results or [])]
         )
-        entities: list[str] = [_extract_search_text(r) for r in (entity_results or [])]
 
         # ------------------------------------------------------------------
         # Step 8 – Write final state to DB
         # ------------------------------------------------------------------
-        _update(
+        await _update(
             status="completed",
             progress_stage="completed",
             dataset_name=client_name,
@@ -227,7 +282,7 @@ def _now() -> str:
 
     except Exception as exc:
         logger.exception("Pipeline failed for doc %s: %s", doc_id, exc)
-        _update(
+        await _update(
             status="failed",
             progress_stage="failed",
             error_message=str(exc),
diff --git a/backend/app/services/extraction/pdf_strategy.py b/backend/app/services/extraction/pdf_strategy.py
index 8eac4a9..5df24e9 100644
--- a/backend/app/services/extraction/pdf_strategy.py
+++ b/backend/app/services/extraction/pdf_strategy.py
@@ -1,8 +1,11 @@
 import json
+import logging
 import os
 
 from app.core.litellm import LLMClient, ModelType
 
+logger = logging.getLogger(__name__)
+
 
 class PdfExtractionStrategy:
     def __init__(self):
@@ -48,7 +51,7 @@ async def extract_data(
 
         text = response.choices[0].message.content.strip()
 
-        print("JSON response received", flush=True)
+        logger.info("JSON response received")
         try:
             data = json.loads(text)
 
@@ -72,7 +75,7 @@ async def extract_data(
                 "extracted_json": {"error": "LLM did not return JSON"},
             }
 
-        print("JSON response parsed", flush=True)
+        logger.info("JSON response parsed")
 
         return {
             "file_name": file_name,
diff --git a/backend/app/services/extraction/preprocessing_queue.py b/backend/app/services/extraction/preprocessing_queue.py
index d9844f9..9693c0f 100644
--- a/backend/app/services/extraction/preprocessing_queue.py
+++ b/backend/app/services/extraction/preprocessing_queue.py
@@ -1,4 +1,5 @@
 import asyncio
+import logging
 from uuid import UUID
 
 from supabase._async.client import AsyncClient
@@ -9,6 +10,8 @@
 from app.services.pattern_recognition_service import PatternRecognitionService
 from app.services.preprocess_service import PreprocessService
 
+logger = logging.getLogger(__name__)
+
 
 class PreprocessingQueue:
     def __init__(self, supabase: AsyncClient):
@@ -35,11 +38,11 @@ async def _worker(self):
         while True:
             extracted_file_id = await self._queue.get()
             try:
-                print(f"Processing {extracted_file_id}", flush=True)
+                logger.info("Processing %s", extracted_file_id)
                 await self.service.process_pdf_upload(extracted_file_id)
-                print(f"Completed {extracted_file_id}", flush=True)
+                logger.info("Completed %s", extracted_file_id)
             except Exception as e:
-                print(f"Failed {extracted_file_id}: {e}", flush=True)
+                logger.error("Failed %s: %s", extracted_file_id, e)
             finally:
                 self._queue.task_done()
 
@@ -57,10 +60,21 @@ async def init_queue(supabase: AsyncClient):
     global _queue
     _queue = PreprocessingQueue(supabase)
     await _queue.start_worker()
-    print("Preprocessing Queue Initialized")
+    logger.info("Preprocessing Queue Initialized")
+
+
+async def shutdown_queue():
+    global _queue
+    if _queue and _queue._worker_task:
+        _queue._worker_task.cancel()
+        try:
+            await _queue._worker_task
+        except asyncio.CancelledError:
+            pass
+    _queue = None
 
 
 def get_queue() -> PreprocessingQueue:
-    assert _queue is not None
-    print("Queue Found:", _queue)
+    if _queue is None:
+        raise RuntimeError("Preprocessing queue not initialized")
     return _queue
diff --git a/backend/app/services/graph_service.py b/backend/app/services/graph_service.py
index 0e73766..1e32cff 100644
--- a/backend/app/services/graph_service.py
+++ b/backend/app/services/graph_service.py
@@ -1,6 +1,7 @@
 """
 Graph service — fetches knowledge graph data from cognee for D3 visualization.
 """
+
 from __future__ import annotations
 
 import logging
@@ -47,11 +48,13 @@ async def get_graph_data(dataset: str | None = None) -> dict[str, Any]:
                 node_map[tid] = {"id": tid, "name": tid, "type": "Entity", "val": 1}
             node_map[sid]["val"] += 1
             node_map[tid]["val"] += 1
-            links.append({
-                "source": sid,
-                "target": tid,
-                "label": rel_name or "related_to",
-            })
+            links.append(
+                {
+                    "source": sid,
+                    "target": tid,
+                    "label": rel_name or "related_to",
+                }
+            )
 
         nodes = list(node_map.values())
 
diff --git a/backend/app/services/ingest.py b/backend/app/services/ingest.py
index f398476..408ece9 100644
--- a/backend/app/services/ingest.py
+++ b/backend/app/services/ingest.py
@@ -1,48 +1,18 @@
 """
-Ingest service: document processing with cognee.
+Ingest service: startup checks for Cognee local storage.
 """
 
 from __future__ import annotations
 
-import errno
 import logging
 import os
 from pathlib import Path
 
-import cognee
-from cognee import SearchType
-
 logger = logging.getLogger(__name__)
 
 # Cognee stores its graph and vector data here by default.
 COGNEE_SYSTEM_DIR = Path(os.getenv("COGNEE_SYSTEM_PATH", ".cognee_system"))
 
-# Try to import litellm exceptions for precise API error matching.
-try:
-    import litellm.exceptions as _litellm_exc
-
-    _LLM_EXCEPTIONS: tuple = (
-        _litellm_exc.AuthenticationError,
-        _litellm_exc.APIConnectionError,
-        _litellm_exc.RateLimitError,
-        _litellm_exc.APIError,
-    )
-except Exception:  # pragma: no cover – litellm not installed or changed API
-    _LLM_EXCEPTIONS = ()
-
-# Try to import kuzu-specific runtime errors.
-try:
-    import kuzu as _kuzu
-
-    _KUZU_EXCEPTIONS: tuple = (
-        _kuzu.RuntimeError,
-        _kuzu.Exception if hasattr(_kuzu, "Exception") else type(None),
-    )
-except Exception:  # pragma: no cover
-    _KUZU_EXCEPTIONS = ()
-
-_STORAGE_EXCEPTIONS = (PermissionError, OSError) + _KUZU_EXCEPTIONS
-
 
 def check_cognee_storage() -> None:
     """
@@ -68,219 +38,3 @@ def check_cognee_storage() -> None:
         raise RuntimeError(
             f"Cannot access Cognee storage directory '{COGNEE_SYSTEM_DIR}': {exc}"
         ) from exc
-
-
-def _is_disk_full(exc: OSError) -> bool:
-    return getattr(exc, "errno", None) == errno.ENOSPC
-
-
-def _is_llm_error(exc: Exception) -> bool:
-    """Return True when exc originates from an LLM provider (Gemini, OpenAI, …)."""
-    if _LLM_EXCEPTIONS and isinstance(exc, _LLM_EXCEPTIONS):
-        return True
-    module = type(exc).__module__ or ""
-    if any(pkg in module for pkg in ("litellm", "openai", "google.api_core")):
-        return True
-    lowered = str(exc).lower()
-    return any(
-        phrase in lowered
-        for phrase in (
-            "api key",
-            "authentication",
-            "quota exceeded",
-            "rate limit",
-            "gemini",
-            "openai",
-            "invalid_api_key",
-        )
-    )
-
-
-def _is_dimension_mismatch(exc: Exception) -> bool:
-    lowered = str(exc).lower()
-    return "dimension" in lowered or "mismatch" in lowered or "wrong number of dimensions" in lowered
-
-
-async def ingest_document(
-    file_path: str,
-    dataset_name: str,
-    document_id: str = None,
-) -> dict:
-    """
-    Ingest a document into the knowledge graph.
-
-    Calls cognee.add() to ingest the file, then cognee.cognify() to
-    process it into chunks, entities, relationships, and summaries.
-    Finally extracts structured data from the processed results.
-
-    Returns a dict with "status": "success" or "status": "error".
-    Error dicts include an ``error_type`` key so the route layer can map
-    them to the correct HTTP status code without inspecting raw messages.
-
-    error_type values:
-        "kuzu_storage"           → 503 Service Unavailable
-        "llm_api"                → 502 Bad Gateway
-        "vector_dimension_mismatch" → 500 Internal Server Error
-        "no_data_added"          → 500 Internal Server Error
-        "unknown"                → 500 Internal Server Error
-    """
-    # ------------------------------------------------------------------ add()
-    try:
-        await cognee.add(file_path, dataset_name)
-    except _STORAGE_EXCEPTIONS as exc:
-        if isinstance(exc, OSError) and _is_disk_full(exc):
-            msg = "Cognee storage is full — free up disk space and retry."
-        else:
-            msg = (
-                f"Cognee storage error during add() — check that "
-                f"'{COGNEE_SYSTEM_DIR}' is writable: {exc}"
-            )
-        logger.error("Kuzu storage failure during add(): %s", exc, exc_info=True)
-        return {"status": "error", "error_type": "kuzu_storage", "error": msg}
-
-    # --------------------------------------------------------------- cognify()
-    try:
-        await cognee.cognify([dataset_name])
-    except _STORAGE_EXCEPTIONS as exc:
-        if isinstance(exc, OSError) and _is_disk_full(exc):
-            msg = "Cognee storage is full during cognify() — free up disk space and retry."
-        else:
-            msg = (
-                f"Cognee storage error during cognify() — check that "
-                f"'{COGNEE_SYSTEM_DIR}' is writable: {exc}"
-            )
-        logger.error("Kuzu storage failure during cognify(): %s", exc, exc_info=True)
-        return {"status": "error", "error_type": "kuzu_storage", "error": msg}
-    except Exception as exc:
-        if _is_llm_error(exc):
-            logger.error("LLM API error during cognify(): %s", exc, exc_info=True)
-            return {
-                "status": "error",
-                "error_type": "llm_api",
-                "error": f"LLM API error during cognify(): {exc}",
-            }
-        if _is_dimension_mismatch(exc):
-            msg = (
-                "Vector dimension mismatch detected during cognify(). "
-                "This happens when the embedding model is changed after data was already stored. "
-                "To fix: delete the '.cognee_system/' directory and re-ingest all documents."
-            )
-            logger.error("Vector dimension mismatch: %s", exc, exc_info=True)
-            return {"status": "error", "error_type": "vector_dimension_mismatch", "error": msg}
-        lowered = str(exc).lower()
-        if any(phrase in lowered for phrase in ("no data", "no documents", "dataset is empty")):
-            logger.warning(
-                "cognify() called on dataset '%s' with no prior add(): %s",
-                dataset_name,
-                exc,
-            )
-            return {
-                "status": "error",
-                "error_type": "no_data_added",
-                "error": (
-                    f"No documents were added to dataset '{dataset_name}' before cognify(). "
-                    "Call add() first."
-                ),
-            }
-        logger.error("Unexpected error during cognify(): %s", exc, exc_info=True)
-        return {"status": "error", "error_type": "unknown", "error": str(exc)}
-
-    # --------------------------------------------------- extract results
-    try:
-        structured_data = await _extract_structured_data(dataset_name)
-    except Exception as exc:
-        if _is_dimension_mismatch(exc):
-            msg = (
-                "Vector dimension mismatch detected during search. "
-                "This happens when the embedding model is changed after data was already stored. "
-                "To fix: delete the '.cognee_system/' directory and re-ingest all documents."
-            )
-            logger.error("Vector dimension mismatch during search: %s", exc, exc_info=True)
-            return {"status": "error", "error_type": "vector_dimension_mismatch", "error": msg}
-        logger.error("Unexpected error during search: %s", exc, exc_info=True)
-        return {"status": "error", "error_type": "unknown", "error": str(exc)}
-
-    return {
-        "status": "success",
-        "document_id": document_id,
-        "dataset_name": dataset_name,
-        **structured_data,
-    }
-
-
-async def _extract_structured_data(dataset_name: str) -> dict:
-    """
-    Query Cognee for structured data after cognify() has run.
-
-    Uses SearchType.SUMMARIES for pre-computed summaries and
-    SearchType.CHUNKS for raw text segments.
-
-    Returns summary (str), entities (list), and raw_chunks_count (int).
-    Empty results are not an error — they return empty/zero values.
-    """
-    summary_results = await cognee.search(
-        query_type=SearchType.SUMMARIES,
-        query_text=dataset_name,
-    )
-
-    chunk_results = await cognee.search(
-        query_type=SearchType.CHUNKS,
-        query_text=dataset_name,
-    )
-
-    summary = summary_results[0] if summary_results else ""
-
-    entities = []
-    for chunk in chunk_results:
-        if hasattr(chunk, "entities"):
-            entities.extend(chunk.entities)
-
-    return {
-        "summary": str(summary),
-        "entities": entities,
-        "raw_chunks_count": len(chunk_results),
-    }
-
-
-async def search_knowledge_graph(
-    query_text: str,
-    dataset: str | None = None,
-    limit: int = 20,
-) -> list[dict]:
-    """
-    Search the Cognee knowledge graph and return a list of result dicts.
-
-    Each result has ``text``, ``score``, and ``metadata`` keys so the route
-    layer can deserialise them directly into SearchResult models.
-    """
-    results = await cognee.search(
-        query_type=SearchType.CHUNKS,
-        query_text=query_text,
-    )
-
-    output: list[dict] = []
-    for item in results[:limit]:
-        text = str(item) if not hasattr(item, "text") else item.text
-        score = getattr(item, "score", None)
-        metadata: dict = {}
-        if dataset:
-            metadata["dataset"] = dataset
-        output.append({"text": text, "score": score, "metadata": metadata})
-
-    return output
-
-
-async def ingest_document_background(path: Path, dataset_name: str) -> None:
-    """
-    For FastAPI BackgroundTasks. Allows ingest_document to run in the
-    background for large files.
-    """
-    try:
-        await ingest_document(str(path), dataset_name)
-    except Exception:
-        logger.error("Background ingest failed for %s", path, exc_info=True)
-    finally:
-        try:
-            path.unlink(missing_ok=True)
-        except Exception:
-            pass
diff --git a/backend/app/services/migration_service.py b/backend/app/services/migration_service.py
deleted file mode 100644
index ef1c3d6..0000000
--- a/backend/app/services/migration_service.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import os
-from typing import Any
-from uuid import UUID
-
-from supabase._async.client import AsyncClient
-
-from app.services.schema.schema_generation_service import SchemaGenerationService
-
-
-class MigrationService:
-    def __init__(self, supabase: AsyncClient):
-        self.supabase = supabase
-
-    async def list_migrations(self, tenant_id: UUID) -> list[dict[str, Any]]:
-        response = (
-            await self.supabase.table("migrations")
-            .select("*")
-            .eq("tenant_id", str(tenant_id))
-            .order("sequence", desc=False)
-            .execute()
-        )
-        return response.data or []
-
-    async def generate_migrations(self, tenant_id: UUID) -> list[dict[str, Any]]:
-        """
-        Generates pending migrations based on current state.
-        """
-        # 1. Fetch Classifications
-        c_resp = (
-            await self.supabase.table("classifications")
-            .select("*")
-            .eq("tenant_id", str(tenant_id))
-            .execute()
-        )
-        classifications = c_resp.data or []
-
-        # 2. Fetch Relationships (Mocking structure for now as logic is simple)
-        r_resp = await self.supabase.table("relationships").select("*").execute()
-        relationships = r_resp.data or []
-
-        # 3. Generate SQL
-        sqls = SchemaGenerationService.generate_migrations(
-            str(tenant_id), classifications, relationships
-        )
-
-        # 4. Store in DB as pending migrations
-        # Get next sequence
-        existing = await self.list_migrations(tenant_id)
-        next_seq = (existing[-1]["sequence"] + 1) if existing else 1
-
-        created_migrations = []
-        for i, sql in enumerate(sqls):
-            # Check if this SQL already exists to avoid duplicates?
-            # For now, just insert.
-            name = f"auto_gen_{next_seq + i}"
-            res = (
-                await self.supabase.table("migrations")
-                .insert(
-                    {
-                        "tenant_id": str(tenant_id),
-                        "name": name,
-                        "sql": sql,
-                        "sequence": next_seq + i,
-                        "executed_at": None,
-                    }
-                )
-                .execute()
-            )
-            if res.data:
-                created_migrations.append(res.data[0])
-
-        return created_migrations
-
-    async def execute_migrations(self, tenant_id: UUID) -> None:
-        """
-        Executes pending migrations.
-        """
-        pending = (
-            await self.supabase.table("migrations")
-            .select("*")
-            .eq("tenant_id", str(tenant_id))
-            .is_("executed_at", "null")
-            .order("sequence")
-            .execute()
-        )
-
-        for migration in pending.data or []:
-            sql = migration["sql"]
-            # Execute SQL
-            # DANGER: Supabase-js/py client doesn't support raw SQL easily unless we use an RPC
-            # or have a direct connection.
-            # OPTION 1: Use an RPC function `exec_sql` if it exists (common pattern).
-            # OPTION 2: If we assume `postgres` user locally, we might not have it.
-            # Let's try RPC 'exec_sql'. If it fails, we mock success for the UI flow
-            # (since this is likely a demo/MVP setup and we don't have the RPC scripts).
-
-            try:
-                # await self.supabase.rpc("exec_sql", {"sql_query": sql}).execute()
-                # For safety/stability in this environment where I can't easily add RPCs:
-                # We will log it and mark as executed.
-                print(f"EXECUTING SQL (Simulated): {sql}")
-
-                # Update status
-                from datetime import datetime
-
-                await (
-                    self.supabase.table("migrations")
-                    .update({"executed_at": datetime.now().isoformat()})
-                    .eq("id", migration["id"])
-                    .execute()
-                )
-
-            except Exception as e:
-                print(f"Migration failed: {e}")
-                # Don't stop, or stop? Stop on error.
-                raise e
-
-    async def load_data(self, tenant_id: UUID) -> dict[str, Any]:
-        """
-        Mock data loading.
-        """
-        return {
-            "status": "success",
-            "message": "Data loaded (simulated)",
-            "tables_updated": [],
-        }
-
-    async def get_connection_url(self, tenant_id: UUID) -> dict[str, Any]:
-        # Return a constructed URL for the tenant schema
-        # This is for display purposes in the UI
-        project_ref = (
-            os.getenv("SUPABASE_URL", "https://xyz.supabase.co")
-            .split("//")[1]
-            .split(".")[0]
-        )
-        return {
-            "tenant_id": str(tenant_id),
-            "schema_name": f"tenant_{str(tenant_id).replace('-', '_')}",
-            "connection_url": f"postgres://postgres:[YOUR-PASSWORD]@db.{project_ref}.supabase.co:5432/postgres",
-            "includes_public_schema": True,
-            "note": "Use the schema_name in your search_path",
-        }
diff --git a/backend/app/services/pattern_recognition_service.py b/backend/app/services/pattern_recognition_service.py
index a0c4cfe..69edbf4 100644
--- a/backend/app/services/pattern_recognition_service.py
+++ b/backend/app/services/pattern_recognition_service.py
@@ -1,4 +1,5 @@
 import json
+import logging
 from typing import Any
 from uuid import UUID
 
@@ -6,6 +7,8 @@
 
 from app.core.litellm import LLMClient
 
+logger = logging.getLogger(__name__)
+
 
 class PatternRecognitionService:
     def __init__(self, supabase: AsyncClient):
@@ -106,7 +109,7 @@ async def detect_and_link(
             content = json.loads(content_str)
             matches = content.get("matches", [])
         except Exception as e:
-            print(f"Relationship detection failed: {e}")
+            logger.error("Relationship detection failed: %s", e)
             return
 
         # 3. Process matches
@@ -156,7 +159,7 @@ async def detect_and_link(
                     if new_rel.data:
                         rel_id = new_rel.data[0]["relationship_id"]
                 except Exception as e:
-                    print(f"Could not create relationship {rel_name}: {e}")
+                    logger.error("Could not create relationship %s: %s", rel_name, e)
                     # Try to fetch again in case of race
                     continue
 
@@ -175,9 +178,9 @@ async def detect_and_link(
                         )
                         .execute()
                     )
-                    print(f"Linked file {file_id} to relationship {rel_name}")
+                    logger.info("Linked file %s to relationship %s", file_id, rel_name)
                 except Exception as e:
-                    print(f"Link failed: {e}")
+                    logger.error("Link failed: %s", e)
 
     async def get_graph_data(self) -> dict[str, list[Any]]:
         """
diff --git a/backend/app/services/preprocess_service.py b/backend/app/services/preprocess_service.py
index 816e1e0..3d5f72c 100644
--- a/backend/app/services/preprocess_service.py
+++ b/backend/app/services/preprocess_service.py
@@ -1,3 +1,4 @@
+import logging
 from uuid import UUID
 
 from fastapi import Depends
@@ -16,6 +17,8 @@
 )
 from app.services.pattern_recognition_service import PatternRecognitionService
 
+logger = logging.getLogger(__name__)
+
 
 class PreprocessService:
     def __init__(
@@ -60,11 +63,11 @@ async def process_pdf_upload(self, file_id: UUID) -> str:
 
             # 1. Download File
             file_bytes = await self.extraction_repo.download_file(file_link)
-            print(f"File downloaded: {file_name}", flush=True)
+            logger.info("File downloaded: %s", file_name)
 
             # 2. Determine Strategy and Extract
             if file_name.lower().endswith(".csv"):
-                print("Processing as CSV", flush=True)
+                logger.info("Processing as CSV")
                 # Returns list of dicts
                 extraction_results = await self.csv_strategy.extract_data(
                     file_bytes, file_name
@@ -80,7 +83,7 @@ async def process_pdf_upload(self, file_id: UUID) -> str:
                 await self.extraction_repo.delete_by_file_id(file_id)
 
             else:
-                print("Processing as PDF", flush=True)
+                logger.info("Processing as PDF")
                 # Returns single dict result wrapped in list for uniform processing
                 single_result = await self.pdf_strategy.extract_data(
                     file_bytes, file_name
@@ -102,7 +105,7 @@ async def process_pdf_upload(self, file_id: UUID) -> str:
                 use_existing = item.get("use_existing_id", False)
                 row_index = item.get("row_index", None)
 
-                print(f"Processing item: {row_name}", flush=True)
+                logger.info("Processing item: %s", row_name)
 
                 # Generate Embedding
                 embedding = await generate_embedding(extracted_data)
@@ -136,16 +139,18 @@ async def process_pdf_upload(self, file_id: UUID) -> str:
                             file_id, summary
                         )
                     except Exception as rel_err:
-                        print(
-                            f"Non-fatal relationship detection error for {row_name}: {rel_err}"
+                        logger.warning(
+                            "Non-fatal relationship detection error for %s: %s",
+                            row_name,
+                            rel_err,
                         )
 
-            print("All items processed", flush=True)
+            logger.info("All items processed")
             return str(file_id)
 
         except Exception as e:
             # Update status to "failed"
-            print(f"Processing failed for {file_id}: {e}", flush=True)
+            logger.error("Processing failed for %s: %s", file_id, e)
             await self.extraction_repo.update_status(file_id, "Failed", str(e))
             raise
 
diff --git a/backend/app/services/schema/__init__.py b/backend/app/services/schema/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/backend/app/services/schema/schema_generation_service.py b/backend/app/services/schema/schema_generation_service.py
deleted file mode 100644
index 6c8cd4e..0000000
--- a/backend/app/services/schema/schema_generation_service.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import re
-from typing import Any
-
-
-class SchemaGenerationService:
-    """
-    Pure service to generate SQL based on classifications and relationships.
-    """
-
-    @staticmethod
-    def generate_migrations(
-        tenant_id: str,
-        classifications: list[dict[str, Any]],
-        relationships: list[dict[str, Any]],
-    ) -> list[str]:
-        """
-        Generates a list of SQL statements (migrations).
-        """
-        migration_sqls = []
-
-        # 1. Create Schema for Tenant
-        schema_name = f"tenant_{tenant_id.replace('-', '_')}"
-        migration_sqls.append(f"CREATE SCHEMA IF NOT EXISTS {schema_name};")
-
-        # 2. Create Tables for Classifications
-        for cls in classifications:
-            table_name = SchemaGenerationService._sanitize_name(cls["name"])
-
-            # Basic table structure for extracted data
-            # Including jsonb_data for flexibility
-            sql = f"""
-            CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} (
-                id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-                file_id UUID REFERENCES public.raw_files(file_id),
-                data JSONB,
-                created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
-            );
-            """
-            migration_sqls.append(sql.strip())
-
-        # 3. Create Foreign Keys from Relationships?
-        # If relationships are "Supplier" -> "Order", how is that mapped?
-        # For now, let's keep it simple: tables are created.
-        # Relationships might be implemented as link tables or FKs if cardinality is known.
-        # Given PRD says "Relationships become foreign keys", we'd need to know source/target.
-        # But `relationships` table groups files. Matches are `file_id` <-> `relationship_id`.
-        # This part is tricky without clear "Class A -> Class B" definition.
-        # relationships table is more like "Clusters".
-        # Let's assume for this MVP we just create the tables for the classifications.
-
-        return migration_sqls
-
-    @staticmethod
-    def _sanitize_name(name: str) -> str:
-        # Lowercase, replace spaces/special chars with underscores
-        clean = re.sub(r"[^a-zA-Z0-9]", "_", name.lower())
-        # Ensure starts with letter
-        if not clean[0].isalpha():
-            clean = "tbl_" + clean
-        return clean[:63]  # Postgres limit
diff --git a/backend/app/services/search_service.py b/backend/app/services/search_service.py
deleted file mode 100644
index dd1bea9..0000000
--- a/backend/app/services/search_service.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import json
-from typing import Any
-
-from supabase._async.client import AsyncClient
-
-from app.core.litellm import LLMClient
-from app.services.extraction.embeddings import generate_embedding
-
-
-class SearchService:
-    def __init__(self, supabase: AsyncClient):
-        self.supabase = supabase
-        self.llm = LLMClient()
-        self.llm.set_system_prompt(
-            "You are a retrieval-augmented assistant. Answer strictly from the provided "
-            "documents. If the documents do not contain enough information, say so plainly. "
-            "Cite supporting evidence by document number such as [Document 1]. Do not invent facts."
-        )
-
-    async def search(
-        self, query: str, limit: int = 5, threshold: float = 0.5
-    ) -> list[dict[str, Any]]:
-        """
-        Semantic search for extracted files.
-        """
-        # 1. Generate embedding for query
-        query_embedding = await generate_embedding(query)
-
-        # 2. Call RPC function
-        response = await self.supabase.rpc(
-            "match_extracted_files",
-            {
-                "query_embedding": query_embedding,
-                "match_threshold": threshold,
-                "match_count": limit,
-            },
-        ).execute()
-
-        return response.data or []
-
-    async def rag_search(
-        self, query: str, limit: int = 5, threshold: float = 0.5
-    ) -> dict[str, Any]:
-        """
-        Semantic search followed by grounded answer generation.
-        """
-        results = await self.search(query, limit, threshold)
-
-        if not results:
-            return {
-                "answer": "I could not find any relevant source documents for that query.",
-                "sources": [],
-            }
-
-        context_parts = []
-        for idx, result in enumerate(results, start=1):
-            context_parts.append(
-                f"[Document {idx}]\n"
-                f"file_name: {result.get('file_name') or 'Unknown'}\n"
-                f"file_type: {result.get('file_type') or 'Unknown'}\n"
-                f"similarity: {result.get('similarity')}\n"
-                f"summary: {result.get('summary') or 'None'}\n"
-                f"extracted_json: "
-                f"{json.dumps(result.get('extracted_json') or {}, ensure_ascii=False)}"
-            )
-
-        context = "\n\n".join(context_parts)
-        response = await self.llm.chat(
-            f"User query:\n{query}\n\n"
-            f"Retrieved documents:\n{context}\n\n"
-            "Answer the query using only the retrieved documents. Cite document numbers "
-            "for every key claim."
-        )
-        answer = response.choices[0].message.content.strip()
-
-        return {"answer": answer, "sources": results}
diff --git a/backend/app/services/storage.py b/backend/app/services/storage.py
index 39fa272..53905fe 100644
--- a/backend/app/services/storage.py
+++ b/backend/app/services/storage.py
@@ -4,6 +4,7 @@
 Gracefully returns None when R2 is not configured so the pipeline
 continues without object storage.
 """
+
 from __future__ import annotations
 
 import logging
@@ -11,29 +12,40 @@
 
 logger = logging.getLogger(__name__)
 
+_cached_r2_client = None
+_r2_client_checked = False
+
 
 def _r2_bucket() -> str:
     return os.getenv("CLOUDFLARE_R2_BUCKET_NAME", "cortex-documents")
 
 
 def _r2_client():
-    """Lazy R2 client — returns None if any credential is missing."""
+    """Lazy, cached R2 client — returns None if any credential is missing."""
+    global _cached_r2_client, _r2_client_checked
+    if _r2_client_checked:
+        return _cached_r2_client
+
     endpoint = os.getenv("CLOUDFLARE_R2_ENDPOINT", "").rstrip("/")
-    access_key = os.getenv("R2_ACCESS_KEY_ID", "")
-    secret_key = os.getenv("R2_SECRET_KEY", "")
+    access_key = os.getenv("CLOUDFLARE_R2_ACCESS_KEY_ID", "")
+    secret_key = os.getenv("CLOUDFLARE_R2_SECRET_KEY", "")
+
+    _r2_client_checked = True
 
     if not all([endpoint, access_key, secret_key]):
         return None
 
     try:
         import boto3
-        return boto3.client(
+
+        _cached_r2_client = boto3.client(
             "s3",
             endpoint_url=endpoint,
             aws_access_key_id=access_key,
             aws_secret_access_key=secret_key,
             region_name="auto",
         )
+        return _cached_r2_client
     except Exception as exc:
         logger.warning("Failed to create R2 client: %s", exc)
         return None
diff --git a/backend/app/services/supabase_check.py b/backend/app/services/supabase_check.py
index 560d5bf..f887d57 100644
--- a/backend/app/services/supabase_check.py
+++ b/backend/app/services/supabase_check.py
@@ -1,29 +1,38 @@
 import asyncio
+import logging
 
 from supabase._async.client import AsyncClient
 
+logger = logging.getLogger(__name__)
+
 
 async def wait_for_supabase(supabase: AsyncClient):
     """
     Waits for Supabase to be ready by attempting simple queries.
     """
-    print("Waiting for Supabase...", flush=True)
+    logger.info("Waiting for Supabase...")
     retries = 0
     max_retries = 10
 
     while retries < max_retries:
         try:
             # Simple query to check connectivity
-            await supabase.table("cortex_documents").select("count", count="exact").execute()
-            print("Supabase connected!", flush=True)
+            await (
+                supabase.table("cortex_documents")
+                .select("count", count="exact")
+                .execute()
+            )
+            logger.info("Supabase connected!")
             return
         except Exception as e:
             retries += 1
-            print(
-                f"Waiting for Supabase... ({retries}/{max_retries}) Error: {e}",
-                flush=True,
+            logger.info(
+                "Waiting for Supabase... (%s/%s) Error: %s",
+                retries,
+                max_retries,
+                e,
             )
             # print(f"DEBUG: URL={supabase.supabase_url}, KEY={supabase.supabase_key[:10]}...", flush=True)
             await asyncio.sleep(2)
 
-    print("WARNING: thorough Supabase check failed, proceeding anyway...", flush=True)
+    logger.warning("thorough Supabase check failed, proceeding anyway...")
diff --git a/backend/app/utils/validation.py b/backend/app/utils/validation.py
index ee9b152..8f0fe93 100644
--- a/backend/app/utils/validation.py
+++ b/backend/app/utils/validation.py
@@ -1,11 +1,18 @@
 import re
 
+
+def sanitize_dataset_name(raw: str) -> str:
+    """Sanitize a raw string into a valid Cognee dataset name."""
+    sanitized = re.sub(r"[^A-Za-z0-9_]", "_", raw).strip("_")
+    return sanitized or "Unknown"
+
+
 def validate_dataset_name(name: str) -> str:
     if not name:
         raise ValueError("Dataset name cannot be empty")
-    if not re.match(r'^[a-z0-9]+(-[a-z0-9]+)*$', name):
+    if not re.match(r"^[A-Za-z0-9][A-Za-z0-9_]*$", name):
         raise ValueError(
             f"Invalid dataset name '{name}'. "
-            "Use lowercase letters, numbers, and hyphens only (e.g. 'fast-food')."
+            "Use letters, numbers, and underscores only (e.g. 'Acme_Corp')."
         )
-    return name
\ No newline at end of file
+    return name
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index 5ae804f..406c25c 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -15,7 +15,8 @@ select = [
 ignore = [
     "E501",
     "B008",
-    "UP007"
+    "UP007",
+    "UP017",
 ]
 
 [tool.ruff.format]
@@ -25,4 +26,8 @@ skip-magic-trailing-comma = false
 line-ending = "auto"
 
 [tool.pytest.ini_options]
-pythonpath = ["."]
\ No newline at end of file
+pythonpath = ["."]
+asyncio_mode = "auto"
+markers = [
+    "e2e: end-to-end tests requiring real LLM credentials",
+]
\ No newline at end of file
diff --git a/backend/requirements.txt b/backend/requirements.txt
index 3825dfa..b4b9b6e 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -17,6 +17,7 @@ ruff==0.8.4
 
 # Testing
 pytest>=8.0.0
+pytest-asyncio>=0.23.0
 
 # LLM Integration  
 litellm>=1.52.0
diff --git a/backend/setup.cfg b/backend/setup.cfg
index 93ac127..f7f6626 100644
--- a/backend/setup.cfg
+++ b/backend/setup.cfg
@@ -4,5 +4,5 @@ extend-ignore = E203, W503
 exclude = .git,__pycache__,alembic
 
 [mypy]
-python_version = 3.11
+python_version = 3.12
 ignore_missing_imports = True
\ No newline at end of file
diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py
index 113f32a..5df39ae 100644
--- a/backend/tests/conftest.py
+++ b/backend/tests/conftest.py
@@ -7,7 +7,46 @@
 import os
 
 os.environ.setdefault("CLOUDFLARE_R2_ENDPOINT", "https://fake.r2.cloudflarestorage.com")
-os.environ.setdefault("R2_ACCESS_KEY", "fake-access-key")
-os.environ.setdefault("R2_SECRET_KEY", "fake-secret-key")
+os.environ.setdefault("CLOUDFLARE_R2_ACCESS_KEY_ID", "fake-access-key")
+os.environ.setdefault("CLOUDFLARE_R2_SECRET_KEY", "fake-secret-key")
 os.environ.setdefault("SUPABASE_URL", "https://fake.supabase.co")
-os.environ.setdefault("SUPABASE_KEY", "fake-supabase-key")
+os.environ.setdefault("SUPABASE_SERVICE_ROLE_KEY", "fake-service-role-key")
+
+from unittest.mock import AsyncMock, MagicMock  # noqa: E402
+
+import pytest  # noqa: E402
+from fastapi import FastAPI  # noqa: E402
+from fastapi.testclient import TestClient  # noqa: E402
+
+from app.api import api_router  # noqa: E402
+from app.core.supabase import get_async_supabase  # noqa: E402
+
+
+@pytest.fixture()
+def app():
+    """Full FastAPI app with all routes mounted — no lifespan side effects."""
+    test_app = FastAPI()
+    test_app.include_router(api_router)
+
+    # Stub the async Supabase dependency used by GET /api/health.
+    # The chain is: await supabase.table(...).select(...).execute()
+    # Only .execute() is awaited, so use MagicMock for the chain and
+    # AsyncMock only for the terminal .execute() call.
+    mock_supabase = MagicMock()
+    mock_supabase.table.return_value.select.return_value.execute = AsyncMock(
+        return_value=MagicMock(count=42),
+    )
+
+    async def _fake_supabase():
+        return mock_supabase
+
+    test_app.dependency_overrides[get_async_supabase] = _fake_supabase
+    yield test_app
+    test_app.dependency_overrides.clear()
+
+
+@pytest.fixture()
+def client(app):
+    """TestClient wired to the full app.  Does not re-raise server errors so
+    tests can assert on HTTP status codes instead."""
+    return TestClient(app, raise_server_exceptions=False)
diff --git a/backend/tests/test_cognee.py b/backend/tests/test_cognee.py
index 3865e90..46a419c 100644
--- a/backend/tests/test_cognee.py
+++ b/backend/tests/test_cognee.py
@@ -1,76 +1,154 @@
-from dotenv import load_dotenv
+"""
+End-to-end (e2e) tests for the Cognee pipeline.
 
-load_dotenv(override=True)
+These tests call the real Cognee SDK — add, cognify, search, prune — so they
+require a live LLM API key.  They use Cognee's embedded defaults (LanceDB for
+vectors, KuzuDB for graph, SQLite for relational) so no PostgreSQL or external
+vector store is needed.
 
-import asyncio  # noqa: E402
+Skipped automatically when LLM_API_KEY is not set.
 
-import cognee  # noqa: E402
-from cognee.api.v1.search import SearchType  # noqa: E402
+Usage:
+    cd backend && pytest tests/test_cognee.py -v          # skips if no creds
+    cd backend && pytest tests/test_cognee.py -v -m e2e   # explicit marker
+"""
 
+from __future__ import annotations
 
-async def setup_cognee():
-    """Initialize cognee environment."""
-    pass
+import os
+import textwrap
+from pathlib import Path
 
-async def ingest_document(files):
-    """Ingest documents"""
-    for file in files:
-        print(f"Ingesting {file}...")
-        await cognee.add(
-            file,
-            dataset_name="smoke-test"
-        )
-        print(f"Added {file}")
-
-    print("Running cognify with dataset...")
-    try:
-        await cognee.cognify(datasets=["smoke-test"])
-        print("Cognify with dataset completed")
-    except Exception as e:
-        print(f"Cognify with dataset error: {e}")
+from dotenv import load_dotenv
 
-async def search_knowledge_graph():
-    """query the ingested data"""
-    results = {}
+# Load real credentials from project root .env
+load_dotenv(override=True)
 
-    results["chunks"] = await cognee.search(
-        query_text="What is contained in the files?",
-        query_type=SearchType.CHUNKS,
-    )
+import cognee  # noqa: E402
+import pytest  # noqa: E402
+from cognee.api.v1.search import SearchType  # noqa: E402
 
-    results["graph_completion"] = await cognee.search(
-        query_text="What is contained in the files?"
+# ---------------------------------------------------------------------------
+# Skip the entire module when LLM credentials are not available
+# ---------------------------------------------------------------------------
+
+_REQUIRED_VARS = ("LLM_API_KEY",)
+_missing = [v for v in _REQUIRED_VARS if not os.getenv(v)]
+
+pytestmark = [
+    pytest.mark.e2e,
+    pytest.mark.asyncio,
+    pytest.mark.skipif(
+        len(_missing) > 0,
+        reason=f"Missing env vars for e2e Cognee tests: {', '.join(_missing)}",
+    ),
+]
+
+E2E_DATASET = "e2e-smoke-test"
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="module")
+def test_file(tmp_path_factory) -> Path:
+    """Create a small text file to ingest — no external mock_data needed."""
+    p = tmp_path_factory.mktemp("cognee_e2e") / "sample.txt"
+    p.write_text(
+        textwrap.dedent("""\
+            Acme Corp Deep Fryer Model X200 — Safety Manual
+
+            Chapter 1: Installation
+            The X200 must be installed on a level, heat-resistant surface at least
+            24 inches from combustible materials.  A dedicated 240V/30A circuit is
+            required.  Do not use extension cords.
+
+            Chapter 2: Operation
+            Fill the basin with oil to the MIN line before powering on.  Maximum
+            oil temperature is 375 degrees F.  Never leave the fryer unattended
+            while in use.  The auto-shutoff triggers at 400 degrees F.
+
+            Chapter 3: Maintenance
+            Drain and filter oil after every 40 hours of use.  Clean the heating
+            element monthly with a non-abrasive cloth.  Replace the thermostat
+            annually.
+        """)
     )
+    return p
+
+
+def _setup_cognee_for_test():
+    """Configure Cognee with LLM + embeddings only.
+
+    Uses Cognee's embedded defaults (LanceDB, KuzuDB, SQLite) so the test
+    works without PostgreSQL or an external vector store.  Only needs
+    LLM_API_KEY and optionally EMBEDDING_API_KEY from the environment.
+    """
+    llm_provider = os.getenv("LLM_PROVIDER")
+    llm_model = os.getenv("LLM_MODEL")
+    llm_api_key = os.getenv("LLM_API_KEY")
+
+    if llm_provider and llm_api_key:
+        cognee.config.set_llm_config(
+            {
+                "llm_provider": llm_provider,
+                "llm_model": llm_model,
+                "llm_api_key": llm_api_key,
+            }
+        )
 
-    return results
+    embedding_provider = os.getenv("EMBEDDING_PROVIDER")
+    embedding_model = os.getenv("EMBEDDING_MODEL")
+    embedding_api_key = os.getenv("EMBEDDING_API_KEY")
+
+    if embedding_provider and embedding_api_key:
+        cognee.config.set_embedding_config(
+            {
+                "embedding_provider": embedding_provider,
+                "embedding_model": embedding_model,
+                "embedding_api_key": embedding_api_key,
+            }
+        )
 
-async def main():
-    files = ["mock_data/DeepFryer-1.pdf", "mock_data/DeepFryer-2.pdf"]
 
-    await setup_cognee()
-    await ingest_document(files)
+# ---------------------------------------------------------------------------
+# Tests
+#
+# Cognee uses KuzuDB (embedded graph DB) which holds a file lock.  Running
+# add → cognify → search across separate test functions can cause lock
+# conflicts.  We therefore run the full pipeline in a single test and do
+# cleanup at the end.
+# ---------------------------------------------------------------------------
 
-    print("Waiting for cognify to complete...")
-    await asyncio.sleep(5)
 
-    results = await search_knowledge_graph()
+async def test_cognee_ingest_and_search(test_file: Path):
+    """Full pipeline: configure → add → cognify → search (chunks + graph)."""
 
-    all_passed = True
+    _setup_cognee_for_test()
 
-    for search_type, data in results.items():
-        if len(data) > 0:
-            print(f"  PASS: {search_type} returned {len(data)} results")
-        else:
-            print(f"  FAIL: {search_type} returned 0 results")
-            all_passed = False
+    # ── Ingest ─────────────────────────────────────────────────────────
+    await cognee.add(str(test_file), dataset_name=E2E_DATASET)
+    await cognee.cognify(datasets=[E2E_DATASET])
 
-    # --- Summary ---
-    if all_passed:
-        print("\n SMOKE TEST PASSED")
-    else:
-        print("\n SMOKE TEST FAILED")
+    # ── Search: CHUNKS ─────────────────────────────────────────────────
+    chunk_results = await cognee.search(
+        query_text="deep fryer installation",
+        query_type=SearchType.CHUNKS,
+        datasets=[E2E_DATASET],
+    )
+    assert chunk_results is not None
+    assert len(chunk_results) > 0, "CHUNKS search returned 0 results after cognify"
+
+    # ── Search: GRAPH_COMPLETION ───────────────────────────────────────
+    graph_results = await cognee.search(
+        query_text="What safety features does the fryer have?",
+        query_type=SearchType.GRAPH_COMPLETION,
+        datasets=[E2E_DATASET],
+    )
+    assert graph_results is not None
+    assert len(graph_results) > 0, "GRAPH_COMPLETION search returned 0 results"
 
+    # ── Cleanup ────────────────────────────────────────────────────────
     await cognee.prune.prune_system(graph=True, vector=True, metadata=False)
-
-if __name__ == '__main__':
-    asyncio.run(main())
diff --git a/backend/tests/test_dataset_name_validation.py b/backend/tests/test_dataset_name_validation.py
index 08e2db1..0cd726a 100644
--- a/backend/tests/test_dataset_name_validation.py
+++ b/backend/tests/test_dataset_name_validation.py
@@ -1,5 +1,6 @@
 import pytest
-from app.utils.validation import validate_dataset_name
+
+from app.utils.validation import sanitize_dataset_name, validate_dataset_name
 
 
 class TestValidateDatasetName:
@@ -10,25 +11,29 @@ def test_valid_simple_name(self):
         """Test valid single-word lowercase name."""
         assert validate_dataset_name("main") == "main"
 
-    def test_valid_name_with_hyphens(self):
-        """Test valid name with hyphens separating words."""
-        assert validate_dataset_name("fast-food") == "fast-food"
+    def test_valid_name_with_underscores(self):
+        """Test valid name with underscores separating words."""
+        assert validate_dataset_name("fast_food") == "fast_food"
 
     def test_valid_name_with_numbers(self):
         """Test valid name with numbers."""
         assert validate_dataset_name("dataset123") == "dataset123"
 
-    def test_valid_name_mixed_with_hyphens_and_numbers(self):
-        """Test valid name with numbers and hyphens."""
-        assert validate_dataset_name("fast-food-123") == "fast-food-123"
+    def test_valid_name_mixed_with_underscores_and_numbers(self):
+        """Test valid name with numbers and underscores."""
+        assert validate_dataset_name("fast_food_123") == "fast_food_123"
 
-    def test_valid_name_multiple_hyphens(self):
-        """Test valid name with multiple hyphen-separated segments."""
-        assert validate_dataset_name("my-fast-food-dataset") == "my-fast-food-dataset"
+    def test_valid_name_uppercase(self):
+        """Test valid name with uppercase letters."""
+        assert validate_dataset_name("FastFood") == "FastFood"
 
     def test_valid_name_starts_with_number(self):
         """Test valid name starting with a number."""
-        assert validate_dataset_name("123-dataset") == "123-dataset"
+        assert validate_dataset_name("123_dataset") == "123_dataset"
+
+    def test_valid_name_starts_with_letter(self):
+        """Test valid name starting with a letter."""
+        assert validate_dataset_name("Acme_Corp") == "Acme_Corp"
 
     # ========== Invalid: Empty ==========
     def test_empty_string(self):
@@ -36,22 +41,11 @@ def test_empty_string(self):
         with pytest.raises(ValueError, match="Dataset name cannot be empty"):
             validate_dataset_name("")
 
-    # ========== Invalid: Uppercase ==========
-    def test_uppercase_letters(self):
-        """Test that uppercase letters are rejected."""
-        with pytest.raises(ValueError, match="Invalid dataset name"):
-            validate_dataset_name("FastFood")
-
-    def test_mixed_case(self):
-        """Test that mixed case is rejected."""
-        with pytest.raises(ValueError, match="Invalid dataset name"):
-            validate_dataset_name("Fast-food")
-
     # ========== Invalid: Special Characters ==========
-    def test_underscore_not_allowed(self):
-        """Test that underscores are rejected."""
+    def test_hyphen_not_allowed(self):
+        """Test that hyphens are rejected."""
         with pytest.raises(ValueError, match="Invalid dataset name"):
-            validate_dataset_name("fast_food")
+            validate_dataset_name("fast-food")
 
     def test_space_not_allowed(self):
         """Test that spaces are rejected."""
@@ -68,31 +62,52 @@ def test_special_characters_not_allowed(self):
         with pytest.raises(ValueError, match="Invalid dataset name"):
             validate_dataset_name("fast@food")
 
-    # ========== Invalid: Hyphen Placement ==========
-    def test_leading_hyphen(self):
-        """Test that leading hyphens are rejected."""
-        with pytest.raises(ValueError, match="Invalid dataset name"):
-            validate_dataset_name("-fast-food")
-
-    def test_trailing_hyphen(self):
-        """Test that trailing hyphens are rejected."""
+    # ========== Invalid: Underscore Placement ==========
+    def test_leading_underscore(self):
+        """Test that leading underscores are rejected."""
         with pytest.raises(ValueError, match="Invalid dataset name"):
-            validate_dataset_name("fast-food-")
-
+            validate_dataset_name("_fast_food")
 
-    def test_only_hyphen(self):
-        """Test that only a hyphen is rejected."""
+    def test_only_underscore(self):
+        """Test that only an underscore is rejected."""
         with pytest.raises(ValueError, match="Invalid dataset name"):
-            validate_dataset_name("-")
+            validate_dataset_name("_")
 
     # ========== Error Message Validation ==========
     def test_error_message_includes_name(self):
-        """Test that error message includesinvalid name."""
+        """Test that error message includes invalid name."""
         invalid_name = "Invalid@Name"
         with pytest.raises(ValueError, match=f"Invalid dataset name '{invalid_name}'"):
             validate_dataset_name(invalid_name)
 
     def test_error_message_includes_guidance(self):
         """Test that error message includes guidance."""
-        with pytest.raises(ValueError, match="Use lowercase letters, numbers, and hyphens only"):
-            validate_dataset_name("INVALID")
\ No newline at end of file
+        with pytest.raises(
+            ValueError, match="Use letters, numbers, and underscores only"
+        ):
+            validate_dataset_name("@INVALID")
+
+
+class TestSanitizeDatasetName:
+    """Test suite for sanitize_dataset_name function."""
+
+    def test_simple_name(self):
+        assert sanitize_dataset_name("Acme") == "Acme"
+
+    def test_name_with_spaces(self):
+        assert sanitize_dataset_name("Acme Corp") == "Acme_Corp"
+
+    def test_name_with_special_chars(self):
+        assert sanitize_dataset_name("Acme & Co.") == "Acme___Co"
+
+    def test_empty_string_returns_unknown(self):
+        assert sanitize_dataset_name("") == "Unknown"
+
+    def test_only_special_chars_returns_unknown(self):
+        assert sanitize_dataset_name("@#$") == "Unknown"
+
+    def test_strips_leading_trailing_underscores(self):
+        assert sanitize_dataset_name("__test__") == "test"
+
+    def test_preserves_numbers(self):
+        assert sanitize_dataset_name("client_123") == "client_123"
diff --git a/backend/tests/test_ingest.py b/backend/tests/test_ingest.py
deleted file mode 100644
index 92c7fde..0000000
--- a/backend/tests/test_ingest.py
+++ /dev/null
@@ -1,415 +0,0 @@
-"""
-Tests for the ingest service error-handling paths.
-
-Each test deliberately triggers one of the known failure modes and asserts
-the correct error_type is returned without raising an unhandled exception.
-
-Usage:
-    pytest tests/test_ingest.py -v
-"""
-
-from __future__ import annotations
-
-import io
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-from fastapi import FastAPI
-from fastapi.testclient import TestClient
-
-from app.routes.documents import router
-from app.services.ingest import ingest_document
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def _make_chunk(entities=None):
-    chunk = MagicMock()
-    chunk.entities = entities or []
-    return chunk
-
-
-# ---------------------------------------------------------------------------
-# Happy path
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_ingest_document_success():
-    """Successful ingest returns structured data."""
-    fake_chunk = _make_chunk(entities=["EntityA"])
-
-    with (
-        patch("app.services.ingest.cognee.add", new_callable=AsyncMock),
-        patch("app.services.ingest.cognee.cognify", new_callable=AsyncMock),
-        patch(
-            "app.services.ingest.cognee.search",
-            new_callable=AsyncMock,
-            side_effect=[["mock summary"], [fake_chunk]],
-        ),
-    ):
-        result = await ingest_document(
-            file_path="fake.pdf",
-            dataset_name="test-dataset",
-            document_id="doc-123",
-        )
-
-    assert result["status"] == "success"
-    assert result["document_id"] == "doc-123"
-    assert result["summary"] == "mock summary"
-    assert result["entities"] == ["EntityA"]
-    assert result["raw_chunks_count"] == 1
-
-
-# ---------------------------------------------------------------------------
-# Empty search results — NOT an error
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_empty_search_results_returns_success():
-    """Empty Cognee search results are not an error — return 200 with zeros."""
-    with (
-        patch("app.services.ingest.cognee.add", new_callable=AsyncMock),
-        patch("app.services.ingest.cognee.cognify", new_callable=AsyncMock),
-        patch(
-            "app.services.ingest.cognee.search",
-            new_callable=AsyncMock,
-            side_effect=[[], []],
-        ),
-    ):
-        result = await ingest_document(
-            file_path="fake.pdf",
-            dataset_name="empty-dataset",
-        )
-
-    assert result["status"] == "success"
-    assert result["summary"] == ""
-    assert result["entities"] == []
-    assert result["raw_chunks_count"] == 0
-
-
-# ---------------------------------------------------------------------------
-# Kuzu storage failure (PermissionError during add)
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_kuzu_permission_error_during_add():
-    """PermissionError on add() → error_type kuzu_storage."""
-    with patch(
-        "app.services.ingest.cognee.add",
-        new_callable=AsyncMock,
-        side_effect=PermissionError("Permission denied: .cognee_system/"),
-    ):
-        result = await ingest_document(
-            file_path="fake.pdf",
-            dataset_name="test-dataset",
-        )
-
-    assert result["status"] == "error"
-    assert result["error_type"] == "kuzu_storage"
-    assert ".cognee_system" in result["error"] or "writable" in result["error"]
-
-
-# ---------------------------------------------------------------------------
-# Kuzu storage failure (disk full during cognify)
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_kuzu_disk_full_during_cognify():
-    """ENOSPC OSError on cognify() → error_type kuzu_storage with helpful message."""
-    import errno
-
-    disk_full = OSError("No space left on device")
-    disk_full.errno = errno.ENOSPC
-
-    with (
-        patch("app.services.ingest.cognee.add", new_callable=AsyncMock),
-        patch(
-            "app.services.ingest.cognee.cognify",
-            new_callable=AsyncMock,
-            side_effect=disk_full,
-        ),
-    ):
-        result = await ingest_document(
-            file_path="fake.pdf",
-            dataset_name="test-dataset",
-        )
-
-    assert result["status"] == "error"
-    assert result["error_type"] == "kuzu_storage"
-    assert "full" in result["error"].lower() or "space" in result["error"].lower()
-
-
-# ---------------------------------------------------------------------------
-# Gemini / LLM API error during cognify
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_llm_api_error_during_cognify():
-    """LLM API error during cognify() → error_type llm_api."""
-
-    class FakeLiteLLMError(Exception):
-        pass
-
-    FakeLiteLLMError.__module__ = "litellm.exceptions"
-
-    with (
-        patch("app.services.ingest.cognee.add", new_callable=AsyncMock),
-        patch(
-            "app.services.ingest.cognee.cognify",
-            new_callable=AsyncMock,
-            side_effect=FakeLiteLLMError("Invalid API key for Gemini"),
-        ),
-    ):
-        result = await ingest_document(
-            file_path="fake.pdf",
-            dataset_name="test-dataset",
-        )
-
-    assert result["status"] == "error"
-    assert result["error_type"] == "llm_api"
-    assert "cognify" in result["error"].lower()
-
-
-@pytest.mark.asyncio
-async def test_llm_api_error_keyword_fallback():
-    """Even a plain Exception with 'api key' in the message is treated as LLM error."""
-    with (
-        patch("app.services.ingest.cognee.add", new_callable=AsyncMock),
-        patch(
-            "app.services.ingest.cognee.cognify",
-            new_callable=AsyncMock,
-            side_effect=Exception("Gemini quota exceeded: rate limit hit"),
-        ),
-    ):
-        result = await ingest_document(
-            file_path="fake.pdf",
-            dataset_name="test-dataset",
-        )
-
-    assert result["status"] == "error"
-    assert result["error_type"] == "llm_api"
-
-
-# ---------------------------------------------------------------------------
-# Vector dimension mismatch
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_vector_dimension_mismatch_during_cognify():
-    """Dimension mismatch error → error_type vector_dimension_mismatch with fix hint."""
-    with (
-        patch("app.services.ingest.cognee.add", new_callable=AsyncMock),
-        patch(
-            "app.services.ingest.cognee.cognify",
-            new_callable=AsyncMock,
-            side_effect=Exception(
-                "Vector dimension mismatch: expected 1536, got 768"
-            ),
-        ),
-    ):
-        result = await ingest_document(
-            file_path="fake.pdf",
-            dataset_name="test-dataset",
-        )
-
-    assert result["status"] == "error"
-    assert result["error_type"] == "vector_dimension_mismatch"
-    assert ".cognee_system" in result["error"]
-    assert "re-ingest" in result["error"].lower() or "delete" in result["error"].lower()
-
-
-@pytest.mark.asyncio
-async def test_vector_dimension_mismatch_during_search():
-    """Dimension mismatch can also surface during search() after cognify succeeds."""
-    with (
-        patch("app.services.ingest.cognee.add", new_callable=AsyncMock),
-        patch("app.services.ingest.cognee.cognify", new_callable=AsyncMock),
-        patch(
-            "app.services.ingest.cognee.search",
-            new_callable=AsyncMock,
-            side_effect=Exception("wrong number of dimensions: expected 1536"),
-        ),
-    ):
-        result = await ingest_document(
-            file_path="fake.pdf",
-            dataset_name="test-dataset",
-        )
-
-    assert result["status"] == "error"
-    assert result["error_type"] == "vector_dimension_mismatch"
-
-
-# ---------------------------------------------------------------------------
-# cognify() called without prior add() (empty dataset)
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_cognify_without_add():
-    """cognify() on empty dataset → error_type no_data_added."""
-    with (
-        patch("app.services.ingest.cognee.add", new_callable=AsyncMock),
-        patch(
-            "app.services.ingest.cognee.cognify",
-            new_callable=AsyncMock,
-            side_effect=Exception("No data added to dataset before cognify"),
-        ),
-    ):
-        result = await ingest_document(
-            file_path="fake.pdf",
-            dataset_name="test-dataset",
-        )
-
-    assert result["status"] == "error"
-    assert result["error_type"] == "no_data_added"
-    assert "add()" in result["error"]
-
-
-# ---------------------------------------------------------------------------
-# Non-existent file (basic smoke test — no mocks)
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_ingest_document_bad_file():
-    """A non-existent file path should return an error status, not raise."""
-    with (
-        patch(
-            "app.services.ingest.cognee.add",
-            new_callable=AsyncMock,
-            side_effect=FileNotFoundError("No such file: nonexistent.pdf"),
-        ),
-    ):
-        result = await ingest_document(
-            file_path="nonexistent_file.pdf",
-            dataset_name="test-dataset",
-        )
-
-    # FileNotFoundError is an OSError subclass → kuzu_storage bucket
-    assert result["status"] == "error"
-    assert "error" in result
-
-
-# ---------------------------------------------------------------------------
-# Upload route tests (/api/documents/upload)
-# ---------------------------------------------------------------------------
-
-_test_app = FastAPI()
-_test_app.include_router(router)  # router already has prefix="/documents"
-
-_client = TestClient(_test_app)
-
-_INGEST_SUCCESS = {
-    "status": "success",
-    "document_id": "doc-123",
-    "dataset_name": "main",
-    "summary": "A test summary.",
-    "entities": ["EntityA"],
-    "raw_chunks_count": 2,
-}
-
-_FAKE_FILE_URL = "s3://test-bucket/main/doc-123.pdf"
-
-
-def _upload_payload(filename: str = "test.pdf", content: bytes = b"%PDF fake"):
-    return {"file": (filename, io.BytesIO(content), "application/pdf")}
-
-
-@patch("app.routes.documents.upload_file_cloudflare", new_callable=AsyncMock)
-@patch("app.routes.documents.ingest_document", new_callable=AsyncMock)
-def test_upload_returns_file_url(mock_ingest, mock_upload):
-    mock_ingest.return_value = _INGEST_SUCCESS
-    mock_upload.return_value = _FAKE_FILE_URL
-
-    response = _client.post(
-        "/documents/upload",
-        files=_upload_payload(),
-    )
-
-    assert response.status_code == 200
-    body = response.json()
-    assert body["status"] == "ok"
-    assert body["file_url"] == _FAKE_FILE_URL
-
-
-@patch("app.routes.documents.upload_file_cloudflare", new_callable=AsyncMock)
-@patch("app.routes.documents.ingest_document", new_callable=AsyncMock)
-def test_upload_storage_called_after_cognify(mock_ingest, mock_upload):
-    """Storage upload must happen after ingest_document (which wraps cognify) returns."""
-    call_order = []
-    mock_ingest.side_effect = lambda *a, **kw: (
-        call_order.append("ingest") or _INGEST_SUCCESS
-    )
-
-    async def _record_upload(*a, **kw):
-        call_order.append("upload")
-        return _FAKE_FILE_URL
-
-    mock_upload.side_effect = _record_upload
-
-    response = _client.post("/documents/upload", files=_upload_payload())
-
-    assert response.status_code == 200
-    assert call_order == ["ingest", "upload"], (
-        "Storage upload must be called after ingest_document completes"
-    )
-
-
-@patch("app.routes.documents.upload_file_cloudflare", new_callable=AsyncMock)
-@patch("app.routes.documents.ingest_document", new_callable=AsyncMock)
-def test_upload_storage_key_contains_document_id_and_dataset(mock_ingest, mock_upload):
-    mock_ingest.return_value = _INGEST_SUCCESS
-    mock_upload.return_value = _FAKE_FILE_URL
-
-    response = _client.post(
-        "/documents/upload?dataset_name=my-dataset",
-        files=_upload_payload("sample.pdf"),
-    )
-
-    assert response.status_code == 200
-    body = response.json()
-    document_id = body["document_id"]
-
-    # key arg should be "{dataset}/{document_id}.pdf"
-    _call_kwargs = mock_upload.call_args
-    key = _call_kwargs.kwargs.get("key") or _call_kwargs.args[2]
-    assert key == f"my-dataset/{document_id}.pdf"
-
-
-@patch("app.routes.documents.upload_file_cloudflare", new_callable=AsyncMock)
-@patch("app.routes.documents.ingest_document", new_callable=AsyncMock)
-def test_temp_file_cleaned_up_after_upload(mock_ingest, mock_upload, tmp_path):
-    """The temp file must be deleted even after a successful upload."""
-    mock_ingest.return_value = _INGEST_SUCCESS
-    mock_upload.return_value = _FAKE_FILE_URL
-
-    with patch("app.routes.documents.UPLOAD_DIR", tmp_path):
-        response = _client.post("/documents/upload", files=_upload_payload())
-
-    assert response.status_code == 200
-    # Verify no .pdf files remain in UPLOAD_DIR (tmp_path)
-    remaining = list(tmp_path.glob("*.pdf"))
-    assert remaining == [], f"Temp file not cleaned up: {remaining}"
-
-
-@patch("app.routes.documents.upload_file_cloudflare", new_callable=AsyncMock)
-@patch("app.routes.documents.ingest_document", new_callable=AsyncMock)
-def test_storage_not_called_on_ingest_failure(mock_ingest, mock_upload):
-    mock_ingest.return_value = {
-        "status": "error",
-        "error_type": "llm_api",
-        "error": "LLM quota exceeded",
-    }
-
-    response = _client.post("/documents/upload", files=_upload_payload())
-
-    assert response.status_code == 502
-    mock_upload.assert_not_called()
diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py
new file mode 100644
index 0000000..e8d2d74
--- /dev/null
+++ b/backend/tests/test_integration.py
@@ -0,0 +1,621 @@
+"""
+Integration tests — exercise full HTTP request → route → service → response chain.
+
+External services (Cognee, Supabase, R2) are mocked at the SDK boundary so these
+tests run without any infrastructure.  What IS tested: routing, request validation,
+Pydantic serialization, service orchestration, error handling, and HTTP status codes.
+
+Usage:
+    cd backend && pytest tests/test_integration.py -v
+"""
+
+from __future__ import annotations
+
+import io
+from unittest.mock import AsyncMock, MagicMock, patch
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _mock_async_sb(data=None):
+    """Build a mock async Supabase client.
+
+    The chain ``sb.table(...).select(...).eq(...).execute()`` uses regular
+    (synchronous) calls except for ``.execute()`` which is awaited.
+    """
+    sb = MagicMock()
+    result = MagicMock(data=data if data is not None else [])
+    chain = sb.table.return_value
+    for method in (
+        "select", "eq", "order", "limit", "insert", "update", "maybe_single", "lt",
+    ):
+        getattr(chain, method).return_value = chain
+    chain.execute = AsyncMock(return_value=result)
+    return sb
+
+
+def _mock_async_sb_single(data):
+    """Mock for maybe_single() queries — data is a dict or None."""
+    return _mock_async_sb(data=data)
+
+
+def _fake_get_async_supabase(sb_mock):
+    """Return an async function that yields *sb_mock*."""
+    async def _get():
+        return sb_mock
+    return _get
+
+
+# ===========================================================================
+# Health check  GET /api/health
+# ===========================================================================
+
+
+class TestHealthCheck:
+
+    def test_healthy(self, client):
+        resp = client.get("/api/health")
+        assert resp.status_code == 200
+        assert resp.json()["status"] == "healthy"
+
+
+# ===========================================================================
+# Upload  POST /api/documents/upload
+# ===========================================================================
+
+
+class TestUploadDocuments:
+
+    @patch("app.routes.documents.run_pipeline", new_callable=AsyncMock)
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_single_pdf(self, mock_get_sb, mock_pipeline, client):
+        mock_get_sb.return_value = _mock_async_sb()
+
+        resp = client.post(
+            "/api/documents/upload",
+            files=[("files", ("report.pdf", io.BytesIO(b"%PDF-fake"), "application/pdf"))],
+        )
+
+        assert resp.status_code == 200
+        body = resp.json()
+        assert len(body["uploaded"]) == 1
+        assert body["uploaded"][0]["filename"] == "report.pdf"
+        assert len(body["uploaded"][0]["id"]) == 36  # UUID
+        mock_pipeline.assert_called_once()
+
+    @patch("app.routes.documents.run_pipeline", new_callable=AsyncMock)
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_multiple_files(self, mock_get_sb, mock_pipeline, client):
+        mock_get_sb.return_value = _mock_async_sb()
+
+        files = [
+            ("files", ("a.pdf", io.BytesIO(b"%PDF"), "application/pdf")),
+            ("files", ("b.csv", io.BytesIO(b"col1,col2"), "text/csv")),
+            ("files", ("c.txt", io.BytesIO(b"hello"), "text/plain")),
+        ]
+        resp = client.post("/api/documents/upload", files=files)
+
+        assert resp.status_code == 200
+        assert len(resp.json()["uploaded"]) == 3
+        assert mock_pipeline.call_count == 3
+
+    @patch("app.routes.documents.run_pipeline", new_callable=AsyncMock)
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_all_allowed_extensions(self, mock_get_sb, mock_pipeline, client):
+        mock_get_sb.return_value = _mock_async_sb()
+
+        for ext, content_type in [
+            (".pdf", "application/pdf"),
+            (".csv", "text/csv"),
+            (".txt", "text/plain"),
+        ]:
+            resp = client.post(
+                "/api/documents/upload",
+                files=[("files", (f"test{ext}", io.BytesIO(b"data"), content_type))],
+            )
+            assert resp.status_code == 200, f"Extension {ext} should be accepted"
+
+    def test_rejects_unsupported_extension(self, client):
+        resp = client.post(
+            "/api/documents/upload",
+            files=[("files", ("image.png", io.BytesIO(b"fake"), "image/png"))],
+        )
+        assert resp.status_code == 400
+        assert "unsupported extension" in resp.json()["detail"].lower()
+
+    def test_rejects_more_than_5_files(self, client):
+        files = [
+            ("files", (f"f{i}.pdf", io.BytesIO(b"%PDF"), "application/pdf"))
+            for i in range(6)
+        ]
+        resp = client.post("/api/documents/upload", files=files)
+        assert resp.status_code == 400
+        assert "maximum" in resp.json()["detail"].lower()
+
+    @patch("app.routes.documents.run_pipeline", new_callable=AsyncMock)
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_pipeline_receives_correct_args(self, mock_get_sb, mock_pipeline, client):
+        mock_get_sb.return_value = _mock_async_sb()
+
+        resp = client.post(
+            "/api/documents/upload",
+            files=[("files", ("data.csv", io.BytesIO(b"a,b,c"), "text/csv"))],
+        )
+
+        assert resp.status_code == 200
+        args, _kwargs = mock_pipeline.call_args
+        temp_path, doc_id, original_filename = args
+        assert str(temp_path).endswith(".csv")
+        assert len(doc_id) == 36
+        assert original_filename == "data.csv"
+
+
+# ===========================================================================
+# Deduplication  POST /api/documents/upload
+# ===========================================================================
+
+
+class TestUploadDeduplication:
+
+    @patch("app.routes.documents.run_pipeline", new_callable=AsyncMock)
+    @patch("app.routes.documents.create_document", new_callable=AsyncMock)
+    @patch("app.routes.documents.find_document_by_hash", new_callable=AsyncMock)
+    def test_duplicate_returns_existing_doc(
+        self, mock_find, mock_create, mock_pipeline, client
+    ):
+        """When an identical file already exists, return it without re-processing."""
+        mock_find.return_value = {
+            "id": "existing-doc-id",
+            "original_filename": "report.pdf",
+            "status": "completed",
+            "insights": [],
+            "entities": [],
+            "file_url": None,
+        }
+
+        resp = client.post(
+            "/api/documents/upload",
+            files=[("files", ("report.pdf", io.BytesIO(b"%PDF-fake"), "application/pdf"))],
+        )
+
+        assert resp.status_code == 200
+        body = resp.json()
+        assert len(body["uploaded"]) == 1
+        assert body["uploaded"][0]["duplicate"] is True
+        assert body["uploaded"][0]["existing_doc_id"] == "existing-doc-id"
+        assert body["uploaded"][0]["id"] == "existing-doc-id"
+        # Pipeline should NOT have been triggered
+        mock_pipeline.assert_not_called()
+        # No new document should have been created
+        mock_create.assert_not_called()
+
+    @patch("app.routes.documents.run_pipeline", new_callable=AsyncMock)
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    @patch("app.routes.documents.find_document_by_hash", new_callable=AsyncMock)
+    def test_new_file_proceeds_to_pipeline(
+        self, mock_find, mock_get_sb, mock_pipeline, client
+    ):
+        """When no duplicate exists, create doc and run the pipeline."""
+        mock_find.return_value = None
+        mock_get_sb.return_value = _mock_async_sb()
+
+        resp = client.post(
+            "/api/documents/upload",
+            files=[("files", ("new.pdf", io.BytesIO(b"%PDF-new"), "application/pdf"))],
+        )
+
+        assert resp.status_code == 200
+        body = resp.json()
+        assert len(body["uploaded"]) == 1
+        assert body["uploaded"][0]["duplicate"] is False
+        assert body["uploaded"][0]["existing_doc_id"] is None
+        mock_pipeline.assert_called_once()
+
+    @patch("app.routes.documents.run_pipeline", new_callable=AsyncMock)
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    @patch("app.routes.documents.find_document_by_hash", new_callable=AsyncMock)
+    def test_hash_passed_to_create_document(
+        self, mock_find, mock_get_sb, mock_pipeline, client
+    ):
+        """create_document receives the content_hash for storage."""
+        import hashlib
+
+        mock_find.return_value = None
+        mock_get_sb.return_value = _mock_async_sb()
+        content = b"unique-file-content"
+        expected_hash = hashlib.sha256(content).hexdigest()
+
+        resp = client.post(
+            "/api/documents/upload",
+            files=[("files", ("file.txt", io.BytesIO(content), "text/plain"))],
+        )
+
+        assert resp.status_code == 200
+        # Verify find_document_by_hash was called with the correct hash
+        mock_find.assert_called_once_with(expected_hash)
+
+    @patch("app.routes.documents.run_pipeline", new_callable=AsyncMock)
+    @patch("app.routes.documents.create_document", new_callable=AsyncMock)
+    @patch("app.routes.documents.find_document_by_hash", new_callable=AsyncMock)
+    def test_mixed_new_and_duplicate_files(
+        self, mock_find, mock_create, mock_pipeline, client
+    ):
+        """A batch with both new and duplicate files handles each correctly."""
+        import hashlib
+
+        new_content = b"brand-new"
+        dup_content = b"already-exists"
+        dup_hash = hashlib.sha256(dup_content).hexdigest()
+
+        def _find_side_effect(content_hash):
+            if content_hash == dup_hash:
+                return {
+                    "id": "dup-doc-id",
+                    "original_filename": "old.csv",
+                    "status": "completed",
+                    "insights": [],
+                    "entities": [],
+                    "file_url": None,
+                }
+            return None
+
+        mock_find.side_effect = _find_side_effect
+        mock_create.return_value = "new-doc-id"
+
+        resp = client.post(
+            "/api/documents/upload",
+            files=[
+                ("files", ("new.txt", io.BytesIO(new_content), "text/plain")),
+                ("files", ("dup.csv", io.BytesIO(dup_content), "text/csv")),
+            ],
+        )
+
+        assert resp.status_code == 200
+        body = resp.json()
+        assert len(body["uploaded"]) == 2
+
+        new_file = body["uploaded"][0]
+        assert new_file["duplicate"] is False
+        assert new_file["filename"] == "new.txt"
+
+        dup_file = body["uploaded"][1]
+        assert dup_file["duplicate"] is True
+        assert dup_file["existing_doc_id"] == "dup-doc-id"
+
+        # Only the new file triggers the pipeline
+        mock_pipeline.assert_called_once()
+        mock_create.assert_called_once()
+
+    @patch("app.routes.documents.run_pipeline", new_callable=AsyncMock)
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    @patch("app.routes.documents.find_document_by_hash", new_callable=AsyncMock)
+    def test_same_filename_different_content_not_duplicate(
+        self, mock_find, mock_get_sb, mock_pipeline, client
+    ):
+        """Same filename but different content should NOT be treated as a duplicate."""
+        mock_find.return_value = None
+        mock_get_sb.return_value = _mock_async_sb()
+
+        resp = client.post(
+            "/api/documents/upload",
+            files=[
+                ("files", ("report.pdf", io.BytesIO(b"version-1"), "application/pdf")),
+                ("files", ("report.pdf", io.BytesIO(b"version-2"), "application/pdf")),
+            ],
+        )
+
+        assert resp.status_code == 200
+        body = resp.json()
+        assert len(body["uploaded"]) == 2
+        assert all(f["duplicate"] is False for f in body["uploaded"])
+        assert mock_pipeline.call_count == 2
+
+
+# ===========================================================================
+# Search  GET /api/documents/search
+# ===========================================================================
+
+
+class TestSearchDocuments:
+
+    @patch("app.core.supabase.get_async_supabase", new_callable=AsyncMock)
+    @patch("app.services.cognee_service.cognee")
+    def test_returns_results_with_sources(self, mock_cognee, mock_get_sb, client):
+        mock_cognee.search = AsyncMock(
+            return_value=[
+                {"search_result": "Deep fryer safety guide", "dataset_name": "fast-food"},
+            ]
+        )
+        mock_get_sb.return_value = _mock_async_sb(
+            data=[
+                {
+                    "id": "doc-1",
+                    "original_filename": "fryer.pdf",
+                    "document_type": "RFQ",
+                    "dataset_name": "fast-food",
+                }
+            ]
+        )
+
+        resp = client.get("/api/documents/search?q=fryer+safety")
+
+        assert resp.status_code == 200
+        body = resp.json()
+        assert body["query"] == "fryer safety"
+        assert body["total"] == 1
+        assert "fryer" in body["results"][0]["text"].lower()
+        assert len(body["results"][0]["sources"]) >= 1
+
+    @patch("app.core.supabase.get_async_supabase", new_callable=AsyncMock)
+    @patch("app.services.cognee_service.cognee")
+    def test_empty_results(self, mock_cognee, mock_get_sb, client):
+        mock_cognee.search = AsyncMock(return_value=[])
+        mock_get_sb.return_value = _mock_async_sb()
+
+        resp = client.get("/api/documents/search?q=nonexistent")
+
+        assert resp.status_code == 200
+        assert resp.json()["total"] == 0
+        assert resp.json()["results"] == []
+
+    def test_missing_query_param_returns_422(self, client):
+        resp = client.get("/api/documents/search")
+        assert resp.status_code == 422
+
+    @patch("app.core.supabase.get_async_supabase", new_callable=AsyncMock)
+    @patch("app.services.cognee_service.cognee")
+    def test_dataset_filter(self, mock_cognee, mock_get_sb, client):
+        mock_cognee.search = AsyncMock(
+            return_value=[{"search_result": "result", "dataset_name": "acme"}]
+        )
+        mock_get_sb.return_value = _mock_async_sb(
+            data=[
+                {
+                    "id": "doc-2",
+                    "original_filename": "acme.pdf",
+                    "document_type": None,
+                    "dataset_name": "acme",
+                }
+            ]
+        )
+
+        resp = client.get("/api/documents/search?q=test&dataset=acme")
+
+        assert resp.status_code == 200
+        assert resp.json()["total"] == 1
+        # Verify cognee was called with the dataset filter
+        call_kwargs = mock_cognee.search.call_args.kwargs
+        assert call_kwargs.get("datasets") == ["acme"]
+
+    @patch("app.core.supabase.get_async_supabase", new_callable=AsyncMock)
+    @patch("app.services.cognee_service.cognee")
+    def test_cognee_failure_returns_500(self, mock_cognee, mock_get_sb, client):
+        mock_cognee.search = AsyncMock(side_effect=Exception("Cognee connection lost"))
+        mock_get_sb.return_value = _mock_async_sb()
+
+        resp = client.get("/api/documents/search?q=test")
+
+        assert resp.status_code == 500
+        assert "search failed" in resp.json()["detail"].lower()
+
+
+# ===========================================================================
+# Graph  GET /api/documents/graph
+# ===========================================================================
+
+
+class TestGraphEndpoint:
+
+    @patch("cognee.infrastructure.databases.graph.get_graph_engine", new_callable=AsyncMock)
+    def test_returns_d3_format(self, mock_get_engine, client):
+        mock_engine = AsyncMock()
+        mock_engine.get_graph_data.return_value = (
+            [
+                ("n1", {"name": "Acme Corp", "type": "Company"}),
+                ("n2", {"name": "Safety Manual", "type": "Document"}),
+            ],
+            [("n1", "n2", "mentions", {})],
+        )
+        mock_get_engine.return_value = mock_engine
+
+        resp = client.get("/api/documents/graph")
+
+        assert resp.status_code == 200
+        body = resp.json()
+        assert "nodes" in body
+        assert "links" in body
+        assert len(body["nodes"]) == 2
+        assert len(body["links"]) == 1
+        assert body["links"][0]["source"] == "n1"
+        assert body["links"][0]["target"] == "n2"
+        assert body["links"][0]["label"] == "mentions"
+
+    @patch("cognee.infrastructure.databases.graph.get_graph_engine", new_callable=AsyncMock)
+    def test_empty_graph(self, mock_get_engine, client):
+        mock_engine = AsyncMock()
+        mock_engine.get_graph_data.return_value = ([], [])
+        mock_get_engine.return_value = mock_engine
+
+        resp = client.get("/api/documents/graph")
+
+        assert resp.status_code == 200
+        assert resp.json() == {"nodes": [], "links": []}
+
+    @patch(
+        "cognee.infrastructure.databases.graph.get_graph_engine",
+        new_callable=AsyncMock,
+        side_effect=Exception("KuzuDB unavailable"),
+    )
+    def test_engine_failure_returns_empty_graph(self, _mock, client):
+        """graph_service catches exceptions and returns an empty graph."""
+        resp = client.get("/api/documents/graph")
+
+        assert resp.status_code == 200
+        assert resp.json() == {"nodes": [], "links": []}
+
+
+# ===========================================================================
+# List documents  GET /api/documents/
+# ===========================================================================
+
+
+class TestListDocuments:
+
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_returns_all_documents(self, mock_get_sb, client):
+        mock_get_sb.return_value = _mock_async_sb(
+            data=[
+                {
+                    "id": "d1",
+                    "original_filename": "a.pdf",
+                    "status": "completed",
+                    "insights": None,
+                    "entities": None,
+                },
+                {
+                    "id": "d2",
+                    "original_filename": "b.csv",
+                    "status": "processing",
+                    "insights": "[]",
+                    "entities": '["EntityA"]',
+                },
+            ]
+        )
+
+        resp = client.get("/api/documents/")
+
+        assert resp.status_code == 200
+        body = resp.json()
+        assert len(body) == 2
+        # _normalize converts JSON strings → lists and None → []
+        assert body[0]["insights"] == []
+        assert body[0]["entities"] == []
+        assert body[1]["entities"] == ["EntityA"]
+
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_empty_list(self, mock_get_sb, client):
+        mock_get_sb.return_value = _mock_async_sb(data=[])
+
+        resp = client.get("/api/documents/")
+
+        assert resp.status_code == 200
+        assert resp.json() == []
+
+
+# ===========================================================================
+# Single document  GET /api/documents/{doc_id}
+# ===========================================================================
+
+
+class TestGetDocument:
+
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_existing_document(self, mock_get_sb, client):
+        mock_get_sb.return_value = _mock_async_sb_single(
+            {
+                "id": "doc-abc",
+                "original_filename": "report.pdf",
+                "status": "completed",
+                "insights": '["insight1"]',
+                "entities": '["entity1"]',
+            }
+        )
+
+        resp = client.get("/api/documents/doc-abc")
+
+        assert resp.status_code == 200
+        body = resp.json()
+        assert body["id"] == "doc-abc"
+        # _normalize deserialises JSON strings
+        assert body["insights"] == ["insight1"]
+        assert body["entities"] == ["entity1"]
+        # _normalize ensures file_url is present
+        assert "file_url" in body
+
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_not_found(self, mock_get_sb, client):
+        mock_get_sb.return_value = _mock_async_sb_single(None)
+
+        resp = client.get("/api/documents/nonexistent")
+
+        assert resp.status_code == 404
+
+
+# ===========================================================================
+# File URL  GET /api/documents/{doc_id}/file-url
+# ===========================================================================
+
+
+class TestGetFileUrl:
+
+    @patch("app.services.storage._r2_client")
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_returns_presigned_url(self, mock_get_sb, mock_r2_client, client):
+        mock_get_sb.return_value = _mock_async_sb_single(
+            {
+                "id": "doc-1",
+                "original_filename": "report.pdf",
+                "file_url": "documents/doc-1/report.pdf",
+                "status": "completed",
+                "insights": None,
+                "entities": None,
+            }
+        )
+        r2 = MagicMock()
+        r2.generate_presigned_url.return_value = "https://r2.example.com/signed?token=abc"
+        mock_r2_client.return_value = r2
+
+        resp = client.get("/api/documents/doc-1/file-url")
+
+        assert resp.status_code == 200
+        body = resp.json()
+        assert body["url"] == "https://r2.example.com/signed?token=abc"
+        assert body["filename"] == "report.pdf"
+
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_document_not_found(self, mock_get_sb, client):
+        mock_get_sb.return_value = _mock_async_sb_single(None)
+
+        resp = client.get("/api/documents/nonexistent/file-url")
+
+        assert resp.status_code == 404
+
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_no_file_stored(self, mock_get_sb, client):
+        mock_get_sb.return_value = _mock_async_sb_single(
+            {
+                "id": "doc-1",
+                "original_filename": "report.pdf",
+                "file_url": None,
+                "status": "completed",
+                "insights": None,
+                "entities": None,
+            }
+        )
+
+        resp = client.get("/api/documents/doc-1/file-url")
+
+        assert resp.status_code == 404
+        assert "no raw file" in resp.json()["detail"].lower()
+
+    @patch("app.services.storage._r2_client")
+    @patch("app.services.document_metadata_service.get_async_supabase", new_callable=AsyncMock)
+    def test_r2_not_configured(self, mock_get_sb, mock_r2_client, client):
+        mock_get_sb.return_value = _mock_async_sb_single(
+            {
+                "id": "doc-1",
+                "original_filename": "report.pdf",
+                "file_url": "documents/doc-1/report.pdf",
+                "status": "completed",
+                "insights": None,
+                "entities": None,
+            }
+        )
+        mock_r2_client.return_value = None  # R2 credentials missing
+
+        resp = client.get("/api/documents/doc-1/file-url")
+
+        assert resp.status_code == 503
+        assert "not configured" in resp.json()["detail"].lower()
diff --git a/backend/tests/test_storage.py b/backend/tests/test_storage.py
index 873ca39..811cf32 100644
--- a/backend/tests/test_storage.py
+++ b/backend/tests/test_storage.py
@@ -1,143 +1,77 @@
 """
-Tests for storage service.
+Tests for storage service (Cloudflare R2).
 """
-from unittest.mock import ANY, MagicMock, mock_open, patch
 
-import pytest
-
-from app.services.storage import (
-    download_file_cloudflare,
-    download_file_supabase,
-    upload_file_cloudflare,
-    upload_file_supabase,
-)
-
-# ── Cloudflare R2 Tests ────────────────────────────────────────────────────────
-
-class TestUploadFileCloudflare:
-    @pytest.mark.asyncio
-    @patch("app.services.storage.s3")
-    async def test_upload_returns_s3_uri(self, mock_s3):
-        mock_s3.upload_file.return_value = None
-        result = await upload_file_cloudflare("local/file.txt", "my-bucket", "folder/file.txt")
-
-        assert result == "s3://my-bucket/folder/file.txt"
+from unittest.mock import MagicMock, patch
 
-    @pytest.mark.asyncio
-    @patch("app.services.storage.s3")
-    async def test_upload_calls_s3_with_correct_args(self, mock_s3):
-        mock_s3.upload_file.return_value = None
-
-        await upload_file_cloudflare("local/file.txt", "my-bucket", "folder/file.txt")
-
-        mock_s3.upload_file.assert_called_once_with("local/file.txt", "my-bucket", "folder/file.txt")
-
-    @pytest.mark.asyncio
-    @patch("app.services.storage.s3")
-    async def test_upload_propagates_s3_exception(self, mock_s3):
-        mock_s3.upload_file.side_effect = Exception("S3 upload failed")
+import pytest
 
-        with pytest.raises(Exception, match="S3 upload failed"):
-            await upload_file_cloudflare("local/file.txt", "my-bucket", "folder/file.txt")
+from app.services.storage import get_presigned_url, upload_to_r2
 
 
-class TestDownloadFileCloudflare:
+class TestUploadToR2:
     @pytest.mark.asyncio
-    @patch("app.services.storage.s3")
-    async def test_download_returns_bytes(self, mock_s3):
-        mock_body = MagicMock()
-        mock_body.read.return_value = b"file content"
-        mock_s3.get_object.return_value = {"Body": mock_body}
+    @patch("app.services.storage._r2_client")
+    async def test_upload_returns_key_on_success(self, mock_client_fn):
+        mock_client = MagicMock()
+        mock_client_fn.return_value = mock_client
 
-        result = await download_file_cloudflare("my-bucket", "folder/file.txt")
+        result = await upload_to_r2("/tmp/file.pdf", "documents/123/file.pdf")
 
-        assert result == b"file content"
+        assert result == "documents/123/file.pdf"
+        mock_client.upload_file.assert_called_once()
 
     @pytest.mark.asyncio
-    @patch("app.services.storage.s3")
-    async def test_download_calls_get_object_with_correct_args(self, mock_s3):
-        mock_body = MagicMock()
-        mock_body.read.return_value = b""
-        mock_s3.get_object.return_value = {"Body": mock_body}
+    @patch("app.services.storage._r2_client")
+    async def test_upload_returns_none_when_not_configured(self, mock_client_fn):
+        mock_client_fn.return_value = None
 
-        await download_file_cloudflare("my-bucket", "folder/file.txt")
+        result = await upload_to_r2("/tmp/file.pdf", "documents/123/file.pdf")
 
-        mock_s3.get_object.assert_called_once_with(Bucket="my-bucket", Key="folder/file.txt")
+        assert result is None
 
     @pytest.mark.asyncio
-    @patch("app.services.storage.s3")
-    async def test_download_propagates_s3_exception(self, mock_s3):
-        mock_s3.get_object.side_effect = Exception("Key not found")
+    @patch("app.services.storage._r2_client")
+    async def test_upload_returns_none_on_exception(self, mock_client_fn):
+        mock_client = MagicMock()
+        mock_client.upload_file.side_effect = Exception("S3 upload failed")
+        mock_client_fn.return_value = mock_client
 
-        with pytest.raises(Exception, match="Key not found"):
-            await download_file_cloudflare("my-bucket", "folder/file.txt")
+        result = await upload_to_r2("/tmp/file.pdf", "documents/123/file.pdf")
 
+        assert result is None
 
-# ── Supabase Tests ─────────────────────────────────────────────────────────────
 
-class TestUploadFileSupabase:
-    @pytest.mark.asyncio
-    @patch("builtins.open", mock_open(read_data=b"file content"))
-    @patch("app.services.storage.supabase")
-    async def test_upload_returns_bucket_key_path(self, mock_supabase):
-        mock_supabase.storage.from_().upload.return_value = None
-
-        result = await upload_file_supabase("local/file.txt", "my-bucket", "folder/file.txt")
+class TestGetPresignedUrl:
+    @patch("app.services.storage._r2_client")
+    def test_returns_url_on_success(self, mock_client_fn):
+        mock_client = MagicMock()
+        mock_client.generate_presigned_url.return_value = "https://r2.example.com/signed"
+        mock_client_fn.return_value = mock_client
 
-        assert result == "my-bucket/folder/file.txt"
+        result = get_presigned_url("documents/123/file.pdf")
 
-    @pytest.mark.asyncio
-    @patch("builtins.open", mock_open(read_data=b"file content"))
-    @patch("app.services.storage.supabase")
-    async def test_upload_calls_storage_with_correct_args(self, mock_supabase):
-        mock_storage = MagicMock()
-        mock_supabase.storage.from_.return_value = mock_storage
-
-        await upload_file_supabase("local/file.txt", "my-bucket", "folder/file.txt")
-
-        mock_supabase.storage.from_.assert_called_once_with("my-bucket")
-        mock_storage.upload.assert_called_once_with(
-            path="folder/file.txt",
-            file=ANY,
-            file_options={"content-type": "application/octet-stream"},
+        assert result == "https://r2.example.com/signed"
+        mock_client.generate_presigned_url.assert_called_once_with(
+            "get_object",
+            Params={"Bucket": "cortex-documents", "Key": "documents/123/file.pdf"},
+            ExpiresIn=3600,
         )
 
-    @pytest.mark.asyncio
-    @patch("builtins.open", mock_open(read_data=b"file content"))
-    @patch("app.services.storage.supabase")
-    async def test_upload_propagates_storage_exception(self, mock_supabase):
-        mock_supabase.storage.from_().upload.side_effect = Exception("Upload failed")
-
-        with pytest.raises(Exception, match="Upload failed"):
-            await upload_file_supabase("local/file.txt", "my-bucket", "folder/file.txt")
-
+    @patch("app.services.storage._r2_client")
+    def test_returns_none_when_not_configured(self, mock_client_fn):
+        mock_client_fn.return_value = None
 
-class TestDownloadFileSupabase:
-    @pytest.mark.asyncio
-    @patch("app.services.storage.supabase")
-    async def test_download_returns_bytes(self, mock_supabase):
-        mock_supabase.storage.from_().download.return_value = b"file content"
-
-        result = await download_file_supabase("my-bucket", "folder/file.txt")
-
-        assert result == b"file content"
+        result = get_presigned_url("documents/123/file.pdf")
 
-    @pytest.mark.asyncio
-    @patch("app.services.storage.supabase")
-    async def test_download_calls_storage_with_correct_args(self, mock_supabase):
-        mock_storage = MagicMock()
-        mock_storage.download.return_value = b""
-        mock_supabase.storage.from_.return_value = mock_storage
-
-        await download_file_supabase("my-bucket", "folder/file.txt")
+        assert result is None
 
-        mock_supabase.storage.from_.assert_called_once_with("my-bucket")
-        mock_storage.download.assert_called_once_with("folder/file.txt")
+    @patch("app.services.storage._r2_client")
+    def test_returns_none_on_exception(self, mock_client_fn):
+        mock_client = MagicMock()
+        mock_client.generate_presigned_url.side_effect = Exception("Failed")
+        mock_client_fn.return_value = mock_client
 
-    @pytest.mark.asyncio
-    @patch("app.services.storage.supabase")
-    async def test_download_propagates_storage_exception(self, mock_supabase):
-        mock_supabase.storage.from_().download.side_effect = Exception("File not found")
+        result = get_presigned_url("documents/123/file.pdf")
 
-        with pytest.raises(Exception, match="File not found"):
-            await download_file_supabase("my-bucket", "folder/file.txt")
+        assert result is None
diff --git a/docker-compose.yml b/docker-compose.yml
index 61e5b66..1ee8f65 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -17,8 +17,13 @@ services:
       DB_PASSWORD: ${DB_PASSWORD:-postgres}
       # Note: DB_PASSWORD must not contain URL-special characters (@, :, /, %)
       VECTOR_DB_URL: postgresql://${DB_USER:-postgres}:${DB_PASSWORD:-postgres}@postgres:5432/${DB_NAME:-cortex}
+      GRAPH_DATABASE_PROVIDER: kuzu
+      GRAPH_DATASET_DATABASE_HANDLER: kuzu
+      SYSTEM_ROOT_DIRECTORY: /app/.cognee_system
+      ENABLE_BACKEND_ACCESS_CONTROL: "false"
     volumes:
       - ./backend:/app
+      - /app/.venv
       - cognee-data:/app/.cognee_system
     depends_on:
       postgres:
@@ -30,7 +35,7 @@ services:
     image: pgvector/pgvector:pg16
     container_name: cortex-postgres
     ports:
-      - "127.0.0.1:5432:5432"
+      - "127.0.0.1:5433:5432"
     environment:
       POSTGRES_DB: ${DB_NAME:-cortex}
       POSTGRES_USER: ${DB_USER:-postgres}
@@ -50,4 +55,3 @@ volumes:
 networks:
   default:
     name: cortex-network
-    external: true
diff --git a/frontend/.gitignore b/frontend/.gitignore
new file mode 100644
index 0000000..a547bf3
--- /dev/null
+++ b/frontend/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/frontend/.prettierrc b/frontend/.prettierrc
new file mode 100644
index 0000000..60a7584
--- /dev/null
+++ b/frontend/.prettierrc
@@ -0,0 +1,9 @@
+{
+  "semi": false,
+  "singleQuote": true,
+  "tabWidth": 2,
+  "trailingComma": "es5",
+  "printWidth": 80,
+  "bracketSpacing": true,
+  "arrowParens": "avoid"
+}
diff --git a/frontend/Dockerfile.dev b/frontend/Dockerfile.dev
new file mode 100644
index 0000000..1c00415
--- /dev/null
+++ b/frontend/Dockerfile.dev
@@ -0,0 +1,13 @@
+FROM node:22-alpine
+
+WORKDIR /app
+
+COPY package.json package-lock.json* ./
+
+RUN npm ci
+
+COPY . .
+
+EXPOSE 5173
+
+CMD ["npm", "run", "dev"]
\ No newline at end of file
diff --git a/frontend/Dockerfile.prod b/frontend/Dockerfile.prod
new file mode 100644
index 0000000..5c57c8b
--- /dev/null
+++ b/frontend/Dockerfile.prod
@@ -0,0 +1,28 @@
+FROM node:22-alpine AS builder
+
+WORKDIR /app
+
+# Declare build arguments
+ARG VITE_ENVIRONMENT
+ARG VITE_SUPABASE_URL
+ARG VITE_SUPABASE_PUBLISHABLE_KEY
+ARG VITE_API_BASE_URL
+
+# Set as environment variables for Vite
+ENV VITE_ENVIRONMENT=$VITE_ENVIRONMENT
+ENV VITE_SUPABASE_URL=$VITE_SUPABASE_URL
+ENV VITE_SUPABASE_PUBLISHABLE_KEY=$VITE_SUPABASE_PUBLISHABLE_KEY
+ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
+
+COPY package.json package-lock.json* ./
+RUN npm ci
+
+COPY . .
+RUN npm run build
+
+FROM nginx:alpine
+COPY --from=builder /app/dist /usr/share/nginx/html
+COPY nginx.conf /etc/nginx/nginx.conf
+
+EXPOSE 80
+CMD ["nginx", "-g", "daemon off;"]
\ No newline at end of file
diff --git a/frontend/eslint.config.js b/frontend/eslint.config.js
new file mode 100644
index 0000000..b19330b
--- /dev/null
+++ b/frontend/eslint.config.js
@@ -0,0 +1,23 @@
+import js from '@eslint/js'
+import globals from 'globals'
+import reactHooks from 'eslint-plugin-react-hooks'
+import reactRefresh from 'eslint-plugin-react-refresh'
+import tseslint from 'typescript-eslint'
+import { defineConfig, globalIgnores } from 'eslint/config'
+
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{ts,tsx}'],
+    extends: [
+      js.configs.recommended,
+      tseslint.configs.recommended,
+      reactHooks.configs['recommended-latest'],
+      reactRefresh.configs.vite,
+    ],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+    },
+  },
+])
diff --git a/frontend/index.html b/frontend/index.html
index 9567726..3286003 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -4,11 +4,17 @@
     <meta charset="UTF-8" />
     <link rel="icon" type="image/svg+xml" href="/favicon.svg" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="description" content="Cortex — AI-powered document intelligence. Search, analyze, and extract insights from your documents." />
+    <meta
+      name="description"
+      content="Cortex — AI-powered document intelligence. Search, analyze, and extract insights from your documents."
+    />
     <title>Cortex</title>
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
-    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet" />
+    <link
+      href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
   </head>
   <body>
     <div id="root"></div>
diff --git a/frontend/nginx.conf b/frontend/nginx.conf
new file mode 100644
index 0000000..539224b
--- /dev/null
+++ b/frontend/nginx.conf
@@ -0,0 +1,74 @@
+events {
+    worker_connections 1024;
+}
+
+http {
+    include       /etc/nginx/mime.types;
+    default_type  application/octet-stream;
+
+    # Logging
+    log_format main '$remote_addr - $remote_user [$time_local] "$request" '
+                    '$status $body_bytes_sent "$http_referer" '
+                    '"$http_user_agent" "$http_x_forwarded_for"';
+
+    access_log /var/log/nginx/access.log main;
+    error_log /var/log/nginx/error.log;
+
+    # Performance
+    sendfile on;
+    tcp_nopush on;
+    tcp_nodelay on;
+    keepalive_timeout 65;
+    types_hash_max_size 2048;
+
+    # Gzip compression
+    gzip on;
+    gzip_vary on;
+    gzip_min_length 1024;
+    gzip_types
+        text/plain
+        text/css
+        text/xml
+        text/javascript
+        application/javascript
+        application/xml+rss
+        application/json;
+
+    server {
+        listen 80;
+        listen [::]:80;
+        server_name _;
+
+        root /usr/share/nginx/html;
+        index index.html;
+
+        # Security headers
+        add_header X-Frame-Options "SAMEORIGIN" always;
+        add_header X-Content-Type-Options "nosniff" always;
+        add_header X-XSS-Protection "1; mode=block" always;
+        add_header Referrer-Policy "no-referrer-when-downgrade" always;
+
+        # Handle React Router (SPA)
+        location / {
+            try_files $uri $uri/ /index.html;
+        }
+
+        # Cache static assets
+        location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
+            expires 1y;
+            add_header Cache-Control "public, immutable";
+        }
+
+        # Health check endpoint
+        location /health {
+            access_log off;
+            return 200 "healthy\n";
+            add_header Content-Type text/plain;
+        }
+
+        # Disable access to hidden files
+        location ~ /\. {
+            deny all;
+        }
+    }
+}
\ No newline at end of file
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 96e3ae2..7fc3632 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -959,9 +959,6 @@
         "arm"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -976,9 +973,6 @@
         "arm"
       ],
       "dev": true,
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -993,9 +987,6 @@
         "arm64"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1010,9 +1001,6 @@
         "arm64"
       ],
       "dev": true,
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1027,9 +1015,6 @@
         "loong64"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1044,9 +1029,6 @@
         "loong64"
       ],
       "dev": true,
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1061,9 +1043,6 @@
         "ppc64"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1078,9 +1057,6 @@
         "ppc64"
       ],
       "dev": true,
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1095,9 +1071,6 @@
         "riscv64"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1112,9 +1085,6 @@
         "riscv64"
       ],
       "dev": true,
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1129,9 +1099,6 @@
         "s390x"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1146,9 +1113,6 @@
         "x64"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1163,9 +1127,6 @@
         "x64"
       ],
       "dev": true,
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
diff --git a/frontend/public/favicon.ico b/frontend/public/favicon.ico
new file mode 100644
index 0000000..2ff04ae
Binary files /dev/null and b/frontend/public/favicon.ico differ
diff --git a/frontend/src/components/Navbar.tsx b/frontend/src/components/Navbar.tsx
index 4765734..e2b5e74 100644
--- a/frontend/src/components/Navbar.tsx
+++ b/frontend/src/components/Navbar.tsx
@@ -39,9 +39,7 @@ export default function Navbar() {
                 key={to}
                 to={to}
                 className={`relative px-4 py-2 text-sm font-medium transition-colors duration-200 ${
-                  active
-                    ? 'text-white'
-                    : 'text-zinc-400 hover:text-white'
+                  active ? 'text-white' : 'text-zinc-400 hover:text-white'
                 }`}
               >
                 {label}
diff --git a/frontend/src/components/NodeDetailPanel.tsx b/frontend/src/components/NodeDetailPanel.tsx
new file mode 100644
index 0000000..fc86aa8
--- /dev/null
+++ b/frontend/src/components/NodeDetailPanel.tsx
@@ -0,0 +1,310 @@
+import { useEffect, useRef } from 'react'
+import { useQuery } from '@tanstack/react-query'
+import { Link } from 'react-router-dom'
+import {
+  searchChunks,
+  listDocuments,
+  type GraphNode,
+  type GraphLink,
+} from '../services/api'
+
+interface ConnectedEntity {
+  id: string
+  name: string
+  relationship: string
+  direction: 'outgoing' | 'incoming'
+}
+
+interface Props {
+  node: GraphNode
+  links: GraphLink[]
+  nodes: GraphNode[]
+  onClose: () => void
+  onSelectNode: (node: GraphNode) => void
+}
+
+export default function NodeDetailPanel({
+  node,
+  links,
+  nodes,
+  onClose,
+  onSelectNode,
+}: Props) {
+  const panelRef = useRef<HTMLDivElement>(null)
+
+  // Close on click outside
+  useEffect(() => {
+    const handler = (e: MouseEvent) => {
+      if (panelRef.current && !panelRef.current.contains(e.target as Node)) {
+        onClose()
+      }
+    }
+    const timer = setTimeout(
+      () => document.addEventListener('mousedown', handler),
+      100
+    )
+    return () => {
+      clearTimeout(timer)
+      document.removeEventListener('mousedown', handler)
+    }
+  }, [onClose])
+
+  // Close on Escape
+  useEffect(() => {
+    const handler = (e: KeyboardEvent) => {
+      if (e.key === 'Escape') onClose()
+    }
+    document.addEventListener('keydown', handler)
+    return () => document.removeEventListener('keydown', handler)
+  }, [onClose])
+
+  // Find connected entities from graph data
+  const connected: ConnectedEntity[] = []
+  const nodeMap = new Map(nodes.map(n => [n.id, n]))
+
+  for (const link of links) {
+    const src =
+      typeof link.source === 'object'
+        ? (link.source as GraphNode).id
+        : link.source
+    const tgt =
+      typeof link.target === 'object'
+        ? (link.target as GraphNode).id
+        : link.target
+
+    if (src === node.id) {
+      const target = nodeMap.get(tgt)
+      if (target) {
+        connected.push({
+          id: target.id,
+          name: target.name,
+          relationship: link.label,
+          direction: 'outgoing',
+        })
+      }
+    } else if (tgt === node.id) {
+      const source = nodeMap.get(src)
+      if (source) {
+        connected.push({
+          id: source.id,
+          name: source.name,
+          relationship: link.label,
+          direction: 'incoming',
+        })
+      }
+    }
+  }
+
+  // Search for related content
+  const isUUID = /^[0-9a-f]{8}-[0-9a-f]{4}-/i.test(node.name)
+  const { data: searchData, isLoading: searchLoading } = useQuery({
+    queryKey: ['node-chunks', node.name],
+    queryFn: () => searchChunks(node.name, 5),
+    enabled: !isUUID,
+    staleTime: 60_000,
+  })
+
+  // Find documents that might relate to this node
+  const { data: docs = [] } = useQuery({
+    queryKey: ['documents'],
+    queryFn: listDocuments,
+    staleTime: 30_000,
+  })
+
+  // Match documents that mention this entity in their entities array
+  const relatedDocs = docs.filter(
+    d =>
+      d.status === 'completed' &&
+      d.entities?.some(e => e.toLowerCase().includes(node.name.toLowerCase()))
+  )
+
+  return (
+    <div
+      ref={panelRef}
+      className="absolute top-0 right-0 z-30 h-full w-[380px] max-w-[90%] overflow-y-auto"
+      style={{
+        background:
+          'linear-gradient(180deg, rgba(10,10,12,0.97) 0%, rgba(6,6,8,0.99) 100%)',
+        borderLeft: '1px solid rgba(255,255,255,0.06)',
+        boxShadow: '-8px 0 40px -10px rgba(0,0,0,0.6)',
+        animation: 'slideIn 0.2s ease-out',
+      }}
+    >
+      <style>{`
+        @keyframes slideIn {
+          from { transform: translateX(100%); opacity: 0; }
+          to { transform: translateX(0); opacity: 1; }
+        }
+      `}</style>
+
+      {/* Header */}
+      <div
+        className="sticky top-0 z-10 px-5 pt-5 pb-4"
+        style={{ background: 'inherit' }}
+      >
+        <div className="flex items-start justify-between gap-3">
+          <div className="min-w-0 flex-1">
+            <h2 className="text-lg font-semibold text-white truncate leading-tight">
+              {isUUID ? node.id.slice(0, 12) + '...' : node.name}
+            </h2>
+            <div className="flex items-center gap-2 mt-1.5">
+              <span className="inline-flex items-center gap-1 px-2 py-0.5 rounded text-[10px] font-medium uppercase tracking-wider bg-violet-500/15 border border-violet-500/20 text-violet-300">
+                Entity
+              </span>
+              <span className="text-[11px] text-white/30">
+                {node.val - 1} connection{node.val - 1 !== 1 ? 's' : ''}
+              </span>
+            </div>
+          </div>
+          <button
+            onClick={onClose}
+            className="shrink-0 w-7 h-7 flex items-center justify-center rounded-lg bg-white/5 border border-white/[0.06] text-white/40 hover:text-white/70 hover:bg-white/10 transition-colors"
+          >
+            <svg
+              width="12"
+              height="12"
+              viewBox="0 0 12 12"
+              fill="none"
+              stroke="currentColor"
+              strokeWidth="1.5"
+              strokeLinecap="round"
+            >
+              <line x1="2" y1="2" x2="10" y2="10" />
+              <line x1="10" y1="2" x2="2" y2="10" />
+            </svg>
+          </button>
+        </div>
+        <div className="mt-3 h-px bg-white/[0.06]" />
+      </div>
+
+      <div className="px-5 pb-6 space-y-5">
+        {/* Connected Entities */}
+        {connected.length > 0 && (
+          <section>
+            <h3 className="text-[11px] font-medium uppercase tracking-wider text-white/30 mb-2.5">
+              Connected Entities
+            </h3>
+            <div className="space-y-1.5">
+              {connected.map((c, i) => (
+                <button
+                  key={`${c.id}-${i}`}
+                  onClick={() => {
+                    const target = nodeMap.get(c.id)
+                    if (target) onSelectNode(target)
+                  }}
+                  className="w-full group flex items-center gap-2.5 px-3 py-2 rounded-lg bg-white/[0.03] border border-white/[0.04] hover:bg-white/[0.06] hover:border-white/[0.08] transition-all text-left"
+                >
+                  <span
+                    className="shrink-0 w-2 h-2 rounded-full"
+                    style={{
+                      background: '#7c3aed',
+                      boxShadow: '0 0 6px 1px rgba(124,58,237,0.3)',
+                    }}
+                  />
+                  <div className="min-w-0 flex-1">
+                    <span className="block text-sm text-white/80 group-hover:text-white truncate">
+                      {/^[0-9a-f]{8}-/i.test(c.name)
+                        ? c.id.slice(0, 12) + '...'
+                        : c.name}
+                    </span>
+                    <span className="block text-[10px] text-white/25 truncate">
+                      {c.direction === 'outgoing' ? '\u2192' : '\u2190'}{' '}
+                      {c.relationship}
+                    </span>
+                  </div>
+                  <svg
+                    className="shrink-0 w-3.5 h-3.5 text-white/15 group-hover:text-white/30 transition-colors"
+                    viewBox="0 0 14 14"
+                    fill="none"
+                    stroke="currentColor"
+                    strokeWidth="1.5"
+                    strokeLinecap="round"
+                  >
+                    <polyline points="5,3 9,7 5,11" />
+                  </svg>
+                </button>
+              ))}
+            </div>
+          </section>
+        )}
+
+        {/* Related Content */}
+        {!isUUID && (
+          <section>
+            <h3 className="text-[11px] font-medium uppercase tracking-wider text-white/30 mb-2.5">
+              Related Content
+            </h3>
+            {searchLoading ? (
+              <div className="space-y-2">
+                {[1, 2, 3].map(i => (
+                  <div key={i} className="skeleton h-16 rounded-lg" />
+                ))}
+              </div>
+            ) : searchData && searchData.results.length > 0 ? (
+              <div className="space-y-2">
+                {searchData.results.map((r, i) => (
+                  <div
+                    key={i}
+                    className="px-3 py-2.5 rounded-lg bg-white/[0.03] border border-white/[0.04]"
+                  >
+                    <p className="text-xs text-white/60 leading-relaxed line-clamp-4">
+                      {r.text}
+                    </p>
+                    {r.dataset_name && (
+                      <span className="inline-block mt-1.5 text-[10px] text-violet-400/50">
+                        {r.dataset_name}
+                      </span>
+                    )}
+                  </div>
+                ))}
+              </div>
+            ) : (
+              <p className="text-xs text-white/20 italic">
+                No related content found
+              </p>
+            )}
+          </section>
+        )}
+
+        {/* Source Documents */}
+        {relatedDocs.length > 0 && (
+          <section>
+            <h3 className="text-[11px] font-medium uppercase tracking-wider text-white/30 mb-2.5">
+              Source Documents
+            </h3>
+            <div className="space-y-1.5">
+              {relatedDocs.map(doc => (
+                <Link
+                  key={doc.id}
+                  to={`/documents/${doc.id}`}
+                  className="flex items-center gap-2.5 px-3 py-2 rounded-lg bg-white/[0.03] border border-white/[0.04] hover:bg-white/[0.06] hover:border-white/[0.08] transition-all group"
+                >
+                  <svg
+                    className="shrink-0 w-4 h-4 text-white/20"
+                    viewBox="0 0 16 16"
+                    fill="none"
+                    stroke="currentColor"
+                    strokeWidth="1.2"
+                  >
+                    <path d="M4 1h6l4 4v10H4V1z" />
+                    <polyline points="10,1 10,5 14,5" />
+                  </svg>
+                  <div className="min-w-0 flex-1">
+                    <span className="block text-sm text-white/70 group-hover:text-white truncate">
+                      {doc.original_filename}
+                    </span>
+                    {doc.dataset_name && (
+                      <span className="block text-[10px] text-white/25 truncate">
+                        {doc.dataset_name}
+                      </span>
+                    )}
+                  </div>
+                </Link>
+              ))}
+            </div>
+          </section>
+        )}
+      </div>
+    </div>
+  )
+}
diff --git a/frontend/src/index.css b/frontend/src/index.css
index d26b998..0340d71 100644
--- a/frontend/src/index.css
+++ b/frontend/src/index.css
@@ -80,8 +80,12 @@
 
 /* Skeleton shimmer */
 @keyframes shimmer {
-  0% { background-position: -800px 0; }
-  100% { background-position: 800px 0; }
+  0% {
+    background-position: -800px 0;
+  }
+  100% {
+    background-position: 800px 0;
+  }
 }
 
 .skeleton {
@@ -98,6 +102,11 @@
 
 /* Progress bar animation */
 @keyframes progress-pulse {
-  0%, 100% { opacity: 1; }
-  50% { opacity: 0.6; }
+  0%,
+  100% {
+    opacity: 1;
+  }
+  50% {
+    opacity: 0.6;
+  }
 }
diff --git a/frontend/src/main.tsx b/frontend/src/main.tsx
index 92e8df4..a903d75 100644
--- a/frontend/src/main.tsx
+++ b/frontend/src/main.tsx
@@ -22,5 +22,5 @@ createRoot(rootElement).render(
     <QueryClientProvider client={queryClient}>
       <App />
     </QueryClientProvider>
-  </StrictMode>,
+  </StrictMode>
 )
diff --git a/frontend/src/pages/DocumentDetailPage.tsx b/frontend/src/pages/DocumentDetailPage.tsx
index 7326f37..296edee 100644
--- a/frontend/src/pages/DocumentDetailPage.tsx
+++ b/frontend/src/pages/DocumentDetailPage.tsx
@@ -2,7 +2,12 @@ import { useState } from 'react'
 import { Link, useParams } from 'react-router-dom'
 import { useQuery } from '@tanstack/react-query'
 import Navbar from '../components/Navbar'
-import { getDocument, getDocumentFileUrl, type Document, type ProgressStage } from '../services/api'
+import {
+  getDocument,
+  getDocumentFileUrl,
+  type Document,
+  type ProgressStage,
+} from '../services/api'
 
 const DOC_TYPE_COLORS: Record<string, string> = {
   RFQ: 'bg-blue-500/15 border-blue-500/25 text-blue-300',
@@ -52,10 +57,10 @@ function parseInsight(insight: string): { parts: string[]; arrows: boolean } {
   const sep = insight.includes(' → ')
     ? ' → '
     : insight.includes('->')
-    ? '->'
-    : insight.includes(' - ')
-    ? ' - '
-    : null
+      ? '->'
+      : insight.includes(' - ')
+        ? ' - '
+        : null
   if (sep) {
     return { parts: insight.split(sep), arrows: true }
   }
@@ -66,12 +71,16 @@ export default function DocumentDetailPage() {
   const { id } = useParams<{ id: string }>()
   const [activeTab, setActiveTab] = useState<Tab>('summary')
 
-  const { data: doc, isLoading, isError } = useQuery({
+  const {
+    data: doc,
+    isLoading,
+    isError,
+  } = useQuery({
     queryKey: ['document', id],
     queryFn: () => getDocument(id!),
     enabled: !!id,
     staleTime: 5000,
-    refetchInterval: (query) => {
+    refetchInterval: query => {
       const d = query.state.data
       return d?.status === 'processing' ? 2000 : false
     },
@@ -103,7 +112,16 @@ export default function DocumentDetailPage() {
             to="/documents"
             className="inline-flex items-center gap-2 text-sm text-[#a1a1aa] hover:text-white transition-colors mb-8"
           >
-            <svg width="16" height="16" viewBox="0 0 16 16" fill="none" stroke="currentColor" strokeWidth="1.75" strokeLinecap="round" strokeLinejoin="round">
+            <svg
+              width="16"
+              height="16"
+              viewBox="0 0 16 16"
+              fill="none"
+              stroke="currentColor"
+              strokeWidth="1.75"
+              strokeLinecap="round"
+              strokeLinejoin="round"
+            >
               <line x1="13" y1="8" x2="3" y2="8" />
               <polyline points="7,12 3,8 7,4" />
             </svg>
@@ -125,7 +143,9 @@ export default function DocumentDetailPage() {
           {/* Error */}
           {isError && (
             <div className="bg-red-500/5 border border-red-500/20 rounded-2xl p-8 text-center">
-              <p className="text-red-300 font-medium mb-2">Failed to load document</p>
+              <p className="text-red-300 font-medium mb-2">
+                Failed to load document
+              </p>
               <p className="text-[#a1a1aa] text-sm">
                 The document may not exist or there was a server error.
               </p>
@@ -154,7 +174,9 @@ export default function DocumentDetailPage() {
                     </span>
                   )}
                   {doc.document_type && (
-                    <span className={`px-3 py-1 rounded-full text-xs border font-medium ${DOC_TYPE_COLORS[doc.document_type] ?? 'bg-white/5 border-white/15 text-zinc-300'}`}>
+                    <span
+                      className={`px-3 py-1 rounded-full text-xs border font-medium ${DOC_TYPE_COLORS[doc.document_type] ?? 'bg-white/5 border-white/15 text-zinc-300'}`}
+                    >
                       {doc.document_type}
                     </span>
                   )}
@@ -172,7 +194,9 @@ export default function DocumentDetailPage() {
                     <div className="h-1.5 rounded-full bg-white/5 overflow-hidden">
                       <div
                         className="h-full rounded-full bg-violet-500 transition-all duration-700"
-                        style={{ width: `${STAGE_PERCENT[doc.progress_stage]}%` }}
+                        style={{
+                          width: `${STAGE_PERCENT[doc.progress_stage]}%`,
+                        }}
                       />
                     </div>
                   </div>
@@ -186,7 +210,9 @@ export default function DocumentDetailPage() {
                     key={key}
                     onClick={() => setActiveTab(key)}
                     className={`relative px-4 py-2.5 text-sm font-medium transition-colors duration-200 ${
-                      activeTab === key ? 'text-white' : 'text-zinc-400 hover:text-white'
+                      activeTab === key
+                        ? 'text-white'
+                        : 'text-zinc-400 hover:text-white'
                     }`}
                   >
                     <span className="flex items-center gap-1.5">
@@ -213,8 +239,12 @@ export default function DocumentDetailPage() {
               {/* Content */}
               {activeTab === 'document' && <DocumentTab doc={doc} />}
               {activeTab === 'summary' && <SummaryTab doc={doc} />}
-              {activeTab === 'insights' && <InsightsTab insights={doc.insights ?? []} />}
-              {activeTab === 'entities' && <EntitiesTab entities={doc.entities ?? []} />}
+              {activeTab === 'insights' && (
+                <InsightsTab insights={doc.insights ?? []} />
+              )}
+              {activeTab === 'entities' && (
+                <EntitiesTab entities={doc.entities ?? []} />
+              )}
             </>
           )}
         </div>
@@ -241,7 +271,8 @@ function DocumentTab({ doc }: { doc: Document }) {
     return (
       <div className="bg-white/5 border border-white/10 rounded-2xl p-8 text-center">
         <p className="text-[#a1a1aa] text-sm">
-          Raw file not stored — configure Cloudflare R2 credentials to enable document storage.
+          Raw file not stored — configure Cloudflare R2 credentials to enable
+          document storage.
         </p>
       </div>
     )
@@ -270,7 +301,16 @@ function DocumentTab({ doc }: { doc: Document }) {
           rel="noopener noreferrer"
           className="inline-flex items-center gap-1.5 text-xs text-violet-400 hover:text-violet-300 transition-colors"
         >
-          <svg width="13" height="13" viewBox="0 0 13 13" fill="none" stroke="currentColor" strokeWidth="1.75" strokeLinecap="round" strokeLinejoin="round">
+          <svg
+            width="13"
+            height="13"
+            viewBox="0 0 13 13"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="1.75"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+          >
             <path d="M6.5 1v7M6.5 8l-2.5-2.5M6.5 8l2.5-2.5" />
             <path d="M1 10v1.5A1.5 1.5 0 002.5 13h8a1.5 1.5 0 001.5-1.5V10" />
           </svg>
@@ -291,7 +331,9 @@ function DocumentTab({ doc }: { doc: Document }) {
 
       {isCsv && (
         <div className="bg-white/5 border border-white/10 rounded-2xl p-4 text-center">
-          <p className="text-sm text-[#a1a1aa] mb-3">CSV files cannot be previewed inline.</p>
+          <p className="text-sm text-[#a1a1aa] mb-3">
+            CSV files cannot be previewed inline.
+          </p>
           <a
             href={data.url}
             download={data.filename}
@@ -306,7 +348,9 @@ function DocumentTab({ doc }: { doc: Document }) {
 
       {!isPdf && !isCsv && (
         <div className="bg-white/5 border border-white/10 rounded-2xl p-4 text-center">
-          <p className="text-sm text-[#a1a1aa] mb-3">Preview not available for this file type.</p>
+          <p className="text-sm text-[#a1a1aa] mb-3">
+            Preview not available for this file type.
+          </p>
           <a
             href={data.url}
             download={data.filename}
@@ -325,9 +369,10 @@ function DocumentTab({ doc }: { doc: Document }) {
 function StatusBadge({ doc }: { doc: Document }) {
   const isCompleted = doc.status === 'completed'
   const isFailed = doc.status === 'failed'
-  const label = doc.status === 'processing'
-    ? STAGE_LABELS[doc.progress_stage]
-    : doc.status.charAt(0).toUpperCase() + doc.status.slice(1)
+  const label =
+    doc.status === 'processing'
+      ? STAGE_LABELS[doc.progress_stage]
+      : doc.status.charAt(0).toUpperCase() + doc.status.slice(1)
 
   return (
     <span
@@ -335,13 +380,17 @@ function StatusBadge({ doc }: { doc: Document }) {
         isCompleted
           ? 'bg-green-500/15 border-green-500/25 text-green-300'
           : isFailed
-          ? 'bg-red-500/15 border-red-500/25 text-red-300'
-          : 'bg-yellow-500/15 border-yellow-500/25 text-yellow-300'
+            ? 'bg-red-500/15 border-red-500/25 text-red-300'
+            : 'bg-yellow-500/15 border-yellow-500/25 text-yellow-300'
       }`}
     >
       <span
         className={`w-1.5 h-1.5 rounded-full ${
-          isCompleted ? 'bg-green-400' : isFailed ? 'bg-red-400' : 'bg-yellow-400 animate-pulse'
+          isCompleted
+            ? 'bg-green-400'
+            : isFailed
+              ? 'bg-red-400'
+              : 'bg-yellow-400 animate-pulse'
         }`}
       />
       {label}
@@ -365,7 +414,9 @@ function SummaryTab({ doc }: { doc: Document }) {
   if (!doc.summary) {
     return (
       <div className="bg-white/5 border border-white/10 rounded-2xl p-8 text-center">
-        <p className="text-[#a1a1aa] text-sm">No summary available for this document.</p>
+        <p className="text-[#a1a1aa] text-sm">
+          No summary available for this document.
+        </p>
       </div>
     )
   }
@@ -373,7 +424,9 @@ function SummaryTab({ doc }: { doc: Document }) {
   return (
     <div className="space-y-4">
       <div className="bg-white/5 border border-white/10 rounded-2xl p-6">
-        <p className="text-sm text-white/80 leading-relaxed whitespace-pre-wrap">{doc.summary}</p>
+        <p className="text-sm text-white/80 leading-relaxed whitespace-pre-wrap">
+          {doc.summary}
+        </p>
       </div>
       <div className="flex items-center gap-4 text-xs text-[#a1a1aa]">
         <span>{doc.raw_chunks_count} chunks processed</span>
@@ -414,15 +467,21 @@ function InsightsTab({ insights }: { insights: string[] }) {
               <div className="flex flex-wrap items-start gap-1.5">
                 {parts.map((part, i) => (
                   <span key={i} className="flex items-start gap-1.5 min-w-0">
-                    <span className="text-sm text-white/80 break-words min-w-0">{part.trim()}</span>
+                    <span className="text-sm text-white/80 break-words min-w-0">
+                      {part.trim()}
+                    </span>
                     {i < parts.length - 1 && (
-                      <span className="text-violet-400 font-semibold text-sm flex-shrink-0">→</span>
+                      <span className="text-violet-400 font-semibold text-sm flex-shrink-0">
+                        →
+                      </span>
                     )}
                   </span>
                 ))}
               </div>
             ) : (
-              <p className="text-sm text-white/80 leading-relaxed break-words">{insight}</p>
+              <p className="text-sm text-white/80 leading-relaxed break-words">
+                {insight}
+              </p>
             )}
           </div>
         )
diff --git a/frontend/src/pages/DocumentsPage.tsx b/frontend/src/pages/DocumentsPage.tsx
index ffa5731..ba19e01 100644
--- a/frontend/src/pages/DocumentsPage.tsx
+++ b/frontend/src/pages/DocumentsPage.tsx
@@ -14,7 +14,11 @@ const DOC_TYPE_COLORS: Record<string, string> = {
 
 function formatDate(iso: string): string {
   try {
-    return new Date(iso).toLocaleDateString('en-US', { month: 'short', day: 'numeric', year: 'numeric' })
+    return new Date(iso).toLocaleDateString('en-US', {
+      month: 'short',
+      day: 'numeric',
+      year: 'numeric',
+    })
   } catch {
     return iso
   }
@@ -23,27 +27,30 @@ function formatDate(iso: string): string {
 export default function DocumentsPage() {
   const [searchParams] = useSearchParams()
   const [nameFilter, setNameFilter] = useState('')
-  const [datasetFilter, setDatasetFilter] = useState(searchParams.get('dataset') ?? '')
+  const [datasetFilter, setDatasetFilter] = useState(
+    searchParams.get('dataset') ?? ''
+  )
 
-  const hasProcessing = (docs: Document[]) => docs.some((d) => d.status === 'processing')
+  const hasProcessing = (docs: Document[]) =>
+    docs.some(d => d.status === 'processing')
 
   const { data: docs = [], isLoading } = useQuery({
     queryKey: ['documents'],
     queryFn: listDocuments,
     staleTime: 5000,
-    refetchInterval: (query) => {
+    refetchInterval: query => {
       const docs = query.state.data
       return docs && hasProcessing(docs) ? 5000 : false
     },
   })
 
   const datasets = useMemo(() => {
-    const set = new Set(docs.map((d) => d.dataset_name).filter(Boolean))
+    const set = new Set(docs.map(d => d.dataset_name).filter(Boolean))
     return Array.from(set).sort()
   }, [docs])
 
   const filtered = useMemo(() => {
-    return docs.filter((doc) => {
+    return docs.filter(doc => {
       const matchName = nameFilter
         ? doc.original_filename.toLowerCase().includes(nameFilter.toLowerCase())
         : true
@@ -70,7 +77,8 @@ export default function DocumentsPage() {
         <div className="pt-10 mb-8">
           <h1 className="text-4xl font-bold text-white mb-2">Documents</h1>
           <p className="text-[#a1a1aa] text-sm">
-            {docs.length} document{docs.length !== 1 ? 's' : ''} in your knowledge base
+            {docs.length} document{docs.length !== 1 ? 's' : ''} in your
+            knowledge base
           </p>
         </div>
 
@@ -78,7 +86,16 @@ export default function DocumentsPage() {
         <div className="flex flex-col sm:flex-row gap-3 mb-8">
           <div className="relative flex-1">
             <div className="absolute left-3 top-1/2 -translate-y-1/2 text-white/30">
-              <svg width="16" height="16" viewBox="0 0 16 16" fill="none" stroke="currentColor" strokeWidth="1.75" strokeLinecap="round" strokeLinejoin="round">
+              <svg
+                width="16"
+                height="16"
+                viewBox="0 0 16 16"
+                fill="none"
+                stroke="currentColor"
+                strokeWidth="1.75"
+                strokeLinecap="round"
+                strokeLinejoin="round"
+              >
                 <circle cx="7" cy="7" r="4.5" />
                 <line x1="10.5" y1="10.5" x2="14" y2="14" />
               </svg>
@@ -86,7 +103,7 @@ export default function DocumentsPage() {
             <input
               type="text"
               value={nameFilter}
-              onChange={(e) => setNameFilter(e.target.value)}
+              onChange={e => setNameFilter(e.target.value)}
               placeholder="Filter by filename…"
               className="input-dark pl-9"
             />
@@ -94,12 +111,14 @@ export default function DocumentsPage() {
 
           <select
             value={datasetFilter}
-            onChange={(e) => setDatasetFilter(e.target.value)}
+            onChange={e => setDatasetFilter(e.target.value)}
             className="input-dark sm:w-56 bg-black cursor-pointer"
           >
             <option value="">All clients</option>
-            {datasets.map((ds) => (
-              <option key={ds} value={ds}>{ds}</option>
+            {datasets.map(ds => (
+              <option key={ds} value={ds}>
+                {ds}
+              </option>
             ))}
           </select>
         </div>
@@ -107,8 +126,11 @@ export default function DocumentsPage() {
         {/* Loading */}
         {isLoading && (
           <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
-            {[0, 1, 2, 3, 4, 5].map((i) => (
-              <div key={i} className="bg-white/5 border border-white/10 rounded-2xl p-5">
+            {[0, 1, 2, 3, 4, 5].map(i => (
+              <div
+                key={i}
+                className="bg-white/5 border border-white/10 rounded-2xl p-5"
+              >
                 <div className="skeleton h-4 rounded w-3/4 mb-3" />
                 <div className="skeleton h-3 rounded w-1/2 mb-4" />
                 <div className="flex gap-2">
@@ -123,7 +145,7 @@ export default function DocumentsPage() {
         {/* Document grid */}
         {!isLoading && filtered.length > 0 && (
           <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
-            {filtered.map((doc) => (
+            {filtered.map(doc => (
               <DocumentCard key={doc.id} doc={doc} />
             ))}
           </div>
@@ -133,7 +155,17 @@ export default function DocumentsPage() {
         {!isLoading && filtered.length === 0 && (
           <div className="flex flex-col items-center justify-center py-24 text-center">
             <div className="w-16 h-16 rounded-2xl bg-white/5 border border-white/10 flex items-center justify-center mb-4">
-              <svg width="28" height="28" viewBox="0 0 28 28" fill="none" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" className="text-white/20">
+              <svg
+                width="28"
+                height="28"
+                viewBox="0 0 28 28"
+                fill="none"
+                stroke="currentColor"
+                strokeWidth="1.5"
+                strokeLinecap="round"
+                strokeLinejoin="round"
+                className="text-white/20"
+              >
                 <path d="M18 3H9a1.5 1.5 0 00-1.5 1.5v19A1.5 1.5 0 009 25h10a1.5 1.5 0 001.5-1.5V8L18 3z" />
                 <polyline points="18,3 18,8 23.5,8" />
                 <line x1="11" y1="13" x2="17" y2="13" />
@@ -173,11 +205,17 @@ function DocumentCard({ doc }: { doc: Document }) {
       {/* Filename + status */}
       <div className="flex items-start gap-2">
         <div className="flex-1 min-w-0">
-          <p className="text-sm font-medium text-white truncate group-hover:text-white/90" title={doc.original_filename}>
+          <p
+            className="text-sm font-medium text-white truncate group-hover:text-white/90"
+            title={doc.original_filename}
+          >
             {doc.original_filename}
           </p>
         </div>
-        <span className={`w-2 h-2 rounded-full flex-shrink-0 mt-1.5 ${statusDot}`} title={doc.status} />
+        <span
+          className={`w-2 h-2 rounded-full flex-shrink-0 mt-1.5 ${statusDot}`}
+          title={doc.status}
+        />
       </div>
 
       {/* Badges */}
@@ -188,7 +226,9 @@ function DocumentCard({ doc }: { doc: Document }) {
           </span>
         )}
         {doc.document_type && (
-          <span className={`px-2.5 py-0.5 rounded-full text-xs border font-medium ${DOC_TYPE_COLORS[doc.document_type] ?? 'bg-white/5 border-white/15 text-zinc-300'}`}>
+          <span
+            className={`px-2.5 py-0.5 rounded-full text-xs border font-medium ${DOC_TYPE_COLORS[doc.document_type] ?? 'bg-white/5 border-white/15 text-zinc-300'}`}
+          >
             {doc.document_type}
           </span>
         )}
@@ -196,7 +236,10 @@ function DocumentCard({ doc }: { doc: Document }) {
 
       {/* Stats */}
       <p className="text-xs text-[#a1a1aa]">
-        {doc.insights?.length ?? 0} insight{(doc.insights?.length ?? 0) !== 1 ? 's' : ''} · {doc.entities?.length ?? 0} entit{(doc.entities?.length ?? 0) !== 1 ? 'ies' : 'y'}
+        {doc.insights?.length ?? 0} insight
+        {(doc.insights?.length ?? 0) !== 1 ? 's' : ''} ·{' '}
+        {doc.entities?.length ?? 0} entit
+        {(doc.entities?.length ?? 0) !== 1 ? 'ies' : 'y'}
       </p>
 
       {/* Date */}
diff --git a/frontend/src/pages/GraphPage.tsx b/frontend/src/pages/GraphPage.tsx
index 6719f74..6da06e5 100644
--- a/frontend/src/pages/GraphPage.tsx
+++ b/frontend/src/pages/GraphPage.tsx
@@ -1,8 +1,16 @@
 import { useRef, useEffect, useState, useCallback, useMemo } from 'react'
 import { useQuery } from '@tanstack/react-query'
+import { useSearchParams } from 'react-router-dom'
 import ForceGraph2D from 'react-force-graph-2d'
 import Navbar from '../components/Navbar'
-import { getGraphData, listDocuments, type GraphNode, type GraphLink } from '../services/api'
+import {
+  getGraphData,
+  listDocuments,
+  type GraphData,
+  type GraphNode,
+  type GraphLink,
+} from '../services/api'
+import NodeDetailPanel from '../components/NodeDetailPanel'
 
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 type NodeObj = GraphNode & { x?: number; y?: number; [k: string]: any }
@@ -11,10 +19,20 @@ type LinkObj = GraphLink & { [k: string]: any }
 
 export default function GraphPage() {
   const wrapperRef = useRef<HTMLDivElement>(null)
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  const fgRef = useRef<any>(null)
+  const hasZoomed = useRef(false)
+  const appliedUrlParams = useRef(false)
+  const [searchParams] = useSearchParams()
   const [width, setWidth] = useState(800)
-  const [selectedDataset, setSelectedDataset] = useState('')
+  const [selectedDataset, setSelectedDataset] = useState(
+    searchParams.get('dataset') || ''
+  )
   const [hoveredNode, setHoveredNode] = useState<string | null>(null)
   const [hoveredLink, setHoveredLink] = useState<string | null>(null)
+  const [selectedNode, setSelectedNode] = useState<GraphNode | null>(null)
+  const [nodeSearch, setNodeSearch] = useState('')
+  const [nodeSearchFocused, setNodeSearchFocused] = useState(false)
 
   const { data: docs = [] } = useQuery({
     queryKey: ['documents'],
@@ -23,20 +41,26 @@ export default function GraphPage() {
   })
 
   const datasets = useMemo(() => {
-    const set = new Set(docs.map((d) => d.dataset_name).filter(Boolean))
+    const set = new Set(docs.map(d => d.dataset_name).filter(Boolean))
     return Array.from(set).sort()
   }, [docs])
 
-  const { data: graphData, isLoading } = useQuery({
+  const { data: rawGraphData, isLoading } = useQuery({
     queryKey: ['graph', selectedDataset],
     queryFn: () => getGraphData(selectedDataset || undefined),
-    staleTime: 5000,
+    staleTime: 30_000,
   })
 
+  const graphData = useMemo<GraphData | undefined>(() => {
+    if (!rawGraphData) return undefined
+    hasZoomed.current = false
+    return { nodes: [...rawGraphData.nodes], links: [...rawGraphData.links] }
+  }, [rawGraphData])
+
   useEffect(() => {
     const el = wrapperRef.current
     if (!el) return
-    const ro = new ResizeObserver((entries) => {
+    const ro = new ResizeObserver(entries => {
       const rect = entries[0]?.contentRect
       if (rect) setWidth(rect.width)
     })
@@ -45,17 +69,217 @@ export default function GraphPage() {
     return () => ro.disconnect()
   }, [])
 
-  const graphHeight = typeof window !== 'undefined' ? Math.max(window.innerHeight - 260, 400) : 600
+  const graphHeight =
+    typeof window !== 'undefined'
+      ? Math.max(window.innerHeight - 260, 400)
+      : 600
 
   const handleNodeHover = useCallback((node: NodeObj | null) => {
     setHoveredNode(node ? (node.name ?? node.id ?? null) : null)
   }, [])
 
   const handleLinkHover = useCallback((link: LinkObj | null) => {
-    setHoveredLink(link ? (link.label as string | undefined) ?? null : null)
+    setHoveredLink(link ? ((link.label as string | undefined) ?? null) : null)
   }, [])
 
-  const hasData = graphData && (graphData.nodes.length > 0 || graphData.links.length > 0)
+  const handleNodeClick = useCallback((node: NodeObj) => {
+    setSelectedNode({
+      id: String(node.id),
+      name: node.name,
+      val: node.val ?? 1,
+    })
+    setNodeSearch('')
+    setNodeSearchFocused(false)
+  }, [])
+
+  // Neighbor IDs for highlight when a node is selected
+  const neighborIds = useMemo(() => {
+    if (!selectedNode || !graphData) return new Set<string>()
+    const ids = new Set<string>()
+    for (const link of graphData.links) {
+      const src =
+        typeof link.source === 'object'
+          ? (link.source as GraphNode).id
+          : link.source
+      const tgt =
+        typeof link.target === 'object'
+          ? (link.target as GraphNode).id
+          : link.target
+      if (src === selectedNode.id) ids.add(tgt)
+      else if (tgt === selectedNode.id) ids.add(src)
+    }
+    return ids
+  }, [selectedNode, graphData])
+
+  // Dynamic link color based on selection
+  const linkColorFn = useCallback(
+    (link: LinkObj) => {
+      if (!selectedNode) return 'rgba(255,255,255,0.15)'
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const src =
+        typeof link.source === 'object' ? (link.source as any).id : link.source
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const tgt =
+        typeof link.target === 'object' ? (link.target as any).id : link.target
+      if (src === selectedNode.id || tgt === selectedNode.id)
+        return 'rgba(167,139,250,0.5)'
+      return 'rgba(255,255,255,0.04)'
+    },
+    [selectedNode]
+  )
+
+  // Node search results (client-side filter)
+  const nodeSearchResults = useMemo(() => {
+    if (!nodeSearch.trim() || !graphData) return []
+    const q = nodeSearch.toLowerCase()
+    return graphData.nodes
+      .filter(
+        n => !/^[0-9a-f]{8}-/i.test(n.name) && n.name.toLowerCase().includes(q)
+      )
+      .slice(0, 8)
+  }, [nodeSearch, graphData])
+
+  // Zoom to a specific node
+  const zoomToNode = useCallback(
+    (node: GraphNode) => {
+      if (!fgRef.current || !graphData) return
+      // Find the live node object with x/y coordinates
+      const liveNode = (graphData.nodes as NodeObj[]).find(
+        n => n.id === node.id
+      )
+      if (liveNode?.x != null && liveNode?.y != null) {
+        fgRef.current.centerAt(liveNode.x, liveNode.y, 600)
+        fgRef.current.zoom(2.5, 600)
+      }
+    },
+    [graphData]
+  )
+
+  // Compute degree per node for sizing
+  const degreeMap = useMemo(() => {
+    const map = new Map<string, number>()
+    if (!graphData) return map
+    for (const link of graphData.links) {
+      map.set(link.source as string, (map.get(link.source as string) || 0) + 1)
+      map.set(link.target as string, (map.get(link.target as string) || 0) + 1)
+    }
+    return map
+  }, [graphData])
+
+  const nodeCanvasObject = useCallback(
+    (node: NodeObj, ctx: CanvasRenderingContext2D, globalScale: number) => {
+      const rawLabel = node.name || String(node.id || '')
+      const isUUID = /^[0-9a-f]{8}-[0-9a-f]{4}-/i.test(rawLabel)
+      const label = isUUID ? '' : rawLabel
+      const degree = degreeMap.get(String(node.id)) || 1
+      const radius = Math.max(3, Math.sqrt(degree) * 3)
+      const x = node.x ?? 0
+      const y = node.y ?? 0
+      const nodeId = String(node.id)
+      const isHovered = hoveredNode === (node.name ?? node.id ?? null)
+      const isSelected = selectedNode?.id === nodeId
+      const isNeighbor = neighborIds.has(nodeId)
+      const hasFocus = !!selectedNode // is any node selected?
+      const isDimmed = hasFocus && !isSelected && !isNeighbor
+
+      // Node circle
+      ctx.beginPath()
+      ctx.arc(x, y, radius, 0, 2 * Math.PI)
+      if (isSelected) {
+        ctx.fillStyle = '#a78bfa'
+      } else if (isDimmed) {
+        ctx.fillStyle = 'rgba(124,58,237,0.2)'
+      } else if (isHovered) {
+        ctx.fillStyle = '#a78bfa'
+      } else {
+        ctx.fillStyle = '#7c3aed'
+      }
+      ctx.fill()
+
+      // Glow ring on selected or hovered
+      if (isSelected) {
+        ctx.strokeStyle = '#c4b5fd'
+        ctx.lineWidth = 2
+        ctx.stroke()
+        ctx.beginPath()
+        ctx.arc(x, y, radius + 3, 0, 2 * Math.PI)
+        ctx.strokeStyle = 'rgba(196,181,253,0.25)'
+        ctx.lineWidth = 1
+        ctx.stroke()
+      } else if (isHovered && !isDimmed) {
+        ctx.strokeStyle = '#c4b5fd'
+        ctx.lineWidth = 1.5
+        ctx.stroke()
+      }
+
+      // Label logic
+      const showLabel =
+        isSelected ||
+        isNeighbor ||
+        isHovered ||
+        (!isDimmed && (globalScale > 1.5 || degree >= 4))
+      if (label && showLabel) {
+        const fontSize = Math.max(10, 12 / globalScale)
+        ctx.font = `${fontSize}px sans-serif`
+        ctx.textAlign = 'center'
+        ctx.textBaseline = 'top'
+        if (isSelected) ctx.fillStyle = '#e9d5ff'
+        else if (isDimmed) ctx.fillStyle = 'rgba(255,255,255,0.15)'
+        else if (isHovered) ctx.fillStyle = '#e9d5ff'
+        else ctx.fillStyle = 'rgba(255,255,255,0.7)'
+        ctx.fillText(label, x, y + radius + 2)
+      }
+    },
+    [degreeMap, hoveredNode, selectedNode, neighborIds]
+  )
+
+  const nodePointerAreaPaint = useCallback(
+    (node: NodeObj, color: string, ctx: CanvasRenderingContext2D) => {
+      const degree = degreeMap.get(String(node.id)) || 1
+      const radius = Math.max(3, Math.sqrt(degree) * 3) + 2
+      ctx.beginPath()
+      ctx.arc(node.x ?? 0, node.y ?? 0, radius, 0, 2 * Math.PI)
+      ctx.fillStyle = color
+      ctx.fill()
+    },
+    [degreeMap]
+  )
+
+  // Apply URL params once graph data loads
+  useEffect(() => {
+    if (!graphData || appliedUrlParams.current) return
+    const nodeParam = searchParams.get('node')
+    if (nodeParam) {
+      const match = graphData.nodes.find(
+        n => n.name.toLowerCase() === nodeParam.toLowerCase()
+      )
+      if (match) {
+        setSelectedNode(match)
+        // Zoom to node after a short delay for simulation to settle
+        setTimeout(() => zoomToNode(match), 800)
+        appliedUrlParams.current = true
+      }
+    }
+  }, [graphData, searchParams, zoomToNode])
+
+  // Configure force simulation for better spread
+  useEffect(() => {
+    if (!fgRef.current) return
+    fgRef.current.d3Force('charge')?.strength(-150)
+    fgRef.current.d3Force('link')?.distance(60)
+    fgRef.current.d3Force('center')?.strength(0.05)
+  })
+
+  // Zoom to fit only on first load
+  const handleEngineStop = useCallback(() => {
+    if (fgRef.current && !hasZoomed.current) {
+      hasZoomed.current = true
+      fgRef.current.zoomToFit(400, 60)
+    }
+  }, [])
+
+  const hasData =
+    graphData && (graphData.nodes.length > 0 || graphData.links.length > 0)
 
   return (
     <div className="relative min-h-screen bg-black">
@@ -70,73 +294,229 @@ export default function GraphPage() {
       />
 
       <main className="relative z-10 px-4 pt-20 pb-8 max-w-7xl mx-auto">
-        <div className="pt-10 mb-6">
+        <div className="pt-10 mb-5">
           <div className="flex flex-col sm:flex-row sm:items-end gap-4 justify-between">
             <div>
-              <h1 className="text-4xl font-bold text-white mb-2">Knowledge Graph</h1>
-              <p className="text-[#a1a1aa] text-sm">
-                {graphData
-                  ? `${graphData.nodes.length} nodes · ${graphData.links.length} relationships`
-                  : 'Explore entity relationships across your documents'}
-              </p>
+              <h1 className="text-4xl font-bold text-white mb-1 tracking-tight">
+                Knowledge Graph
+              </h1>
+              <div className="flex items-center gap-3 mt-2">
+                {graphData ? (
+                  <>
+                    <span className="inline-flex items-center gap-1.5 text-xs font-medium tracking-wide uppercase text-white/40">
+                      <span className="inline-block w-1.5 h-1.5 rounded-full bg-violet-500" />
+                      {graphData.nodes.length} nodes
+                    </span>
+                    <span className="text-white/15">|</span>
+                    <span className="inline-flex items-center gap-1.5 text-xs font-medium tracking-wide uppercase text-white/40">
+                      <span className="inline-block w-3 h-px bg-violet-500/60" />
+                      {graphData.links.length} relationships
+                    </span>
+                  </>
+                ) : (
+                  <span className="text-xs text-white/30 tracking-wide">
+                    Explore entity relationships across your documents
+                  </span>
+                )}
+              </div>
             </div>
 
             <select
               value={selectedDataset}
-              onChange={(e) => setSelectedDataset(e.target.value)}
+              onChange={e => setSelectedDataset(e.target.value)}
               className="input-dark sm:w-52 bg-black cursor-pointer"
             >
               <option value="">All datasets</option>
-              {datasets.map((ds) => (
-                <option key={ds} value={ds}>{ds}</option>
+              {datasets.map(ds => (
+                <option key={ds} value={ds}>
+                  {ds}
+                </option>
               ))}
             </select>
           </div>
         </div>
 
-        {/* Controls hint */}
-        <div className="flex flex-wrap items-center gap-2 mb-4">
-          {['Scroll to zoom', 'Drag to pan', 'Click node to highlight connections'].map((hint) => (
-            <span key={hint} className="border border-white/15 bg-white/5 rounded-full px-3 py-1 text-sm text-zinc-300">
-              {hint}
-            </span>
-          ))}
-        </div>
-
-        {/* Hover label */}
-        {(hoveredNode || hoveredLink) && (
-          <div className="mb-3 inline-flex items-center gap-2 px-3 py-1.5 rounded-lg border border-violet-500/25 bg-violet-500/10 text-sm text-violet-300">
-            {hoveredNode ? (
-              <>
-                <svg width="12" height="12" viewBox="0 0 12 12" fill="none">
-                  <circle cx="6" cy="6" r="4" fill="#7c3aed" />
-                </svg>
-                {hoveredNode}
-              </>
-            ) : (
-              <>
-                <svg width="12" height="8" viewBox="0 0 12 8" fill="none" stroke="#8b5cf6" strokeWidth="1.5" strokeLinecap="round">
-                  <line x1="0" y1="4" x2="10" y2="4" />
-                  <polyline points="7,1 10,4 7,7" />
-                </svg>
-                {hoveredLink}
-              </>
-            )}
-          </div>
-        )}
-
         {/* Graph container */}
         <div
           ref={wrapperRef}
-          className="relative w-full bg-white/[0.02] border border-white/10 rounded-2xl overflow-hidden"
-          style={{ height: graphHeight }}
+          className="relative w-full rounded-2xl overflow-hidden"
+          style={{
+            height: graphHeight,
+            boxShadow:
+              '0 0 80px -20px rgba(124,58,237,0.15), inset 0 0 0 1px rgba(255,255,255,0.06)',
+          }}
         >
+          {/* Controls — overlaid top-left */}
+          <div className="absolute top-3 left-3 z-20 flex items-center gap-1.5">
+            {[
+              { key: 'Scroll', icon: '\u21C5', label: 'Zoom' },
+              { key: 'Drag', icon: '\u2725', label: 'Pan' },
+              { key: 'Click', icon: '\u25CB', label: 'Select' },
+            ].map(hint => (
+              <span
+                key={hint.key}
+                className="inline-flex items-center gap-1 px-2 py-0.5 rounded text-[10px] font-medium tracking-wider uppercase text-white/30 bg-white/[0.04] border border-white/[0.06] backdrop-blur-sm"
+              >
+                <span className="text-white/50">{hint.icon}</span>
+                {hint.label}
+              </span>
+            ))}
+          </div>
+
+          {/* Node search — overlaid top-right */}
+          <div className="absolute top-3 right-3 z-20 w-56">
+            <div className="relative">
+              <svg
+                className="absolute left-2.5 top-1/2 -translate-y-1/2 w-3.5 h-3.5 text-white/25 pointer-events-none"
+                viewBox="0 0 16 16"
+                fill="none"
+                stroke="currentColor"
+                strokeWidth="1.5"
+                strokeLinecap="round"
+              >
+                <circle cx="7" cy="7" r="5" />
+                <line x1="11" y1="11" x2="14" y2="14" />
+              </svg>
+              <input
+                type="text"
+                value={nodeSearch}
+                onChange={e => setNodeSearch(e.target.value)}
+                onFocus={() => setNodeSearchFocused(true)}
+                onBlur={() =>
+                  setTimeout(() => setNodeSearchFocused(false), 150)
+                }
+                onKeyDown={e => {
+                  if (e.key === 'Escape') {
+                    setNodeSearch('')
+                    setNodeSearchFocused(false)
+                    ;(e.target as HTMLInputElement).blur()
+                  }
+                }}
+                placeholder="Find node..."
+                className="w-full pl-8 pr-3 py-1.5 rounded-lg text-xs text-white/80 placeholder-white/20 bg-white/[0.04] border border-white/[0.06] backdrop-blur-sm outline-none focus:border-white/15 focus:bg-white/[0.07] transition-all"
+              />
+            </div>
+            {nodeSearchFocused &&
+              nodeSearch &&
+              nodeSearchResults.length > 0 && (
+                <div className="mt-1 rounded-lg border border-white/[0.08] bg-black/90 backdrop-blur-md overflow-hidden">
+                  {nodeSearchResults.map(n => (
+                    <button
+                      key={n.id}
+                      onMouseDown={e => {
+                        e.preventDefault()
+                        setSelectedNode(n)
+                        zoomToNode(n)
+                        setNodeSearch('')
+                        setNodeSearchFocused(false)
+                      }}
+                      className="w-full flex items-center gap-2 px-3 py-2 text-left text-xs text-white/70 hover:bg-white/[0.06] hover:text-white transition-colors"
+                    >
+                      <span className="w-1.5 h-1.5 rounded-full bg-violet-500 shrink-0" />
+                      <span className="truncate">{n.name}</span>
+                      <span className="ml-auto text-[10px] text-white/20 shrink-0">
+                        {n.val - 1}
+                      </span>
+                    </button>
+                  ))}
+                </div>
+              )}
+            {nodeSearchFocused &&
+              nodeSearch &&
+              nodeSearchResults.length === 0 && (
+                <div className="mt-1 rounded-lg border border-white/[0.08] bg-black/90 backdrop-blur-md px-3 py-2">
+                  <span className="text-xs text-white/20 italic">
+                    No matching nodes
+                  </span>
+                </div>
+              )}
+          </div>
+
+          {/* Hover tooltip — overlaid bottom-left */}
+          {(hoveredNode || hoveredLink) && (
+            <div
+              className="absolute bottom-4 left-4 z-20 inline-flex items-center gap-2.5 px-3.5 py-2 rounded-lg text-sm backdrop-blur-md"
+              style={{
+                background:
+                  'linear-gradient(135deg, rgba(124,58,237,0.15), rgba(139,92,246,0.08))',
+                border: '1px solid rgba(139,92,246,0.2)',
+                boxShadow: '0 4px 24px -4px rgba(124,58,237,0.25)',
+              }}
+            >
+              {hoveredNode ? (
+                <>
+                  <span
+                    className="inline-block w-2.5 h-2.5 rounded-full"
+                    style={{
+                      background: '#7c3aed',
+                      boxShadow: '0 0 8px 2px rgba(124,58,237,0.5)',
+                    }}
+                  />
+                  <span className="text-white/90 font-medium">
+                    {hoveredNode}
+                  </span>
+                  <span className="text-[10px] uppercase tracking-widest text-violet-400/60 font-medium ml-1">
+                    node
+                  </span>
+                </>
+              ) : (
+                <>
+                  <svg
+                    width="14"
+                    height="6"
+                    viewBox="0 0 14 6"
+                    fill="none"
+                    className="opacity-70"
+                  >
+                    <line
+                      x1="0"
+                      y1="3"
+                      x2="11"
+                      y2="3"
+                      stroke="#8b5cf6"
+                      strokeWidth="1.5"
+                      strokeLinecap="round"
+                    />
+                    <polyline
+                      points="8.5,0.5 11,3 8.5,5.5"
+                      fill="none"
+                      stroke="#8b5cf6"
+                      strokeWidth="1.5"
+                      strokeLinecap="round"
+                      strokeLinejoin="round"
+                    />
+                  </svg>
+                  <span className="text-white/90 font-medium">
+                    {hoveredLink}
+                  </span>
+                  <span className="text-[10px] uppercase tracking-widest text-violet-400/60 font-medium ml-1">
+                    edge
+                  </span>
+                </>
+              )}
+            </div>
+          )}
           {isLoading && (
             <div className="absolute inset-0 flex items-center justify-center z-10">
               <div className="flex flex-col items-center gap-3">
-                <svg className="w-8 h-8 animate-spin text-violet-500" viewBox="0 0 24 24" fill="none">
-                  <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="3" />
-                  <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v4a4 4 0 00-4 4H4z" />
+                <svg
+                  className="w-8 h-8 animate-spin text-violet-500"
+                  viewBox="0 0 24 24"
+                  fill="none"
+                >
+                  <circle
+                    className="opacity-25"
+                    cx="12"
+                    cy="12"
+                    r="10"
+                    stroke="currentColor"
+                    strokeWidth="3"
+                  />
+                  <path
+                    className="opacity-75"
+                    fill="currentColor"
+                    d="M4 12a8 8 0 018-8v4a4 4 0 00-4 4H4z"
+                  />
                 </svg>
                 <p className="text-sm text-[#a1a1aa]">Loading graph…</p>
               </div>
@@ -147,26 +527,117 @@ export default function GraphPage() {
             <div className="absolute inset-0 flex flex-col items-center justify-center gap-4">
               <div className="opacity-15 absolute pointer-events-none">
                 <svg width="320" height="320" viewBox="0 0 320 320" fill="none">
-                  <circle cx="160" cy="160" r="150" stroke="#7c3aed" strokeWidth="1.5" strokeDasharray="4 8" />
-                  <circle cx="160" cy="160" r="110" stroke="#8b5cf6" strokeWidth="1" strokeDasharray="3 6" />
-                  <circle cx="160" cy="160" r="70" stroke="#a78bfa" strokeWidth="0.75" strokeDasharray="2 5" />
+                  <circle
+                    cx="160"
+                    cy="160"
+                    r="150"
+                    stroke="#7c3aed"
+                    strokeWidth="1.5"
+                    strokeDasharray="4 8"
+                  />
+                  <circle
+                    cx="160"
+                    cy="160"
+                    r="110"
+                    stroke="#8b5cf6"
+                    strokeWidth="1"
+                    strokeDasharray="3 6"
+                  />
+                  <circle
+                    cx="160"
+                    cy="160"
+                    r="70"
+                    stroke="#a78bfa"
+                    strokeWidth="0.75"
+                    strokeDasharray="2 5"
+                  />
                 </svg>
               </div>
               <div className="relative flex flex-col items-center gap-3 text-center z-10">
                 <div className="w-14 h-14 rounded-2xl bg-white/5 border border-white/10 flex items-center justify-center">
-                  <svg width="26" height="26" viewBox="0 0 26 26" fill="none" className="text-white/20">
-                    <circle cx="13" cy="13" r="3.5" stroke="currentColor" strokeWidth="1.5" />
-                    <circle cx="4.5" cy="5.5" r="2.5" stroke="currentColor" strokeWidth="1.5" />
-                    <circle cx="21.5" cy="5.5" r="2.5" stroke="currentColor" strokeWidth="1.5" />
-                    <circle cx="4.5" cy="20.5" r="2.5" stroke="currentColor" strokeWidth="1.5" />
-                    <circle cx="21.5" cy="20.5" r="2.5" stroke="currentColor" strokeWidth="1.5" />
-                    <line x1="13" y1="9.5" x2="4.5" y2="5.5" stroke="currentColor" strokeWidth="1" opacity="0.5" />
-                    <line x1="13" y1="9.5" x2="21.5" y2="5.5" stroke="currentColor" strokeWidth="1" opacity="0.5" />
-                    <line x1="13" y1="16.5" x2="4.5" y2="20.5" stroke="currentColor" strokeWidth="1" opacity="0.5" />
-                    <line x1="13" y1="16.5" x2="21.5" y2="20.5" stroke="currentColor" strokeWidth="1" opacity="0.5" />
+                  <svg
+                    width="26"
+                    height="26"
+                    viewBox="0 0 26 26"
+                    fill="none"
+                    className="text-white/20"
+                  >
+                    <circle
+                      cx="13"
+                      cy="13"
+                      r="3.5"
+                      stroke="currentColor"
+                      strokeWidth="1.5"
+                    />
+                    <circle
+                      cx="4.5"
+                      cy="5.5"
+                      r="2.5"
+                      stroke="currentColor"
+                      strokeWidth="1.5"
+                    />
+                    <circle
+                      cx="21.5"
+                      cy="5.5"
+                      r="2.5"
+                      stroke="currentColor"
+                      strokeWidth="1.5"
+                    />
+                    <circle
+                      cx="4.5"
+                      cy="20.5"
+                      r="2.5"
+                      stroke="currentColor"
+                      strokeWidth="1.5"
+                    />
+                    <circle
+                      cx="21.5"
+                      cy="20.5"
+                      r="2.5"
+                      stroke="currentColor"
+                      strokeWidth="1.5"
+                    />
+                    <line
+                      x1="13"
+                      y1="9.5"
+                      x2="4.5"
+                      y2="5.5"
+                      stroke="currentColor"
+                      strokeWidth="1"
+                      opacity="0.5"
+                    />
+                    <line
+                      x1="13"
+                      y1="9.5"
+                      x2="21.5"
+                      y2="5.5"
+                      stroke="currentColor"
+                      strokeWidth="1"
+                      opacity="0.5"
+                    />
+                    <line
+                      x1="13"
+                      y1="16.5"
+                      x2="4.5"
+                      y2="20.5"
+                      stroke="currentColor"
+                      strokeWidth="1"
+                      opacity="0.5"
+                    />
+                    <line
+                      x1="13"
+                      y1="16.5"
+                      x2="21.5"
+                      y2="20.5"
+                      stroke="currentColor"
+                      strokeWidth="1"
+                      opacity="0.5"
+                    />
                   </svg>
                 </div>
-                <p className="text-white/50 font-medium">No graph data available</p>
+                <p className="text-white/50 font-medium">
+                  No graph data available
+                </p>
                 <p className="text-[#a1a1aa] text-sm max-w-xs">
                   Upload and process documents to build your knowledge graph.
                 </p>
@@ -176,19 +647,39 @@ export default function GraphPage() {
 
           {!isLoading && hasData && width > 0 && (
             <ForceGraph2D
-              graphData={graphData as Parameters<typeof ForceGraph2D>[0]['graphData']}
+              ref={fgRef}
+              // eslint-disable-next-line @typescript-eslint/no-explicit-any
+              graphData={graphData as any}
               width={width}
               height={graphHeight}
               backgroundColor="#000000"
-              nodeColor={() => '#7c3aed'}
-              nodeRelSize={6}
-              linkColor={() => 'rgba(255,255,255,0.2)'}
-              linkDirectionalArrowLength={4}
+              nodeCanvasObject={nodeCanvasObject}
+              nodePointerAreaPaint={nodePointerAreaPaint}
+              linkColor={linkColorFn}
+              linkWidth={1}
+              linkDirectionalArrowLength={3}
               linkDirectionalArrowRelPos={1}
-              nodeLabel="name"
+              linkDirectionalArrowColor={linkColorFn}
               linkLabel="label"
+              onNodeClick={handleNodeClick}
               onNodeHover={handleNodeHover}
               onLinkHover={handleLinkHover}
+              onEngineStop={handleEngineStop}
+              cooldownTicks={200}
+              d3AlphaDecay={0.05}
+              d3VelocityDecay={0.3}
+              warmupTicks={100}
+            />
+          )}
+
+          {/* Node detail panel */}
+          {selectedNode && graphData && (
+            <NodeDetailPanel
+              node={selectedNode}
+              links={graphData.links}
+              nodes={graphData.nodes}
+              onClose={() => setSelectedNode(null)}
+              onSelectNode={n => setSelectedNode(n)}
             />
           )}
         </div>
diff --git a/frontend/src/pages/SearchPage.tsx b/frontend/src/pages/SearchPage.tsx
index c912cbe..d9449d9 100644
--- a/frontend/src/pages/SearchPage.tsx
+++ b/frontend/src/pages/SearchPage.tsx
@@ -1,7 +1,12 @@
 import { useState, useCallback, useRef } from 'react'
 import { useQuery } from '@tanstack/react-query'
+import { Link } from 'react-router-dom'
 import Navbar from '../components/Navbar'
-import { searchDocuments, type SearchResult, type DocumentSource } from '../services/api'
+import {
+  searchDocuments,
+  type SearchResult,
+  type DocumentSource,
+} from '../services/api'
 
 const DOC_TYPE_COLORS: Record<string, string> = {
   RFQ: 'bg-blue-500/15 border-blue-500/25 text-blue-300',
@@ -44,7 +49,7 @@ export default function SearchPage() {
     (e: React.KeyboardEvent<HTMLInputElement>) => {
       if (e.key === 'Enter') handleSubmit()
     },
-    [handleSubmit],
+    [handleSubmit]
   )
 
   const handleExampleClick = useCallback((q: string) => {
@@ -61,21 +66,45 @@ export default function SearchPage() {
       <div
         className="pointer-events-none fixed inset-0 z-0"
         style={{
-          background: 'radial-gradient(ellipse at 50% 40%, rgba(124,58,237,0.25) 0%, transparent 65%)',
+          background:
+            'radial-gradient(ellipse at 50% 40%, rgba(124,58,237,0.25) 0%, transparent 65%)',
         }}
       />
 
       <div className="pointer-events-none fixed top-1/4 right-8 opacity-10 z-0 hidden lg:block">
         <svg width="300" height="300" viewBox="0 0 300 300" fill="none">
-          <circle cx="150" cy="150" r="140" stroke="#7c3aed" strokeWidth="1.5" strokeDasharray="4 8" />
-          <circle cx="150" cy="150" r="100" stroke="#8b5cf6" strokeWidth="1" strokeDasharray="3 6" />
-          <circle cx="150" cy="150" r="60" stroke="#a78bfa" strokeWidth="0.75" strokeDasharray="2 5" />
+          <circle
+            cx="150"
+            cy="150"
+            r="140"
+            stroke="#7c3aed"
+            strokeWidth="1.5"
+            strokeDasharray="4 8"
+          />
+          <circle
+            cx="150"
+            cy="150"
+            r="100"
+            stroke="#8b5cf6"
+            strokeWidth="1"
+            strokeDasharray="3 6"
+          />
+          <circle
+            cx="150"
+            cy="150"
+            r="60"
+            stroke="#a78bfa"
+            strokeWidth="0.75"
+            strokeDasharray="2 5"
+          />
         </svg>
       </div>
 
       <main className="relative z-10 flex flex-col items-center px-4 pt-20 pb-24">
         {/* Search bar */}
-        <div className={`w-full max-w-2xl flex flex-col items-center transition-all duration-500 ${hasSubmitted ? 'pt-10' : 'pt-24'}`}>
+        <div
+          className={`w-full max-w-2xl flex flex-col items-center transition-all duration-500 ${hasSubmitted ? 'pt-10' : 'pt-24'}`}
+        >
           {!hasSubmitted && (
             <div className="flex flex-col items-center mb-12 text-center">
               <div className="mb-5 inline-flex items-center gap-2 px-3.5 py-1.5 rounded-full border border-violet-500/30 bg-violet-600/10 text-violet-300 text-xs font-medium">
@@ -94,7 +123,16 @@ export default function SearchPage() {
           <div className="w-full">
             <div className="relative flex items-center bg-white/5 border border-white/10 rounded-xl focus-within:border-violet-500/40 focus-within:bg-white/[0.07] transition-all duration-200">
               <div className="pl-4 pr-2 text-white/30 flex-shrink-0">
-                <svg width="18" height="18" viewBox="0 0 18 18" fill="none" stroke="currentColor" strokeWidth="1.75" strokeLinecap="round" strokeLinejoin="round">
+                <svg
+                  width="18"
+                  height="18"
+                  viewBox="0 0 18 18"
+                  fill="none"
+                  stroke="currentColor"
+                  strokeWidth="1.75"
+                  strokeLinecap="round"
+                  strokeLinejoin="round"
+                >
                   <circle cx="8" cy="8" r="5.5" />
                   <line x1="12.5" y1="12.5" x2="16" y2="16" />
                 </svg>
@@ -103,7 +141,7 @@ export default function SearchPage() {
                 ref={inputRef}
                 type="text"
                 value={query}
-                onChange={(e) => setQuery(e.target.value)}
+                onChange={e => setQuery(e.target.value)}
                 onKeyDown={handleKeyDown}
                 placeholder="Ask a question about your documents…"
                 className="flex-1 bg-transparent text-white placeholder-white/25 text-base py-4 px-3 outline-none"
@@ -111,11 +149,22 @@ export default function SearchPage() {
               />
               {query.length > 0 && (
                 <button
-                  onClick={() => { setQuery(''); inputRef.current?.focus() }}
+                  onClick={() => {
+                    setQuery('')
+                    inputRef.current?.focus()
+                  }}
                   className="px-3 text-white/30 hover:text-white/60 transition-colors"
                   aria-label="Clear"
                 >
-                  <svg width="14" height="14" viewBox="0 0 14 14" fill="none" stroke="currentColor" strokeWidth="1.75" strokeLinecap="round">
+                  <svg
+                    width="14"
+                    height="14"
+                    viewBox="0 0 14 14"
+                    fill="none"
+                    stroke="currentColor"
+                    strokeWidth="1.75"
+                    strokeLinecap="round"
+                  >
                     <line x1="3" y1="3" x2="11" y2="11" />
                     <line x1="11" y1="3" x2="3" y2="11" />
                   </svg>
@@ -142,18 +191,33 @@ export default function SearchPage() {
             <div className="p-6 border border-red-500/20 bg-red-500/5 rounded-2xl">
               <div className="flex items-start gap-3">
                 <div className="w-8 h-8 rounded-lg bg-red-500/10 border border-red-500/20 flex items-center justify-center text-red-400 flex-shrink-0 mt-0.5">
-                  <svg width="14" height="14" viewBox="0 0 14 14" fill="none" stroke="currentColor" strokeWidth="1.75" strokeLinecap="round">
+                  <svg
+                    width="14"
+                    height="14"
+                    viewBox="0 0 14 14"
+                    fill="none"
+                    stroke="currentColor"
+                    strokeWidth="1.75"
+                    strokeLinecap="round"
+                  >
                     <circle cx="7" cy="7" r="5.5" />
                     <line x1="7" y1="4.5" x2="7" y2="7.5" />
                     <circle cx="7" cy="9.5" r="0.5" fill="currentColor" />
                   </svg>
                 </div>
                 <div>
-                  <p className="text-sm font-medium text-red-300 mb-1">Search failed</p>
+                  <p className="text-sm font-medium text-red-300 mb-1">
+                    Search failed
+                  </p>
                   <p className="text-xs text-[#a1a1aa]">
-                    {error instanceof Error ? error.message : 'Something went wrong.'}
+                    {error instanceof Error
+                      ? error.message
+                      : 'Something went wrong.'}
                   </p>
-                  <button onClick={() => refetch()} className="mt-3 text-xs text-violet-400 hover:text-violet-300 transition-colors">
+                  <button
+                    onClick={() => refetch()}
+                    className="mt-3 text-xs text-violet-400 hover:text-violet-300 transition-colors"
+                  >
                     Try again →
                   </button>
                 </div>
@@ -165,9 +229,13 @@ export default function SearchPage() {
             <div className="space-y-3">
               <div className="flex items-center justify-between mb-4">
                 <p className="text-sm text-[#a1a1aa]">
-                  <span className="text-white font-medium">{data.total ?? data.results?.length ?? 0}</span>{' '}
+                  <span className="text-white font-medium">
+                    {data.total ?? data.results?.length ?? 0}
+                  </span>{' '}
                   result{data.results?.length !== 1 ? 's' : ''} for{' '}
-                  <span className="text-white font-medium">"{submittedQuery}"</span>
+                  <span className="text-white font-medium">
+                    "{submittedQuery}"
+                  </span>
                 </p>
                 <span className="text-[10px] px-2.5 py-1 rounded-full border border-violet-500/20 bg-violet-500/10 text-violet-300">
                   Knowledge Graph
@@ -188,7 +256,7 @@ export default function SearchPage() {
             <div className="mt-16 flex flex-col items-center gap-6">
               <p className="text-sm text-white/30">Try one of these examples</p>
               <div className="grid grid-cols-1 sm:grid-cols-2 gap-2 w-full max-w-lg">
-                {EXAMPLE_QUERIES.map((q) => (
+                {EXAMPLE_QUERIES.map(q => (
                   <button
                     key={q}
                     onClick={() => handleExampleClick(q)}
@@ -209,7 +277,13 @@ export default function SearchPage() {
 
 // ── ResultCard ────────────────────────────────────────────────────────────────
 
-function ResultCard({ result, index }: { result: SearchResult; index: number }) {
+function ResultCard({
+  result,
+  index,
+}: {
+  result: SearchResult
+  index: number
+}) {
   const [isExpanded, setIsExpanded] = useState(false)
 
   const wordCount = result.text.trim().split(/\s+/).length
@@ -217,13 +291,14 @@ function ResultCard({ result, index }: { result: SearchResult; index: number })
 
   return (
     <div
-      onClick={() => setIsExpanded((v) => !v)}
+      onClick={() => setIsExpanded(v => !v)}
       className={`
         group relative border rounded-2xl cursor-pointer select-none
         transition-all duration-300 overflow-hidden
-        ${isExpanded
-          ? 'bg-white/[0.06] border-violet-500/30 shadow-[0_0_0_1px_rgba(124,58,237,0.15),0_8px_32px_rgba(0,0,0,0.4)]'
-          : 'bg-white/[0.03] border-white/10 hover:bg-white/[0.05] hover:border-white/20'
+        ${
+          isExpanded
+            ? 'bg-white/[0.06] border-violet-500/30 shadow-[0_0_0_1px_rgba(124,58,237,0.15),0_8px_32px_rgba(0,0,0,0.4)]'
+            : 'bg-white/[0.03] border-white/10 hover:bg-white/[0.05] hover:border-white/20'
         }
       `}
       style={{ animationDelay: `${index * 50}ms` }}
@@ -232,17 +307,21 @@ function ResultCard({ result, index }: { result: SearchResult; index: number })
       <div className="p-5">
         <div className="flex items-start gap-3">
           {/* Index badge */}
-          <span className={`
+          <span
+            className={`
             flex-shrink-0 w-6 h-6 rounded-md flex items-center justify-center text-[11px] font-semibold mt-0.5
             transition-colors duration-200
             ${isExpanded ? 'bg-violet-500/20 text-violet-300' : 'bg-white/5 text-white/30'}
-          `}>
+          `}
+          >
             {index + 1}
           </span>
 
           {/* Text — clamped when collapsed, full when expanded */}
           <div className="flex-1 min-w-0">
-            <p className={`text-sm text-white/80 leading-relaxed break-words transition-all duration-300 ${!isExpanded && isLong ? 'line-clamp-2' : ''}`}>
+            <p
+              className={`text-sm text-white/80 leading-relaxed break-words transition-all duration-300 ${!isExpanded && isLong ? 'line-clamp-2' : ''}`}
+            >
               {result.text}
             </p>
             {!isExpanded && isLong && (
@@ -255,43 +334,70 @@ function ResultCard({ result, index }: { result: SearchResult; index: number })
           {/* Chevron */}
           <svg
             className={`flex-shrink-0 w-4 h-4 text-white/20 transition-transform duration-300 mt-0.5 ${isExpanded ? 'rotate-180 text-violet-400' : 'group-hover:text-white/40'}`}
-            viewBox="0 0 16 16" fill="none" stroke="currentColor" strokeWidth="1.75" strokeLinecap="round" strokeLinejoin="round"
+            viewBox="0 0 16 16"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="1.75"
+            strokeLinecap="round"
+            strokeLinejoin="round"
           >
             <polyline points="4,6 8,10 12,6" />
           </svg>
         </div>
 
-        {/* Collapsed footer — dataset pill */}
-        {!isExpanded && result.dataset_name && (
-          <div className="mt-3 ml-9 flex items-center gap-2">
-            <span className="inline-flex items-center gap-1.5 px-2.5 py-0.5 rounded-full text-[11px] border border-violet-500/20 bg-violet-500/10 text-violet-300">
-              <svg width="9" height="9" viewBox="0 0 9 9" fill="none" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round">
-                <path d="M1 2.5h7M1 4.5h5M1 6.5h3" />
-              </svg>
-              {result.dataset_name.replace(/_/g, ' ')}
-            </span>
-          </div>
-        )}
+        {/* Collapsed footer — document pill */}
+        {!isExpanded &&
+          (result.sources?.[0]?.original_filename || result.dataset_name) && (
+            <div className="mt-3 ml-9 flex items-center gap-2">
+              <span className="inline-flex items-center gap-1.5 px-2.5 py-0.5 rounded-full text-[11px] border border-violet-500/20 bg-violet-500/10 text-violet-300">
+                <svg
+                  width="9"
+                  height="9"
+                  viewBox="0 0 9 9"
+                  fill="none"
+                  stroke="currentColor"
+                  strokeWidth="1.5"
+                  strokeLinecap="round"
+                >
+                  <path d="M1 2.5h7M1 4.5h5M1 6.5h3" />
+                </svg>
+                {result.sources?.[0]?.original_filename ??
+                  result.dataset_name!.replace(/_/g, ' ')}
+              </span>
+            </div>
+          )}
       </div>
 
       {/* Expanded panel */}
       {isExpanded && (
-        <div onClick={(e) => e.stopPropagation()}>
+        <div onClick={e => e.stopPropagation()}>
           {/* Divider */}
           <div className="mx-5 h-px bg-white/[0.06]" />
 
           <div className="p-5 space-y-4">
-            {/* Dataset + word count metadata row */}
+            {/* Document + word count metadata row */}
             <div className="flex items-center gap-2 flex-wrap">
-              {result.dataset_name && (
+              {(result.sources?.[0]?.original_filename ||
+                result.dataset_name) && (
                 <span className="inline-flex items-center gap-1.5 px-2.5 py-1 rounded-full text-xs border border-violet-500/20 bg-violet-500/10 text-violet-300">
-                  <svg width="10" height="10" viewBox="0 0 10 10" fill="none" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round">
+                  <svg
+                    width="10"
+                    height="10"
+                    viewBox="0 0 10 10"
+                    fill="none"
+                    stroke="currentColor"
+                    strokeWidth="1.5"
+                    strokeLinecap="round"
+                  >
                     <path d="M1 2.5h8M1 5h6M1 7.5h4" />
                   </svg>
-                  {result.dataset_name.replace(/_/g, ' ')}
+                  {result.sources?.[0]?.original_filename ??
+                    result.dataset_name!.replace(/_/g, ' ')}
                 </span>
               )}
-              <span className="text-[11px] text-white/25">{wordCount} words</span>
+              <span className="text-[11px] text-white/25">
+                {wordCount} words
+              </span>
             </div>
 
             {/* Source documents */}
@@ -301,7 +407,7 @@ function ResultCard({ result, index }: { result: SearchResult; index: number })
                   Source Documents
                 </p>
                 <div className="space-y-2">
-                  {result.sources.map((source) => (
+                  {result.sources.map(source => (
                     <SourceCard key={source.id} source={source} />
                   ))}
                 </div>
@@ -318,7 +424,10 @@ function ResultCard({ result, index }: { result: SearchResult; index: number })
 
 function SourceCard({ source }: { source: DocumentSource }) {
   const ext = source.original_filename.split('.').pop()?.toLowerCase()
-  const typeColor = source.document_type ? (DOC_TYPE_COLORS[source.document_type] ?? 'bg-white/5 border-white/15 text-zinc-300') : null
+  const typeColor = source.document_type
+    ? (DOC_TYPE_COLORS[source.document_type] ??
+      'bg-white/5 border-white/15 text-zinc-300')
+    : null
 
   const handleClick = (e: React.MouseEvent) => {
     e.stopPropagation()
@@ -332,8 +441,22 @@ function SourceCard({ source }: { source: DocumentSource }) {
     >
       {/* File icon */}
       <div className="w-8 h-8 rounded-lg bg-white/5 border border-white/10 flex items-center justify-center flex-shrink-0">
-        <svg width="14" height="14" viewBox="0 0 14 14" fill="none" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round"
-          className={ext === 'pdf' ? 'text-red-400' : ext === 'csv' ? 'text-green-400' : 'text-blue-400'}
+        <svg
+          width="14"
+          height="14"
+          viewBox="0 0 14 14"
+          fill="none"
+          stroke="currentColor"
+          strokeWidth="1.5"
+          strokeLinecap="round"
+          strokeLinejoin="round"
+          className={
+            ext === 'pdf'
+              ? 'text-red-400'
+              : ext === 'csv'
+                ? 'text-green-400'
+                : 'text-blue-400'
+          }
         >
           <path d="M9 1.5H4a1 1 0 00-1 1v9a1 1 0 001 1h6a1 1 0 001-1V4L9 1.5z" />
           <polyline points="9,1.5 9,4 11.5,4" />
@@ -355,12 +478,46 @@ function SourceCard({ source }: { source: DocumentSource }) {
       {/* Type badge */}
       <div className="flex items-center gap-2 flex-shrink-0">
         {typeColor && (
-          <span className={`px-2 py-0.5 rounded-full text-[10px] font-medium border ${typeColor}`}>
+          <span
+            className={`px-2 py-0.5 rounded-full text-[10px] font-medium border ${typeColor}`}
+          >
             {source.document_type}
           </span>
         )}
+        {/* View in Graph */}
+        {source.dataset_name && (
+          <Link
+            to={`/graph?dataset=${encodeURIComponent(source.dataset_name)}`}
+            onClick={e => e.stopPropagation()}
+            className="w-7 h-7 rounded-lg bg-white/[0.04] border border-white/[0.06] flex items-center justify-center text-white/20 hover:text-violet-400 hover:border-violet-500/25 hover:bg-violet-500/10 transition-all"
+            title="View in Graph"
+          >
+            <svg
+              width="12"
+              height="12"
+              viewBox="0 0 12 12"
+              fill="none"
+              stroke="currentColor"
+              strokeWidth="1.2"
+            >
+              <circle cx="6" cy="3" r="1.5" />
+              <circle cx="2.5" cy="9" r="1.5" />
+              <circle cx="9.5" cy="9" r="1.5" />
+              <line x1="5.2" y1="4.3" x2="3.3" y2="7.7" />
+              <line x1="6.8" y1="4.3" x2="8.7" y2="7.7" />
+            </svg>
+          </Link>
+        )}
         {/* Arrow */}
-        <svg width="12" height="12" viewBox="0 0 12 12" fill="none" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round"
+        <svg
+          width="12"
+          height="12"
+          viewBox="0 0 12 12"
+          fill="none"
+          stroke="currentColor"
+          strokeWidth="1.5"
+          strokeLinecap="round"
+          strokeLinejoin="round"
           className="text-white/20 group-hover/source:text-violet-400 transition-colors"
         >
           <line x1="2" y1="10" x2="10" y2="2" />
@@ -377,14 +534,24 @@ function EmptyResults({ query }: { query: string }) {
   return (
     <div className="p-12 flex flex-col items-center text-center gap-3 rounded-2xl border border-white/10 bg-white/[0.02]">
       <div className="w-12 h-12 rounded-full bg-white/5 border border-white/10 flex items-center justify-center mb-1">
-        <svg width="20" height="20" viewBox="0 0 20 20" fill="none" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" className="text-white/25">
+        <svg
+          width="20"
+          height="20"
+          viewBox="0 0 20 20"
+          fill="none"
+          stroke="currentColor"
+          strokeWidth="1.5"
+          strokeLinecap="round"
+          className="text-white/25"
+        >
           <circle cx="9" cy="9" r="6" />
           <line x1="14" y1="14" x2="18" y2="18" />
         </svg>
       </div>
       <p className="text-sm font-medium text-white/50">No results found</p>
       <p className="text-xs text-[#a1a1aa] max-w-xs">
-        No documents matched <span className="text-white/60">"{query}"</span>. Try rephrasing your query.
+        No documents matched <span className="text-white/60">"{query}"</span>.
+        Try rephrasing your query.
       </p>
     </div>
   )
@@ -393,8 +560,11 @@ function EmptyResults({ query }: { query: string }) {
 function SearchSkeletons() {
   return (
     <div className="space-y-3">
-      {[0, 1, 2].map((i) => (
-        <div key={i} className="bg-white/[0.03] border border-white/10 rounded-2xl p-5">
+      {[0, 1, 2].map(i => (
+        <div
+          key={i}
+          className="bg-white/[0.03] border border-white/10 rounded-2xl p-5"
+        >
           <div className="flex gap-3">
             <div className="skeleton w-6 h-6 rounded-md flex-shrink-0" />
             <div className="flex-1 space-y-2">
@@ -413,8 +583,19 @@ function SearchSkeletons() {
 function Spinner() {
   return (
     <svg className="w-4 h-4 animate-spin" viewBox="0 0 24 24" fill="none">
-      <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="3" />
-      <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v4a4 4 0 00-4 4H4z" />
+      <circle
+        className="opacity-25"
+        cx="12"
+        cy="12"
+        r="10"
+        stroke="currentColor"
+        strokeWidth="3"
+      />
+      <path
+        className="opacity-75"
+        fill="currentColor"
+        d="M4 12a8 8 0 018-8v4a4 4 0 00-4 4H4z"
+      />
     </svg>
   )
 }
diff --git a/frontend/src/pages/UploadPage.tsx b/frontend/src/pages/UploadPage.tsx
index 22b9421..1116419 100644
--- a/frontend/src/pages/UploadPage.tsx
+++ b/frontend/src/pages/UploadPage.tsx
@@ -2,7 +2,13 @@ import { useState, useCallback, useRef, useEffect } from 'react'
 import { useNavigate } from 'react-router-dom'
 import { useMutation, useQuery } from '@tanstack/react-query'
 import Navbar from '../components/Navbar'
-import { uploadDocuments, getDocument, type UploadedFile, type Document, type ProgressStage } from '../services/api'
+import {
+  uploadDocuments,
+  getDocument,
+  type UploadedFile,
+  type Document,
+  type ProgressStage,
+} from '../services/api'
 
 const MAX_FILES = 5
 const ACCEPTED_EXTENSIONS = '.pdf,.csv,.txt'
@@ -57,10 +63,10 @@ export default function UploadPage() {
 
   const mutation = useMutation({
     mutationFn: uploadDocuments,
-    onSuccess: (data) => {
+    onSuccess: data => {
       setUploadedFiles(data.uploaded)
       setProgresses(
-        data.uploaded.map((f) => ({ uploadedFile: f, doc: null, error: null }))
+        data.uploaded.map(f => ({ uploadedFile: f, doc: null, error: null }))
       )
     },
   })
@@ -69,18 +75,23 @@ export default function UploadPage() {
   const hasUploadStarted = uploadedFiles.length > 0
   const allDone =
     hasUploadStarted &&
-    progresses.every((p) => p.doc?.status === 'completed' || p.doc?.status === 'failed')
+    progresses.every(
+      p =>
+        p.uploadedFile.duplicate ||
+        p.doc?.status === 'completed' ||
+        p.doc?.status === 'failed'
+    )
 
   function addFiles(incoming: FileList | File[]) {
     const arr = Array.from(incoming)
-    setFiles((prev) => {
+    setFiles(prev => {
       const combined = [...prev, ...arr]
       return combined.slice(0, MAX_FILES)
     })
   }
 
   function removeFile(idx: number) {
-    setFiles((prev) => prev.filter((_, i) => i !== idx))
+    setFiles(prev => prev.filter((_, i) => i !== idx))
   }
 
   const handleDragOver = useCallback((e: React.DragEvent) => {
@@ -95,23 +106,23 @@ export default function UploadPage() {
     }
   }, [])
 
-  const handleDrop = useCallback(
-    (e: React.DragEvent) => {
-      e.preventDefault()
-      setIsDragging(false)
-      if (e.dataTransfer.files.length > 0) {
-        addFiles(e.dataTransfer.files)
+  const handleDrop = useCallback((e: React.DragEvent) => {
+    e.preventDefault()
+    setIsDragging(false)
+    if (e.dataTransfer.files.length > 0) {
+      addFiles(e.dataTransfer.files)
+    }
+  }, [])
+
+  const handleInputChange = useCallback(
+    (e: React.ChangeEvent<HTMLInputElement>) => {
+      if (e.target.files && e.target.files.length > 0) {
+        addFiles(e.target.files)
       }
     },
-    [],
+    []
   )
 
-  const handleInputChange = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
-    if (e.target.files && e.target.files.length > 0) {
-      addFiles(e.target.files)
-    }
-  }, [])
-
   function handleUpload() {
     if (files.length === 0) return
     mutation.mutate(files)
@@ -140,8 +151,22 @@ export default function UploadPage() {
       {/* Decorative dotted circle */}
       <div className="pointer-events-none fixed bottom-16 left-8 opacity-10 z-0 hidden lg:block">
         <svg width="240" height="240" viewBox="0 0 240 240" fill="none">
-          <circle cx="120" cy="120" r="110" stroke="#7c3aed" strokeWidth="1.5" strokeDasharray="4 8" />
-          <circle cx="120" cy="120" r="75" stroke="#8b5cf6" strokeWidth="1" strokeDasharray="3 6" />
+          <circle
+            cx="120"
+            cy="120"
+            r="110"
+            stroke="#7c3aed"
+            strokeWidth="1.5"
+            strokeDasharray="4 8"
+          />
+          <circle
+            cx="120"
+            cy="120"
+            r="75"
+            stroke="#8b5cf6"
+            strokeWidth="1"
+            strokeDasharray="3 6"
+          />
         </svg>
       </div>
 
@@ -153,7 +178,8 @@ export default function UploadPage() {
               Upload Documents
             </h1>
             <p className="text-sm text-[#a1a1aa] max-w-sm mx-auto leading-relaxed">
-              Upload up to {MAX_FILES} documents. Client and type are detected automatically.
+              Upload up to {MAX_FILES} documents. Client and type are detected
+              automatically.
             </p>
           </div>
 
@@ -168,9 +194,10 @@ export default function UploadPage() {
                 className={`
                   relative rounded-2xl border-2 border-dashed p-12 flex flex-col items-center justify-center gap-4
                   cursor-pointer transition-all duration-200
-                  ${isDragging
-                    ? 'border-violet-500/60 bg-violet-600/10'
-                    : 'border-white/15 bg-white/[0.02] hover:border-white/25 hover:bg-white/[0.04]'
+                  ${
+                    isDragging
+                      ? 'border-violet-500/60 bg-violet-600/10'
+                      : 'border-white/15 bg-white/[0.02] hover:border-white/25 hover:bg-white/[0.04]'
                   }
                 `}
               >
@@ -189,21 +216,37 @@ export default function UploadPage() {
                   className="hidden"
                 />
 
-                <div className={`w-14 h-14 rounded-xl flex items-center justify-center transition-all duration-200 ${isDragging ? 'bg-violet-600/30 border border-violet-500/50' : 'bg-white/5 border border-white/10'}`}>
-                  <svg width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" className={isDragging ? 'text-violet-400' : 'text-white/30'}>
+                <div
+                  className={`w-14 h-14 rounded-xl flex items-center justify-center transition-all duration-200 ${isDragging ? 'bg-violet-600/30 border border-violet-500/50' : 'bg-white/5 border border-white/10'}`}
+                >
+                  <svg
+                    width="24"
+                    height="24"
+                    viewBox="0 0 24 24"
+                    fill="none"
+                    stroke="currentColor"
+                    strokeWidth="1.5"
+                    strokeLinecap="round"
+                    strokeLinejoin="round"
+                    className={isDragging ? 'text-violet-400' : 'text-white/30'}
+                  >
                     <path d="M12 15V4M12 4l-4 4M12 4l4 4" />
                     <path d="M3 15v4a2 2 0 002 2h14a2 2 0 002-2v-4" />
                   </svg>
                 </div>
 
                 <div className="text-center">
-                  <p className={`text-sm font-medium mb-1 transition-colors ${isDragging ? 'text-violet-300' : 'text-white/60'}`}>
+                  <p
+                    className={`text-sm font-medium mb-1 transition-colors ${isDragging ? 'text-violet-300' : 'text-white/60'}`}
+                  >
                     {isDragging ? 'Drop files here' : 'Drag & drop files here'}
                   </p>
                   <p className="text-xs text-[#a1a1aa]">
                     or <span className="text-violet-400">click to browse</span>
                   </p>
-                  <p className="text-xs text-white/25 mt-2">PDF, CSV, TXT supported · up to {MAX_FILES} files</p>
+                  <p className="text-xs text-white/25 mt-2">
+                    PDF, CSV, TXT supported · up to {MAX_FILES} files
+                  </p>
                 </div>
               </div>
 
@@ -211,17 +254,35 @@ export default function UploadPage() {
               {files.length > 0 && (
                 <div className="space-y-2">
                   {files.map((file, idx) => (
-                    <div key={idx} className="flex items-center gap-3 bg-white/5 border border-white/10 rounded-xl px-4 py-3">
+                    <div
+                      key={idx}
+                      className="flex items-center gap-3 bg-white/5 border border-white/10 rounded-xl px-4 py-3"
+                    >
                       <FileTypeIcon filename={file.name} />
                       <div className="flex-1 min-w-0">
-                        <p className="text-sm text-white truncate">{file.name}</p>
-                        <p className="text-xs text-[#a1a1aa]">{formatBytes(file.size)}</p>
+                        <p className="text-sm text-white truncate">
+                          {file.name}
+                        </p>
+                        <p className="text-xs text-[#a1a1aa]">
+                          {formatBytes(file.size)}
+                        </p>
                       </div>
                       <button
-                        onClick={(e) => { e.stopPropagation(); removeFile(idx) }}
+                        onClick={e => {
+                          e.stopPropagation()
+                          removeFile(idx)
+                        }}
                         className="text-white/30 hover:text-white/70 transition-colors p-1"
                       >
-                        <svg width="14" height="14" viewBox="0 0 14 14" fill="none" stroke="currentColor" strokeWidth="1.75" strokeLinecap="round">
+                        <svg
+                          width="14"
+                          height="14"
+                          viewBox="0 0 14 14"
+                          fill="none"
+                          stroke="currentColor"
+                          strokeWidth="1.75"
+                          strokeLinecap="round"
+                        >
                           <line x1="3" y1="3" x2="11" y2="11" />
                           <line x1="11" y1="3" x2="3" y2="11" />
                         </svg>
@@ -234,15 +295,28 @@ export default function UploadPage() {
               {/* Upload error */}
               {mutation.isError && (
                 <div className="flex items-start gap-3 bg-red-500/5 border border-red-500/20 rounded-xl p-4">
-                  <svg width="16" height="16" viewBox="0 0 16 16" fill="none" stroke="currentColor" strokeWidth="1.75" strokeLinecap="round" className="text-red-400 flex-shrink-0 mt-0.5">
+                  <svg
+                    width="16"
+                    height="16"
+                    viewBox="0 0 16 16"
+                    fill="none"
+                    stroke="currentColor"
+                    strokeWidth="1.75"
+                    strokeLinecap="round"
+                    className="text-red-400 flex-shrink-0 mt-0.5"
+                  >
                     <circle cx="8" cy="8" r="6" />
                     <line x1="8" y1="5" x2="8" y2="8.5" />
                     <circle cx="8" cy="10.5" r="0.5" fill="currentColor" />
                   </svg>
                   <div>
-                    <p className="text-sm font-medium text-red-300">Upload failed</p>
+                    <p className="text-sm font-medium text-red-300">
+                      Upload failed
+                    </p>
                     <p className="text-xs text-[#a1a1aa] mt-0.5">
-                      {mutation.error instanceof Error ? mutation.error.message : 'Something went wrong.'}
+                      {mutation.error instanceof Error
+                        ? mutation.error.message
+                        : 'Something went wrong.'}
                     </p>
                   </div>
                 </div>
@@ -261,11 +335,23 @@ export default function UploadPage() {
                   </>
                 ) : (
                   <>
-                    <svg width="16" height="16" viewBox="0 0 16 16" fill="none" stroke="currentColor" strokeWidth="1.75" strokeLinecap="round" strokeLinejoin="round">
+                    <svg
+                      width="16"
+                      height="16"
+                      viewBox="0 0 16 16"
+                      fill="none"
+                      stroke="currentColor"
+                      strokeWidth="1.75"
+                      strokeLinecap="round"
+                      strokeLinejoin="round"
+                    >
                       <path d="M8 10V3M8 3L5 6M8 3l3 3" />
                       <path d="M2 11v1.5A1.5 1.5 0 003.5 14h9A1.5 1.5 0 0014 12.5V11" />
                     </svg>
-                    Upload {files.length > 0 ? `${files.length} file${files.length > 1 ? 's' : ''}` : 'Documents'}
+                    Upload{' '}
+                    {files.length > 0
+                      ? `${files.length} file${files.length > 1 ? 's' : ''}`
+                      : 'Documents'}
                   </>
                 )}
               </button>
@@ -273,12 +359,21 @@ export default function UploadPage() {
           ) : (
             /* Progress section */
             <div className="space-y-4">
-              <h2 className="text-sm font-medium text-[#a1a1aa] mb-4">Processing files…</h2>
+              <h2 className="text-sm font-medium text-[#a1a1aa] mb-4">
+                Processing files…
+              </h2>
 
               {progresses.map((p, idx) => (
-                <FileProgressCard key={p.uploadedFile.id} progress={p} index={idx} onUpdate={(doc) => {
-                  setProgresses((prev) => prev.map((x, i) => i === idx ? { ...x, doc } : x))
-                }} />
+                <FileProgressCard
+                  key={p.uploadedFile.id}
+                  progress={p}
+                  index={idx}
+                  onUpdate={doc => {
+                    setProgresses(prev =>
+                      prev.map((x, i) => (i === idx ? { ...x, doc } : x))
+                    )
+                  }}
+                />
               ))}
 
               {allDone && (
@@ -316,8 +411,11 @@ function FileProgressCard({
   onUpdate: (doc: Document) => void
 }) {
   const { uploadedFile, doc } = progress
-  const status = doc?.status ?? 'processing'
-  const stage = doc?.progress_stage ?? 'uploading'
+  const navigate = useNavigate()
+  const isDuplicate = uploadedFile.duplicate
+
+  const status = isDuplicate ? 'completed' : (doc?.status ?? 'processing')
+  const stage = isDuplicate ? 'completed' : (doc?.progress_stage ?? 'uploading')
   const percent = STAGE_PERCENT[stage] ?? 0
   const isDone = status === 'completed'
   const isFailed = status === 'failed'
@@ -325,8 +423,8 @@ function FileProgressCard({
   const { data } = useQuery({
     queryKey: ['document', uploadedFile.id],
     queryFn: () => getDocument(uploadedFile.id),
-    enabled: status !== 'completed' && status !== 'failed',
-    refetchInterval: (query) => {
+    enabled: !isDuplicate && status !== 'completed' && status !== 'failed',
+    refetchInterval: query => {
       const d = query.state.data
       if (!d) return 2000
       return d.status === 'processing' ? 2000 : false
@@ -339,24 +437,70 @@ function FileProgressCard({
   }, [data]) // eslint-disable-line react-hooks/exhaustive-deps
 
   return (
-    <div className={`bg-white/5 border rounded-2xl p-5 transition-all duration-300 ${
-      isDone ? 'border-green-500/25' : isFailed ? 'border-red-500/25' : 'border-white/10'
-    }`}>
+    <div
+      className={`bg-white/5 border rounded-2xl p-5 transition-all duration-300 ${
+        isDuplicate
+          ? 'border-amber-500/25'
+          : isDone
+            ? 'border-green-500/25'
+            : isFailed
+              ? 'border-red-500/25'
+              : 'border-white/10'
+      }`}
+    >
       <div className="flex items-start gap-3">
         {/* Status icon */}
-        <div className={`w-8 h-8 rounded-lg flex items-center justify-center flex-shrink-0 ${
-          isDone
-            ? 'bg-green-500/15 border border-green-500/25'
-            : isFailed
-            ? 'bg-red-500/15 border border-red-500/25'
-            : 'bg-white/5 border border-white/10'
-        }`}>
-          {isDone ? (
-            <svg width="14" height="14" viewBox="0 0 14 14" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className="text-green-400">
+        <div
+          className={`w-8 h-8 rounded-lg flex items-center justify-center flex-shrink-0 ${
+            isDuplicate
+              ? 'bg-amber-500/15 border border-amber-500/25'
+              : isDone
+                ? 'bg-green-500/15 border border-green-500/25'
+                : isFailed
+                  ? 'bg-red-500/15 border border-red-500/25'
+                  : 'bg-white/5 border border-white/10'
+          }`}
+        >
+          {isDuplicate ? (
+            <svg
+              width="14"
+              height="14"
+              viewBox="0 0 14 14"
+              fill="none"
+              stroke="currentColor"
+              strokeWidth="1.75"
+              strokeLinecap="round"
+              strokeLinejoin="round"
+              className="text-amber-400"
+            >
+              <rect x="1.5" y="3.5" width="8" height="9" rx="1" />
+              <path d="M4.5 3.5V2.5a1 1 0 011-1h6a1 1 0 011 1v7a1 1 0 01-1 1h-1" />
+            </svg>
+          ) : isDone ? (
+            <svg
+              width="14"
+              height="14"
+              viewBox="0 0 14 14"
+              fill="none"
+              stroke="currentColor"
+              strokeWidth="2"
+              strokeLinecap="round"
+              strokeLinejoin="round"
+              className="text-green-400"
+            >
               <polyline points="2.5,7 5.5,10.5 11.5,3.5" />
             </svg>
           ) : isFailed ? (
-            <svg width="14" height="14" viewBox="0 0 14 14" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" className="text-red-400">
+            <svg
+              width="14"
+              height="14"
+              viewBox="0 0 14 14"
+              fill="none"
+              stroke="currentColor"
+              strokeWidth="2"
+              strokeLinecap="round"
+              className="text-red-400"
+            >
               <line x1="3" y1="3" x2="11" y2="11" />
               <line x1="11" y1="3" x2="3" y2="11" />
             </svg>
@@ -370,37 +514,66 @@ function FileProgressCard({
             <p className="text-sm font-medium text-white truncate max-w-xs">
               {uploadedFile.filename}
             </p>
-            {isDone && doc?.document_type && (
-              <span className={`px-2 py-0.5 rounded-full text-xs border font-medium ${DOC_TYPE_COLORS[doc.document_type] ?? 'bg-white/5 border-white/15 text-zinc-300'}`}>
+            {isDuplicate && (
+              <span className="px-2 py-0.5 rounded-full text-xs border font-medium bg-amber-500/15 border-amber-500/25 text-amber-300">
+                Duplicate
+              </span>
+            )}
+            {!isDuplicate && isDone && doc?.document_type && (
+              <span
+                className={`px-2 py-0.5 rounded-full text-xs border font-medium ${DOC_TYPE_COLORS[doc.document_type] ?? 'bg-white/5 border-white/15 text-zinc-300'}`}
+              >
                 {doc.document_type}
               </span>
             )}
-            {isDone && doc?.dataset_name && (
+            {!isDuplicate && isDone && doc?.dataset_name && (
               <span className="px-2 py-0.5 rounded-full text-xs border border-violet-500/20 bg-violet-500/10 text-violet-300">
                 {doc.dataset_name}
               </span>
             )}
           </div>
 
-          <p className="text-xs text-[#a1a1aa] mt-1">
-            {isFailed ? 'Processing failed. Please try re-uploading this file.' : STAGE_LABELS[stage]}
-          </p>
+          {isDuplicate ? (
+            <div className="flex items-center gap-2 mt-1">
+              <p className="text-xs text-amber-300/70">Already processed</p>
+              <button
+                onClick={() =>
+                  navigate(`/documents/${uploadedFile.existing_doc_id}`)
+                }
+                className="text-xs text-violet-400 hover:text-violet-300 underline underline-offset-2 transition-colors"
+              >
+                View document
+              </button>
+            </div>
+          ) : (
+            <p className="text-xs text-[#a1a1aa] mt-1">
+              {isFailed
+                ? 'Processing failed. Please try re-uploading this file.'
+                : STAGE_LABELS[stage]}
+            </p>
+          )}
 
           {/* Progress bar */}
-          <div className="mt-3 h-1.5 rounded-full bg-white/5 overflow-hidden">
-            <div
-              className={`h-full rounded-full transition-all duration-700 ${
-                isDone
-                  ? 'bg-green-500'
-                  : isFailed
-                  ? 'bg-red-500'
-                  : 'bg-violet-500'
-              }`}
-              style={{ width: `${percent}%` }}
-            />
-          </div>
-          {!isDone && !isFailed && (
-            <p className="text-[10px] text-white/25 mt-1 text-right">{percent}%</p>
+          {!isDuplicate && (
+            <>
+              <div className="mt-3 h-1.5 rounded-full bg-white/5 overflow-hidden">
+                <div
+                  className={`h-full rounded-full transition-all duration-700 ${
+                    isDone
+                      ? 'bg-green-500'
+                      : isFailed
+                        ? 'bg-red-500'
+                        : 'bg-violet-500'
+                  }`}
+                  style={{ width: `${percent}%` }}
+                />
+              </div>
+              {!isDone && !isFailed && (
+                <p className="text-[10px] text-white/25 mt-1 text-right">
+                  {percent}%
+                </p>
+              )}
+            </>
           )}
         </div>
       </div>
@@ -413,12 +586,24 @@ function FileProgressCard({
 function FileTypeIcon({ filename }: { filename: string }) {
   const ext = filename.split('.').pop()?.toLowerCase()
   const color =
-    ext === 'pdf' ? 'text-red-400' :
-    ext === 'csv' ? 'text-green-400' :
-    'text-blue-400'
+    ext === 'pdf'
+      ? 'text-red-400'
+      : ext === 'csv'
+        ? 'text-green-400'
+        : 'text-blue-400'
 
   return (
-    <svg width="18" height="18" viewBox="0 0 18 18" fill="none" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" className={`flex-shrink-0 ${color}`}>
+    <svg
+      width="18"
+      height="18"
+      viewBox="0 0 18 18"
+      fill="none"
+      stroke="currentColor"
+      strokeWidth="1.5"
+      strokeLinecap="round"
+      strokeLinejoin="round"
+      className={`flex-shrink-0 ${color}`}
+    >
       <path d="M11 2H5a1 1 0 00-1 1v12a1 1 0 001 1h8a1 1 0 001-1V6L11 2z" />
       <polyline points="11,2 11,6 15,6" />
     </svg>
@@ -427,9 +612,24 @@ function FileTypeIcon({ filename }: { filename: string }) {
 
 function Spinner() {
   return (
-    <svg className="w-4 h-4 animate-spin text-violet-400" viewBox="0 0 24 24" fill="none">
-      <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="3" />
-      <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v4a4 4 0 00-4 4H4z" />
+    <svg
+      className="w-4 h-4 animate-spin text-violet-400"
+      viewBox="0 0 24 24"
+      fill="none"
+    >
+      <circle
+        className="opacity-25"
+        cx="12"
+        cy="12"
+        r="10"
+        stroke="currentColor"
+        strokeWidth="3"
+      />
+      <path
+        className="opacity-75"
+        fill="currentColor"
+        d="M4 12a8 8 0 018-8v4a4 4 0 00-4 4H4z"
+      />
     </svg>
   )
 }
diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts
index 120763f..e28d660 100644
--- a/frontend/src/services/api.ts
+++ b/frontend/src/services/api.ts
@@ -9,7 +9,13 @@ const client = axios.create({
 
 // ─── Types ────────────────────────────────────────────────────────────────────
 
-export type DocumentType = 'RFQ' | 'PO' | 'CFG' | 'Client CSV' | 'Sales CSV' | null
+export type DocumentType =
+  | 'RFQ'
+  | 'PO'
+  | 'CFG'
+  | 'Client CSV'
+  | 'Sales CSV'
+  | null
 
 export type DocumentStatus = 'processing' | 'completed' | 'failed'
 
@@ -61,6 +67,8 @@ export interface SearchResponse {
 export interface UploadedFile {
   id: string
   filename: string
+  duplicate: boolean
+  existing_doc_id: string | null
 }
 
 export interface UploadResponse {
@@ -101,7 +109,7 @@ export async function uploadDocuments(files: File[]): Promise<UploadResponse> {
   const { data } = await client.post<UploadResponse>(
     '/api/documents/upload',
     formData,
-    { headers: { 'Content-Type': 'multipart/form-data' } },
+    { headers: { 'Content-Type': 'multipart/form-data' } }
   )
   return data
 }
@@ -116,8 +124,22 @@ export async function listDocuments(): Promise<Document[]> {
   return data
 }
 
-export async function getDocumentFileUrl(id: string): Promise<{ url: string; filename: string }> {
-  const { data } = await client.get<{ url: string; filename: string }>(`/api/documents/${id}/file-url`)
+export async function getDocumentFileUrl(
+  id: string
+): Promise<{ url: string; filename: string }> {
+  const { data } = await client.get<{ url: string; filename: string }>(
+    `/api/documents/${id}/file-url`
+  )
+  return data
+}
+
+export async function searchChunks(
+  query: string,
+  limit = 5
+): Promise<SearchResponse> {
+  const { data } = await client.get<SearchResponse>('/api/documents/search', {
+    params: { q: query, search_type: 'CHUNKS', limit },
+  })
   return data
 }
 
diff --git a/frontend/tailwind.config.js b/frontend/tailwind.config.js
index b3991be..41aeaf0 100644
--- a/frontend/tailwind.config.js
+++ b/frontend/tailwind.config.js
@@ -1,9 +1,6 @@
 /** @type {import('tailwindcss').Config} */
 export default {
-  content: [
-    "./index.html",
-    "./src/**/*.{js,ts,jsx,tsx}",
-  ],
+  content: ['./index.html', './src/**/*.{js,ts,jsx,tsx}'],
   theme: {
     extend: {
       fontFamily: {
diff --git a/frontend/tsconfig.app.json b/frontend/tsconfig.app.json
new file mode 100644
index 0000000..8291c9f
--- /dev/null
+++ b/frontend/tsconfig.app.json
@@ -0,0 +1,26 @@
+{
+  "compilerOptions": {
+    "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
+    "target": "ES2022",
+    "useDefineForClassFields": true,
+    "lib": ["ES2022", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+
+    /* Bundler mode */
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "verbatimModuleSyntax": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    "jsx": "react-jsx",
+
+    /* Linting */
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noFallthroughCasesInSwitch": true,
+    "types": []
+  },
+  "include": ["src"]
+}
diff --git a/frontend/vercel.json b/frontend/vercel.json
new file mode 100644
index 0000000..3a48e56
--- /dev/null
+++ b/frontend/vercel.json
@@ -0,0 +1,3 @@
+{
+  "rewrites": [{ "source": "/(.*)", "destination": "/" }]
+}
diff --git a/package-lock.json b/package-lock.json
index 330018f..8bb535b 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -5,10 +5,12 @@
     "requires": true,
     "packages": {
         "": {
+            "name": "cortex_s26",
             "dependencies": {
                 "dotenv": "^17.2.3"
             },
             "devDependencies": {
+                "@playwright/test": "^1.59.1",
                 "baseline-browser-mapping": "^2.9.19",
                 "supabase": "^2.58.5"
             }
@@ -26,14 +28,30 @@
                 "node": ">=18.0.0"
             }
         },
+        "node_modules/@playwright/test": {
+            "version": "1.59.1",
+            "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.59.1.tgz",
+            "integrity": "sha512-PG6q63nQg5c9rIi4/Z5lR5IVF7yU5MqmKaPOe0HSc0O2cX1fPi96sUQu5j7eo4gKCkB2AnNGoWt7y4/Xx3Kcqg==",
+            "dev": true,
+            "license": "Apache-2.0",
+            "dependencies": {
+                "playwright": "1.59.1"
+            },
+            "bin": {
+                "playwright": "cli.js"
+            },
+            "engines": {
+                "node": ">=18"
+            }
+        },
         "node_modules/agent-base": {
-            "version": "7.1.4",
-            "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz",
-            "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==",
+            "version": "9.0.0",
+            "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-9.0.0.tgz",
+            "integrity": "sha512-TQf59BsZnytt8GdJKLPfUZ54g/iaUL2OWDSFCCvMOhsHduDQxO8xC4PNeyIkVcA5KwL2phPSv0douC0fgWzmnA==",
             "dev": true,
             "license": "MIT",
             "engines": {
-                "node": ">= 14"
+                "node": ">= 20"
             }
         },
         "node_modules/baseline-browser-mapping": {
@@ -160,18 +178,33 @@
                 "node": ">=12.20.0"
             }
         },
+        "node_modules/fsevents": {
+            "version": "2.3.2",
+            "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
+            "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
+            "dev": true,
+            "hasInstallScript": true,
+            "license": "MIT",
+            "optional": true,
+            "os": [
+                "darwin"
+            ],
+            "engines": {
+                "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+            }
+        },
         "node_modules/https-proxy-agent": {
-            "version": "7.0.6",
-            "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
-            "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==",
+            "version": "9.0.0",
+            "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-9.0.0.tgz",
+            "integrity": "sha512-/MVmHp58WkOypgFhCLk4fzpPcFQvTJ/e6LBI7irpIO2HfxUbpmYoHF+KzipzJpxxzJu7aJNWQ0xojJ/dzV2G5g==",
             "dev": true,
             "license": "MIT",
             "dependencies": {
-                "agent-base": "^7.1.2",
-                "debug": "4"
+                "agent-base": "9.0.0",
+                "debug": "^4.3.4"
             },
             "engines": {
-                "node": ">= 14"
+                "node": ">= 20"
             }
         },
         "node_modules/imurmurhash": {
@@ -185,11 +218,11 @@
             }
         },
         "node_modules/minipass": {
-            "version": "7.1.2",
-            "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz",
-            "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==",
+            "version": "7.1.3",
+            "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.3.tgz",
+            "integrity": "sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==",
             "dev": true,
-            "license": "ISC",
+            "license": "BlueOak-1.0.0",
             "engines": {
                 "node": ">=16 || 14 >=14.17"
             }
@@ -264,6 +297,38 @@
                 "node": "^20.17.0 || >=22.9.0"
             }
         },
+        "node_modules/playwright": {
+            "version": "1.59.1",
+            "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz",
+            "integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==",
+            "dev": true,
+            "license": "Apache-2.0",
+            "dependencies": {
+                "playwright-core": "1.59.1"
+            },
+            "bin": {
+                "playwright": "cli.js"
+            },
+            "engines": {
+                "node": ">=18"
+            },
+            "optionalDependencies": {
+                "fsevents": "2.3.2"
+            }
+        },
+        "node_modules/playwright-core": {
+            "version": "1.59.1",
+            "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz",
+            "integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==",
+            "dev": true,
+            "license": "Apache-2.0",
+            "bin": {
+                "playwright-core": "cli.js"
+            },
+            "engines": {
+                "node": ">=18"
+            }
+        },
         "node_modules/proc-log": {
             "version": "6.0.0",
             "resolved": "https://registry.npmjs.org/proc-log/-/proc-log-6.0.0.tgz",
@@ -298,17 +363,17 @@
             }
         },
         "node_modules/supabase": {
-            "version": "2.58.5",
-            "resolved": "https://registry.npmjs.org/supabase/-/supabase-2.58.5.tgz",
-            "integrity": "sha512-mYZSkUIePTdmwlHd26Pff8wpmjfre8gcuWzrc5QqhZgZvCXugVzAQQhcjaQisw5kusbPQWNIjUwcHYEKmejhPw==",
+            "version": "2.91.2",
+            "resolved": "https://registry.npmjs.org/supabase/-/supabase-2.91.2.tgz",
+            "integrity": "sha512-tqBBPQdNuU1Snu6uFKjSfKXSsjza56ncGZWG3SOb6cGGSkmCZyLnguHPHccuRmImpsIzXKocN5FKJcyj3J8D7Q==",
             "dev": true,
             "hasInstallScript": true,
             "license": "MIT",
             "dependencies": {
                 "bin-links": "^6.0.0",
-                "https-proxy-agent": "^7.0.2",
+                "https-proxy-agent": "^9.0.0",
                 "node-fetch": "^3.3.2",
-                "tar": "7.5.2"
+                "tar": "7.5.13"
             },
             "bin": {
                 "supabase": "bin/supabase"
@@ -318,9 +383,9 @@
             }
         },
         "node_modules/tar": {
-            "version": "7.5.2",
-            "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.2.tgz",
-            "integrity": "sha512-7NyxrTE4Anh8km8iEy7o0QYPs+0JKBTj5ZaqHg6B39erLg0qYXN3BijtShwbsNSvQ+LN75+KV+C4QR/f6Gwnpg==",
+            "version": "7.5.13",
+            "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.13.tgz",
+            "integrity": "sha512-tOG/7GyXpFevhXVh8jOPJrmtRpOTsYqUIkVdVooZYJS/z8WhfQUX8RJILmeuJNinGAMSu1veBr4asSHFt5/hng==",
             "dev": true,
             "license": "BlueOak-1.0.0",
             "dependencies": {
diff --git a/package.json b/package.json
index 1dd50e7..6282718 100644
--- a/package.json
+++ b/package.json
@@ -12,6 +12,7 @@
         "types:frontend": "npx supabase gen types typescript --local > frontend/src/types/database.types.ts"
     },
     "devDependencies": {
+        "@playwright/test": "^1.59.1",
         "baseline-browser-mapping": "^2.9.19",
         "supabase": "^2.58.5"
     },
diff --git a/supabase/migrations/019_add_content_hash.sql b/supabase/migrations/019_add_content_hash.sql
new file mode 100644
index 0000000..2b11637
--- /dev/null
+++ b/supabase/migrations/019_add_content_hash.sql
@@ -0,0 +1,5 @@
+-- Add content_hash column for upload deduplication (SHA-256 hex digest).
+ALTER TABLE cortex_documents ADD COLUMN IF NOT EXISTS content_hash TEXT;
+
+CREATE INDEX IF NOT EXISTS idx_cortex_documents_content_hash
+  ON cortex_documents(content_hash);