From b8eafd91f7dfd6e08703cd9aa2f4f2b718e4fc11 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 3 Jun 2026 04:36:30 +0000 Subject: [PATCH] feat: GraphRAG retrieval + context/citation engine (P13d-1) The pure-Dart retrieval core for the local "Ask your library" GraphRAG chat (P13d): reuse the P10 semantic substrate (embed -> vector search) plus a light graph re-rank to gather the most relevant library items for a question, then assemble a bounded, cited context block + a history-aware generation prompt the chat (d-2) will drive. New lib/features/ai/data/: - rag_context.dart (pure): RagSource/RagChatTurn/RagContext, kRagSystemPrompt, buildSourceSnippet (aiSummary>description, capped), selectRagSources (dedupe/cap), fitHistory (recent-turn char budget -- the tier-aware history-depth knob), buildRagPrompt (numbered cited sources + optional bounded history). - rag_availability.dart (pure): RagAvailability {unavailable, retrievalOnly, full} + ragAvailability(...) -- the d-3 fallback gate. - rag_retriever.dart: RagRetriever + provider -- embed query -> vectorSearch -> relatedTo re-rank -> hydrate via MetadataRepository -> cited RagContext; empty-sources when retrieval isn't ready. Tests: prompt/snippet/fitHistory/selectRagSources, the ragAvailability truth table, and the retriever with fake embedder + graph + seeded in-memory metadata. No deps, no schema, no UI (it's d-2's engine). Docs: P13-PLAN.md P13d card rewritten to the multi-turn decomposition (d-1/d-2a/d-2b/d-3) + d-1 [~]; VERIFICATION.md P13d-1 note; BACKLOG.md retrieval-only persistence, per-tier history budget, RAM co-residency. https://claude.ai/code/session_013JoYmLCosYt5tQ8qwdbL1T --- docs/BACKLOG.md | 11 ++ docs/VERIFICATION.md | 11 +- docs/design/P13-PLAN.md | 58 +++++--- lib/features/ai/data/rag_availability.dart | 31 ++++ lib/features/ai/data/rag_context.dart | 137 ++++++++++++++++++ lib/features/ai/data/rag_retriever.dart | 91 ++++++++++++ test/features/ai/rag_availability_test.dart | 47 ++++++ test/features/ai/rag_context_test.dart | 95 ++++++++++++ test/features/ai/rag_retriever_test.dart | 152 ++++++++++++++++++++ 9 files changed, 613 insertions(+), 20 deletions(-) create mode 100644 lib/features/ai/data/rag_availability.dart create mode 100644 lib/features/ai/data/rag_context.dart create mode 100644 lib/features/ai/data/rag_retriever.dart create mode 100644 test/features/ai/rag_availability_test.dart create mode 100644 test/features/ai/rag_context_test.dart create mode 100644 test/features/ai/rag_retriever_test.dart diff --git a/docs/BACKLOG.md b/docs/BACKLOG.md index fc54275..f8fd346 100644 --- a/docs/BACKLOG.md +++ b/docs/BACKLOG.md @@ -8,6 +8,17 @@ _(nothing active — pick the next batch from below)_ ## Deferred / future refinements +- [ ] **GraphRAG — retrieval-only answer persistence.** On low / ineligible tiers (`ragAvailability == + retrievalOnly`) P13d falls back to an ephemeral "most relevant items" answer that **isn't** saved to the + chat history (nothing is generated to revisit). Decide at d-3 whether these should be persisted as a + special turn type or kept purely transient. *(From P13d-1.)* +- [ ] **GraphRAG — per-tier history-budget tuning.** P13d-1's `fitHistory` bounds the fed-back chat history by + a char budget (`historyCharBudget`, default 1500). The budget should scale with the device tier / model + context window (flagship → deeper) rather than a single constant; tune against real models at d-3. + *(From P13d-1.)* +- [ ] **GraphRAG — LLM + Cozo HNSW RAM co-residency.** "Ask your library" runs the generation model **and** + the live HNSW vector index in RAM together. Validate co-residency (and tune retrieval `k` / source caps) + on real low/mid devices so it doesn't OOM — carried from P12d-2; verified at P13d-3. *(From P13d-1.)* - [ ] **Library "hide / filter AI tags" facet.** P13c-2 marks AI-applied tags (`media_tags.source = 'ai'`) and shows a ✦ on their chips, but the library tag facet (`watchDistinctTags`) treats them like any tag. Add a "hide AI tags" / "AI-tagged only" filter (and maybe a bulk "remove all AI tags on this item") if diff --git a/docs/VERIFICATION.md b/docs/VERIFICATION.md index fc4f115..6c815d7 100644 --- a/docs/VERIFICATION.md +++ b/docs/VERIFICATION.md @@ -985,11 +985,18 @@ entries, or verify after P11c lands.)* - [ ] A **manually-added** tag has no marker. **Default off:** downloads aren't auto-tagged; with generation off there's a one-time "finish setting up auto-tagging" nudge; the queue still drains. +### P13d-1 — GraphRAG retrieval engine *(CI-covered; no APK check)* +- No on-device check: P13d-1 ships the **pure-Dart retrieval/context engine** only (no UI, schema, or + native path). It's exercised by unit tests (fake embedder + graph + seeded in-memory metadata). The + end-to-end **"Ask your library"** flow is verified at P13d-2 (chat screen + generation). + ### P13 (later subphases) - [ ] **Transcription / summarization / translation / OCR** each work (capability-gated) and write results back to the item. -- [ ] **"Ask your library"**: a natural-language question returns a grounded answer citing real - library items — **fully offline** (airplane mode). +- [ ] **"Ask your library"**: a multi-turn chat answers natural-language questions with grounded answers + citing real library items — **fully offline** (airplane mode); conversations persist (list / continue + / rename / archive / delete); low / ineligible tiers fall back to a retrieval-only "most relevant + items" answer. - [ ] **Graph-clustered auto-albums**, **"Rediscover"** (centrality), and **path/bridge** discovery produce sensible results. - [ ] All P13 features gate gracefully on incapable devices. diff --git a/docs/design/P13-PLAN.md b/docs/design/P13-PLAN.md index 9206be6..f0b43cd 100644 --- a/docs/design/P13-PLAN.md +++ b/docs/design/P13-PLAN.md @@ -213,29 +213,51 @@ user-curated (they drive facets), AI tags are **marked** (provenance) rather tha 'ai' + entry; default-off no-op). **No deps.** **Pending APK spot-check** (real download → AI-marked tags + facets, offline). A library "hide/filter AI tags" facet is deferred (BACKLOG). -### `[ ]` P13d — Local GraphRAG "Ask your library" *(flagship; split into 3 PRs)* +### `[~]` P13d — Local GraphRAG "Ask your library" *(flagship; split into 4 PRs)* The headline differentiator — natural-language Q&A grounded in the private library, fully on-device (AI-SPEC §6, GRAPH-SPEC §7). Sequenced **mid-phase** so the generation patterns (P13a/c) are proven first. +**Revised target (maintainer call): a real multi-turn chat**, not single-shot — persistent conversations +(list / continue / rename / archive / delete) on capable tiers, each turn re-retrieving **fresh RAG sources** +plus a **bounded recent-history window** whose depth scales with the device tier; entry from the **Dashboard**. +Incapable / low tiers fall back to an ephemeral **retrieval-only** answer (d-3). -#### `[ ]` P13d-1 — Retrieval + context & citation assembly *(pure Dart; CI-verifiable)* -- A pure-Dart **retrieval/context packer** that reuses `GraphQueryService.relatedTo` (vector + graph re-rank) - and `neighborhood` to select the most relevant nodes + their graph neighborhood for a query, then assembles - a **bounded, cited** context block (node → deep-linkable item) and the generation prompt. No UI, no model — - fully unit-testable. -- **Exit / review:** for a seeded graph, the packer returns the expected relevant nodes + a well-formed, - size-bounded prompt with stable citations; covered by unit tests. +#### `[~]` P13d-1 — Retrieval + context & citation assembly *(pure Dart; CI-verifiable)* +- A pure-Dart **retrieval/context packer** that reuses the P10 semantic substrate (`embedderEngine.embed` → + `GraphQueryService.vectorSearch`) plus a light `relatedTo` graph re-rank to select the most relevant items + for a query, then assembles a **bounded, cited** context block (item → deep-linkable source) and a + **history-aware** generation prompt. No UI, no model, no schema — fully unit-testable. +- **History-aware prompt builder** (`fitHistory` char-budget knob) so d-2 multi-turn drops in cleanly and the + per-tier history depth is a graceful budget, not a hard mode switch. +- **Status:** implemented (CI-green). New `lib/features/ai/data/`: `rag_context.dart` (pure — `RagSource`, + `RagChatTurn`, `RagContext`, `kRagSystemPrompt`, `buildSourceSnippet`, `selectRagSources`, `fitHistory`, + `buildRagPrompt`), `rag_availability.dart` (pure — `RagAvailability {unavailable, retrievalOnly, full}` + + `ragAvailability(...)`, the d-3 gate), `rag_retriever.dart` (`RagRetriever` + provider: embed → vectorSearch + → `relatedTo` re-rank → hydrate via `MetadataRepository` → cited context; empty-sources when retrieval isn't + ready). Tests: prompt/snippet/`fitHistory`/`selectRagSources`, the `ragAvailability` truth table, and the + retriever with fake embedder + graph + seeded in-memory metadata. **No deps, no schema, no UI.** +- **Exit / review:** for seeded sources, the retriever returns the expected ordered, cited items + a + well-formed, size-bounded, history-aware prompt; degrades to empty-sources when retrieval is unavailable; + covered by unit tests. ✓ -#### `[ ]` P13d-2 — Chat UI + streaming grounded answer + citations *(native; APK)* -- A dedicated **"Ask your library"** screen (reached from Dashboard/Library) that runs P13d-1's context - through `GenerationEngine.generate()` and **streams** a grounded answer with **tappable citations** that - deep-link to the cited library items. -- **Exit / review:** ask a natural-language question on a capable device and get a streamed, grounded answer - citing real library items **offline**; citations navigate correctly. APK spot-check. +#### `[ ]` P13d-2a — Chat schema + Ask screen (single conversation) *(native; APK)* +- Drift **`chats` + `chat_messages`** schema; a dedicated **"Ask your library"** screen from the Dashboard + that runs P13d-1's per-turn fresh retrieval + bounded history through `GenerationEngine.generate()` and + **streams** a grounded answer with **tappable citations** deep-linking to the cited items. Generation-gated + via `aiSummaryAction` (on-ramp when no model). +- **Exit / review:** ask a natural-language question on a capable device → a streamed, grounded, cited answer + **offline**; the turn persists; citations navigate. APK spot-check. -#### `[ ]` P13d-3 — Low-tier fallback + RAM co-residency validation *(native; APK)* -- On ineligible / low tiers, fall back to **retrieval-only** ("here are the most relevant items") plus the - extractive summary — no generation, clearly framed. Validate **LLM + Cozo HNSW RAM co-residency** on real - devices (the index lives in RAM with the model — BACKLOG from P12d-2) and tune limits. +#### `[ ]` P13d-2b — Conversation list + manage *(native)* +- A conversation **list** with **continue / rename / archive / delete**; resuming a chat re-feeds the bounded + history into each new turn's prompt. +- **Exit / review:** prior chats list, reopen and continue with retained context, and archive/delete/rename + behave; covered where CI can (provider/repository) + an APK spot-check for the flow. + +#### `[ ]` P13d-3 — Low-tier fallback + tier-aware depth + RAM co-residency *(native; APK)* +- On ineligible / low tiers (`ragAvailability == retrievalOnly`), fall back to an ephemeral **retrieval-only** + answer ("here are the most relevant items") — no generation, clearly framed, nothing persisted. Tune the + **tier-aware history-depth** budget. Validate **LLM + Cozo HNSW RAM co-residency** on real devices (the index + lives in RAM with the model — BACKLOG from P12d-2) and tune limits. - **Exit / review:** a low-end device gives a useful retrieval-only answer without OOM; a capable device runs generation + the live HNSW index together within memory budget (verified on real hardware). diff --git a/lib/features/ai/data/rag_availability.dart b/lib/features/ai/data/rag_availability.dart new file mode 100644 index 0000000..31d559b --- /dev/null +++ b/lib/features/ai/data/rag_availability.dart @@ -0,0 +1,31 @@ +/// Pure decision for whether "Ask your library" (P13d) can run, and at what +/// level — so the UI gates consistently and d-3's retrieval-only fallback has a +/// single source of truth. +library; + +/// What the Ask feature can do on this device right now. +enum RagAvailability { + /// No retrieval index (no embedder / graph) — the feature can't run. + unavailable, + + /// Retrieval works but there's no generation model (low/ineligible tier) → + /// answer with "most relevant items" only (d-3 fallback), no LLM. + retrievalOnly, + + /// Full GraphRAG: retrieve + generate a grounded, cited answer. + full, +} + +/// [generationEligible] is whether the device tier offers a generation model; +/// [embedderReady] is whether semantic search (the query embedder) is ready; +/// [graphAvailable] is whether the on-device graph/vector store is usable. +RagAvailability ragAvailability({ + required bool generationEligible, + required bool embedderReady, + required bool graphAvailable, +}) { + if (!embedderReady || !graphAvailable) return RagAvailability.unavailable; + return generationEligible + ? RagAvailability.full + : RagAvailability.retrievalOnly; +} diff --git a/lib/features/ai/data/rag_context.dart b/lib/features/ai/data/rag_context.dart new file mode 100644 index 0000000..9e11efc --- /dev/null +++ b/lib/features/ai/data/rag_context.dart @@ -0,0 +1,137 @@ +/// Pure, engine-free building blocks for the local GraphRAG "Ask your library" +/// retrieval (P13d-1): the grounding-source + context types, the prompt builder, +/// source selection, and history-window fitting. Kept out of the retriever/UI so +/// the prompt shape + bounds are unit-testable in isolation. +library; + +/// System instruction: answer only from the provided sources, cite them, and +/// admit ignorance rather than invent. On-device; nothing leaves the device. +const String kRagSystemPrompt = + "You answer questions about the user's personal media library using ONLY " + 'the numbered sources provided. Cite the sources you use inline as [n]. If ' + 'the sources do not contain the answer, say you do not know — never invent ' + 'items, facts, or citations.'; + +/// One retrieved library item used to ground an answer. [index] is its 1-based +/// citation number; [snippet] is the compact, capped text the model sees. +class RagSource { + const RagSource({ + required this.index, + required this.itemId, + required this.title, + required this.snippet, + }); + + final int index; + final String itemId; + final String title; + final String snippet; +} + +/// A prior question/answer turn, for multi-turn history (fed back, bounded). +class RagChatTurn { + const RagChatTurn({required this.question, required this.answer}); + final String question; + final String answer; +} + +/// The assembled retrieval context + prompt for one question. +class RagContext { + const RagContext({ + required this.question, + required this.sources, + required this.systemPrompt, + required this.prompt, + }); + + final String question; + final List sources; + final String systemPrompt; + final String prompt; + + bool get hasSources => sources.isNotEmpty; +} + +/// Builds a compact, capped grounding snippet for one item from its signals. +/// Prefers the distilled `aiSummary` over the raw description; includes a slice +/// of the transcript + OCR text; whole thing is truncated to [maxChars]. +String buildSourceSnippet({ + String? uploader, + List tags = const [], + String? description, + String? transcript, + String? aiSummary, + String? ocrText, + int maxChars = 400, +}) { + String? clean(String? s) => + (s != null && s.trim().isNotEmpty) ? s.trim() : null; + final parts = [ + if (clean(uploader) != null) 'by ${uploader!.trim()}', + if (tags.isNotEmpty) 'tags: ${tags.join(', ')}', + ?(clean(aiSummary) ?? clean(description)), + if (clean(transcript) != null) transcript!.trim(), + if (clean(ocrText) != null) 'text in image: ${ocrText!.trim()}', + ]; + final joined = parts.join(' · '); + return joined.length > maxChars + ? joined.substring(0, maxChars).trimRight() + : joined; +} + +/// De-duplicates [orderedIds] (preserving order) and caps to [max] — the final +/// source set, most-relevant first. +List selectRagSources(List orderedIds, {int max = 6}) { + final seen = {}; + final out = []; + for (final id in orderedIds) { + if (seen.add(id)) out.add(id); + if (out.length >= max) break; + } + return out; +} + +/// Keeps the most **recent** history turns that fit within [charBudget] +/// (oldest dropped first), returned chronologically. The tier knob's mechanism: +/// a smaller budget on smaller models feeds back less history. +List fitHistory(List turns, int charBudget) { + final kept = []; + var used = 0; + for (final t in turns.reversed) { + final cost = t.question.length + t.answer.length; + if (used + cost > charBudget && kept.isNotEmpty) break; + kept.add(t); + used += cost; + if (used >= charBudget) break; + } + return kept.reversed.toList(); +} + +/// Assembles the user prompt: a bounded slice of prior turns (if any), the +/// numbered sources, and the question. +String buildRagPrompt( + String question, + List sources, { + List history = const [], + int historyCharBudget = 1500, +}) { + final b = StringBuffer(); + final fitted = fitHistory(history, historyCharBudget); + if (fitted.isNotEmpty) { + b.writeln('Conversation so far:'); + for (final t in fitted) { + b + ..writeln('Q: ${t.question}') + ..writeln('A: ${t.answer}'); + } + b.writeln(); + } + b.writeln('Sources:'); + for (final s in sources) { + b.writeln('[${s.index}] ${s.title} — ${s.snippet}'); + } + b + ..writeln() + ..write('Question: $question'); + return b.toString(); +} diff --git a/lib/features/ai/data/rag_retriever.dart b/lib/features/ai/data/rag_retriever.dart new file mode 100644 index 0000000..04e1f46 --- /dev/null +++ b/lib/features/ai/data/rag_retriever.dart @@ -0,0 +1,91 @@ +import 'package:flutter_riverpod/flutter_riverpod.dart'; +import 'package:grabbit/core/ai/embedder_engine_provider.dart'; +import 'package:grabbit/core/graph/graph_query_provider.dart'; +import 'package:grabbit/features/ai/data/rag_context.dart'; +import 'package:grabbit/features/library/data/metadata_repository.dart'; +import 'package:grabbit/features/library/presentation/semantic_search_provider.dart'; + +/// Retrieves the most relevant library items for a question and assembles the +/// grounding context + prompt for the local LLM (P13d-1) — the engine the Ask +/// chat (d-2) drives. Reuses the existing semantic-search substrate (embed → +/// vector search) + a light graph expansion; degrades to an empty-sources +/// context when retrieval isn't available (no embedder / empty index) so the +/// caller can fall back gracefully. No generation here — that's d-2. +class RagRetriever { + RagRetriever(this._ref); + + final Ref _ref; + + /// Retrieves sources for [question] and builds the prompt. [history] (prior + /// turns) is folded in, bounded by [historyCharBudget] (the tier knob). + Future retrieve( + String question, { + List history = const [], + int historyCharBudget = 1500, + int maxSources = 6, + int k = 30, + }) async { + final q = question.trim(); + final empty = RagContext( + question: q, + sources: const [], + systemPrompt: kRagSystemPrompt, + prompt: '', + ); + if (q.isEmpty) return empty; + // Retrieval needs the query embedder ready; the vector search itself returns + // [] when the graph/index is unavailable. + if (!await _ref.read(semanticSearchReadyProvider.future)) return empty; + + final vector = await _ref.read(embedderEngineProvider).embed(q); + final query = _ref.read(graphQueryServiceProvider); + final hits = await query.vectorSearch(vector, k: k); + if (hits.isEmpty) return empty; + + // Light graph re-rank: add a few items connected to the top hit so context + // isn't purely vector-nearest (bounded; cheap on modest libraries). + final related = await query.relatedTo(hits.first.id, limit: 4); + final ids = selectRagSources([ + for (final h in hits) h.id, + ...related, + ], max: maxSources); + + final repo = _ref.read(metadataRepositoryProvider); + final sources = []; + for (final id in ids) { + final item = await repo.mediaItemById(id); + if (item == null) continue; + final meta = await repo.metadataForItem(id); + final tags = await repo.tagNamesForItem(id); + sources.add( + RagSource( + index: sources.length + 1, + itemId: id, + title: item.title, + snippet: buildSourceSnippet( + uploader: meta?.uploader, + tags: tags, + description: meta?.description, + transcript: meta?.transcript, + aiSummary: meta?.aiSummary, + ocrText: meta?.ocrText, + ), + ), + ); + } + if (sources.isEmpty) return empty; + return RagContext( + question: q, + sources: sources, + systemPrompt: kRagSystemPrompt, + prompt: buildRagPrompt( + q, + sources, + history: history, + historyCharBudget: historyCharBudget, + ), + ); + } +} + +final ragRetrieverProvider = Provider(RagRetriever.new); diff --git a/test/features/ai/rag_availability_test.dart b/test/features/ai/rag_availability_test.dart new file mode 100644 index 0000000..72f08ae --- /dev/null +++ b/test/features/ai/rag_availability_test.dart @@ -0,0 +1,47 @@ +import 'package:flutter_test/flutter_test.dart'; +import 'package:grabbit/features/ai/data/rag_availability.dart'; + +void main() { + group('ragAvailability (P13d-1)', () { + test('no embedder or no graph → unavailable', () { + expect( + ragAvailability( + generationEligible: true, + embedderReady: false, + graphAvailable: true, + ), + RagAvailability.unavailable, + ); + expect( + ragAvailability( + generationEligible: true, + embedderReady: true, + graphAvailable: false, + ), + RagAvailability.unavailable, + ); + }); + + test('retrieval works but no generation model → retrievalOnly', () { + expect( + ragAvailability( + generationEligible: false, + embedderReady: true, + graphAvailable: true, + ), + RagAvailability.retrievalOnly, + ); + }); + + test('all three → full', () { + expect( + ragAvailability( + generationEligible: true, + embedderReady: true, + graphAvailable: true, + ), + RagAvailability.full, + ); + }); + }); +} diff --git a/test/features/ai/rag_context_test.dart b/test/features/ai/rag_context_test.dart new file mode 100644 index 0000000..e0f305d --- /dev/null +++ b/test/features/ai/rag_context_test.dart @@ -0,0 +1,95 @@ +import 'package:flutter_test/flutter_test.dart'; +import 'package:grabbit/features/ai/data/rag_context.dart'; + +RagSource _src(int i, String title, String snippet) => + RagSource(index: i, itemId: 'id$i', title: title, snippet: snippet); + +void main() { + group('buildRagPrompt (P13d-1)', () { + test('numbers sources and includes the question', () { + final prompt = buildRagPrompt('what live shows do I have?', [ + _src(1, 'Concert A', 'by Band · tags: live'), + _src(2, 'Concert B', 'by Other'), + ]); + expect(prompt, contains('[1] Concert A — by Band · tags: live')); + expect(prompt, contains('[2] Concert B — by Other')); + expect(prompt, contains('Question: what live shows do I have?')); + expect(prompt, isNot(contains('Conversation so far'))); + }); + + test('folds in bounded history, oldest dropped first', () { + final history = [ + const RagChatTurn(question: 'old q', answer: 'old a'), + const RagChatTurn(question: 'recent q', answer: 'recent a'), + ]; + final prompt = buildRagPrompt( + 'follow up', + [_src(1, 'X', 'y')], + history: history, + historyCharBudget: 20, // only the most recent turn fits + ); + expect(prompt, contains('Conversation so far')); + expect(prompt, contains('recent q')); + expect(prompt, isNot(contains('old q'))); + }); + }); + + group('fitHistory (P13d-1)', () { + test('keeps the most recent turns within budget, chronological', () { + final turns = [ + const RagChatTurn(question: 'a', answer: '1'), // cost 2 + const RagChatTurn(question: 'b', answer: '2'), // cost 2 + const RagChatTurn(question: 'c', answer: '3'), // cost 2 + ]; + final kept = fitHistory(turns, 4); + expect(kept.map((t) => t.question), ['b', 'c']); + }); + + test('always keeps at least the latest turn even if over budget', () { + final kept = fitHistory(const [ + RagChatTurn(question: 'long question', answer: 'long answer'), + ], 1); + expect(kept, hasLength(1)); + }); + + test('empty history → empty', () { + expect(fitHistory(const [], 100), isEmpty); + }); + }); + + group('selectRagSources (P13d-1)', () { + test('dedupes preserving order and caps to max', () { + expect(selectRagSources(['a', 'b', 'a', 'c', 'd'], max: 3), [ + 'a', + 'b', + 'c', + ]); + }); + }); + + group('buildSourceSnippet (P13d-1)', () { + test('prefers aiSummary over description and includes tags/uploader', () { + final s = buildSourceSnippet( + uploader: 'Chef', + tags: ['food', 'pasta'], + description: 'raw description', + aiSummary: 'a tidy summary', + ); + expect(s, contains('by Chef')); + expect(s, contains('tags: food, pasta')); + expect(s, contains('a tidy summary')); + expect(s, isNot(contains('raw description'))); + }); + + test('falls back to description when no summary, and caps length', () { + final long = List.filled(2000, 'x').join(); + final s = buildSourceSnippet(description: long, maxChars: 100); + expect(s, contains('x')); + expect(s.length, lessThanOrEqualTo(100)); + }); + + test('empty when there is nothing to say', () { + expect(buildSourceSnippet(), isEmpty); + }); + }); +} diff --git a/test/features/ai/rag_retriever_test.dart b/test/features/ai/rag_retriever_test.dart new file mode 100644 index 0000000..3e94ce4 --- /dev/null +++ b/test/features/ai/rag_retriever_test.dart @@ -0,0 +1,152 @@ +import 'package:drift/drift.dart' show Value; +import 'package:drift/native.dart'; +import 'package:flutter_riverpod/flutter_riverpod.dart'; +import 'package:flutter_test/flutter_test.dart'; +import 'package:grabbit/core/ai/embedder_engine.dart'; +import 'package:grabbit/core/ai/embedder_engine_provider.dart'; +import 'package:grabbit/core/ai/model_catalog.dart'; +import 'package:grabbit/core/db/database.dart'; +import 'package:grabbit/core/db/database_provider.dart'; +import 'package:grabbit/core/graph/graph_query_provider.dart'; +import 'package:grabbit/core/graph/graph_query_service.dart'; +import 'package:grabbit/core/graph/unavailable_graph_store.dart'; +import 'package:grabbit/features/ai/data/rag_retriever.dart'; +import 'package:grabbit/features/library/presentation/semantic_search_provider.dart'; + +/// Embedder that returns a fixed vector; always ready. +class FakeEmbedderEngine implements EmbedderEngine { + @override + EmbedderModel get model => defaultEmbedder; + @override + bool get isAvailable => true; + @override + int get dimension => 3; + @override + Future ensureReady() async => true; + @override + Future downloadModel({void Function(double)? onProgress}) async {} + @override + Future> embed(String text) async => const [0.1, 0.2, 0.3]; + @override + Future>> embedBatch(List texts) async => [ + for (final _ in texts) const [0.1, 0.2, 0.3], + ]; + @override + Future close() async {} +} + +/// GraphQueryService with canned vector + related results (the underlying store +/// is the no-op one; we override the two methods the retriever uses). +class FakeGraphQueryService extends GraphQueryService { + FakeGraphQueryService(this.hits, {this.related = const []}) + : super(const UnavailableGraphStore()); + final List hits; + final List related; + + @override + Future> vectorSearch( + List query, { + int k = 50, + int ef = 100, + }) async => hits; + + @override + Future> relatedTo( + String id, { + int k = 50, + int limit = 12, + }) async => related; +} + +void main() { + late AppDatabase db; + setUp(() => db = AppDatabase(NativeDatabase.memory())); + tearDown(() => db.close()); + + Future seedItem(String id, String title, {String? description}) async { + await db + .into(db.mediaItems) + .insert( + MediaItemsCompanion.insert( + id: id, + title: title, + sourceUrl: 'u', + site: 'youtube', + filePath: '/m/$id', + type: 'video', + createdAt: DateTime.utc(2026), + storageState: 'private', + ), + ); + await db + .into(db.mediaMetadata) + .insert( + MediaMetadataCompanion.insert( + itemId: id, + description: Value(description), + ), + ); + } + + ProviderContainer makeContainer({ + required FakeGraphQueryService graph, + bool ready = true, + }) { + final c = ProviderContainer( + overrides: [ + appDatabaseProvider.overrideWithValue(db), + embedderEngineProvider.overrideWithValue(FakeEmbedderEngine()), + graphQueryServiceProvider.overrideWithValue(graph), + semanticSearchReadyProvider.overrideWith((ref) async => ready), + ], + ); + addTearDown(c.dispose); + return c; + } + + test('retrieves cited sources + a grounded prompt (P13d-1)', () async { + await seedItem('a', 'Live in Tokyo', description: 'a great concert'); + await seedItem('b', 'Studio session', description: 'recording'); + final c = makeContainer( + graph: FakeGraphQueryService([ + const VectorHit('a', 0.1), + const VectorHit('b', 0.4), + ]), + ); + + final ctx = await c.read(ragRetrieverProvider).retrieve('what concerts?'); + + expect(ctx.hasSources, isTrue); + expect(ctx.sources.map((s) => s.itemId), ['a', 'b']); + expect(ctx.sources.first.index, 1); + expect(ctx.prompt, contains('[1] Live in Tokyo')); + expect(ctx.prompt, contains('a great concert')); + expect(ctx.prompt, contains('Question: what concerts?')); + expect(ctx.systemPrompt, isNotEmpty); + }); + + test('empty question or unready retrieval → no sources (P13d-1)', () async { + await seedItem('a', 'X'); + final graph = FakeGraphQueryService([const VectorHit('a', 0.1)]); + + final c1 = makeContainer(graph: graph); + expect( + (await c1.read(ragRetrieverProvider).retrieve(' ')).hasSources, + isFalse, + ); + + final c2 = makeContainer(graph: graph, ready: false); + expect( + (await c2.read(ragRetrieverProvider).retrieve('q')).hasSources, + isFalse, + ); + }); + + test('empty vector index → no sources (P13d-1)', () async { + final c = makeContainer(graph: FakeGraphQueryService(const [])); + expect( + (await c.read(ragRetrieverProvider).retrieve('q')).hasSources, + isFalse, + ); + }); +}