From 1418091c5678392ba3dae619bebc59e501501f50 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 17:45:08 +0000 Subject: [PATCH 1/3] fix: image-only downloads become image items, not just a thumbnail (P13b-3 precursor) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit classifyDownloadOutputs routed every image extension to `thumb`, so a single-image download (a photo or carousel) produced an empty media list — no library item at all. Now image files are tentative thumbnails: alongside a video/audio download they stay thumbnails, but when a download has no video/audio the images ARE the media (→ image items). Reuses mediaTypeForExt so classification matches the insert loop's type assignment. This unblocks auto-OCR-on-download (P13b-3) and fixes image downloads generally (they now appear in the library, with dimensions/OCR/etc.). https://claude.ai/code/session_013JoYmLCosYt5tQ8qwdbL1T --- .../queue/data/completed_outputs.dart | 28 ++++++++++++++---- .../queue/completed_outputs_test.dart | 29 +++++++++++++++++++ 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/lib/features/queue/data/completed_outputs.dart b/lib/features/queue/data/completed_outputs.dart index 6ade0e8..29ca98e 100644 --- a/lib/features/queue/data/completed_outputs.dart +++ b/lib/features/queue/data/completed_outputs.dart @@ -1,18 +1,24 @@ import 'dart:io'; +import 'package:grabbit/core/utils/media_type.dart'; + /// The classified files produced by a finished download in its per-task folder. typedef DownloadOutputs = ({List media, File? thumb, File? info}); const _subtitleExts = {'srt', 'vtt', 'ass', 'ssa', 'lrc', 'sub'}; -const _thumbExts = {'jpg', 'jpeg', 'png', 'webp'}; /// Sorts a download folder's files into the media file(s), the thumbnail, and /// the `.info.json` sidecar. Subtitle sidecars (`.srt`/`.vtt`/`.srv*`/…) and /// other JSON sidecars are excluded so they're never mistaken for the media — /// and multiple media files (yt-dlp `--split-chapters`) are all returned. +/// +/// Image files are **tentative thumbnails**: alongside a video/audio download an +/// image is the thumbnail sidecar, but an **image-only** download (a photo or a +/// carousel of photos) has no video/audio — there the images *are* the media +/// (P13b-3), so they become image library items rather than being discarded. DownloadOutputs classifyDownloadOutputs(Iterable files) { - final media = []; - File? thumb; + final media = []; // video / audio + final images = []; // image files — thumbnail(s) or the media itself File? info; for (final f in files) { final lower = f.path.toLowerCase(); @@ -23,12 +29,22 @@ DownloadOutputs classifyDownloadOutputs(Iterable files) { // Other yt-dlp sidecars (e.g. live chat) — ignore. } else if (_subtitleExts.contains(ext) || ext.startsWith('srv')) { // Subtitle sidecars — not media. - } else if (_thumbExts.contains(ext)) { - thumb ??= f; + } else if (mediaTypeForExt(ext) == 'image') { + images.add(f); } else { media.add(f); } } media.sort((a, b) => a.path.compareTo(b.path)); - return (media: media, thumb: thumb, info: info); + images.sort((a, b) => a.path.compareTo(b.path)); + // Video/audio present → images are thumbnail sidecars (keep the first). + // Otherwise it's an image download → the images are the media. + if (media.isNotEmpty) { + return ( + media: media, + thumb: images.isEmpty ? null : images.first, + info: info, + ); + } + return (media: images, thumb: null, info: info); } diff --git a/test/features/queue/completed_outputs_test.dart b/test/features/queue/completed_outputs_test.dart index 396deda..fd58a57 100644 --- a/test/features/queue/completed_outputs_test.dart +++ b/test/features/queue/completed_outputs_test.dart @@ -50,5 +50,34 @@ void main() { final out = classifyDownloadOutputs(_files(['/d/clip.en.srt'])); expect(out.media, isEmpty); }); + + test('image-only download → the image is the media, no thumb (P13b-3)', () { + final out = classifyDownloadOutputs( + _files(['/d/photo.jpg', '/d/photo.info.json']), + ); + expect(out.media.map((f) => f.path), ['/d/photo.jpg']); + expect(out.thumb, isNull); + expect(out.info?.path, '/d/photo.info.json'); + }); + + test('photo carousel → every image is media (sorted) (P13b-3)', () { + final out = classifyDownloadOutputs( + _files(['/d/post 2.jpg', '/d/post 1.png', '/d/post 3.webp']), + ); + expect(out.media.map((f) => f.path), [ + '/d/post 1.png', + '/d/post 2.jpg', + '/d/post 3.webp', + ]); + expect(out.thumb, isNull); + }); + + test('video + image keeps the image as the thumbnail (unchanged)', () { + final out = classifyDownloadOutputs( + _files(['/d/clip.mp4', '/d/clip.jpg']), + ); + expect(out.media.map((f) => f.path), ['/d/clip.mp4']); + expect(out.thumb?.path, '/d/clip.jpg'); + }); }); } From 26b5023bdaf5df9b9188e655b857ee96231fc37d Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 17:45:18 +0000 Subject: [PATCH 2/3] feat: opt-in auto-OCR on image download (P13b-3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to P13b-1: optionally auto-scan image downloads for text on completion, so search coverage grows automatically. Opt-in (default off); mirrors P13a-2 auto-summarize. OCR is free + offline (bundled ML Kit), so there's no model download or "needs setup" nudge. - Settings: `autoOcrOnDownload` (default false) + setter. - Pure `shouldAutoOcr` (enabled & engine-available & image & not-yet-scanned). - Queue: gated block in `_persistCompleted` (after auto-summary) scans each image item via ocrEngine → `updateOcrText` (FTS reindexes); `ocrCount` in `_PersistResult`; an `ai` success inbox entry when text is found. - Settings UI: an "Image text (OCR)" auto-scan card in AI & graph settings, shown only where ML Kit OCR runs. - Tests: shouldAutoOcr truth table, settings round-trip, and queue cases (image+text → ocrText + entry; default-off no-op; video skipped). The realistic image test relies on the precursor classifier fix. - Docs: P13-PLAN P13b-3 status, VERIFICATION P13b-3 + image-download fix. No schema/deps change. https://claude.ai/code/session_013JoYmLCosYt5tQ8qwdbL1T --- docs/VERIFICATION.md | 10 ++ docs/design/P13-PLAN.md | 17 ++- lib/features/library/presentation/ocr.dart | 15 +++ .../queue/presentation/queue_controller.dart | 55 +++++++++ .../settings/data/settings_model.dart | 5 + .../presentation/ai_settings_screen.dart | 47 ++++++++ .../presentation/settings_controller.dart | 4 + test/features/library/ocr_test.dart | 61 ++++++++++ .../features/queue/queue_controller_test.dart | 109 ++++++++++++++++++ test/features/settings/settings_test.dart | 24 ++++ 10 files changed, 344 insertions(+), 3 deletions(-) create mode 100644 lib/features/library/presentation/ocr.dart create mode 100644 test/features/library/ocr_test.dart diff --git a/docs/VERIFICATION.md b/docs/VERIFICATION.md index 8744ec2..18057ce 100644 --- a/docs/VERIFICATION.md +++ b/docs/VERIFICATION.md @@ -954,6 +954,16 @@ entries, or verify after P11c lands.)* "Couldn't detect the language". - [ ] On a host without ML Kit, the **Translate…** action is absent (graceful). +### P13b-3 — Auto-OCR on download (+ image-download fix) *(install `app-arm64-v8a-debug.apk`)* +- [ ] **Image download fix:** download a single image (e.g. an Instagram/X photo, or a photo carousel) → + it now appears in the library as an **image item** (previously it produced nothing). The video case is + unchanged (the video is the item; its thumbnail is still a thumbnail). +- [ ] AI & graph settings → enable **Image text (OCR) · Auto-scan new image downloads**. Download an image + with legible text → its text becomes **searchable** + a "Text found in image" Activity Inbox entry, + **fully offline**. +- [ ] **Default off:** with the toggle off, image downloads are not auto-scanned (on-demand "Scan text" + still works). A **video** download is never auto-OCR'd. The queue still drains normally. + ### P13 (later subphases) - [ ] **Transcription / summarization / translation / OCR** each work (capability-gated) and write results back to the item. diff --git a/docs/design/P13-PLAN.md b/docs/design/P13-PLAN.md index 93a10a2..927b953 100644 --- a/docs/design/P13-PLAN.md +++ b/docs/design/P13-PLAN.md @@ -150,12 +150,23 @@ target-language UX + GMS nuance). Measure APK-size impact in the first ML Kit bu BCP mapping, `translateReadiness` truth table, controller with a fake engine. **Pending APK spot-check** (the native ML Kit translate/language-id + the pack download); the widget flow is APK-verified. -#### `[ ]` P13b-3 — Auto-OCR on download *(follow-up; native; APK)* +#### `[~]` P13b-3 — Auto-OCR on download (+ image-download fix) *(follow-up; native; APK)* - Opt-in (default off) auto-scan of **image** downloads, mirroring P13a-2 auto-summarize: a settings toggle + a gated block in `queue_controller._persistCompleted` (runs inline; OCR is cheap + offline) → `updateOcrText` → an Activity Inbox entry. Grows search coverage automatically. -- **Exit / review:** with auto-OCR on, a finished image download is scanned + becomes searchable offline; - default-off does nothing; the queue still drains. +- **Precursor fix (maintainer call):** `classifyDownloadOutputs` routed **all** image extensions to `thumb`, + so a single-image download (a photo/carousel) produced **no media item** — auto-OCR would never fire. + Fixed: image files are tentative thumbnails, but when a download has **no video/audio**, the images **are** + the media (→ `image` items). Reuses `mediaTypeForExt` for consistency. This also fixes image downloads + generally (they now appear in the library, with dimensions, OCR, etc.). +- **Exit / review:** an image-only download becomes an `image` item; with auto-OCR on, it's scanned + becomes + searchable offline; default-off / video items do nothing; the queue still drains. +- **Status:** implemented (CI-green) — classifier fix (+ tests); `autoOcrOnDownload` setting + setter; pure + `shouldAutoOcr`; gated auto-OCR block in `_persistCompleted` (`ocrCount` in `_PersistResult`) + an `ai` + success inbox entry when text is found; an "Image text (OCR)" auto-scan card in AI settings (shown where ML + Kit runs). Tests: classifier image cases, `shouldAutoOcr` truth table, settings round-trip, and queue cases + (image+text → `ocrText` + entry; default-off no-op; video skipped). **No schema/deps change.** **Pending + APK spot-check** (real image download → image item + searchable text + inbox entry, offline). ### `[ ]` P13c — Smart auto-tagging *(generation; APK)* LLM-suggested tags feeding the **existing** tag system — builds directly on the P13a generation patterns. diff --git a/lib/features/library/presentation/ocr.dart b/lib/features/library/presentation/ocr.dart new file mode 100644 index 0000000..936bc17 --- /dev/null +++ b/lib/features/library/presentation/ocr.dart @@ -0,0 +1,15 @@ +/// Pure, engine-free helper for auto-OCR-on-download (P13b-3). Kept out of the +/// queue controller so the gating decision is unit-testable in isolation +/// (mirrors `autoSummaryDecision`). +library; + +/// Whether a freshly downloaded item should be auto-scanned for text now. +/// [enabled] is `autoOcrOnDownload`; [engineAvailable] is whether ML Kit OCR can +/// run on this host; [isImage] is whether the item is an image; [alreadyScanned] +/// is whether OCR text is already stored. +bool shouldAutoOcr({ + required bool enabled, + required bool engineAvailable, + required bool isImage, + required bool alreadyScanned, +}) => enabled && engineAvailable && isImage && !alreadyScanned; diff --git a/lib/features/queue/presentation/queue_controller.dart b/lib/features/queue/presentation/queue_controller.dart index d59657d..4b1a3e4 100644 --- a/lib/features/queue/presentation/queue_controller.dart +++ b/lib/features/queue/presentation/queue_controller.dart @@ -6,6 +6,7 @@ import 'package:drift/drift.dart' show Value; import 'package:flutter/widgets.dart' show AppLifecycleState; import 'package:flutter_riverpod/flutter_riverpod.dart'; import 'package:grabbit/core/ai/generation_provider.dart'; +import 'package:grabbit/core/ai/ocr_provider.dart'; import 'package:grabbit/core/ai/transcription_provider.dart'; import 'package:grabbit/core/db/database.dart'; import 'package:grabbit/core/db/database_provider.dart'; @@ -26,6 +27,7 @@ import 'package:grabbit/features/library/data/library_repository.dart'; import 'package:grabbit/features/library/data/metadata_repository.dart'; import 'package:grabbit/features/library/data/transcript_service.dart'; import 'package:grabbit/features/library/presentation/ai_summary.dart'; +import 'package:grabbit/features/library/presentation/ocr.dart'; import 'package:grabbit/features/notifications/data/notification_enums.dart'; import 'package:grabbit/features/notifications/data/notifications_repository.dart'; import 'package:grabbit/features/notifications/data/system_notification_service.dart'; @@ -51,6 +53,8 @@ typedef _PersistResult = ({ // in but the generation model isn't downloaded → prompt to finish setup. int summaryCount, bool summaryNeedsModel, + // P13b-3: count of image items auto-scanned for text (OCR). + int ocrCount, }); class QueueConfig { @@ -418,6 +422,20 @@ class QueueController extends _$QueueController { dedupeKey: 'summary_needs_model', ); } + // P13b-3: auto-OCR found text in a downloaded image (now searchable). + if (result.ocrCount > 0) { + await center.post( + category: NotificationCategory.ai, + severity: NotificationSeverity.success, + title: queued.title, + body: result.ocrCount > 1 + ? 'Text found in ${result.ocrCount} images' + : 'Text found in image', + targetRoute: route, + itemId: single ? result.primaryId : null, + dedupeKey: 'ocr_$id', + ); + } await _maybeNotifyOs( taskId: id, title: queued.title, @@ -578,6 +596,7 @@ class QueueController extends _$QueueController { transcriptionNeedsModel: false, summaryCount: 0, summaryNeedsModel: false, + ocrCount: 0, ); // Files land in a per-task subfolder (see YtDlpHost `-o`): the task id names // the folder, the user's template names the file inside it. @@ -773,6 +792,41 @@ class QueueController extends _$QueueController { } } + // P13b-3: auto-scan freshly downloaded images for text (OCR) when opted in, + // so they become searchable. On-device + offline (bundled ML Kit, no + // download); images only; skips ones already scanned. + var ocrCount = 0; + if (settings.autoOcrOnDownload) { + final ocr = ref.read(ocrEngineProvider); + final metadata = ref.read(metadataRepositoryProvider); + for (final (i, mediaFile) in outputs.media.indexed) { + final itemId = single ? id : '${id}__$i'; + final ext = mediaFile.path.split('.').last.toLowerCase(); + final isImage = + !queued.request.audioOnly && mediaTypeForExt(ext) == 'image'; + final meta = await (db.select( + db.mediaMetadata, + )..where((m) => m.itemId.equals(itemId))).getSingleOrNull(); + if (!shouldAutoOcr( + enabled: settings.autoOcrOnDownload, + engineAvailable: ocr.isAvailable, + isImage: isImage, + alreadyScanned: meta?.ocrText?.trim().isNotEmpty ?? false, + )) { + continue; + } + try { + final text = (await ocr.recognizeText(mediaFile.path)).trim(); + if (text.isNotEmpty) { + await metadata.updateOcrText(itemId, text); + ocrCount++; + } + } catch (_) { + // A per-item OCR failure must not fail the download. + } + } + } + return ( primaryId: single ? id : '${id}__0', itemCount: outputs.media.length, @@ -780,6 +834,7 @@ class QueueController extends _$QueueController { transcriptionNeedsModel: transcriptionNeedsModel, summaryCount: summaryCount, summaryNeedsModel: summaryNeedsModel, + ocrCount: ocrCount, ); } diff --git a/lib/features/settings/data/settings_model.dart b/lib/features/settings/data/settings_model.dart index f0c774e..c4cb842 100644 --- a/lib/features/settings/data/settings_model.dart +++ b/lib/features/settings/data/settings_model.dart @@ -97,6 +97,11 @@ abstract class SettingsModel with _$SettingsModel { // fetch — mirrors `autoTranscribe`). The on-demand summary on item detail // (P13a) works regardless. @Default(false) bool autoSummarizeOnDownload, + // P13b-3: auto-extract text (OCR) from a newly downloaded image in the + // background, so it becomes searchable. Opt-in (defaults off); runs only on + // images, on-device + offline (bundled ML Kit, no download). The on-demand + // "Scan text" on item detail (P13b-1) works regardless. + @Default(false) bool autoOcrOnDownload, // On-device speech transcription (P12e). Opt-in (defaults off); the whisper // model is downloaded only when the user enables it + picks a model. // `selectedTranscriptionModelId` empty = the device-tier recommendation; diff --git a/lib/features/settings/presentation/ai_settings_screen.dart b/lib/features/settings/presentation/ai_settings_screen.dart index b49cae0..48b017f 100644 --- a/lib/features/settings/presentation/ai_settings_screen.dart +++ b/lib/features/settings/presentation/ai_settings_screen.dart @@ -12,6 +12,7 @@ import 'package:grabbit/core/ai/inference_error.dart'; import 'package:grabbit/core/ai/model_capability_matrix.dart'; import 'package:grabbit/core/ai/model_catalog.dart'; import 'package:grabbit/core/ai/model_download_service.dart'; +import 'package:grabbit/core/ai/ocr_provider.dart'; import 'package:grabbit/core/ai/transcription_model.dart'; import 'package:grabbit/core/ai/transcription_provider.dart'; import 'package:grabbit/core/device/device_profile.dart'; @@ -70,11 +71,57 @@ class AiSettingsScreen extends ConsumerWidget { ), const _GenerationCard(), const _TranscriptionCard(), + const _OcrCard(), ], ); } } +/// On-device image OCR (P13b-3). Image text is always scannable by hand from an +/// image's detail screen (P13b-1); this card just offers the opt-in to do it +/// automatically on download. Shown only where ML Kit OCR can run (Android). +class _OcrCard extends ConsumerWidget { + const _OcrCard(); + + @override + Widget build(BuildContext context, WidgetRef ref) { + if (!ref.watch(ocrEngineProvider).isAvailable) { + return const SizedBox.shrink(); + } + final auto = ref.watch( + settingsControllerProvider.select( + (s) => s.value?.autoOcrOnDownload ?? false, + ), + ); + return Padding( + padding: const EdgeInsets.only(top: 8), + child: SettingsCard( + children: [ + SwitchListTile( + secondary: const InfoHintButton( + InfoHint( + title: 'Auto-scan images for text', + body: + 'Automatically read text inside each downloaded image so ' + 'you can search for it — all on-device and offline. You can ' + 'always scan an image by hand from its detail screen.', + ), + ), + title: const Text('Image text (OCR)'), + subtitle: const Text( + 'Scan new image downloads for searchable text', + ), + value: auto, + onChanged: (v) => ref + .read(settingsControllerProvider.notifier) + .setAutoOcrOnDownload(v), + ), + ], + ), + ); + } +} + /// Compact banner framing the AI screen with the device's capability tier (P12g) /// — so a user understands *why* some AI options are offered or gated. Reads the /// live tier (probed at startup); the InfoHint explains on-device scaling. diff --git a/lib/features/settings/presentation/settings_controller.dart b/lib/features/settings/presentation/settings_controller.dart index 7a71f0d..8e24d20 100644 --- a/lib/features/settings/presentation/settings_controller.dart +++ b/lib/features/settings/presentation/settings_controller.dart @@ -147,6 +147,10 @@ class SettingsController extends _$SettingsController { Future setAutoSummarizeOnDownload(bool value) async => _update((await future).copyWith(autoSummarizeOnDownload: value)); + /// Auto-scan newly downloaded images for text (OCR) in the background (P13b-3). + Future setAutoOcrOnDownload(bool value) async => + _update((await future).copyWith(autoOcrOnDownload: value)); + /// On-device transcription opt-in (P12e). Future setTranscriptionEnabled(bool value) async => _update((await future).copyWith(transcriptionEnabled: value)); diff --git a/test/features/library/ocr_test.dart b/test/features/library/ocr_test.dart new file mode 100644 index 0000000..83a516f --- /dev/null +++ b/test/features/library/ocr_test.dart @@ -0,0 +1,61 @@ +import 'package:flutter_test/flutter_test.dart'; +import 'package:grabbit/features/library/presentation/ocr.dart'; + +void main() { + group('shouldAutoOcr (P13b-3)', () { + test('all favourable → true', () { + expect( + shouldAutoOcr( + enabled: true, + engineAvailable: true, + isImage: true, + alreadyScanned: false, + ), + isTrue, + ); + }); + + test('any unfavourable condition → false', () { + expect( + shouldAutoOcr( + enabled: false, + engineAvailable: true, + isImage: true, + alreadyScanned: false, + ), + isFalse, + reason: 'disabled', + ); + expect( + shouldAutoOcr( + enabled: true, + engineAvailable: false, + isImage: true, + alreadyScanned: false, + ), + isFalse, + reason: 'engine unavailable', + ); + expect( + shouldAutoOcr( + enabled: true, + engineAvailable: true, + isImage: false, + alreadyScanned: false, + ), + isFalse, + reason: 'not an image', + ); + expect( + shouldAutoOcr( + enabled: true, + engineAvailable: true, + isImage: true, + alreadyScanned: true, + ), + isFalse, + reason: 'already scanned', + ); + }); + }); +} diff --git a/test/features/queue/queue_controller_test.dart b/test/features/queue/queue_controller_test.dart index 80857e4..8668c7d 100644 --- a/test/features/queue/queue_controller_test.dart +++ b/test/features/queue/queue_controller_test.dart @@ -8,6 +8,8 @@ import 'package:flutter_test/flutter_test.dart'; import 'package:grabbit/core/ai/generation_engine.dart'; import 'package:grabbit/core/ai/generation_model.dart'; import 'package:grabbit/core/ai/generation_provider.dart'; +import 'package:grabbit/core/ai/ocr_engine.dart'; +import 'package:grabbit/core/ai/ocr_provider.dart'; import 'package:grabbit/core/ai/structured_generation.dart'; import 'package:grabbit/core/ai/transcription_engine.dart'; import 'package:grabbit/core/ai/transcription_model.dart'; @@ -275,6 +277,27 @@ class FakeGenerationEngine implements GenerationEngine { Future close() async {} } +/// In-memory OCR engine (no native ML Kit) for the auto-OCR tests (P13b-3). +/// [available] simulates whether OCR can run; records the paths it scanned. +class FakeOcrEngine implements OcrEngine { + FakeOcrEngine({this.available = true, this.result = 'fake ocr text'}); + + bool available; + String result; + final List scanned = []; + + @override + bool get isAvailable => available; + @override + Future recognizeText(String imagePath) async { + scanned.add(imagePath); + return result; + } + + @override + Future close() async {} +} + QueuedDownload _qd( String id, { String outputDir = '/tmp', @@ -323,6 +346,7 @@ void main() { late FakeSystemNotificationService fakeOsNotifier; late FakeTranscriptionEngine fakeTranscriber; late FakeGenerationEngine fakeGenerator; + late FakeOcrEngine fakeOcr; late Directory mediaDir; ProviderContainer makeContainer() => ProviderContainer( @@ -337,6 +361,7 @@ void main() { mediaStorageProvider.overrideWithValue(FakeMediaStorage(mediaDir)), transcriptionEngineProvider.overrideWithValue(fakeTranscriber), generationEngineProvider.overrideWithValue(fakeGenerator), + ocrEngineProvider.overrideWithValue(fakeOcr), queueConfigProvider.overrideWithValue( const QueueConfig(baseRetryDelay: Duration(milliseconds: 5)), ), @@ -353,6 +378,7 @@ void main() { fakeOsNotifier = FakeSystemNotificationService(); fakeTranscriber = FakeTranscriptionEngine(); fakeGenerator = FakeGenerationEngine(); + fakeOcr = FakeOcrEngine(); mediaDir = Directory.systemTemp.createTempSync('grabbit_qmedia_'); container = makeContainer(); repo = container.read(queueRepositoryProvider); @@ -470,6 +496,20 @@ void main() { db.mediaMetadata, )..where((m) => m.itemId.equals(id))).getSingleOrNull())?.aiSummary; + Future ocrOf(String id) async => (await (db.select( + db.mediaMetadata, + )..where((m) => m.itemId.equals(id))).getSingleOrNull())?.ocrText; + + /// An image-only download (no video/audio) — after the P13b-3 classifier fix + /// this becomes an `image` library item. + Future imageDownload(String id) async { + final dir = await Directory.systemTemp.createTemp('grabbit_img_'); + addTearDown(() => dir.delete(recursive: true)); + await Directory('${dir.path}/$id').create(); + await File('${dir.path}/$id/Photo.jpg').writeAsString('imgdata'); + return dir; + } + /// A normal completed download with a description (so the auto-summary source /// — `transcript ?? description` — is non-empty), no caption sidecar. Future describedDownload(String id) async { @@ -642,6 +682,75 @@ void main() { }, ); + // --- P13b-3: auto-OCR on image download --- + + test( + 'auto-OCR: enabled + image + text → ocrText + ai entry (P13b-3)', + () async { + await container + .read(settingsControllerProvider.notifier) + .setAutoOcrOnDownload(true); + fakeOcr.available = true; + final dir = await imageDownload('img1'); + + await controller.enqueue(_qd('img1', outputDir: dir.path)); + await waitFor(() async => engine.running.contains('img1')); + engine.complete('img1'); + await waitFor( + () async => (await repo.byId('img1'))?.status == TaskStatus.done, + ); + + // The classifier fix makes the image the media item; auto-OCR scans it. + final item = await (db.select( + db.mediaItems, + )..where((t) => t.id.equals('img1'))).getSingle(); + expect(item.type, 'image'); + expect(fakeOcr.scanned, hasLength(1)); + expect(await ocrOf('img1'), 'fake ocr text'); + final ai = await (db.select( + db.notifications, + )..where((n) => n.category.equals(NotificationCategory.ai))).get(); + expect(ai, hasLength(1)); + expect(ai.single.severity, NotificationSeverity.success); + }, + ); + + test('auto-OCR: default off → no scan, no entry (P13b-3)', () async { + fakeOcr.available = true; // engine present, but the toggle is off + final dir = await imageDownload('img1'); + + await controller.enqueue(_qd('img1', outputDir: dir.path)); + await waitFor(() async => engine.running.contains('img1')); + engine.complete('img1'); + await waitFor( + () async => (await repo.byId('img1'))?.status == TaskStatus.done, + ); + + expect(fakeOcr.scanned, isEmpty); + expect(await ocrOf('img1'), isNull); + final ai = await (db.select( + db.notifications, + )..where((n) => n.category.equals(NotificationCategory.ai))).get(); + expect(ai, isEmpty); + }); + + test('auto-OCR: enabled but item is a video → skipped (P13b-3)', () async { + await container + .read(settingsControllerProvider.notifier) + .setAutoOcrOnDownload(true); + final dir = await captionlessDownload('vid1'); // a .mp4 + + await controller.enqueue(_qd('vid1', outputDir: dir.path)); + await waitFor(() async => engine.running.contains('vid1')); + engine.complete('vid1'); + await waitFor( + () async => (await repo.byId('vid1'))?.status == TaskStatus.done, + ); + + expect(fakeOcr.scanned, isEmpty); + expect(await ocrOf('vid1'), isNull); + }); + test('a completed download posts a success activity entry (P11c)', () async { final dir = await Directory.systemTemp.createTemp('grabbit_ntf_done_'); addTearDown(() => dir.delete(recursive: true)); diff --git a/test/features/settings/settings_test.dart b/test/features/settings/settings_test.dart index 558df31..5718639 100644 --- a/test/features/settings/settings_test.dart +++ b/test/features/settings/settings_test.dart @@ -274,6 +274,30 @@ void main() { expect(saved.aiSetupSeen, isTrue); }); + test( + 'auto-process setters default off and persist (P13a-2/P13b-3)', + () async { + final db = AppDatabase(NativeDatabase.memory()); + addTearDown(db.close); + final container = ProviderContainer( + overrides: [appDatabaseProvider.overrideWithValue(db)], + ); + addTearDown(container.dispose); + + final loaded = await container.read(settingsControllerProvider.future); + expect(loaded.autoSummarizeOnDownload, isFalse); + expect(loaded.autoOcrOnDownload, isFalse); + + final notifier = container.read(settingsControllerProvider.notifier); + await notifier.setAutoSummarizeOnDownload(true); + await notifier.setAutoOcrOnDownload(true); + + final saved = await SettingsRepository(db).read(); + expect(saved.autoSummarizeOnDownload, isTrue); + expect(saved.autoOcrOnDownload, isTrue); + }, + ); + test( 'autoCheckEngineUpdate defaults on and persists when toggled', () async { From 0d572bc7185abd2e14efdf9e357221ca6958377d Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 19:35:43 +0000 Subject: [PATCH 3/3] fix: image thumbnails + de-duplicate image downloads; queue quick wins (P13b-3 sweep) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-merge sweep of the image-download/classification work: - MediaThumb now falls back to the image FILE for `image` items with a null thumbnail (they were showing a movie-icon placeholder in the grid, dashboard, collections, hero shuttle, and related strips). Typed fallback icon for images is now image_outlined. - classifyDownloadOutputs collapses an image + its yt-dlp `--write-thumbnail` sidecar to ONE item: with no video/audio, the largest image is the media and the next-largest is its thumbnail (carousels expand to one task/folder per photo, so multiple images here = photo + thumbnail). Prevents a duplicate image item and gives image items a real thumbnail. - Quick wins in _persistCompleted: auto-transcribe skips image items (no wasted whisper transcode of a photo); durationSec gated to non-image. - Tests: classifier photo+thumbnail collapse (real temp files), MediaThumb image null-thumb renders Image.file (not the movie icon), queue cases (image+thumbnail → one item with a thumbnail; whisper skipped on images). - Docs: VERIFICATION (thumbnail rendering, single-item, export), BACKLOG (unconditional --write-thumbnail; non-mediaTypeForExt image formats), P13-PLAN P13b-3 sweep note. https://claude.ai/code/session_013JoYmLCosYt5tQ8qwdbL1T --- docs/BACKLOG.md | 9 +++ docs/VERIFICATION.md | 8 ++- docs/design/P13-PLAN.md | 6 ++ .../library/presentation/media_grid.dart | 12 +++- .../queue/data/completed_outputs.dart | 23 +++++++- .../queue/presentation/queue_controller.dart | 8 ++- test/features/library/media_grid_test.dart | 38 +++++++++++++ .../queue/completed_outputs_test.dart | 31 +++++++---- .../features/queue/queue_controller_test.dart | 55 +++++++++++++++++++ 9 files changed, 171 insertions(+), 19 deletions(-) diff --git a/docs/BACKLOG.md b/docs/BACKLOG.md index b128fa3..765bd05 100644 --- a/docs/BACKLOG.md +++ b/docs/BACKLOG.md @@ -16,6 +16,15 @@ _(nothing active — pick the next batch from below)_ - [ ] **OCR — non-Latin scripts.** P13b-1 ships the **bundled Latin** ML Kit recognizer (no Google Play Services, offline). Chinese/Japanese/Korean/Devanagari need their own ML Kit script models (extra APK size or a download). Add a script choice if users want non-Latin OCR. *(From P13b-1.)* +- [ ] **Unconditional `--write-thumbnail` for image downloads.** `YtDlpHost.kt` passes + `--write-thumbnail --convert-thumbnails jpg` for every download, so an image download wastes a fetch + writing a thumbnail of the photo. P13b-3 handles this defensively in Dart (the classifier keeps the + largest image as the photo and the smaller as its thumbnail), but a cleaner fix would gate the flag off + at request time for image downloads (needs an `isImage`/`writeThumbnail` hint through the Pigeon + `DownloadRequest`). *(From P13b-3 sweep.)* +- [ ] **Image formats outside `mediaTypeForExt`.** `.heic`/`.heif`/`.avif`/`.tiff` aren't in the image set, + so such a download is classified as a `video` item. Add them (+ confirm the player/thumbnail handle + them) if real downloads produce them. *(From P13b-3 sweep.)* - [ ] **Auto-summarize — queue-decoupled background run.** P13a-2 generates the auto-summary **inline** in `_persistCompleted` before the next download pumps (gated on "model present" so it can't stall on a fetch), exactly like `autoTranscribe`. Generation is heavier than whisper-tiny, so a fuller design diff --git a/docs/VERIFICATION.md b/docs/VERIFICATION.md index 18057ce..dfd4347 100644 --- a/docs/VERIFICATION.md +++ b/docs/VERIFICATION.md @@ -956,8 +956,12 @@ entries, or verify after P11c lands.)* ### P13b-3 — Auto-OCR on download (+ image-download fix) *(install `app-arm64-v8a-debug.apk`)* - [ ] **Image download fix:** download a single image (e.g. an Instagram/X photo, or a photo carousel) → - it now appears in the library as an **image item** (previously it produced nothing). The video case is - unchanged (the video is the item; its thumbnail is still a thumbnail). + it now appears in the library as an **image item** (previously it produced nothing), shows **its own + picture as the thumbnail** in the grid/dashboard/collections (not a movie-icon placeholder), and is + exactly **one** item even though yt-dlp also writes a thumbnail sidecar. The video case is unchanged + (the video is the item; its thumbnail is still a thumbnail). +- [ ] **Export:** export a downloaded image item to the gallery → it lands in the **Images** collection + and opens in the device gallery. - [ ] AI & graph settings → enable **Image text (OCR) · Auto-scan new image downloads**. Download an image with legible text → its text becomes **searchable** + a "Text found in image" Activity Inbox entry, **fully offline**. diff --git a/docs/design/P13-PLAN.md b/docs/design/P13-PLAN.md index 927b953..113517f 100644 --- a/docs/design/P13-PLAN.md +++ b/docs/design/P13-PLAN.md @@ -167,6 +167,12 @@ target-language UX + GMS nuance). Measure APK-size impact in the first ML Kit bu Kit runs). Tests: classifier image cases, `shouldAutoOcr` truth table, settings round-trip, and queue cases (image+text → `ocrText` + entry; default-off no-op; video skipped). **No schema/deps change.** **Pending APK spot-check** (real image download → image item + searchable text + inbox entry, offline). +- **Pre-merge sweep refinements (same PR):** (a) `MediaThumb` now falls back to the image **file** for + `image` items with a null thumbnail (they were showing a movie-icon placeholder in grid/dashboard/ + collections/hero/related); (b) the classifier collapses an image + its yt-dlp `--write-thumbnail` sidecar + to **one** item (largest = photo, smaller = thumbnail) so a single image download isn't double-counted; + (c) quick wins — auto-transcribe skips image items, and `durationSec` is gated to non-image. The + unconditional `--write-thumbnail` and non-`mediaTypeForExt` image formats are logged in `BACKLOG.md`. ### `[ ]` P13c — Smart auto-tagging *(generation; APK)* LLM-suggested tags feeding the **existing** tag system — builds directly on the P13a generation patterns. diff --git a/lib/features/library/presentation/media_grid.dart b/lib/features/library/presentation/media_grid.dart index ae80853..7e2dbd7 100644 --- a/lib/features/library/presentation/media_grid.dart +++ b/lib/features/library/presentation/media_grid.dart @@ -241,12 +241,20 @@ class MediaThumb extends StatelessWidget { final fallback = ColoredBox( color: scheme.surfaceContainerHighest, child: Icon( - item.type == 'audio' ? Icons.music_note : Icons.movie_outlined, + switch (item.type) { + 'audio' => Icons.music_note, + 'image' => Icons.image_outlined, + _ => Icons.movie_outlined, + }, color: scheme.onSurfaceVariant, size: 40, ), ); - final thumbPath = item.thumbPath; + // Image items often have no separate thumbnail (the photo is its own + // thumbnail) — render the image file directly. Everything else needs a + // generated thumbnail. + final thumbPath = + item.thumbPath ?? (item.type == 'image' ? item.filePath : null); if (thumbPath == null) return fallback; return Image.file( File(thumbPath), diff --git a/lib/features/queue/data/completed_outputs.dart b/lib/features/queue/data/completed_outputs.dart index 29ca98e..c0c6562 100644 --- a/lib/features/queue/data/completed_outputs.dart +++ b/lib/features/queue/data/completed_outputs.dart @@ -36,15 +36,32 @@ DownloadOutputs classifyDownloadOutputs(Iterable files) { } } media.sort((a, b) => a.path.compareTo(b.path)); - images.sort((a, b) => a.path.compareTo(b.path)); // Video/audio present → images are thumbnail sidecars (keep the first). - // Otherwise it's an image download → the images are the media. if (media.isNotEmpty) { + images.sort((a, b) => a.path.compareTo(b.path)); return ( media: media, thumb: images.isEmpty ? null : images.first, info: info, ); } - return (media: images, thumb: null, info: info); + // Image download → the image is the media. A carousel expands to one task + // (folder) per photo, so multiple images here means the photo PLUS yt-dlp's + // `--write-thumbnail` sidecar — keep the largest as the photo and the next as + // its thumbnail (rather than minting a duplicate item). + if (images.length <= 1) { + return (media: images, thumb: null, info: info); + } + images.sort((a, b) => _sizeOf(b).compareTo(_sizeOf(a))); + return (media: [images.first], thumb: images[1], info: info); +} + +/// File size in bytes, or 0 when it can't be read (e.g. a missing path in a +/// unit test) — used only to pick the largest image as the media. +int _sizeOf(File f) { + try { + return f.lengthSync(); + } on FileSystemException { + return 0; + } } diff --git a/lib/features/queue/presentation/queue_controller.dart b/lib/features/queue/presentation/queue_controller.dart index 4b1a3e4..0e61a3e 100644 --- a/lib/features/queue/presentation/queue_controller.dart +++ b/lib/features/queue/presentation/queue_controller.dart @@ -653,7 +653,9 @@ class QueueController extends _$QueueController { type: type, createdAt: DateTime.now(), storageState: 'private', - durationSec: Value(single ? queued.durationSec : null), + durationSec: Value( + single && type != 'image' ? queued.durationSec : null, + ), sizeBytes: Value(await mediaFile.length()), thumbPath: Value(outputs.thumb?.path), width: Value(width), @@ -703,6 +705,10 @@ class QueueController extends _$QueueController { : null; final whisperReady = whisper != null && await whisper.ensureReady(); for (final (i, mediaFile) in outputs.media.indexed) { + // Images have no audio to transcribe — skip (avoids a wasted whisper + // transcode of a photo). + final ext = mediaFile.path.split('.').last.toLowerCase(); + if (mediaTypeForExt(ext) == 'image') continue; final itemId = single ? id : '${id}__$i'; final timed = await transcripts.extractTimed( mediaFile.path, diff --git a/test/features/library/media_grid_test.dart b/test/features/library/media_grid_test.dart index be0b9fd..c474271 100644 --- a/test/features/library/media_grid_test.dart +++ b/test/features/library/media_grid_test.dart @@ -46,6 +46,44 @@ void main() { ); }); + testWidgets( + 'image item with no thumbnail renders the image file, not a movie icon (P13b-3)', + (tester) async { + await tester.pumpWidget( + ProviderScope( + child: MaterialApp( + home: Scaffold( + body: MediaThumb( + item: _item(id: 'i', type: 'image'), + ), + ), + ), + ), + ); + // Falls back to Image.file(filePath) — never the video placeholder. + expect(find.byType(Image), findsOneWidget); + expect(find.byIcon(Icons.movie_outlined), findsNothing); + }, + ); + + testWidgets('video item with no thumbnail shows the movie placeholder', ( + tester, + ) async { + await tester.pumpWidget( + ProviderScope( + child: MaterialApp( + home: Scaffold( + body: MediaThumb( + item: _item(id: 'v2', type: 'video'), + ), + ), + ), + ), + ); + expect(find.byIcon(Icons.movie_outlined), findsOneWidget); + expect(find.byType(Image), findsNothing); + }); + testWidgets('tapping the star favorites the item', (tester) async { final db = AppDatabase(NativeDatabase.memory()); addTearDown(db.close); diff --git a/test/features/queue/completed_outputs_test.dart b/test/features/queue/completed_outputs_test.dart index fd58a57..6ee0b8f 100644 --- a/test/features/queue/completed_outputs_test.dart +++ b/test/features/queue/completed_outputs_test.dart @@ -60,17 +60,26 @@ void main() { expect(out.info?.path, '/d/photo.info.json'); }); - test('photo carousel → every image is media (sorted) (P13b-3)', () { - final out = classifyDownloadOutputs( - _files(['/d/post 2.jpg', '/d/post 1.png', '/d/post 3.webp']), - ); - expect(out.media.map((f) => f.path), [ - '/d/post 1.png', - '/d/post 2.jpg', - '/d/post 3.webp', - ]); - expect(out.thumb, isNull); - }); + test( + 'image + its written thumbnail → largest is media, smaller is thumb (P13b-3)', + () async { + // yt-dlp `--write-thumbnail` lands a second image beside the photo; + // the larger file is the real photo, the smaller is its thumbnail. + final dir = await Directory.systemTemp.createTemp('grabbit_cls_'); + addTearDown(() => dir.delete(recursive: true)); + final photo = File('${dir.path}/post.webp') + ..writeAsBytesSync(List.filled(5000, 0)); + final thumb = File('${dir.path}/post.jpg') + ..writeAsBytesSync(List.filled(300, 0)); + + final out = classifyDownloadOutputs([ + thumb, + photo, + ]); // order shouldn't matter + expect(out.media.map((f) => f.path), [photo.path]); + expect(out.thumb?.path, thumb.path); + }, + ); test('video + image keeps the image as the thumbnail (unchanged)', () { final out = classifyDownloadOutputs( diff --git a/test/features/queue/queue_controller_test.dart b/test/features/queue/queue_controller_test.dart index 8668c7d..6da2efe 100644 --- a/test/features/queue/queue_controller_test.dart +++ b/test/features/queue/queue_controller_test.dart @@ -510,6 +510,21 @@ void main() { return dir; } + /// An image download plus yt-dlp's `--write-thumbnail` sidecar (a smaller + /// second image) — should still yield exactly one image item. + Future imageWithThumbDownload(String id) async { + final dir = await Directory.systemTemp.createTemp('grabbit_imgt_'); + addTearDown(() => dir.delete(recursive: true)); + await Directory('${dir.path}/$id').create(); + await File( + '${dir.path}/$id/Photo.webp', + ).writeAsBytes(List.filled(5000, 0)); // the real photo (larger) + await File( + '${dir.path}/$id/Photo.jpg', + ).writeAsBytes(List.filled(300, 0)); // the written thumbnail (smaller) + return dir; + } + /// A normal completed download with a description (so the auto-summary source /// — `transcript ?? description` — is non-empty), no caption sidecar. Future describedDownload(String id) async { @@ -751,6 +766,46 @@ void main() { expect(await ocrOf('vid1'), isNull); }); + test('image + written thumbnail → exactly one image item (P13b-3)', () async { + final dir = await imageWithThumbDownload('img1'); + + await controller.enqueue(_qd('img1', outputDir: dir.path)); + await waitFor(() async => engine.running.contains('img1')); + engine.complete('img1'); + await waitFor( + () async => (await repo.byId('img1'))?.status == TaskStatus.done, + ); + + final items = await (db.select( + db.mediaItems, + )..where((t) => t.id.equals('img1'))).get(); + expect(items, hasLength(1)); // not two (the thumbnail isn't its own item) + expect(items.single.type, 'image'); + // The smaller image became the thumbnail. + expect(items.single.thumbPath, endsWith('Photo.jpg')); + expect(items.single.filePath, endsWith('Photo.webp')); + }); + + test('auto-transcribe skips image downloads (P13b-3)', () async { + await container + .read(settingsControllerProvider.notifier) + .setAutoTranscribe(true); + await container + .read(settingsControllerProvider.notifier) + .setTranscriptionEnabled(true); + fakeTranscriber.ready = true; + final dir = await imageDownload('img1'); + + await controller.enqueue(_qd('img1', outputDir: dir.path)); + await waitFor(() async => engine.running.contains('img1')); + engine.complete('img1'); + await waitFor( + () async => (await repo.byId('img1'))?.status == TaskStatus.done, + ); + + expect(fakeTranscriber.transcribed, isEmpty); // no whisper on a photo + }); + test('a completed download posts a success activity entry (P11c)', () async { final dir = await Directory.systemTemp.createTemp('grabbit_ntf_done_'); addTearDown(() => dir.delete(recursive: true));