From 1418091c5678392ba3dae619bebc59e501501f50 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 2 Jun 2026 17:45:08 +0000
Subject: [PATCH 1/3] fix: image-only downloads become image items, not just a
 thumbnail (P13b-3 precursor)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

classifyDownloadOutputs routed every image extension to `thumb`, so a
single-image download (a photo or carousel) produced an empty media list —
no library item at all. Now image files are tentative thumbnails: alongside
a video/audio download they stay thumbnails, but when a download has no
video/audio the images ARE the media (→ image items). Reuses mediaTypeForExt
so classification matches the insert loop's type assignment.

This unblocks auto-OCR-on-download (P13b-3) and fixes image downloads
generally (they now appear in the library, with dimensions/OCR/etc.).

https://claude.ai/code/session_013JoYmLCosYt5tQ8qwdbL1T
---
 .../queue/data/completed_outputs.dart         | 28 ++++++++++++++----
 .../queue/completed_outputs_test.dart         | 29 +++++++++++++++++++
 2 files changed, 51 insertions(+), 6 deletions(-)
diff --git a/lib/features/queue/data/completed_outputs.dart b/lib/features/queue/data/completed_outputs.dart
index 6ade0e8..29ca98e 100644
--- a/lib/features/queue/data/completed_outputs.dart
+++ b/lib/features/queue/data/completed_outputs.dart
@@ -1,18 +1,24 @@
 import 'dart:io';
 
+import 'package:grabbit/core/utils/media_type.dart';
+
 /// The classified files produced by a finished download in its per-task folder.
 typedef DownloadOutputs = ({List<File> media, File? thumb, File? info});
 
 const _subtitleExts = {'srt', 'vtt', 'ass', 'ssa', 'lrc', 'sub'};
-const _thumbExts = {'jpg', 'jpeg', 'png', 'webp'};
 
 /// Sorts a download folder's files into the media file(s), the thumbnail, and
 /// the `.info.json` sidecar. Subtitle sidecars (`.srt`/`.vtt`/`.srv*`/…) and
 /// other JSON sidecars are excluded so they're never mistaken for the media —
 /// and multiple media files (yt-dlp `--split-chapters`) are all returned.
+///
+/// Image files are **tentative thumbnails**: alongside a video/audio download an
+/// image is the thumbnail sidecar, but an **image-only** download (a photo or a
+/// carousel of photos) has no video/audio — there the images *are* the media
+/// (P13b-3), so they become image library items rather than being discarded.
 DownloadOutputs classifyDownloadOutputs(Iterable<File> files) {
-  final media = <File>[];
-  File? thumb;
+  final media = <File>[]; // video / audio
+  final images = <File>[]; // image files — thumbnail(s) or the media itself
   File? info;
   for (final f in files) {
     final lower = f.path.toLowerCase();
@@ -23,12 +29,22 @@ DownloadOutputs classifyDownloadOutputs(Iterable<File> files) {
       // Other yt-dlp sidecars (e.g. live chat) — ignore.
     } else if (_subtitleExts.contains(ext) || ext.startsWith('srv')) {
       // Subtitle sidecars — not media.
-    } else if (_thumbExts.contains(ext)) {
-      thumb ??= f;
+    } else if (mediaTypeForExt(ext) == 'image') {
+      images.add(f);
     } else {
       media.add(f);
     }
   }
   media.sort((a, b) => a.path.compareTo(b.path));
-  return (media: media, thumb: thumb, info: info);
+  images.sort((a, b) => a.path.compareTo(b.path));
+  // Video/audio present → images are thumbnail sidecars (keep the first).
+  // Otherwise it's an image download → the images are the media.
+  if (media.isNotEmpty) {
+    return (
+      media: media,
+      thumb: images.isEmpty ? null : images.first,
+      info: info,
+    );
+  }
+  return (media: images, thumb: null, info: info);
 }
diff --git a/test/features/queue/completed_outputs_test.dart b/test/features/queue/completed_outputs_test.dart
index 396deda..fd58a57 100644
--- a/test/features/queue/completed_outputs_test.dart
+++ b/test/features/queue/completed_outputs_test.dart
@@ -50,5 +50,34 @@ void main() {
       final out = classifyDownloadOutputs(_files(['/d/clip.en.srt']));
       expect(out.media, isEmpty);
     });
+
+    test('image-only download → the image is the media, no thumb (P13b-3)', () {
+      final out = classifyDownloadOutputs(
+        _files(['/d/photo.jpg', '/d/photo.info.json']),
+      );
+      expect(out.media.map((f) => f.path), ['/d/photo.jpg']);
+      expect(out.thumb, isNull);
+      expect(out.info?.path, '/d/photo.info.json');
+    });
+
+    test('photo carousel → every image is media (sorted) (P13b-3)', () {
+      final out = classifyDownloadOutputs(
+        _files(['/d/post 2.jpg', '/d/post 1.png', '/d/post 3.webp']),
+      );
+      expect(out.media.map((f) => f.path), [
+        '/d/post 1.png',
+        '/d/post 2.jpg',
+        '/d/post 3.webp',
+      ]);
+      expect(out.thumb, isNull);
+    });
+
+    test('video + image keeps the image as the thumbnail (unchanged)', () {
+      final out = classifyDownloadOutputs(
+        _files(['/d/clip.mp4', '/d/clip.jpg']),
+      );
+      expect(out.media.map((f) => f.path), ['/d/clip.mp4']);
+      expect(out.thumb?.path, '/d/clip.jpg');
+    });
   });
 }

From 26b5023bdaf5df9b9188e655b857ee96231fc37d Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 2 Jun 2026 17:45:18 +0000
Subject: [PATCH 2/3] feat: opt-in auto-OCR on image download (P13b-3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to P13b-1: optionally auto-scan image downloads for text on
completion, so search coverage grows automatically. Opt-in (default off);
mirrors P13a-2 auto-summarize. OCR is free + offline (bundled ML Kit), so
there's no model download or "needs setup" nudge.

- Settings: `autoOcrOnDownload` (default false) + setter.
- Pure `shouldAutoOcr` (enabled & engine-available & image & not-yet-scanned).
- Queue: gated block in `_persistCompleted` (after auto-summary) scans each
  image item via ocrEngine → `updateOcrText` (FTS reindexes); `ocrCount` in
  `_PersistResult`; an `ai` success inbox entry when text is found.
- Settings UI: an "Image text (OCR)" auto-scan card in AI & graph settings,
  shown only where ML Kit OCR runs.
- Tests: shouldAutoOcr truth table, settings round-trip, and queue cases
  (image+text → ocrText + entry; default-off no-op; video skipped). The
  realistic image test relies on the precursor classifier fix.
- Docs: P13-PLAN P13b-3 status, VERIFICATION P13b-3 + image-download fix.
  No schema/deps change.

https://claude.ai/code/session_013JoYmLCosYt5tQ8qwdbL1T
---
 docs/VERIFICATION.md                          |  10 ++
 docs/design/P13-PLAN.md                       |  17 ++-
 lib/features/library/presentation/ocr.dart    |  15 +++
 .../queue/presentation/queue_controller.dart  |  55 +++++++++
 .../settings/data/settings_model.dart         |   5 +
 .../presentation/ai_settings_screen.dart      |  47 ++++++++
 .../presentation/settings_controller.dart     |   4 +
 test/features/library/ocr_test.dart           |  61 ++++++++++
 .../features/queue/queue_controller_test.dart | 109 ++++++++++++++++++
 test/features/settings/settings_test.dart     |  24 ++++
 10 files changed, 344 insertions(+), 3 deletions(-)
 create mode 100644 lib/features/library/presentation/ocr.dart
 create mode 100644 test/features/library/ocr_test.dart

diff --git a/docs/VERIFICATION.md b/docs/VERIFICATION.md
index 8744ec2..18057ce 100644
--- a/docs/VERIFICATION.md
+++ b/docs/VERIFICATION.md
@@ -954,6 +954,16 @@ entries, or verify after P11c lands.)*
       "Couldn't detect the language".
 - [ ] On a host without ML Kit, the **Translate…** action is absent (graceful).
 
+### P13b-3 — Auto-OCR on download (+ image-download fix)  *(install `app-arm64-v8a-debug.apk`)*
+- [ ] **Image download fix:** download a single image (e.g. an Instagram/X photo, or a photo carousel) →
+      it now appears in the library as an **image item** (previously it produced nothing). The video case is
+      unchanged (the video is the item; its thumbnail is still a thumbnail).
+- [ ] AI & graph settings → enable **Image text (OCR) · Auto-scan new image downloads**. Download an image
+      with legible text → its text becomes **searchable** + a "Text found in image" Activity Inbox entry,
+      **fully offline**.
+- [ ] **Default off:** with the toggle off, image downloads are not auto-scanned (on-demand "Scan text"
+      still works). A **video** download is never auto-OCR'd. The queue still drains normally.
+
 ### P13 (later subphases)
 - [ ] **Transcription / summarization / translation / OCR** each work (capability-gated) and write
       results back to the item.
diff --git a/docs/design/P13-PLAN.md b/docs/design/P13-PLAN.md
index 93a10a2..927b953 100644
--- a/docs/design/P13-PLAN.md
+++ b/docs/design/P13-PLAN.md
@@ -150,12 +150,23 @@ target-language UX + GMS nuance). Measure APK-size impact in the first ML Kit bu
   BCP mapping, `translateReadiness` truth table, controller with a fake engine. **Pending APK spot-check**
   (the native ML Kit translate/language-id + the pack download); the widget flow is APK-verified.
 
-#### `[ ]` P13b-3 — Auto-OCR on download *(follow-up; native; APK)*
+#### `[~]` P13b-3 — Auto-OCR on download (+ image-download fix) *(follow-up; native; APK)*
 - Opt-in (default off) auto-scan of **image** downloads, mirroring P13a-2 auto-summarize: a settings toggle +
   a gated block in `queue_controller._persistCompleted` (runs inline; OCR is cheap + offline) → `updateOcrText`
   → an Activity Inbox entry. Grows search coverage automatically.
-- **Exit / review:** with auto-OCR on, a finished image download is scanned + becomes searchable offline;
-  default-off does nothing; the queue still drains.
+- **Precursor fix (maintainer call):** `classifyDownloadOutputs` routed **all** image extensions to `thumb`,
+  so a single-image download (a photo/carousel) produced **no media item** — auto-OCR would never fire.
+  Fixed: image files are tentative thumbnails, but when a download has **no video/audio**, the images **are**
+  the media (→ `image` items). Reuses `mediaTypeForExt` for consistency. This also fixes image downloads
+  generally (they now appear in the library, with dimensions, OCR, etc.).
+- **Exit / review:** an image-only download becomes an `image` item; with auto-OCR on, it's scanned + becomes
+  searchable offline; default-off / video items do nothing; the queue still drains.
+- **Status:** implemented (CI-green) — classifier fix (+ tests); `autoOcrOnDownload` setting + setter; pure
+  `shouldAutoOcr`; gated auto-OCR block in `_persistCompleted` (`ocrCount` in `_PersistResult`) + an `ai`
+  success inbox entry when text is found; an "Image text (OCR)" auto-scan card in AI settings (shown where ML
+  Kit runs). Tests: classifier image cases, `shouldAutoOcr` truth table, settings round-trip, and queue cases
+  (image+text → `ocrText` + entry; default-off no-op; video skipped). **No schema/deps change.** **Pending
+  APK spot-check** (real image download → image item + searchable text + inbox entry, offline).
 
 ### `[ ]` P13c — Smart auto-tagging *(generation; APK)*
 LLM-suggested tags feeding the **existing** tag system — builds directly on the P13a generation patterns.
diff --git a/lib/features/library/presentation/ocr.dart b/lib/features/library/presentation/ocr.dart
new file mode 100644
index 0000000..936bc17
--- /dev/null
+++ b/lib/features/library/presentation/ocr.dart
@@ -0,0 +1,15 @@
+/// Pure, engine-free helper for auto-OCR-on-download (P13b-3). Kept out of the
+/// queue controller so the gating decision is unit-testable in isolation
+/// (mirrors `autoSummaryDecision`).
+library;
+
+/// Whether a freshly downloaded item should be auto-scanned for text now.
+/// [enabled] is `autoOcrOnDownload`; [engineAvailable] is whether ML Kit OCR can
+/// run on this host; [isImage] is whether the item is an image; [alreadyScanned]
+/// is whether OCR text is already stored.
+bool shouldAutoOcr({
+  required bool enabled,
+  required bool engineAvailable,
+  required bool isImage,
+  required bool alreadyScanned,
+}) => enabled && engineAvailable && isImage && !alreadyScanned;
diff --git a/lib/features/queue/presentation/queue_controller.dart b/lib/features/queue/presentation/queue_controller.dart
index d59657d..4b1a3e4 100644
--- a/lib/features/queue/presentation/queue_controller.dart
+++ b/lib/features/queue/presentation/queue_controller.dart
@@ -6,6 +6,7 @@ import 'package:drift/drift.dart' show Value;
 import 'package:flutter/widgets.dart' show AppLifecycleState;
 import 'package:flutter_riverpod/flutter_riverpod.dart';
 import 'package:grabbit/core/ai/generation_provider.dart';
+import 'package:grabbit/core/ai/ocr_provider.dart';
 import 'package:grabbit/core/ai/transcription_provider.dart';
 import 'package:grabbit/core/db/database.dart';
 import 'package:grabbit/core/db/database_provider.dart';
@@ -26,6 +27,7 @@ import 'package:grabbit/features/library/data/library_repository.dart';
 import 'package:grabbit/features/library/data/metadata_repository.dart';
 import 'package:grabbit/features/library/data/transcript_service.dart';
 import 'package:grabbit/features/library/presentation/ai_summary.dart';
+import 'package:grabbit/features/library/presentation/ocr.dart';
 import 'package:grabbit/features/notifications/data/notification_enums.dart';
 import 'package:grabbit/features/notifications/data/notifications_repository.dart';
 import 'package:grabbit/features/notifications/data/system_notification_service.dart';
@@ -51,6 +53,8 @@ typedef _PersistResult = ({
   // in but the generation model isn't downloaded → prompt to finish setup.
   int summaryCount,
   bool summaryNeedsModel,
+  // P13b-3: count of image items auto-scanned for text (OCR).
+  int ocrCount,
 });
 
 class QueueConfig {
@@ -418,6 +422,20 @@ class QueueController extends _$QueueController {
         dedupeKey: 'summary_needs_model',
       );
     }
+    // P13b-3: auto-OCR found text in a downloaded image (now searchable).
+    if (result.ocrCount > 0) {
+      await center.post(
+        category: NotificationCategory.ai,
+        severity: NotificationSeverity.success,
+        title: queued.title,
+        body: result.ocrCount > 1
+            ? 'Text found in ${result.ocrCount} images'
+            : 'Text found in image',
+        targetRoute: route,
+        itemId: single ? result.primaryId : null,
+        dedupeKey: 'ocr_$id',
+      );
+    }
     await _maybeNotifyOs(
       taskId: id,
       title: queued.title,
@@ -578,6 +596,7 @@ class QueueController extends _$QueueController {
       transcriptionNeedsModel: false,
       summaryCount: 0,
       summaryNeedsModel: false,
+      ocrCount: 0,
     );
     // Files land in a per-task subfolder (see YtDlpHost `-o`): the task id names
     // the folder, the user's template names the file inside it.
@@ -773,6 +792,41 @@ class QueueController extends _$QueueController {
       }
     }
 
+    // P13b-3: auto-scan freshly downloaded images for text (OCR) when opted in,
+    // so they become searchable. On-device + offline (bundled ML Kit, no
+    // download); images only; skips ones already scanned.
+    var ocrCount = 0;
+    if (settings.autoOcrOnDownload) {
+      final ocr = ref.read(ocrEngineProvider);
+      final metadata = ref.read(metadataRepositoryProvider);
+      for (final (i, mediaFile) in outputs.media.indexed) {
+        final itemId = single ? id : '${id}__$i';
+        final ext = mediaFile.path.split('.').last.toLowerCase();
+        final isImage =
+            !queued.request.audioOnly && mediaTypeForExt(ext) == 'image';
+        final meta = await (db.select(
+          db.mediaMetadata,
+        )..where((m) => m.itemId.equals(itemId))).getSingleOrNull();
+        if (!shouldAutoOcr(
+          enabled: settings.autoOcrOnDownload,
+          engineAvailable: ocr.isAvailable,
+          isImage: isImage,
+          alreadyScanned: meta?.ocrText?.trim().isNotEmpty ?? false,
+        )) {
+          continue;
+        }
+        try {
+          final text = (await ocr.recognizeText(mediaFile.path)).trim();
+          if (text.isNotEmpty) {
+            await metadata.updateOcrText(itemId, text);
+            ocrCount++;
+          }
+        } catch (_) {
+          // A per-item OCR failure must not fail the download.
+        }
+      }
+    }
+
     return (
       primaryId: single ? id : '${id}__0',
       itemCount: outputs.media.length,
@@ -780,6 +834,7 @@ class QueueController extends _$QueueController {
       transcriptionNeedsModel: transcriptionNeedsModel,
       summaryCount: summaryCount,
       summaryNeedsModel: summaryNeedsModel,
+      ocrCount: ocrCount,
     );
   }
 
diff --git a/lib/features/settings/data/settings_model.dart b/lib/features/settings/data/settings_model.dart
index f0c774e..c4cb842 100644
--- a/lib/features/settings/data/settings_model.dart
+++ b/lib/features/settings/data/settings_model.dart
@@ -97,6 +97,11 @@ abstract class SettingsModel with _$SettingsModel {
     // fetch — mirrors `autoTranscribe`). The on-demand summary on item detail
     // (P13a) works regardless.
     @Default(false) bool autoSummarizeOnDownload,
+    // P13b-3: auto-extract text (OCR) from a newly downloaded image in the
+    // background, so it becomes searchable. Opt-in (defaults off); runs only on
+    // images, on-device + offline (bundled ML Kit, no download). The on-demand
+    // "Scan text" on item detail (P13b-1) works regardless.
+    @Default(false) bool autoOcrOnDownload,
     // On-device speech transcription (P12e). Opt-in (defaults off); the whisper
     // model is downloaded only when the user enables it + picks a model.
     // `selectedTranscriptionModelId` empty = the device-tier recommendation;
diff --git a/lib/features/settings/presentation/ai_settings_screen.dart b/lib/features/settings/presentation/ai_settings_screen.dart
index b49cae0..48b017f 100644
--- a/lib/features/settings/presentation/ai_settings_screen.dart
+++ b/lib/features/settings/presentation/ai_settings_screen.dart
@@ -12,6 +12,7 @@ import 'package:grabbit/core/ai/inference_error.dart';
 import 'package:grabbit/core/ai/model_capability_matrix.dart';
 import 'package:grabbit/core/ai/model_catalog.dart';
 import 'package:grabbit/core/ai/model_download_service.dart';
+import 'package:grabbit/core/ai/ocr_provider.dart';
 import 'package:grabbit/core/ai/transcription_model.dart';
 import 'package:grabbit/core/ai/transcription_provider.dart';
 import 'package:grabbit/core/device/device_profile.dart';
@@ -70,11 +71,57 @@ class AiSettingsScreen extends ConsumerWidget {
         ),
         const _GenerationCard(),
         const _TranscriptionCard(),
+        const _OcrCard(),
       ],
     );
   }
 }
 
+/// On-device image OCR (P13b-3). Image text is always scannable by hand from an
+/// image's detail screen (P13b-1); this card just offers the opt-in to do it
+/// automatically on download. Shown only where ML Kit OCR can run (Android).
+class _OcrCard extends ConsumerWidget {
+  const _OcrCard();
+
+  @override
+  Widget build(BuildContext context, WidgetRef ref) {
+    if (!ref.watch(ocrEngineProvider).isAvailable) {
+      return const SizedBox.shrink();
+    }
+    final auto = ref.watch(
+      settingsControllerProvider.select(
+        (s) => s.value?.autoOcrOnDownload ?? false,
+      ),
+    );
+    return Padding(
+      padding: const EdgeInsets.only(top: 8),
+      child: SettingsCard(
+        children: [
+          SwitchListTile(
+            secondary: const InfoHintButton(
+              InfoHint(
+                title: 'Auto-scan images for text',
+                body:
+                    'Automatically read text inside each downloaded image so '
+                    'you can search for it — all on-device and offline. You can '
+                    'always scan an image by hand from its detail screen.',
+              ),
+            ),
+            title: const Text('Image text (OCR)'),
+            subtitle: const Text(
+              'Scan new image downloads for searchable text',
+            ),
+            value: auto,
+            onChanged: (v) => ref
+                .read(settingsControllerProvider.notifier)
+                .setAutoOcrOnDownload(v),
+          ),
+        ],
+      ),
+    );
+  }
+}
+
 /// Compact banner framing the AI screen with the device's capability tier (P12g)
 /// — so a user understands *why* some AI options are offered or gated. Reads the
 /// live tier (probed at startup); the InfoHint explains on-device scaling.
diff --git a/lib/features/settings/presentation/settings_controller.dart b/lib/features/settings/presentation/settings_controller.dart
index 7a71f0d..8e24d20 100644
--- a/lib/features/settings/presentation/settings_controller.dart
+++ b/lib/features/settings/presentation/settings_controller.dart
@@ -147,6 +147,10 @@ class SettingsController extends _$SettingsController {
   Future<void> setAutoSummarizeOnDownload(bool value) async =>
       _update((await future).copyWith(autoSummarizeOnDownload: value));
 
+  /// Auto-scan newly downloaded images for text (OCR) in the background (P13b-3).
+  Future<void> setAutoOcrOnDownload(bool value) async =>
+      _update((await future).copyWith(autoOcrOnDownload: value));
+
   /// On-device transcription opt-in (P12e).
   Future<void> setTranscriptionEnabled(bool value) async =>
       _update((await future).copyWith(transcriptionEnabled: value));
diff --git a/test/features/library/ocr_test.dart b/test/features/library/ocr_test.dart
new file mode 100644
index 0000000..83a516f
--- /dev/null
+++ b/test/features/library/ocr_test.dart
@@ -0,0 +1,61 @@
+import 'package:flutter_test/flutter_test.dart';
+import 'package:grabbit/features/library/presentation/ocr.dart';
+
+void main() {
+  group('shouldAutoOcr (P13b-3)', () {
+    test('all favourable → true', () {
+      expect(
+        shouldAutoOcr(
+          enabled: true,
+          engineAvailable: true,
+          isImage: true,
+          alreadyScanned: false,
+        ),
+        isTrue,
+      );
+    });
+
+    test('any unfavourable condition → false', () {
+      expect(
+        shouldAutoOcr(
+          enabled: false,
+          engineAvailable: true,
+          isImage: true,
+          alreadyScanned: false,
+        ),
+        isFalse,
+        reason: 'disabled',
+      );
+      expect(
+        shouldAutoOcr(
+          enabled: true,
+          engineAvailable: false,
+          isImage: true,
+          alreadyScanned: false,
+        ),
+        isFalse,
+        reason: 'engine unavailable',
+      );
+      expect(
+        shouldAutoOcr(
+          enabled: true,
+          engineAvailable: true,
+          isImage: false,
+          alreadyScanned: false,
+        ),
+        isFalse,
+        reason: 'not an image',
+      );
+      expect(
+        shouldAutoOcr(
+          enabled: true,
+          engineAvailable: true,
+          isImage: true,
+          alreadyScanned: true,
+        ),
+        isFalse,
+        reason: 'already scanned',
+      );
+    });
+  });
+}
diff --git a/test/features/queue/queue_controller_test.dart b/test/features/queue/queue_controller_test.dart
index 80857e4..8668c7d 100644
--- a/test/features/queue/queue_controller_test.dart
+++ b/test/features/queue/queue_controller_test.dart
@@ -8,6 +8,8 @@ import 'package:flutter_test/flutter_test.dart';
 import 'package:grabbit/core/ai/generation_engine.dart';
 import 'package:grabbit/core/ai/generation_model.dart';
 import 'package:grabbit/core/ai/generation_provider.dart';
+import 'package:grabbit/core/ai/ocr_engine.dart';
+import 'package:grabbit/core/ai/ocr_provider.dart';
 import 'package:grabbit/core/ai/structured_generation.dart';
 import 'package:grabbit/core/ai/transcription_engine.dart';
 import 'package:grabbit/core/ai/transcription_model.dart';
@@ -275,6 +277,27 @@ class FakeGenerationEngine implements GenerationEngine {
   Future<void> close() async {}
 }
 
+/// In-memory OCR engine (no native ML Kit) for the auto-OCR tests (P13b-3).
+/// [available] simulates whether OCR can run; records the paths it scanned.
+class FakeOcrEngine implements OcrEngine {
+  FakeOcrEngine({this.available = true, this.result = 'fake ocr text'});
+
+  bool available;
+  String result;
+  final List<String> scanned = [];
+
+  @override
+  bool get isAvailable => available;
+  @override
+  Future<String> recognizeText(String imagePath) async {
+    scanned.add(imagePath);
+    return result;
+  }
+
+  @override
+  Future<void> close() async {}
+}
+
 QueuedDownload _qd(
   String id, {
   String outputDir = '/tmp',
@@ -323,6 +346,7 @@ void main() {
   late FakeSystemNotificationService fakeOsNotifier;
   late FakeTranscriptionEngine fakeTranscriber;
   late FakeGenerationEngine fakeGenerator;
+  late FakeOcrEngine fakeOcr;
   late Directory mediaDir;
 
   ProviderContainer makeContainer() => ProviderContainer(
@@ -337,6 +361,7 @@ void main() {
       mediaStorageProvider.overrideWithValue(FakeMediaStorage(mediaDir)),
       transcriptionEngineProvider.overrideWithValue(fakeTranscriber),
       generationEngineProvider.overrideWithValue(fakeGenerator),
+      ocrEngineProvider.overrideWithValue(fakeOcr),
       queueConfigProvider.overrideWithValue(
         const QueueConfig(baseRetryDelay: Duration(milliseconds: 5)),
       ),
@@ -353,6 +378,7 @@ void main() {
     fakeOsNotifier = FakeSystemNotificationService();
     fakeTranscriber = FakeTranscriptionEngine();
     fakeGenerator = FakeGenerationEngine();
+    fakeOcr = FakeOcrEngine();
     mediaDir = Directory.systemTemp.createTempSync('grabbit_qmedia_');
     container = makeContainer();
     repo = container.read(queueRepositoryProvider);
@@ -470,6 +496,20 @@ void main() {
     db.mediaMetadata,
   )..where((m) => m.itemId.equals(id))).getSingleOrNull())?.aiSummary;
 
+  Future<String?> ocrOf(String id) async => (await (db.select(
+    db.mediaMetadata,
+  )..where((m) => m.itemId.equals(id))).getSingleOrNull())?.ocrText;
+
+  /// An image-only download (no video/audio) — after the P13b-3 classifier fix
+  /// this becomes an `image` library item.
+  Future<Directory> imageDownload(String id) async {
+    final dir = await Directory.systemTemp.createTemp('grabbit_img_');
+    addTearDown(() => dir.delete(recursive: true));
+    await Directory('${dir.path}/$id').create();
+    await File('${dir.path}/$id/Photo.jpg').writeAsString('imgdata');
+    return dir;
+  }
+
   /// A normal completed download with a description (so the auto-summary source
   /// — `transcript ?? description` — is non-empty), no caption sidecar.
   Future<Directory> describedDownload(String id) async {
@@ -642,6 +682,75 @@ void main() {
     },
   );
 
+  // --- P13b-3: auto-OCR on image download ---
+
+  test(
+    'auto-OCR: enabled + image + text → ocrText + ai entry (P13b-3)',
+    () async {
+      await container
+          .read(settingsControllerProvider.notifier)
+          .setAutoOcrOnDownload(true);
+      fakeOcr.available = true;
+      final dir = await imageDownload('img1');
+
+      await controller.enqueue(_qd('img1', outputDir: dir.path));
+      await waitFor(() async => engine.running.contains('img1'));
+      engine.complete('img1');
+      await waitFor(
+        () async => (await repo.byId('img1'))?.status == TaskStatus.done,
+      );
+
+      // The classifier fix makes the image the media item; auto-OCR scans it.
+      final item = await (db.select(
+        db.mediaItems,
+      )..where((t) => t.id.equals('img1'))).getSingle();
+      expect(item.type, 'image');
+      expect(fakeOcr.scanned, hasLength(1));
+      expect(await ocrOf('img1'), 'fake ocr text');
+      final ai = await (db.select(
+        db.notifications,
+      )..where((n) => n.category.equals(NotificationCategory.ai))).get();
+      expect(ai, hasLength(1));
+      expect(ai.single.severity, NotificationSeverity.success);
+    },
+  );
+
+  test('auto-OCR: default off → no scan, no entry (P13b-3)', () async {
+    fakeOcr.available = true; // engine present, but the toggle is off
+    final dir = await imageDownload('img1');
+
+    await controller.enqueue(_qd('img1', outputDir: dir.path));
+    await waitFor(() async => engine.running.contains('img1'));
+    engine.complete('img1');
+    await waitFor(
+      () async => (await repo.byId('img1'))?.status == TaskStatus.done,
+    );
+
+    expect(fakeOcr.scanned, isEmpty);
+    expect(await ocrOf('img1'), isNull);
+    final ai = await (db.select(
+      db.notifications,
+    )..where((n) => n.category.equals(NotificationCategory.ai))).get();
+    expect(ai, isEmpty);
+  });
+
+  test('auto-OCR: enabled but item is a video → skipped (P13b-3)', () async {
+    await container
+        .read(settingsControllerProvider.notifier)
+        .setAutoOcrOnDownload(true);
+    final dir = await captionlessDownload('vid1'); // a .mp4
+
+    await controller.enqueue(_qd('vid1', outputDir: dir.path));
+    await waitFor(() async => engine.running.contains('vid1'));
+    engine.complete('vid1');
+    await waitFor(
+      () async => (await repo.byId('vid1'))?.status == TaskStatus.done,
+    );
+
+    expect(fakeOcr.scanned, isEmpty);
+    expect(await ocrOf('vid1'), isNull);
+  });
+
   test('a completed download posts a success activity entry (P11c)', () async {
     final dir = await Directory.systemTemp.createTemp('grabbit_ntf_done_');
     addTearDown(() => dir.delete(recursive: true));
diff --git a/test/features/settings/settings_test.dart b/test/features/settings/settings_test.dart
index 558df31..5718639 100644
--- a/test/features/settings/settings_test.dart
+++ b/test/features/settings/settings_test.dart
@@ -274,6 +274,30 @@ void main() {
       expect(saved.aiSetupSeen, isTrue);
     });
 
+    test(
+      'auto-process setters default off and persist (P13a-2/P13b-3)',
+      () async {
+        final db = AppDatabase(NativeDatabase.memory());
+        addTearDown(db.close);
+        final container = ProviderContainer(
+          overrides: [appDatabaseProvider.overrideWithValue(db)],
+        );
+        addTearDown(container.dispose);
+
+        final loaded = await container.read(settingsControllerProvider.future);
+        expect(loaded.autoSummarizeOnDownload, isFalse);
+        expect(loaded.autoOcrOnDownload, isFalse);
+
+        final notifier = container.read(settingsControllerProvider.notifier);
+        await notifier.setAutoSummarizeOnDownload(true);
+        await notifier.setAutoOcrOnDownload(true);
+
+        final saved = await SettingsRepository(db).read();
+        expect(saved.autoSummarizeOnDownload, isTrue);
+        expect(saved.autoOcrOnDownload, isTrue);
+      },
+    );
+
     test(
       'autoCheckEngineUpdate defaults on and persists when toggled',
       () async {

From 0d572bc7185abd2e14efdf9e357221ca6958377d Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 2 Jun 2026 19:35:43 +0000
Subject: [PATCH 3/3] fix: image thumbnails + de-duplicate image downloads;
 queue quick wins (P13b-3 sweep)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-merge sweep of the image-download/classification work:

- MediaThumb now falls back to the image FILE for `image` items with a null
  thumbnail (they were showing a movie-icon placeholder in the grid,
  dashboard, collections, hero shuttle, and related strips). Typed fallback
  icon for images is now image_outlined.
- classifyDownloadOutputs collapses an image + its yt-dlp `--write-thumbnail`
  sidecar to ONE item: with no video/audio, the largest image is the media
  and the next-largest is its thumbnail (carousels expand to one task/folder
  per photo, so multiple images here = photo + thumbnail). Prevents a
  duplicate image item and gives image items a real thumbnail.
- Quick wins in _persistCompleted: auto-transcribe skips image items (no
  wasted whisper transcode of a photo); durationSec gated to non-image.
- Tests: classifier photo+thumbnail collapse (real temp files), MediaThumb
  image null-thumb renders Image.file (not the movie icon), queue cases
  (image+thumbnail → one item with a thumbnail; whisper skipped on images).
- Docs: VERIFICATION (thumbnail rendering, single-item, export), BACKLOG
  (unconditional --write-thumbnail; non-mediaTypeForExt image formats),
  P13-PLAN P13b-3 sweep note.

https://claude.ai/code/session_013JoYmLCosYt5tQ8qwdbL1T
---
 docs/BACKLOG.md                               |  9 +++
 docs/VERIFICATION.md                          |  8 ++-
 docs/design/P13-PLAN.md                       |  6 ++
 .../library/presentation/media_grid.dart      | 12 +++-
 .../queue/data/completed_outputs.dart         | 23 +++++++-
 .../queue/presentation/queue_controller.dart  |  8 ++-
 test/features/library/media_grid_test.dart    | 38 +++++++++++++
 .../queue/completed_outputs_test.dart         | 31 +++++++----
 .../features/queue/queue_controller_test.dart | 55 +++++++++++++++++++
 9 files changed, 171 insertions(+), 19 deletions(-)

diff --git a/docs/BACKLOG.md b/docs/BACKLOG.md
index b128fa3..765bd05 100644
--- a/docs/BACKLOG.md
+++ b/docs/BACKLOG.md
@@ -16,6 +16,15 @@ _(nothing active — pick the next batch from below)_
 - [ ] **OCR — non-Latin scripts.** P13b-1 ships the **bundled Latin** ML Kit recognizer (no Google Play
       Services, offline). Chinese/Japanese/Korean/Devanagari need their own ML Kit script models (extra APK
       size or a download). Add a script choice if users want non-Latin OCR. *(From P13b-1.)*
+- [ ] **Unconditional `--write-thumbnail` for image downloads.** `YtDlpHost.kt` passes
+      `--write-thumbnail --convert-thumbnails jpg` for every download, so an image download wastes a fetch
+      writing a thumbnail of the photo. P13b-3 handles this defensively in Dart (the classifier keeps the
+      largest image as the photo and the smaller as its thumbnail), but a cleaner fix would gate the flag off
+      at request time for image downloads (needs an `isImage`/`writeThumbnail` hint through the Pigeon
+      `DownloadRequest`). *(From P13b-3 sweep.)*
+- [ ] **Image formats outside `mediaTypeForExt`.** `.heic`/`.heif`/`.avif`/`.tiff` aren't in the image set,
+      so such a download is classified as a `video` item. Add them (+ confirm the player/thumbnail handle
+      them) if real downloads produce them. *(From P13b-3 sweep.)*
 - [ ] **Auto-summarize — queue-decoupled background run.** P13a-2 generates the auto-summary **inline** in
       `_persistCompleted` before the next download pumps (gated on "model present" so it can't stall on a
       fetch), exactly like `autoTranscribe`. Generation is heavier than whisper-tiny, so a fuller design
diff --git a/docs/VERIFICATION.md b/docs/VERIFICATION.md
index 18057ce..dfd4347 100644
--- a/docs/VERIFICATION.md
+++ b/docs/VERIFICATION.md
@@ -956,8 +956,12 @@ entries, or verify after P11c lands.)*
 
 ### P13b-3 — Auto-OCR on download (+ image-download fix)  *(install `app-arm64-v8a-debug.apk`)*
 - [ ] **Image download fix:** download a single image (e.g. an Instagram/X photo, or a photo carousel) →
-      it now appears in the library as an **image item** (previously it produced nothing). The video case is
-      unchanged (the video is the item; its thumbnail is still a thumbnail).
+      it now appears in the library as an **image item** (previously it produced nothing), shows **its own
+      picture as the thumbnail** in the grid/dashboard/collections (not a movie-icon placeholder), and is
+      exactly **one** item even though yt-dlp also writes a thumbnail sidecar. The video case is unchanged
+      (the video is the item; its thumbnail is still a thumbnail).
+- [ ] **Export:** export a downloaded image item to the gallery → it lands in the **Images** collection
+      and opens in the device gallery.
 - [ ] AI & graph settings → enable **Image text (OCR) · Auto-scan new image downloads**. Download an image
       with legible text → its text becomes **searchable** + a "Text found in image" Activity Inbox entry,
       **fully offline**.
diff --git a/docs/design/P13-PLAN.md b/docs/design/P13-PLAN.md
index 927b953..113517f 100644
--- a/docs/design/P13-PLAN.md
+++ b/docs/design/P13-PLAN.md
@@ -167,6 +167,12 @@ target-language UX + GMS nuance). Measure APK-size impact in the first ML Kit bu
   Kit runs). Tests: classifier image cases, `shouldAutoOcr` truth table, settings round-trip, and queue cases
   (image+text → `ocrText` + entry; default-off no-op; video skipped). **No schema/deps change.** **Pending
   APK spot-check** (real image download → image item + searchable text + inbox entry, offline).
+- **Pre-merge sweep refinements (same PR):** (a) `MediaThumb` now falls back to the image **file** for
+  `image` items with a null thumbnail (they were showing a movie-icon placeholder in grid/dashboard/
+  collections/hero/related); (b) the classifier collapses an image + its yt-dlp `--write-thumbnail` sidecar
+  to **one** item (largest = photo, smaller = thumbnail) so a single image download isn't double-counted;
+  (c) quick wins — auto-transcribe skips image items, and `durationSec` is gated to non-image. The
+  unconditional `--write-thumbnail` and non-`mediaTypeForExt` image formats are logged in `BACKLOG.md`.
 
 ### `[ ]` P13c — Smart auto-tagging *(generation; APK)*
 LLM-suggested tags feeding the **existing** tag system — builds directly on the P13a generation patterns.
diff --git a/lib/features/library/presentation/media_grid.dart b/lib/features/library/presentation/media_grid.dart
index ae80853..7e2dbd7 100644
--- a/lib/features/library/presentation/media_grid.dart
+++ b/lib/features/library/presentation/media_grid.dart
@@ -241,12 +241,20 @@ class MediaThumb extends StatelessWidget {
     final fallback = ColoredBox(
       color: scheme.surfaceContainerHighest,
       child: Icon(
-        item.type == 'audio' ? Icons.music_note : Icons.movie_outlined,
+        switch (item.type) {
+          'audio' => Icons.music_note,
+          'image' => Icons.image_outlined,
+          _ => Icons.movie_outlined,
+        },
         color: scheme.onSurfaceVariant,
         size: 40,
       ),
     );
-    final thumbPath = item.thumbPath;
+    // Image items often have no separate thumbnail (the photo is its own
+    // thumbnail) — render the image file directly. Everything else needs a
+    // generated thumbnail.
+    final thumbPath =
+        item.thumbPath ?? (item.type == 'image' ? item.filePath : null);
     if (thumbPath == null) return fallback;
     return Image.file(
       File(thumbPath),
diff --git a/lib/features/queue/data/completed_outputs.dart b/lib/features/queue/data/completed_outputs.dart
index 29ca98e..c0c6562 100644
--- a/lib/features/queue/data/completed_outputs.dart
+++ b/lib/features/queue/data/completed_outputs.dart
@@ -36,15 +36,32 @@ DownloadOutputs classifyDownloadOutputs(Iterable<File> files) {
     }
   }
   media.sort((a, b) => a.path.compareTo(b.path));
-  images.sort((a, b) => a.path.compareTo(b.path));
   // Video/audio present → images are thumbnail sidecars (keep the first).
-  // Otherwise it's an image download → the images are the media.
   if (media.isNotEmpty) {
+    images.sort((a, b) => a.path.compareTo(b.path));
     return (
       media: media,
       thumb: images.isEmpty ? null : images.first,
       info: info,
     );
   }
-  return (media: images, thumb: null, info: info);
+  // Image download → the image is the media. A carousel expands to one task
+  // (folder) per photo, so multiple images here means the photo PLUS yt-dlp's
+  // `--write-thumbnail` sidecar — keep the largest as the photo and the next as
+  // its thumbnail (rather than minting a duplicate item).
+  if (images.length <= 1) {
+    return (media: images, thumb: null, info: info);
+  }
+  images.sort((a, b) => _sizeOf(b).compareTo(_sizeOf(a)));
+  return (media: [images.first], thumb: images[1], info: info);
+}
+
+/// File size in bytes, or 0 when it can't be read (e.g. a missing path in a
+/// unit test) — used only to pick the largest image as the media.
+int _sizeOf(File f) {
+  try {
+    return f.lengthSync();
+  } on FileSystemException {
+    return 0;
+  }
 }
diff --git a/lib/features/queue/presentation/queue_controller.dart b/lib/features/queue/presentation/queue_controller.dart
index 4b1a3e4..0e61a3e 100644
--- a/lib/features/queue/presentation/queue_controller.dart
+++ b/lib/features/queue/presentation/queue_controller.dart
@@ -653,7 +653,9 @@ class QueueController extends _$QueueController {
                 type: type,
                 createdAt: DateTime.now(),
                 storageState: 'private',
-                durationSec: Value(single ? queued.durationSec : null),
+                durationSec: Value(
+                  single && type != 'image' ? queued.durationSec : null,
+                ),
                 sizeBytes: Value(await mediaFile.length()),
                 thumbPath: Value(outputs.thumb?.path),
                 width: Value(width),
@@ -703,6 +705,10 @@ class QueueController extends _$QueueController {
           : null;
       final whisperReady = whisper != null && await whisper.ensureReady();
       for (final (i, mediaFile) in outputs.media.indexed) {
+        // Images have no audio to transcribe — skip (avoids a wasted whisper
+        // transcode of a photo).
+        final ext = mediaFile.path.split('.').last.toLowerCase();
+        if (mediaTypeForExt(ext) == 'image') continue;
         final itemId = single ? id : '${id}__$i';
         final timed = await transcripts.extractTimed(
           mediaFile.path,
diff --git a/test/features/library/media_grid_test.dart b/test/features/library/media_grid_test.dart
index be0b9fd..c474271 100644
--- a/test/features/library/media_grid_test.dart
+++ b/test/features/library/media_grid_test.dart
@@ -46,6 +46,44 @@ void main() {
     );
   });
 
+  testWidgets(
+    'image item with no thumbnail renders the image file, not a movie icon (P13b-3)',
+    (tester) async {
+      await tester.pumpWidget(
+        ProviderScope(
+          child: MaterialApp(
+            home: Scaffold(
+              body: MediaThumb(
+                item: _item(id: 'i', type: 'image'),
+              ),
+            ),
+          ),
+        ),
+      );
+      // Falls back to Image.file(filePath) — never the video placeholder.
+      expect(find.byType(Image), findsOneWidget);
+      expect(find.byIcon(Icons.movie_outlined), findsNothing);
+    },
+  );
+
+  testWidgets('video item with no thumbnail shows the movie placeholder', (
+    tester,
+  ) async {
+    await tester.pumpWidget(
+      ProviderScope(
+        child: MaterialApp(
+          home: Scaffold(
+            body: MediaThumb(
+              item: _item(id: 'v2', type: 'video'),
+            ),
+          ),
+        ),
+      ),
+    );
+    expect(find.byIcon(Icons.movie_outlined), findsOneWidget);
+    expect(find.byType(Image), findsNothing);
+  });
+
   testWidgets('tapping the star favorites the item', (tester) async {
     final db = AppDatabase(NativeDatabase.memory());
     addTearDown(db.close);
diff --git a/test/features/queue/completed_outputs_test.dart b/test/features/queue/completed_outputs_test.dart
index fd58a57..6ee0b8f 100644
--- a/test/features/queue/completed_outputs_test.dart
+++ b/test/features/queue/completed_outputs_test.dart
@@ -60,17 +60,26 @@ void main() {
       expect(out.info?.path, '/d/photo.info.json');
     });
 
-    test('photo carousel → every image is media (sorted) (P13b-3)', () {
-      final out = classifyDownloadOutputs(
-        _files(['/d/post 2.jpg', '/d/post 1.png', '/d/post 3.webp']),
-      );
-      expect(out.media.map((f) => f.path), [
-        '/d/post 1.png',
-        '/d/post 2.jpg',
-        '/d/post 3.webp',
-      ]);
-      expect(out.thumb, isNull);
-    });
+    test(
+      'image + its written thumbnail → largest is media, smaller is thumb (P13b-3)',
+      () async {
+        // yt-dlp `--write-thumbnail` lands a second image beside the photo;
+        // the larger file is the real photo, the smaller is its thumbnail.
+        final dir = await Directory.systemTemp.createTemp('grabbit_cls_');
+        addTearDown(() => dir.delete(recursive: true));
+        final photo = File('${dir.path}/post.webp')
+          ..writeAsBytesSync(List.filled(5000, 0));
+        final thumb = File('${dir.path}/post.jpg')
+          ..writeAsBytesSync(List.filled(300, 0));
+
+        final out = classifyDownloadOutputs([
+          thumb,
+          photo,
+        ]); // order shouldn't matter
+        expect(out.media.map((f) => f.path), [photo.path]);
+        expect(out.thumb?.path, thumb.path);
+      },
+    );
 
     test('video + image keeps the image as the thumbnail (unchanged)', () {
       final out = classifyDownloadOutputs(
diff --git a/test/features/queue/queue_controller_test.dart b/test/features/queue/queue_controller_test.dart
index 8668c7d..6da2efe 100644
--- a/test/features/queue/queue_controller_test.dart
+++ b/test/features/queue/queue_controller_test.dart
@@ -510,6 +510,21 @@ void main() {
     return dir;
   }
 
+  /// An image download plus yt-dlp's `--write-thumbnail` sidecar (a smaller
+  /// second image) — should still yield exactly one image item.
+  Future<Directory> imageWithThumbDownload(String id) async {
+    final dir = await Directory.systemTemp.createTemp('grabbit_imgt_');
+    addTearDown(() => dir.delete(recursive: true));
+    await Directory('${dir.path}/$id').create();
+    await File(
+      '${dir.path}/$id/Photo.webp',
+    ).writeAsBytes(List.filled(5000, 0)); // the real photo (larger)
+    await File(
+      '${dir.path}/$id/Photo.jpg',
+    ).writeAsBytes(List.filled(300, 0)); // the written thumbnail (smaller)
+    return dir;
+  }
+
   /// A normal completed download with a description (so the auto-summary source
   /// — `transcript ?? description` — is non-empty), no caption sidecar.
   Future<Directory> describedDownload(String id) async {
@@ -751,6 +766,46 @@ void main() {
     expect(await ocrOf('vid1'), isNull);
   });
 
+  test('image + written thumbnail → exactly one image item (P13b-3)', () async {
+    final dir = await imageWithThumbDownload('img1');
+
+    await controller.enqueue(_qd('img1', outputDir: dir.path));
+    await waitFor(() async => engine.running.contains('img1'));
+    engine.complete('img1');
+    await waitFor(
+      () async => (await repo.byId('img1'))?.status == TaskStatus.done,
+    );
+
+    final items = await (db.select(
+      db.mediaItems,
+    )..where((t) => t.id.equals('img1'))).get();
+    expect(items, hasLength(1)); // not two (the thumbnail isn't its own item)
+    expect(items.single.type, 'image');
+    // The smaller image became the thumbnail.
+    expect(items.single.thumbPath, endsWith('Photo.jpg'));
+    expect(items.single.filePath, endsWith('Photo.webp'));
+  });
+
+  test('auto-transcribe skips image downloads (P13b-3)', () async {
+    await container
+        .read(settingsControllerProvider.notifier)
+        .setAutoTranscribe(true);
+    await container
+        .read(settingsControllerProvider.notifier)
+        .setTranscriptionEnabled(true);
+    fakeTranscriber.ready = true;
+    final dir = await imageDownload('img1');
+
+    await controller.enqueue(_qd('img1', outputDir: dir.path));
+    await waitFor(() async => engine.running.contains('img1'));
+    engine.complete('img1');
+    await waitFor(
+      () async => (await repo.byId('img1'))?.status == TaskStatus.done,
+    );
+
+    expect(fakeTranscriber.transcribed, isEmpty); // no whisper on a photo
+  });
+
   test('a completed download posts a success activity entry (P11c)', () async {
     final dir = await Directory.systemTemp.createTemp('grabbit_ntf_done_');
     addTearDown(() => dir.delete(recursive: true));