From 5ffd48d3a4cf8e8a4fc5cbb6d229c6657211d701 Mon Sep 17 00:00:00 2001
From: Jhin Lee <leehack@gmail.com>
Date: Sat, 4 Jul 2026 12:24:24 -0400
Subject: [PATCH 1/2] Add source-based multimodal projector loading

---
 CHANGELOG.md                              |   6 +
 README.md                                 |  33 +-
 lib/src/core/engine/engine.dart           | 105 ++++++-
 test/unit/core/engine/engine_test.dart    | 347 +++++++++++++++++++++-
 website/docs/changelog/recent-releases.md |   4 +
 website/docs/guides/model-lifecycle.md    |  20 ++
 website/docs/guides/multimodal.md         |  28 +-
 website/docs/platforms/support-matrix.md  |   8 +
 8 files changed, 533 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 220124ca..a7776a4d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,12 @@
   helpers for strict JSON-object / JSON-schema generation with final-output
   validation and typed decoding.
 
+* Added `LlamaEngine.loadMultimodalProjectorSource(...)` so GGUF
+  multimodal projector files can use the same `ModelSource` resolver,
+  native download/cache manager, authentication, checksum, and progress
+  options as `loadModelSource(...)`, while preserving the existing
+  `loadMultimodalProjector(...)` path/string API.
+
 ## 0.8.12
 
 * Updated the default LiteRT-LM native runtime pin to
diff --git a/README.md b/README.md
index 166e4f3c..b9861686 100644
--- a/README.md
+++ b/README.md
@@ -422,7 +422,25 @@ For LiteRT-LM bundles, use the same `loadModelSource(...)` path with a
 CPU, GPU, or Android NPU execution after the file is cached.
 `llamadart` does not list Hugging Face files or expand sharded GGUF manifests;
 pick the exact `.gguf` file path from the repository, and use separate model and
-`mmproj` sources for multimodal assets.
+`mmproj` sources for multimodal assets. After the GGUF model is loaded, call
+`loadMultimodalProjectorSource(...)` to resolve, download, cache, and load the
+projector through the same source/cache layer:
+
+```dart
+await engine.loadModelSource(
+  ModelSource.parse('hf://owner/repo/model-Q4_K_M.gguf'),
+);
+await engine.loadMultimodalProjectorSource(
+  ModelSource.parse('hf://owner/repo/mmproj.gguf'),
+  options: ModelLoadOptions(cachePolicy: ModelCachePolicy.preferCached),
+);
+```
+
+Native/file-backed backends load the cached local projector path. URL-loading
+web backends pass unauthenticated remote projector URLs directly to the bridge;
+authenticated headers, checksum verification, explicit cache policy changes,
+custom cache directories, and local filesystem path sources require a
+file-backed native cache manager.
 
 ### 7. Generate embeddings
 
@@ -1139,6 +1157,10 @@ void main() async {
   try {
     await engine.loadModel('vision-model.gguf');
     await engine.loadMultimodalProjector('mmproj.gguf');
+    // Or use ModelSource when the projector should be downloaded/cached:
+    // await engine.loadMultimodalProjectorSource(
+    //   ModelSource.parse('hf://owner/repo/mmproj.gguf'),
+    // );
 
     final session = ChatSession(engine);
 
@@ -1166,7 +1188,9 @@ void main() async {
 
 Web-specific note:
 
-- Load model/mmproj with URL-based assets (`loadModelFromUrl` + URL projector).
+- Load model/mmproj with URL-based assets (`loadModelSource` /
+  `loadModelFromUrl` + URL projector). `loadMultimodalProjectorSource` supports
+  remote unauthenticated projector URLs on URL-loading web backends.
 - For user-picked browser files, send media as bytes (`LlamaImageContent(bytes: ...)`,
   `LlamaAudioContent(bytes: ...)`) rather than local file paths.
 
@@ -1178,8 +1202,9 @@ LiteRT-LM native note:
 - Native LiteRT-LM supports local paths and encoded media bytes (`blob`) for
   media parts. Remote image URLs and raw PCM `Float32List` samples fail before
   native generation with clear errors.
-- `loadMultimodalProjector`, `supportsVision`, and `supportsAudio` remain
-  projector-oriented APIs for llama.cpp/WebGPU multimodal paths.
+- `loadMultimodalProjector`, `loadMultimodalProjectorSource`,
+  `supportsVision`, and `supportsAudio` remain projector-oriented APIs for
+  llama.cpp/WebGPU multimodal paths.
 
 ### 💡 Model-Specific Notes
 
diff --git a/lib/src/core/engine/engine.dart b/lib/src/core/engine/engine.dart
index 6a1bb755..5151661d 100644
--- a/lib/src/core/engine/engine.dart
+++ b/lib/src/core/engine/engine.dart
@@ -293,6 +293,85 @@ class LlamaEngine {
     return _withMmLifecycle(() => _loadMultimodalProjectorLocked(mmProjPath));
   }
 
+  /// Loads a multimodal projector from a structured [source].
+  ///
+  /// A model must already be loaded with [loadModel], [loadModelSource], or
+  /// [loadModelFromUrl]. Calling this before the model is ready throws a
+  /// [LlamaContextException].
+  ///
+  /// This method is lifecycle-compatible with [loadMultimodalProjector]:
+  /// source resolution, package-managed download/cache work, and the final
+  /// backend projector load are serialized with direct path projector loads and
+  /// unloads. Concurrent projector lifecycle calls are applied in call order,
+  /// and loading a new projector replaces any active projector.
+  ///
+  /// Local path sources are validated by the configured
+  /// [modelDownloadManager], then loaded from their local file path. Remote
+  /// sources use the native download/cache manager on file-backed backends. On
+  /// URL-loading backends, remote unauthenticated sources are passed directly to
+  /// the backend; package-managed auth, headers, checksum verification, cache
+  /// policy changes, cache directories, cancellation, retry/resume settings,
+  /// and progress reporting are not available because the backend/browser owns
+  /// the network and cache behavior.
+  ///
+  /// Throws [LlamaUnsupportedException] when the active backend cannot load
+  /// multimodal projectors, when a local path is used with a URL-loading
+  /// backend, when the resolver returns a remote target that disallows
+  /// browser/backend caching, or when URL-backend loading is requested with
+  /// options that require the package-managed download/cache manager.
+  Future<void> loadMultimodalProjectorSource(
+    ModelSource source, {
+    ModelLoadOptions options = ModelLoadOptions.defaults,
+    ModelDownloadProgressCallback? onProgress,
+  }) {
+    return _withMmLifecycle(() async {
+      _ensureReady(requireContext: false);
+
+      final target = await modelResolver.resolve(
+        source,
+        ModelResolveRequest(options: options, onProgress: onProgress),
+      );
+
+      switch (target) {
+        case LocalModelFile(:final path):
+          if (backend.supportsUrlLoading) {
+            throw LlamaUnsupportedException(
+              'Explicit local multimodal projector paths are not supported by URL-loading backends.',
+            );
+          }
+          final localSource = ModelSource.path(path);
+          final entry = await modelDownloadManager.ensureModel(
+            localSource,
+            options: options,
+            onProgress: onProgress,
+          );
+          return _loadMultimodalProjectorLocked(entry.filePath);
+        case RemoteModelUrl(:final url, :final useBrowserCache):
+          if (!useBrowserCache) {
+            throw LlamaUnsupportedException(
+              'Remote multimodal projector loading without browser/backend cache is not supported yet.',
+            );
+          }
+          if (!backend.supportsUrlLoading) {
+            final downloadSource = source.isRemote
+                ? source.withResolvedUri(url)
+                : ModelSource.url(url, fileName: source.fileName);
+            final entry = await modelDownloadManager.ensureModel(
+              downloadSource,
+              options: options,
+              onProgress: onProgress,
+            );
+            return _loadMultimodalProjectorLocked(entry.filePath);
+          }
+          _rejectUnsupportedUrlBackendOptions(
+            options,
+            assetType: 'multimodal projector',
+          );
+          return _loadMultimodalProjectorLocked(url.toString());
+      }
+    });
+  }
+
   Future<void> _loadMultimodalProjectorLocked(String mmProjPath) async {
     final mmProjName = _displayNameForSource(mmProjPath);
     LlamaLogger.instance.info('Loading multimodal projector: $mmProjName');
@@ -1148,15 +1227,19 @@ class LlamaEngine {
     _isReady = false;
   }
 
-  void _rejectUnsupportedUrlBackendOptions(ModelLoadOptions options) {
+  void _rejectUnsupportedUrlBackendOptions(
+    ModelLoadOptions options, {
+    String assetType = 'model',
+  }) {
+    final isModel = assetType == 'model';
     if (options.cachePolicy != ModelCachePolicy.preferCached) {
       throw LlamaUnsupportedException(
-        '${options.cachePolicy.name} model loading requires the native download/cache manager.',
+        '${options.cachePolicy.name} $assetType loading requires the native download/cache manager.',
       );
     }
     if (options.bearerToken != null || options.headers.isNotEmpty) {
       throw LlamaUnsupportedException(
-        'Authenticated model URL loading requires the native download/cache manager.',
+        'Authenticated $assetType URL loading requires the native download/cache manager.',
       );
     }
     if (options.cancelToken != null) {
@@ -1166,22 +1249,30 @@ class LlamaEngine {
     }
     if (options.sha256 != null) {
       throw LlamaUnsupportedException(
-        'Checksum verification requires the native download/cache manager.',
+        isModel
+            ? 'Checksum verification requires the native download/cache manager.'
+            : 'Checksum verification for $assetType loading requires the native download/cache manager.',
       );
     }
     if (options.cacheDirectory != null) {
       throw LlamaUnsupportedException(
-        'cacheDirectory is not supported by URL-loading backends.',
+        isModel
+            ? 'cacheDirectory is not supported by URL-loading backends.'
+            : 'cacheDirectory is not supported for $assetType loading by URL-loading backends.',
       );
     }
     if (!options.resume) {
       throw LlamaUnsupportedException(
-        'Disabling resume is not supported by URL-loading backends.',
+        isModel
+            ? 'Disabling resume is not supported by URL-loading backends.'
+            : 'Disabling resume is not supported for $assetType loading by URL-loading backends.',
       );
     }
     if (options.maxRetries != ModelLoadOptions.defaults.maxRetries) {
       throw LlamaUnsupportedException(
-        'Custom maxRetries is not supported by URL-loading backends.',
+        isModel
+            ? 'Custom maxRetries is not supported by URL-loading backends.'
+            : 'Custom maxRetries is not supported for $assetType loading by URL-loading backends.',
       );
     }
   }
diff --git a/test/unit/core/engine/engine_test.dart b/test/unit/core/engine/engine_test.dart
index 21de2619..77d2273a 100644
--- a/test/unit/core/engine/engine_test.dart
+++ b/test/unit/core/engine/engine_test.dart
@@ -19,12 +19,15 @@ class MockLlamaBackend
   String? lastModelPath;
   String? lastLoraPath;
   String? lastModelUrl;
+  String? lastMultimodalProjectorPath;
   double? lastLoraScale;
   int resolvedGpuLayers = 0;
   int modelLoadCalls = 0;
   int modelLoadFromUrlCalls = 0;
   int modelFreeCalls = 0;
   int contextFreeCalls = 0;
+  int multimodalContextCreateCalls = 0;
+  final List<String> multimodalProjectorPaths = <String>[];
   int tokenizeCalls = 0;
   int modelMetadataCalls = 0;
   String generationText = 'response';
@@ -185,7 +188,12 @@ class MockLlamaBackend
   Future<int?> multimodalContextCreate(
     int modelHandle,
     String mmProjPath,
-  ) async => 2;
+  ) async {
+    multimodalContextCreateCalls += 1;
+    lastMultimodalProjectorPath = mmProjPath;
+    multimodalProjectorPaths.add(mmProjPath);
+    return 2;
+  }
 
   @override
   Future<void> multimodalContextFree(int mmContextHandle) async {}
@@ -327,13 +335,23 @@ class MockModelResolver implements ModelResolver {
 }
 
 class MockModelDownloadManager implements ModelDownloadManager {
-  MockModelDownloadManager(this.entry);
+  MockModelDownloadManager(ModelCacheEntry entry)
+    : entriesByCacheKey = <String, ModelCacheEntry>{entry.cacheKey: entry};
+
+  MockModelDownloadManager.forEntries(Iterable<ModelCacheEntry> entries)
+    : entriesByCacheKey = <String, ModelCacheEntry>{
+        for (final entry in entries) entry.cacheKey: entry,
+      };
 
-  final ModelCacheEntry entry;
+  final Map<String, ModelCacheEntry> entriesByCacheKey;
   ModelSource? lastSource;
   ModelLoadOptions? lastOptions;
+  final List<ModelSource> sources = <ModelSource>[];
+  final List<ModelLoadOptions> options = <ModelLoadOptions>[];
   int ensureModelCalls = 0;
 
+  ModelCacheEntry get entry => entriesByCacheKey.values.first;
+
   @override
   Future<ModelCacheEntry> ensureModel(
     ModelSource source, {
@@ -343,10 +361,12 @@ class MockModelDownloadManager implements ModelDownloadManager {
     ensureModelCalls += 1;
     lastSource = source;
     lastOptions = options;
+    sources.add(source);
+    this.options.add(options);
     onProgress?.call(
       const ModelDownloadProgress(receivedBytes: 1, totalBytes: 2),
     );
-    return entry;
+    return entriesByCacheKey[source.cacheKey] ?? entry;
   }
 
   @override
@@ -373,6 +393,42 @@ class MockModelDownloadManager implements ModelDownloadManager {
   Future<void> remove(String cacheKey, {String? cacheDirectory}) async {}
 }
 
+class ControlledModelDownloadManager extends MockModelDownloadManager {
+  ControlledModelDownloadManager({
+    required Iterable<ModelCacheEntry> entries,
+    this.gatesByCacheKey = const <String, Completer<void>>{},
+    this.startedByCacheKey = const <String, Completer<void>>{},
+  }) : super.forEntries(entries);
+
+  final Map<String, Completer<void>> gatesByCacheKey;
+  final Map<String, Completer<void>> startedByCacheKey;
+
+  @override
+  Future<ModelCacheEntry> ensureModel(
+    ModelSource source, {
+    ModelLoadOptions options = ModelLoadOptions.defaults,
+    ModelDownloadProgressCallback? onProgress,
+  }) async {
+    ensureModelCalls += 1;
+    lastSource = source;
+    lastOptions = options;
+    sources.add(source);
+    this.options.add(options);
+    final started = startedByCacheKey[source.cacheKey];
+    if (started != null && !started.isCompleted) {
+      started.complete();
+    }
+    final gate = gatesByCacheKey[source.cacheKey];
+    if (gate != null) {
+      await gate.future;
+    }
+    onProgress?.call(
+      const ModelDownloadProgress(receivedBytes: 1, totalBytes: 2),
+    );
+    return entriesByCacheKey[source.cacheKey] ?? entry;
+  }
+}
+
 class MockEmbeddingBackend extends MockLlamaBackend
     implements BackendEmbeddings {
   int embedCalls = 0;
@@ -713,6 +769,289 @@ void main() {
       },
     );
 
+    test(
+      'native loadMultimodalProjectorSource supports local and remote model/projector combinations',
+      () async {
+        ModelCacheEntry entryFor(ModelSource source, String filePath) {
+          return ModelCacheEntry(
+            sourceCanonicalKey: source.metadataSourceKey,
+            cacheKey: source.cacheKey,
+            fileName: source.fileName,
+            filePath: filePath,
+            createdAt: DateTime.utc(2026),
+            updatedAt: DateTime.utc(2026),
+          );
+        }
+
+        final cases =
+            <
+              ({
+                String label,
+                ModelSource modelSource,
+                String modelPath,
+                ModelSource projectorSource,
+                String projectorPath,
+              })
+            >[
+              (
+                label: 'local model + local projector',
+                modelSource: ModelSource.path('/models/local-model.gguf'),
+                modelPath: '/models/local-model.gguf',
+                projectorSource: ModelSource.path('/models/local-mmproj.gguf'),
+                projectorPath: '/models/local-mmproj.gguf',
+              ),
+              (
+                label: 'local model + remote projector',
+                modelSource: ModelSource.path('/models/local-model.gguf'),
+                modelPath: '/models/local-model.gguf',
+                projectorSource: ModelSource.url(
+                  Uri.parse('https://example.com/remote-mmproj.gguf'),
+                ),
+                projectorPath: '/cache/remote-mmproj.gguf',
+              ),
+              (
+                label: 'remote model + local projector',
+                modelSource: ModelSource.url(
+                  Uri.parse('https://example.com/remote-model.gguf'),
+                ),
+                modelPath: '/cache/remote-model.gguf',
+                projectorSource: ModelSource.path('/models/local-mmproj.gguf'),
+                projectorPath: '/models/local-mmproj.gguf',
+              ),
+              (
+                label: 'remote model + remote projector',
+                modelSource: ModelSource.url(
+                  Uri.parse('https://example.com/remote-model.gguf'),
+                ),
+                modelPath: '/cache/remote-model.gguf',
+                projectorSource: ModelSource.url(
+                  Uri.parse('https://example.com/remote-mmproj.gguf'),
+                ),
+                projectorPath: '/cache/remote-mmproj.gguf',
+              ),
+            ];
+
+        for (final testCase in cases) {
+          final nativeBackend = MockLlamaBackend();
+          final downloadManager = MockModelDownloadManager.forEntries([
+            entryFor(testCase.modelSource, testCase.modelPath),
+            entryFor(testCase.projectorSource, testCase.projectorPath),
+          ]);
+          final nativeEngine = LlamaEngine(
+            nativeBackend,
+            modelDownloadManager: downloadManager,
+          );
+
+          await nativeEngine.loadModelSource(testCase.modelSource);
+          await nativeEngine.loadMultimodalProjectorSource(
+            testCase.projectorSource,
+          );
+
+          expect(
+            nativeBackend.lastModelPath,
+            testCase.modelPath,
+            reason: testCase.label,
+          );
+          expect(
+            nativeBackend.lastMultimodalProjectorPath,
+            testCase.projectorPath,
+            reason: testCase.label,
+          );
+          expect(downloadManager.ensureModelCalls, 2, reason: testCase.label);
+          expect(
+            downloadManager.sources.map((source) => source.cacheKey),
+            [testCase.modelSource.cacheKey, testCase.projectorSource.cacheKey],
+            reason: testCase.label,
+          );
+        }
+      },
+    );
+
+    test(
+      'native loadMultimodalProjectorSource forwards options and progress',
+      () async {
+        final source = ModelSource.url(
+          Uri.parse('https://example.com/mmproj.gguf'),
+        );
+        final entry = ModelCacheEntry(
+          sourceCanonicalKey: source.metadataSourceKey,
+          cacheKey: source.cacheKey,
+          fileName: source.fileName,
+          filePath: '/cache/mmproj.gguf',
+          createdAt: DateTime.utc(2026),
+          updatedAt: DateTime.utc(2026),
+        );
+        final downloadManager = MockModelDownloadManager(entry);
+        final nativeBackend = MockLlamaBackend();
+        final nativeEngine = LlamaEngine(
+          nativeBackend,
+          modelDownloadManager: downloadManager,
+        );
+        final options = ModelLoadOptions(
+          cachePolicy: ModelCachePolicy.refresh,
+          bearerToken: 'secret-token',
+        );
+        final progressEvents = <ModelDownloadProgress>[];
+
+        await nativeEngine.loadModel('model.gguf');
+        await nativeEngine.loadMultimodalProjectorSource(
+          source,
+          options: options,
+          onProgress: progressEvents.add,
+        );
+
+        expect(downloadManager.ensureModelCalls, 1);
+        expect(downloadManager.lastSource?.resolvedUri, source.resolvedUri);
+        expect(downloadManager.lastOptions, same(options));
+        expect(nativeBackend.lastMultimodalProjectorPath, '/cache/mmproj.gguf');
+        expect(progressEvents.single.fraction, 0.5);
+      },
+    );
+
+    test(
+      'loadMultimodalProjectorSource serializes source work before backend load',
+      () async {
+        ModelCacheEntry entryFor(ModelSource source, String filePath) {
+          return ModelCacheEntry(
+            sourceCanonicalKey: source.metadataSourceKey,
+            cacheKey: source.cacheKey,
+            fileName: source.fileName,
+            filePath: filePath,
+            createdAt: DateTime.utc(2026),
+            updatedAt: DateTime.utc(2026),
+          );
+        }
+
+        final firstSource = ModelSource.url(
+          Uri.parse('https://example.com/first-mmproj.gguf'),
+        );
+        final secondSource = ModelSource.url(
+          Uri.parse('https://example.com/second-mmproj.gguf'),
+        );
+        final firstGate = Completer<void>();
+        final firstStarted = Completer<void>();
+        final secondStarted = Completer<void>();
+        final downloadManager = ControlledModelDownloadManager(
+          entries: [
+            entryFor(firstSource, '/cache/first-mmproj.gguf'),
+            entryFor(secondSource, '/cache/second-mmproj.gguf'),
+          ],
+          gatesByCacheKey: {firstSource.cacheKey: firstGate},
+          startedByCacheKey: {
+            firstSource.cacheKey: firstStarted,
+            secondSource.cacheKey: secondStarted,
+          },
+        );
+        final nativeBackend = MockLlamaBackend();
+        final nativeEngine = LlamaEngine(
+          nativeBackend,
+          modelDownloadManager: downloadManager,
+        );
+
+        await nativeEngine.loadModel('model.gguf');
+
+        final firstLoad = nativeEngine.loadMultimodalProjectorSource(
+          firstSource,
+        );
+        await firstStarted.future;
+
+        final secondLoad = nativeEngine.loadMultimodalProjectorSource(
+          secondSource,
+        );
+        await pumpEventQueue();
+
+        expect(secondStarted.isCompleted, isFalse);
+        expect(downloadManager.sources.map((source) => source.cacheKey), [
+          firstSource.cacheKey,
+        ]);
+        expect(nativeBackend.multimodalProjectorPaths, isEmpty);
+
+        firstGate.complete();
+        await Future.wait<void>([firstLoad, secondLoad]);
+
+        expect(secondStarted.isCompleted, isTrue);
+        expect(downloadManager.sources.map((source) => source.cacheKey), [
+          firstSource.cacheKey,
+          secondSource.cacheKey,
+        ]);
+        expect(nativeBackend.multimodalProjectorPaths, [
+          '/cache/first-mmproj.gguf',
+          '/cache/second-mmproj.gguf',
+        ]);
+      },
+    );
+
+    test(
+      'loadMultimodalProjectorSource loads remote URL directly on URL backends',
+      () async {
+        final webBackend = MockLlamaBackend(urlLoadingSupported: true);
+        final webEngine = LlamaEngine(webBackend);
+
+        await webEngine.loadModelSource(
+          ModelSource.url(Uri.parse('https://example.com/model.gguf')),
+        );
+        await webEngine.loadMultimodalProjectorSource(
+          ModelSource.url(Uri.parse('https://example.com/mmproj.gguf')),
+        );
+
+        expect(webBackend.lastModelUrl, 'https://example.com/model.gguf');
+        expect(
+          webBackend.lastMultimodalProjectorPath,
+          'https://example.com/mmproj.gguf',
+        );
+        expect(webBackend.multimodalContextCreateCalls, 1);
+      },
+    );
+
+    test(
+      'loadMultimodalProjectorSource rejects local paths on URL backends',
+      () async {
+        final webBackend = MockLlamaBackend(urlLoadingSupported: true);
+        final webEngine = LlamaEngine(webBackend);
+
+        await webEngine.loadModelSource(
+          ModelSource.url(Uri.parse('https://example.com/model.gguf')),
+        );
+
+        await expectLater(
+          () => webEngine.loadMultimodalProjectorSource(
+            ModelSource.path('/models/mmproj.gguf'),
+          ),
+          throwsA(isA<LlamaUnsupportedException>()),
+        );
+        expect(webBackend.multimodalContextCreateCalls, 0);
+      },
+    );
+
+    test(
+      'loadMultimodalProjectorSource rejects URL-backend cache IO options',
+      () async {
+        final webBackend = MockLlamaBackend(urlLoadingSupported: true);
+        final webEngine = LlamaEngine(webBackend);
+
+        await webEngine.loadModelSource(
+          ModelSource.url(Uri.parse('https://example.com/model.gguf')),
+        );
+
+        Object? thrown;
+        try {
+          await webEngine.loadMultimodalProjectorSource(
+            ModelSource.url(Uri.parse('https://example.com/mmproj.gguf')),
+            options: ModelLoadOptions(bearerToken: 'secret-token'),
+          );
+        } catch (error) {
+          thrown = error;
+        }
+
+        expect(thrown, isA<LlamaUnsupportedException>());
+        expect(
+          thrown.toString(),
+          contains('Authenticated multimodal projector URL loading'),
+        );
+        expect(webBackend.multimodalContextCreateCalls, 0);
+      },
+    );
+
     test(
       'loadModelSource rejects unsupported cancellation on URL backends',
       () async {
diff --git a/website/docs/changelog/recent-releases.md b/website/docs/changelog/recent-releases.md
index 90e47544..da9d1c93 100644
--- a/website/docs/changelog/recent-releases.md
+++ b/website/docs/changelog/recent-releases.md
@@ -13,6 +13,10 @@ For canonical full release notes, use:
   helpers for strict JSON-object / JSON-schema generation with final-output
   validation and typed decoding.
 
+- Added `LlamaEngine.loadMultimodalProjectorSource(...)` so GGUF multimodal
+  projector files can use the same `ModelSource` resolver and native
+  download/cache options as `loadModelSource(...)`.
+
 ## 0.8.12
 
 - Updated the default LiteRT-LM native runtime pin to
diff --git a/website/docs/guides/model-lifecycle.md b/website/docs/guides/model-lifecycle.md
index eaf88e9a..df5a0a76 100644
--- a/website/docs/guides/model-lifecycle.md
+++ b/website/docs/guides/model-lifecycle.md
@@ -493,6 +493,26 @@ final canHear = await engine.supportsAudio;
 print('vision=$canSee audio=$canHear');
 ```
 
+Use `loadMultimodalProjectorSource(...)` when the projector should use the same
+`ModelSource` resolver, native download/cache manager, checksums, auth headers,
+and progress callback shape as `loadModelSource(...)`:
+
+```dart
+await engine.loadModelSource(
+  ModelSource.parse('hf://owner/repo/model-Q4_K_M.gguf'),
+);
+await engine.loadMultimodalProjectorSource(
+  ModelSource.parse('hf://owner/repo/mmproj.gguf'),
+  options: ModelLoadOptions(cachePolicy: ModelCachePolicy.preferCached),
+);
+```
+
+Native/file-backed backends resolve remote projector sources to cached local
+files before loading. URL-loading web backends accept remote unauthenticated
+projector URLs and reject local filesystem sources, auth headers, checksum
+verification, explicit cache policy changes, custom cache directories, disabled
+resume, and custom retry counts because those require native cache IO.
+
 Projector resources are released by `unloadModel()` or `dispose()`.
 
 ## LoRA adapters at runtime
diff --git a/website/docs/guides/multimodal.md b/website/docs/guides/multimodal.md
index bb5e0f00..50bd8ef7 100644
--- a/website/docs/guides/multimodal.md
+++ b/website/docs/guides/multimodal.md
@@ -14,6 +14,25 @@ await engine.loadModel('/path/to/model.gguf');
 await engine.loadMultimodalProjector('/path/to/mmproj.gguf');
 ```
 
+Use source-based loading when the projector should be resolved, downloaded, and
+cached like a remote model source:
+
+```dart
+await engine.loadModelSource(
+  ModelSource.parse('hf://owner/repo/model-Q4_K_M.gguf'),
+);
+await engine.loadMultimodalProjectorSource(
+  ModelSource.parse('hf://owner/repo/mmproj.gguf'),
+);
+```
+
+Native/file-backed backends download remote projectors through the configured
+`ModelDownloadManager` before loading the cached local path. URL-loading web
+backends support remote unauthenticated projector URLs directly and reject local
+filesystem paths or options that require native cache IO such as auth headers,
+checksum verification, explicit cache policy changes, custom cache directories,
+disabled resume, and custom retry counts.
+
 Projector offload follows effective model-load configuration. If model loading
 is CPU-only (`preferredBackend: GpuBackend.cpu` or `gpuLayers: 0`), projector
 initialization also runs CPU-only.
@@ -76,14 +95,17 @@ the current Gemma 4 E2B/E4B GGUF projector path in `llama.cpp` mtmd exposes
 vision, but not audio, in `llamadart`.
 
 For native LiteRT-LM `.litertlm` bundles, capability depends on the bundle's
-native template/model processors. `loadMultimodalProjector`, `supportsVision`,
-and `supportsAudio` are projector-oriented APIs and are not used by the
-LiteRT-LM bundle flow.
+native template/model processors. `loadMultimodalProjector`,
+`loadMultimodalProjectorSource`, `supportsVision`, and `supportsAudio` are
+projector-oriented APIs and are not used by the LiteRT-LM bundle flow.
 
 ## Web notes
 
 - Web uses bridge runtime paths.
 - Multimodal projector loading on web is URL-based.
+- `loadMultimodalProjectorSource(...)` accepts remote unauthenticated projector
+  URLs on URL-loading web backends; source options that require the native
+  download/cache manager are unsupported there.
 - Local file path media inputs are native-first; web flows use browser file
   bytes/URLs.
 - LiteRT-LM web through `@litert-lm/core` remains text-only in `llamadart`.
diff --git a/website/docs/platforms/support-matrix.md b/website/docs/platforms/support-matrix.md
index 4aba967a..b01d9a65 100644
--- a/website/docs/platforms/support-matrix.md
+++ b/website/docs/platforms/support-matrix.md
@@ -67,6 +67,14 @@ currently forwards only single-turn text prompts through `@litert-lm/core`, so
 it does not preserve `ChatSession` history, system prompts, or tool
 declarations with native LiteRT-LM semantics yet.
 
+For GGUF multimodal projectors, `loadMultimodalProjectorSource(...)` uses the
+same `ModelSource` and native download/cache APIs as `loadModelSource(...)`.
+Native/file-backed targets load the cached local projector path. URL-loading web
+targets accept remote unauthenticated projector URLs and reject local filesystem
+sources plus options that require native cache IO, including auth headers,
+checksum verification, explicit cache policy changes, custom cache directories,
+disabled resume, and custom retry counts.
+
 Select LiteRT-LM CPU/GPU/NPU with `ModelParams.liteRtLmBackend`.
 `LiteRtLmBackendPreference.auto` currently maps to GPU on Android, macOS, and
 web, and CPU on other LiteRT-LM targets. NPU selection is Android native only;

From 89dac8625b41f25d4cc25bf697ffc4dbd4415740 Mon Sep 17 00:00:00 2001
From: Jhin Lee <leehack@gmail.com>
Date: Sat, 4 Jul 2026 14:34:55 -0400
Subject: [PATCH 2/2] Clarify projector URL option errors

---
 lib/src/core/engine/engine.dart        |  4 +++-
 test/unit/core/engine/engine_test.dart | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/lib/src/core/engine/engine.dart b/lib/src/core/engine/engine.dart
index 5151661d..8628ea57 100644
--- a/lib/src/core/engine/engine.dart
+++ b/lib/src/core/engine/engine.dart
@@ -1244,7 +1244,9 @@ class LlamaEngine {
     }
     if (options.cancelToken != null) {
       throw LlamaUnsupportedException(
-        'Cancellation tokens require the native download/cache manager.',
+        isModel
+            ? 'Cancellation tokens require the native download/cache manager.'
+            : 'Cancellation tokens for $assetType loading require the native download/cache manager.',
       );
     }
     if (options.sha256 != null) {
diff --git a/test/unit/core/engine/engine_test.dart b/test/unit/core/engine/engine_test.dart
index 77d2273a..72693b6e 100644
--- a/test/unit/core/engine/engine_test.dart
+++ b/test/unit/core/engine/engine_test.dart
@@ -1049,6 +1049,23 @@ void main() {
           contains('Authenticated multimodal projector URL loading'),
         );
         expect(webBackend.multimodalContextCreateCalls, 0);
+
+        Object? cancellationError;
+        try {
+          await webEngine.loadMultimodalProjectorSource(
+            ModelSource.url(Uri.parse('https://example.com/mmproj.gguf')),
+            options: ModelLoadOptions(cancelToken: ModelDownloadCancelToken()),
+          );
+        } catch (error) {
+          cancellationError = error;
+        }
+
+        expect(cancellationError, isA<LlamaUnsupportedException>());
+        expect(
+          cancellationError.toString(),
+          contains('Cancellation tokens for multimodal projector loading'),
+        );
+        expect(webBackend.multimodalContextCreateCalls, 0);
       },
     );