From 5ffd48d3a4cf8e8a4fc5cbb6d229c6657211d701 Mon Sep 17 00:00:00 2001 From: Jhin Lee Date: Sat, 4 Jul 2026 12:24:24 -0400 Subject: [PATCH 1/2] Add source-based multimodal projector loading --- CHANGELOG.md | 6 + README.md | 33 +- lib/src/core/engine/engine.dart | 105 ++++++- test/unit/core/engine/engine_test.dart | 347 +++++++++++++++++++++- website/docs/changelog/recent-releases.md | 4 + website/docs/guides/model-lifecycle.md | 20 ++ website/docs/guides/multimodal.md | 28 +- website/docs/platforms/support-matrix.md | 8 + 8 files changed, 533 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 220124ca..a7776a4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ helpers for strict JSON-object / JSON-schema generation with final-output validation and typed decoding. +* Added `LlamaEngine.loadMultimodalProjectorSource(...)` so GGUF + multimodal projector files can use the same `ModelSource` resolver, + native download/cache manager, authentication, checksum, and progress + options as `loadModelSource(...)`, while preserving the existing + `loadMultimodalProjector(...)` path/string API. + ## 0.8.12 * Updated the default LiteRT-LM native runtime pin to diff --git a/README.md b/README.md index 166e4f3c..b9861686 100644 --- a/README.md +++ b/README.md @@ -422,7 +422,25 @@ For LiteRT-LM bundles, use the same `loadModelSource(...)` path with a CPU, GPU, or Android NPU execution after the file is cached. `llamadart` does not list Hugging Face files or expand sharded GGUF manifests; pick the exact `.gguf` file path from the repository, and use separate model and -`mmproj` sources for multimodal assets. +`mmproj` sources for multimodal assets. After the GGUF model is loaded, call +`loadMultimodalProjectorSource(...)` to resolve, download, cache, and load the +projector through the same source/cache layer: + +```dart +await engine.loadModelSource( + ModelSource.parse('hf://owner/repo/model-Q4_K_M.gguf'), +); +await engine.loadMultimodalProjectorSource( + ModelSource.parse('hf://owner/repo/mmproj.gguf'), + options: ModelLoadOptions(cachePolicy: ModelCachePolicy.preferCached), +); +``` + +Native/file-backed backends load the cached local projector path. URL-loading +web backends pass unauthenticated remote projector URLs directly to the bridge; +authenticated headers, checksum verification, explicit cache policy changes, +custom cache directories, and local filesystem path sources require a +file-backed native cache manager. ### 7. Generate embeddings @@ -1139,6 +1157,10 @@ void main() async { try { await engine.loadModel('vision-model.gguf'); await engine.loadMultimodalProjector('mmproj.gguf'); + // Or use ModelSource when the projector should be downloaded/cached: + // await engine.loadMultimodalProjectorSource( + // ModelSource.parse('hf://owner/repo/mmproj.gguf'), + // ); final session = ChatSession(engine); @@ -1166,7 +1188,9 @@ void main() async { Web-specific note: -- Load model/mmproj with URL-based assets (`loadModelFromUrl` + URL projector). +- Load model/mmproj with URL-based assets (`loadModelSource` / + `loadModelFromUrl` + URL projector). `loadMultimodalProjectorSource` supports + remote unauthenticated projector URLs on URL-loading web backends. - For user-picked browser files, send media as bytes (`LlamaImageContent(bytes: ...)`, `LlamaAudioContent(bytes: ...)`) rather than local file paths. @@ -1178,8 +1202,9 @@ LiteRT-LM native note: - Native LiteRT-LM supports local paths and encoded media bytes (`blob`) for media parts. Remote image URLs and raw PCM `Float32List` samples fail before native generation with clear errors. -- `loadMultimodalProjector`, `supportsVision`, and `supportsAudio` remain - projector-oriented APIs for llama.cpp/WebGPU multimodal paths. +- `loadMultimodalProjector`, `loadMultimodalProjectorSource`, + `supportsVision`, and `supportsAudio` remain projector-oriented APIs for + llama.cpp/WebGPU multimodal paths. ### 💡 Model-Specific Notes diff --git a/lib/src/core/engine/engine.dart b/lib/src/core/engine/engine.dart index 6a1bb755..5151661d 100644 --- a/lib/src/core/engine/engine.dart +++ b/lib/src/core/engine/engine.dart @@ -293,6 +293,85 @@ class LlamaEngine { return _withMmLifecycle(() => _loadMultimodalProjectorLocked(mmProjPath)); } + /// Loads a multimodal projector from a structured [source]. + /// + /// A model must already be loaded with [loadModel], [loadModelSource], or + /// [loadModelFromUrl]. Calling this before the model is ready throws a + /// [LlamaContextException]. + /// + /// This method is lifecycle-compatible with [loadMultimodalProjector]: + /// source resolution, package-managed download/cache work, and the final + /// backend projector load are serialized with direct path projector loads and + /// unloads. Concurrent projector lifecycle calls are applied in call order, + /// and loading a new projector replaces any active projector. + /// + /// Local path sources are validated by the configured + /// [modelDownloadManager], then loaded from their local file path. Remote + /// sources use the native download/cache manager on file-backed backends. On + /// URL-loading backends, remote unauthenticated sources are passed directly to + /// the backend; package-managed auth, headers, checksum verification, cache + /// policy changes, cache directories, cancellation, retry/resume settings, + /// and progress reporting are not available because the backend/browser owns + /// the network and cache behavior. + /// + /// Throws [LlamaUnsupportedException] when the active backend cannot load + /// multimodal projectors, when a local path is used with a URL-loading + /// backend, when the resolver returns a remote target that disallows + /// browser/backend caching, or when URL-backend loading is requested with + /// options that require the package-managed download/cache manager. + Future loadMultimodalProjectorSource( + ModelSource source, { + ModelLoadOptions options = ModelLoadOptions.defaults, + ModelDownloadProgressCallback? onProgress, + }) { + return _withMmLifecycle(() async { + _ensureReady(requireContext: false); + + final target = await modelResolver.resolve( + source, + ModelResolveRequest(options: options, onProgress: onProgress), + ); + + switch (target) { + case LocalModelFile(:final path): + if (backend.supportsUrlLoading) { + throw LlamaUnsupportedException( + 'Explicit local multimodal projector paths are not supported by URL-loading backends.', + ); + } + final localSource = ModelSource.path(path); + final entry = await modelDownloadManager.ensureModel( + localSource, + options: options, + onProgress: onProgress, + ); + return _loadMultimodalProjectorLocked(entry.filePath); + case RemoteModelUrl(:final url, :final useBrowserCache): + if (!useBrowserCache) { + throw LlamaUnsupportedException( + 'Remote multimodal projector loading without browser/backend cache is not supported yet.', + ); + } + if (!backend.supportsUrlLoading) { + final downloadSource = source.isRemote + ? source.withResolvedUri(url) + : ModelSource.url(url, fileName: source.fileName); + final entry = await modelDownloadManager.ensureModel( + downloadSource, + options: options, + onProgress: onProgress, + ); + return _loadMultimodalProjectorLocked(entry.filePath); + } + _rejectUnsupportedUrlBackendOptions( + options, + assetType: 'multimodal projector', + ); + return _loadMultimodalProjectorLocked(url.toString()); + } + }); + } + Future _loadMultimodalProjectorLocked(String mmProjPath) async { final mmProjName = _displayNameForSource(mmProjPath); LlamaLogger.instance.info('Loading multimodal projector: $mmProjName'); @@ -1148,15 +1227,19 @@ class LlamaEngine { _isReady = false; } - void _rejectUnsupportedUrlBackendOptions(ModelLoadOptions options) { + void _rejectUnsupportedUrlBackendOptions( + ModelLoadOptions options, { + String assetType = 'model', + }) { + final isModel = assetType == 'model'; if (options.cachePolicy != ModelCachePolicy.preferCached) { throw LlamaUnsupportedException( - '${options.cachePolicy.name} model loading requires the native download/cache manager.', + '${options.cachePolicy.name} $assetType loading requires the native download/cache manager.', ); } if (options.bearerToken != null || options.headers.isNotEmpty) { throw LlamaUnsupportedException( - 'Authenticated model URL loading requires the native download/cache manager.', + 'Authenticated $assetType URL loading requires the native download/cache manager.', ); } if (options.cancelToken != null) { @@ -1166,22 +1249,30 @@ class LlamaEngine { } if (options.sha256 != null) { throw LlamaUnsupportedException( - 'Checksum verification requires the native download/cache manager.', + isModel + ? 'Checksum verification requires the native download/cache manager.' + : 'Checksum verification for $assetType loading requires the native download/cache manager.', ); } if (options.cacheDirectory != null) { throw LlamaUnsupportedException( - 'cacheDirectory is not supported by URL-loading backends.', + isModel + ? 'cacheDirectory is not supported by URL-loading backends.' + : 'cacheDirectory is not supported for $assetType loading by URL-loading backends.', ); } if (!options.resume) { throw LlamaUnsupportedException( - 'Disabling resume is not supported by URL-loading backends.', + isModel + ? 'Disabling resume is not supported by URL-loading backends.' + : 'Disabling resume is not supported for $assetType loading by URL-loading backends.', ); } if (options.maxRetries != ModelLoadOptions.defaults.maxRetries) { throw LlamaUnsupportedException( - 'Custom maxRetries is not supported by URL-loading backends.', + isModel + ? 'Custom maxRetries is not supported by URL-loading backends.' + : 'Custom maxRetries is not supported for $assetType loading by URL-loading backends.', ); } } diff --git a/test/unit/core/engine/engine_test.dart b/test/unit/core/engine/engine_test.dart index 21de2619..77d2273a 100644 --- a/test/unit/core/engine/engine_test.dart +++ b/test/unit/core/engine/engine_test.dart @@ -19,12 +19,15 @@ class MockLlamaBackend String? lastModelPath; String? lastLoraPath; String? lastModelUrl; + String? lastMultimodalProjectorPath; double? lastLoraScale; int resolvedGpuLayers = 0; int modelLoadCalls = 0; int modelLoadFromUrlCalls = 0; int modelFreeCalls = 0; int contextFreeCalls = 0; + int multimodalContextCreateCalls = 0; + final List multimodalProjectorPaths = []; int tokenizeCalls = 0; int modelMetadataCalls = 0; String generationText = 'response'; @@ -185,7 +188,12 @@ class MockLlamaBackend Future multimodalContextCreate( int modelHandle, String mmProjPath, - ) async => 2; + ) async { + multimodalContextCreateCalls += 1; + lastMultimodalProjectorPath = mmProjPath; + multimodalProjectorPaths.add(mmProjPath); + return 2; + } @override Future multimodalContextFree(int mmContextHandle) async {} @@ -327,13 +335,23 @@ class MockModelResolver implements ModelResolver { } class MockModelDownloadManager implements ModelDownloadManager { - MockModelDownloadManager(this.entry); + MockModelDownloadManager(ModelCacheEntry entry) + : entriesByCacheKey = {entry.cacheKey: entry}; + + MockModelDownloadManager.forEntries(Iterable entries) + : entriesByCacheKey = { + for (final entry in entries) entry.cacheKey: entry, + }; - final ModelCacheEntry entry; + final Map entriesByCacheKey; ModelSource? lastSource; ModelLoadOptions? lastOptions; + final List sources = []; + final List options = []; int ensureModelCalls = 0; + ModelCacheEntry get entry => entriesByCacheKey.values.first; + @override Future ensureModel( ModelSource source, { @@ -343,10 +361,12 @@ class MockModelDownloadManager implements ModelDownloadManager { ensureModelCalls += 1; lastSource = source; lastOptions = options; + sources.add(source); + this.options.add(options); onProgress?.call( const ModelDownloadProgress(receivedBytes: 1, totalBytes: 2), ); - return entry; + return entriesByCacheKey[source.cacheKey] ?? entry; } @override @@ -373,6 +393,42 @@ class MockModelDownloadManager implements ModelDownloadManager { Future remove(String cacheKey, {String? cacheDirectory}) async {} } +class ControlledModelDownloadManager extends MockModelDownloadManager { + ControlledModelDownloadManager({ + required Iterable entries, + this.gatesByCacheKey = const >{}, + this.startedByCacheKey = const >{}, + }) : super.forEntries(entries); + + final Map> gatesByCacheKey; + final Map> startedByCacheKey; + + @override + Future ensureModel( + ModelSource source, { + ModelLoadOptions options = ModelLoadOptions.defaults, + ModelDownloadProgressCallback? onProgress, + }) async { + ensureModelCalls += 1; + lastSource = source; + lastOptions = options; + sources.add(source); + this.options.add(options); + final started = startedByCacheKey[source.cacheKey]; + if (started != null && !started.isCompleted) { + started.complete(); + } + final gate = gatesByCacheKey[source.cacheKey]; + if (gate != null) { + await gate.future; + } + onProgress?.call( + const ModelDownloadProgress(receivedBytes: 1, totalBytes: 2), + ); + return entriesByCacheKey[source.cacheKey] ?? entry; + } +} + class MockEmbeddingBackend extends MockLlamaBackend implements BackendEmbeddings { int embedCalls = 0; @@ -713,6 +769,289 @@ void main() { }, ); + test( + 'native loadMultimodalProjectorSource supports local and remote model/projector combinations', + () async { + ModelCacheEntry entryFor(ModelSource source, String filePath) { + return ModelCacheEntry( + sourceCanonicalKey: source.metadataSourceKey, + cacheKey: source.cacheKey, + fileName: source.fileName, + filePath: filePath, + createdAt: DateTime.utc(2026), + updatedAt: DateTime.utc(2026), + ); + } + + final cases = + < + ({ + String label, + ModelSource modelSource, + String modelPath, + ModelSource projectorSource, + String projectorPath, + }) + >[ + ( + label: 'local model + local projector', + modelSource: ModelSource.path('/models/local-model.gguf'), + modelPath: '/models/local-model.gguf', + projectorSource: ModelSource.path('/models/local-mmproj.gguf'), + projectorPath: '/models/local-mmproj.gguf', + ), + ( + label: 'local model + remote projector', + modelSource: ModelSource.path('/models/local-model.gguf'), + modelPath: '/models/local-model.gguf', + projectorSource: ModelSource.url( + Uri.parse('https://example.com/remote-mmproj.gguf'), + ), + projectorPath: '/cache/remote-mmproj.gguf', + ), + ( + label: 'remote model + local projector', + modelSource: ModelSource.url( + Uri.parse('https://example.com/remote-model.gguf'), + ), + modelPath: '/cache/remote-model.gguf', + projectorSource: ModelSource.path('/models/local-mmproj.gguf'), + projectorPath: '/models/local-mmproj.gguf', + ), + ( + label: 'remote model + remote projector', + modelSource: ModelSource.url( + Uri.parse('https://example.com/remote-model.gguf'), + ), + modelPath: '/cache/remote-model.gguf', + projectorSource: ModelSource.url( + Uri.parse('https://example.com/remote-mmproj.gguf'), + ), + projectorPath: '/cache/remote-mmproj.gguf', + ), + ]; + + for (final testCase in cases) { + final nativeBackend = MockLlamaBackend(); + final downloadManager = MockModelDownloadManager.forEntries([ + entryFor(testCase.modelSource, testCase.modelPath), + entryFor(testCase.projectorSource, testCase.projectorPath), + ]); + final nativeEngine = LlamaEngine( + nativeBackend, + modelDownloadManager: downloadManager, + ); + + await nativeEngine.loadModelSource(testCase.modelSource); + await nativeEngine.loadMultimodalProjectorSource( + testCase.projectorSource, + ); + + expect( + nativeBackend.lastModelPath, + testCase.modelPath, + reason: testCase.label, + ); + expect( + nativeBackend.lastMultimodalProjectorPath, + testCase.projectorPath, + reason: testCase.label, + ); + expect(downloadManager.ensureModelCalls, 2, reason: testCase.label); + expect( + downloadManager.sources.map((source) => source.cacheKey), + [testCase.modelSource.cacheKey, testCase.projectorSource.cacheKey], + reason: testCase.label, + ); + } + }, + ); + + test( + 'native loadMultimodalProjectorSource forwards options and progress', + () async { + final source = ModelSource.url( + Uri.parse('https://example.com/mmproj.gguf'), + ); + final entry = ModelCacheEntry( + sourceCanonicalKey: source.metadataSourceKey, + cacheKey: source.cacheKey, + fileName: source.fileName, + filePath: '/cache/mmproj.gguf', + createdAt: DateTime.utc(2026), + updatedAt: DateTime.utc(2026), + ); + final downloadManager = MockModelDownloadManager(entry); + final nativeBackend = MockLlamaBackend(); + final nativeEngine = LlamaEngine( + nativeBackend, + modelDownloadManager: downloadManager, + ); + final options = ModelLoadOptions( + cachePolicy: ModelCachePolicy.refresh, + bearerToken: 'secret-token', + ); + final progressEvents = []; + + await nativeEngine.loadModel('model.gguf'); + await nativeEngine.loadMultimodalProjectorSource( + source, + options: options, + onProgress: progressEvents.add, + ); + + expect(downloadManager.ensureModelCalls, 1); + expect(downloadManager.lastSource?.resolvedUri, source.resolvedUri); + expect(downloadManager.lastOptions, same(options)); + expect(nativeBackend.lastMultimodalProjectorPath, '/cache/mmproj.gguf'); + expect(progressEvents.single.fraction, 0.5); + }, + ); + + test( + 'loadMultimodalProjectorSource serializes source work before backend load', + () async { + ModelCacheEntry entryFor(ModelSource source, String filePath) { + return ModelCacheEntry( + sourceCanonicalKey: source.metadataSourceKey, + cacheKey: source.cacheKey, + fileName: source.fileName, + filePath: filePath, + createdAt: DateTime.utc(2026), + updatedAt: DateTime.utc(2026), + ); + } + + final firstSource = ModelSource.url( + Uri.parse('https://example.com/first-mmproj.gguf'), + ); + final secondSource = ModelSource.url( + Uri.parse('https://example.com/second-mmproj.gguf'), + ); + final firstGate = Completer(); + final firstStarted = Completer(); + final secondStarted = Completer(); + final downloadManager = ControlledModelDownloadManager( + entries: [ + entryFor(firstSource, '/cache/first-mmproj.gguf'), + entryFor(secondSource, '/cache/second-mmproj.gguf'), + ], + gatesByCacheKey: {firstSource.cacheKey: firstGate}, + startedByCacheKey: { + firstSource.cacheKey: firstStarted, + secondSource.cacheKey: secondStarted, + }, + ); + final nativeBackend = MockLlamaBackend(); + final nativeEngine = LlamaEngine( + nativeBackend, + modelDownloadManager: downloadManager, + ); + + await nativeEngine.loadModel('model.gguf'); + + final firstLoad = nativeEngine.loadMultimodalProjectorSource( + firstSource, + ); + await firstStarted.future; + + final secondLoad = nativeEngine.loadMultimodalProjectorSource( + secondSource, + ); + await pumpEventQueue(); + + expect(secondStarted.isCompleted, isFalse); + expect(downloadManager.sources.map((source) => source.cacheKey), [ + firstSource.cacheKey, + ]); + expect(nativeBackend.multimodalProjectorPaths, isEmpty); + + firstGate.complete(); + await Future.wait([firstLoad, secondLoad]); + + expect(secondStarted.isCompleted, isTrue); + expect(downloadManager.sources.map((source) => source.cacheKey), [ + firstSource.cacheKey, + secondSource.cacheKey, + ]); + expect(nativeBackend.multimodalProjectorPaths, [ + '/cache/first-mmproj.gguf', + '/cache/second-mmproj.gguf', + ]); + }, + ); + + test( + 'loadMultimodalProjectorSource loads remote URL directly on URL backends', + () async { + final webBackend = MockLlamaBackend(urlLoadingSupported: true); + final webEngine = LlamaEngine(webBackend); + + await webEngine.loadModelSource( + ModelSource.url(Uri.parse('https://example.com/model.gguf')), + ); + await webEngine.loadMultimodalProjectorSource( + ModelSource.url(Uri.parse('https://example.com/mmproj.gguf')), + ); + + expect(webBackend.lastModelUrl, 'https://example.com/model.gguf'); + expect( + webBackend.lastMultimodalProjectorPath, + 'https://example.com/mmproj.gguf', + ); + expect(webBackend.multimodalContextCreateCalls, 1); + }, + ); + + test( + 'loadMultimodalProjectorSource rejects local paths on URL backends', + () async { + final webBackend = MockLlamaBackend(urlLoadingSupported: true); + final webEngine = LlamaEngine(webBackend); + + await webEngine.loadModelSource( + ModelSource.url(Uri.parse('https://example.com/model.gguf')), + ); + + await expectLater( + () => webEngine.loadMultimodalProjectorSource( + ModelSource.path('/models/mmproj.gguf'), + ), + throwsA(isA()), + ); + expect(webBackend.multimodalContextCreateCalls, 0); + }, + ); + + test( + 'loadMultimodalProjectorSource rejects URL-backend cache IO options', + () async { + final webBackend = MockLlamaBackend(urlLoadingSupported: true); + final webEngine = LlamaEngine(webBackend); + + await webEngine.loadModelSource( + ModelSource.url(Uri.parse('https://example.com/model.gguf')), + ); + + Object? thrown; + try { + await webEngine.loadMultimodalProjectorSource( + ModelSource.url(Uri.parse('https://example.com/mmproj.gguf')), + options: ModelLoadOptions(bearerToken: 'secret-token'), + ); + } catch (error) { + thrown = error; + } + + expect(thrown, isA()); + expect( + thrown.toString(), + contains('Authenticated multimodal projector URL loading'), + ); + expect(webBackend.multimodalContextCreateCalls, 0); + }, + ); + test( 'loadModelSource rejects unsupported cancellation on URL backends', () async { diff --git a/website/docs/changelog/recent-releases.md b/website/docs/changelog/recent-releases.md index 90e47544..da9d1c93 100644 --- a/website/docs/changelog/recent-releases.md +++ b/website/docs/changelog/recent-releases.md @@ -13,6 +13,10 @@ For canonical full release notes, use: helpers for strict JSON-object / JSON-schema generation with final-output validation and typed decoding. +- Added `LlamaEngine.loadMultimodalProjectorSource(...)` so GGUF multimodal + projector files can use the same `ModelSource` resolver and native + download/cache options as `loadModelSource(...)`. + ## 0.8.12 - Updated the default LiteRT-LM native runtime pin to diff --git a/website/docs/guides/model-lifecycle.md b/website/docs/guides/model-lifecycle.md index eaf88e9a..df5a0a76 100644 --- a/website/docs/guides/model-lifecycle.md +++ b/website/docs/guides/model-lifecycle.md @@ -493,6 +493,26 @@ final canHear = await engine.supportsAudio; print('vision=$canSee audio=$canHear'); ``` +Use `loadMultimodalProjectorSource(...)` when the projector should use the same +`ModelSource` resolver, native download/cache manager, checksums, auth headers, +and progress callback shape as `loadModelSource(...)`: + +```dart +await engine.loadModelSource( + ModelSource.parse('hf://owner/repo/model-Q4_K_M.gguf'), +); +await engine.loadMultimodalProjectorSource( + ModelSource.parse('hf://owner/repo/mmproj.gguf'), + options: ModelLoadOptions(cachePolicy: ModelCachePolicy.preferCached), +); +``` + +Native/file-backed backends resolve remote projector sources to cached local +files before loading. URL-loading web backends accept remote unauthenticated +projector URLs and reject local filesystem sources, auth headers, checksum +verification, explicit cache policy changes, custom cache directories, disabled +resume, and custom retry counts because those require native cache IO. + Projector resources are released by `unloadModel()` or `dispose()`. ## LoRA adapters at runtime diff --git a/website/docs/guides/multimodal.md b/website/docs/guides/multimodal.md index bb5e0f00..50bd8ef7 100644 --- a/website/docs/guides/multimodal.md +++ b/website/docs/guides/multimodal.md @@ -14,6 +14,25 @@ await engine.loadModel('/path/to/model.gguf'); await engine.loadMultimodalProjector('/path/to/mmproj.gguf'); ``` +Use source-based loading when the projector should be resolved, downloaded, and +cached like a remote model source: + +```dart +await engine.loadModelSource( + ModelSource.parse('hf://owner/repo/model-Q4_K_M.gguf'), +); +await engine.loadMultimodalProjectorSource( + ModelSource.parse('hf://owner/repo/mmproj.gguf'), +); +``` + +Native/file-backed backends download remote projectors through the configured +`ModelDownloadManager` before loading the cached local path. URL-loading web +backends support remote unauthenticated projector URLs directly and reject local +filesystem paths or options that require native cache IO such as auth headers, +checksum verification, explicit cache policy changes, custom cache directories, +disabled resume, and custom retry counts. + Projector offload follows effective model-load configuration. If model loading is CPU-only (`preferredBackend: GpuBackend.cpu` or `gpuLayers: 0`), projector initialization also runs CPU-only. @@ -76,14 +95,17 @@ the current Gemma 4 E2B/E4B GGUF projector path in `llama.cpp` mtmd exposes vision, but not audio, in `llamadart`. For native LiteRT-LM `.litertlm` bundles, capability depends on the bundle's -native template/model processors. `loadMultimodalProjector`, `supportsVision`, -and `supportsAudio` are projector-oriented APIs and are not used by the -LiteRT-LM bundle flow. +native template/model processors. `loadMultimodalProjector`, +`loadMultimodalProjectorSource`, `supportsVision`, and `supportsAudio` are +projector-oriented APIs and are not used by the LiteRT-LM bundle flow. ## Web notes - Web uses bridge runtime paths. - Multimodal projector loading on web is URL-based. +- `loadMultimodalProjectorSource(...)` accepts remote unauthenticated projector + URLs on URL-loading web backends; source options that require the native + download/cache manager are unsupported there. - Local file path media inputs are native-first; web flows use browser file bytes/URLs. - LiteRT-LM web through `@litert-lm/core` remains text-only in `llamadart`. diff --git a/website/docs/platforms/support-matrix.md b/website/docs/platforms/support-matrix.md index 4aba967a..b01d9a65 100644 --- a/website/docs/platforms/support-matrix.md +++ b/website/docs/platforms/support-matrix.md @@ -67,6 +67,14 @@ currently forwards only single-turn text prompts through `@litert-lm/core`, so it does not preserve `ChatSession` history, system prompts, or tool declarations with native LiteRT-LM semantics yet. +For GGUF multimodal projectors, `loadMultimodalProjectorSource(...)` uses the +same `ModelSource` and native download/cache APIs as `loadModelSource(...)`. +Native/file-backed targets load the cached local projector path. URL-loading web +targets accept remote unauthenticated projector URLs and reject local filesystem +sources plus options that require native cache IO, including auth headers, +checksum verification, explicit cache policy changes, custom cache directories, +disabled resume, and custom retry counts. + Select LiteRT-LM CPU/GPU/NPU with `ModelParams.liteRtLmBackend`. `LiteRtLmBackendPreference.auto` currently maps to GPU on Android, macOS, and web, and CPU on other LiteRT-LM targets. NPU selection is Android native only; From 89dac8625b41f25d4cc25bf697ffc4dbd4415740 Mon Sep 17 00:00:00 2001 From: Jhin Lee Date: Sat, 4 Jul 2026 14:34:55 -0400 Subject: [PATCH 2/2] Clarify projector URL option errors --- lib/src/core/engine/engine.dart | 4 +++- test/unit/core/engine/engine_test.dart | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/lib/src/core/engine/engine.dart b/lib/src/core/engine/engine.dart index 5151661d..8628ea57 100644 --- a/lib/src/core/engine/engine.dart +++ b/lib/src/core/engine/engine.dart @@ -1244,7 +1244,9 @@ class LlamaEngine { } if (options.cancelToken != null) { throw LlamaUnsupportedException( - 'Cancellation tokens require the native download/cache manager.', + isModel + ? 'Cancellation tokens require the native download/cache manager.' + : 'Cancellation tokens for $assetType loading require the native download/cache manager.', ); } if (options.sha256 != null) { diff --git a/test/unit/core/engine/engine_test.dart b/test/unit/core/engine/engine_test.dart index 77d2273a..72693b6e 100644 --- a/test/unit/core/engine/engine_test.dart +++ b/test/unit/core/engine/engine_test.dart @@ -1049,6 +1049,23 @@ void main() { contains('Authenticated multimodal projector URL loading'), ); expect(webBackend.multimodalContextCreateCalls, 0); + + Object? cancellationError; + try { + await webEngine.loadMultimodalProjectorSource( + ModelSource.url(Uri.parse('https://example.com/mmproj.gguf')), + options: ModelLoadOptions(cancelToken: ModelDownloadCancelToken()), + ); + } catch (error) { + cancellationError = error; + } + + expect(cancellationError, isA()); + expect( + cancellationError.toString(), + contains('Cancellation tokens for multimodal projector loading'), + ); + expect(webBackend.multimodalContextCreateCalls, 0); }, );