Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions src/config/models/flare-models.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
{
"smollm2-135m-flare": {
"engine": "flare",
"modelName": "SmolLM2-135M-Instruct",
"modelType": "text-generation",
"repo": "HuggingFaceTB/smollm2-135M-instruct-GGUF",
"url": "https://huggingface.co/HuggingFaceTB/smollm2-135M-instruct-GGUF/resolve/main/smollm2-135m-instruct-q8_0.gguf",
"pipeline": "text-generation",
"defaultQuantization": "Q8_0",
"quantizations": ["Q8_0"],
"architecture": "llama",
"downloadSizeMB": 138,
"contextLength": 2048,
"defaultParams": {
"temperature": 0.7,
"maxTokens": 512
},
"metadata": {
"description": "Smallest Flare model — instant load, great for demos",
"tier": 1
}
},
"smollm2-135m-flare-q4": {
"engine": "flare",
"modelName": "SmolLM2-135M-Instruct-Q4",
"modelType": "text-generation",
"repo": "HuggingFaceTB/smollm2-135M-instruct-GGUF",
"url": "https://huggingface.co/HuggingFaceTB/smollm2-135M-instruct-GGUF/resolve/main/smollm2-135m-instruct-q4_k_m.gguf",
"pipeline": "text-generation",
"defaultQuantization": "Q4_K_M",
"quantizations": ["Q4_K_M"],
"architecture": "llama",
"downloadSizeMB": 75,
"contextLength": 2048,
"defaultParams": {
"temperature": 0.7,
"maxTokens": 512
},
"metadata": {
"description": "Smallest download (~75 MB), great for bandwidth-constrained environments",
"tier": 1
}
},
"smollm2-360m-flare": {
"engine": "flare",
"modelName": "SmolLM2-360M-Instruct",
"modelType": "text-generation",
"repo": "HuggingFaceTB/smollm2-360M-instruct-GGUF",
"url": "https://huggingface.co/HuggingFaceTB/smollm2-360M-instruct-GGUF/resolve/main/smollm2-360m-instruct-q8_0.gguf",
"pipeline": "text-generation",
"defaultQuantization": "Q8_0",
"quantizations": ["Q8_0"],
"architecture": "llama",
"downloadSizeMB": 350,
"contextLength": 2048,
"defaultParams": {
"temperature": 0.7,
"maxTokens": 512
},
"metadata": {
"description": "Better quality than 135M while still loading quickly",
"tier": 2
}
},
"qwen2.5-0.5b-flare": {
"engine": "flare",
"modelName": "Qwen2.5-0.5B-Instruct",
"modelType": "text-generation",
"repo": "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
"url": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_k_m.gguf",
"pipeline": "text-generation",
"defaultQuantization": "Q4_K_M",
"quantizations": ["Q4_K_M"],
"architecture": "qwen2",
"downloadSizeMB": 350,
"contextLength": 4096,
"defaultParams": {
"temperature": 0.7,
"maxTokens": 512
},
"metadata": {
"description": "Multilingual model with strong reasoning — Alibaba Qwen2.5 family",
"tier": 2
}
},
"llama-3.2-1b-flare": {
"engine": "flare",
"modelName": "Llama-3.2-1B-Instruct",
"modelType": "text-generation",
"repo": "bartowski/Llama-3.2-1B-Instruct-GGUF",
"url": "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf",
"pipeline": "text-generation",
"defaultQuantization": "Q8_0",
"quantizations": ["Q8_0"],
"architecture": "llama",
"downloadSizeMB": 1200,
"contextLength": 4096,
"defaultParams": {
"temperature": 0.7,
"maxTokens": 512
},
"metadata": {
"description": "Best quality in the Flare tier — Meta Llama 3.2 1B full precision Q8",
"tier": 3
}
},
"llama-3.2-1b-flare-q4": {
"engine": "flare",
"modelName": "Llama-3.2-1B-Instruct-Q4",
"modelType": "text-generation",
"repo": "bartowski/Llama-3.2-1B-Instruct-GGUF",
"url": "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_K_M.gguf",
"pipeline": "text-generation",
"defaultQuantization": "Q4_K_M",
"quantizations": ["Q4_K_M"],
"architecture": "llama",
"downloadSizeMB": 600,
"contextLength": 4096,
"defaultParams": {
"temperature": 0.7,
"maxTokens": 512
},
"metadata": {
"description": "Balanced quality/size — Llama 3.2 1B at Q4_K_M quantization",
"tier": 3
}
}
}
14 changes: 13 additions & 1 deletion src/config/models/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,16 @@ export interface DemucsConfig extends BaseModelConfig {
executionProviders?: ('webgpu' | 'wasm')[];
}

export type ModelConfig = MLCConfig | TransformersConfig | DemucsConfig;
export interface FlareConfig extends BaseModelConfig {
engine: 'flare';
/** Direct URL to the GGUF model file (overrides registry URL) */
url?: string;
/** Model architecture hint (e.g. 'llama', 'mistral', 'qwen2') */
architecture?: string;
/** Quantization string (e.g. 'Q8_0', 'Q4_K_M') */
quantization?: string;
/** Approximate download size in MB */
downloadSizeMB?: number;
}

export type ModelConfig = MLCConfig | TransformersConfig | DemucsConfig | FlareConfig;
62 changes: 56 additions & 6 deletions src/core/llm/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,23 @@ import { MLCEngineWrapper } from '../../engines/mlc-engine-wrapper';
import { TransformersEngineWrapper } from '../../engines/transformer-engine-wrapper';
import { DemucsEngine } from '../../engines/demucs-engine';
import type { SeparateOptions, SeparationResult } from '../../engines/demucs-engine';
import type { ModelConfig, MLCConfig, TransformersConfig, DemucsConfig } from '../../config/models/types';
import { FlareEngineWrapper, FlareAdapterOptions } from '../../engines/flare-engine-wrapper';
import type { ModelConfig, MLCConfig, TransformersConfig, DemucsConfig, FlareConfig } from '../../config/models/types';
import mlcModels from '../../config/models/mlc-models.json';
import transformersModels from '../../config/models/transformers-models.json';
import demucsModels from '../../config/models/demucs-models.json';
import flareModels from '../../config/models/flare-models.json';

// Combine model configurations
const MODEL_CONFIG: Record<string, ModelConfig> = {
...(mlcModels as Record<string, MLCConfig>),
...(transformersModels as Record<string, TransformersConfig>),
...(demucsModels as Record<string, DemucsConfig>),
...(flareModels as Record<string, FlareConfig>),
};

export class BrowserAI {
private engine: MLCEngineWrapper | TransformersEngineWrapper | DemucsEngine | null;
private engine: MLCEngineWrapper | TransformersEngineWrapper | DemucsEngine | FlareEngineWrapper | null;
public currentModel: ModelConfig | null;
private mediaRecorder: MediaRecorder | null = null;
private mediaStream: MediaStream | null = null;
Expand Down Expand Up @@ -46,13 +49,12 @@ export class BrowserAI {
throw new Error(`Model identifier "${this.modelIdentifier}" not recognized.`);
}

// Check if model exists in both MLC and Transformers configs
// Check if model exists in MLC config (preferred for text-generation)
const mlcVersion = (mlcModels as Record<string, MLCConfig>)[this.modelIdentifier];
// const transformersVersion = (transformersModels as Record<string, TransformersConfig>)[modelIdentifier];

// For text-generation models, prefer MLC if available
// For text-generation models, prefer MLC if available (unless explicitly requesting flare)
let engineToUse = modelConfig.engine;
if (modelConfig.modelType === 'text-generation' && mlcVersion) {
if (modelConfig.modelType === 'text-generation' && mlcVersion && engineToUse !== 'flare') {
engineToUse = 'mlc';
}

Expand All @@ -69,6 +71,12 @@ export class BrowserAI {
this.engine = new DemucsEngine();
await this.engine.loadModel(modelConfig, options);
break;
case 'flare': {
const flareEngine = new FlareEngineWrapper();
await flareEngine.loadModel(modelConfig as FlareConfig, options);
this.engine = flareEngine;
break;
}
default:
throw new Error(`Engine "${engineToUse}" not supported.`);
}
Expand Down Expand Up @@ -100,6 +108,11 @@ export class BrowserAI {
if (this.engine instanceof DemucsEngine) {
throw new Error('Current engine does not support embeddings.');
}
if (this.engine instanceof FlareEngineWrapper) {
throw new Error(
'Flare engine does not support embeddings. Use a Transformers.js feature-extraction model instead.',
);
}
return await this.engine.embed(input, options);
}

Expand Down Expand Up @@ -270,6 +283,43 @@ export class BrowserAI {
throw new Error('Current engine does not support multimodal generation');
}

/**
* Load a LoRA adapter into the current Flare engine.
*
* Only supported when using the Flare engine. The adapter must be in
* SafeTensors format and compatible with the loaded base model.
*
* @example
* ```ts
* await ai.loadModel('llama-3.2-1b-flare');
* await ai.loadAdapter({ url: 'https://hf.co/.../adapter.safetensors' });
* ```
*/
async loadAdapter(options: FlareAdapterOptions): Promise<void> {
if (!(this.engine instanceof FlareEngineWrapper)) {
throw new Error('loadAdapter is only supported with the Flare engine.');
}
return this.engine.loadAdapter(options);
}

/**
* Check whether the current Flare model is cached in OPFS for instant reload.
*/
async isFlareModelCached(): Promise<boolean> {
if (!(this.engine instanceof FlareEngineWrapper)) return false;
return this.engine.isCached();
}

/**
* Delete the OPFS cache entry for the current Flare model.
*/
async clearFlareModelCache(): Promise<void> {
if (!(this.engine instanceof FlareEngineWrapper)) {
throw new Error('clearFlareModelCache is only supported with the Flare engine.');
}
return this.engine.clearCache();
}

async clearModelCache(): Promise<void> {
try {
const cacheNames = ['webllm/config', 'webllm/wasm', 'webllm/model'];
Expand Down
Loading
Loading