diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e3c60d..8132765 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ All notable changes to `@stackbilt/llm-providers` are documented here. Format follows [Keep a Changelog](https://keepachangelog.com/). Versions use [Semantic Versioning](https://semver.org/). +## [1.6.1] — 2026-05-06 + +### Added +- **Cerebras `openai/gpt-oss-120b`** — added to `CerebrasProvider.models`, `TOOL_CAPABLE_MODELS`, and the model catalog with `HIGH_PERFORMANCE | TOOL_CALLING | BALANCED` tiers, 128k context, and tool support + ## [1.6.0] — 2026-04-27 ### Added diff --git a/package.json b/package.json index ccf2e3d..9cc98a1 100755 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@stackbilt/llm-providers", - "version": "1.6.0", + "version": "1.6.1", "description": "Multi-LLM failover with circuit breakers, cost tracking, and intelligent retry. Cloudflare Workers native.", "author": "Stackbilt ", "license": "Apache-2.0", diff --git a/src/__tests__/cerebras.test.ts b/src/__tests__/cerebras.test.ts index e71e4c5..dd926fd 100644 --- a/src/__tests__/cerebras.test.ts +++ b/src/__tests__/cerebras.test.ts @@ -71,7 +71,7 @@ describe('CerebrasProvider', () => { describe('getModels', () => { it('should return available models', () => { const models = provider.getModels(); - expect(models).toEqual(['llama-3.1-8b', 'llama-3.3-70b', 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507']); + expect(models).toEqual(['llama-3.1-8b', 'llama-3.3-70b', 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507', 'openai/gpt-oss-120b']); }); it('should return a copy of the models array', () => { diff --git a/src/model-catalog.ts b/src/model-catalog.ts index 19cdfca..ee993ad 100644 --- a/src/model-catalog.ts +++ b/src/model-catalog.ts @@ -400,6 +400,15 @@ export const MODEL_CATALOG: readonly ModelCatalogEntry[] = [ outputTokenCost: 0.0012, description: 'Cerebras Qwen 3 235B MoE' }, { speed: 4, quality: 4, cost: 3 }), + entry('cerebras', 'openai/gpt-oss-120b', 'active', ['TOOL_CALLING', 'BALANCED', 'HIGH_PERFORMANCE'], { + maxContextLength: 128000, + supportsStreaming: true, + supportsTools: true, + supportsBatching: false, + inputTokenCost: 0.00015, + outputTokenCost: 0.0006, + description: 'Cerebras GPT-OSS 120B' + }, { speed: 5, quality: 4, cost: 4 }), entry('groq', 'llama-3.3-70b-versatile', 'active', ['HIGH_PERFORMANCE', 'TOOL_CALLING', 'BALANCED'], { maxContextLength: 128000, diff --git a/src/providers/cerebras.ts b/src/providers/cerebras.ts index 4b87aef..1064b71 100644 --- a/src/providers/cerebras.ts +++ b/src/providers/cerebras.ts @@ -113,6 +113,7 @@ interface CerebrasResponse { const TOOL_CAPABLE_MODELS = new Set([ 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507', + 'openai/gpt-oss-120b', ]); export class CerebrasProvider extends BaseProvider { @@ -122,6 +123,7 @@ export class CerebrasProvider extends BaseProvider { 'llama-3.3-70b', 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507', + 'openai/gpt-oss-120b', ]; supportsStreaming = true; supportsTools = true; @@ -240,6 +242,15 @@ export class CerebrasProvider extends BaseProvider { inputTokenCost: 0.0006, // $0.60 per 1M tokens outputTokenCost: 0.0012, // $1.20 per 1M tokens description: 'Qwen 3 235B MoE (22B active) - Tool calling, structured outputs (Preview)' + }, + 'openai/gpt-oss-120b': { + maxContextLength: 128000, + supportsStreaming: true, + supportsTools: true, + supportsBatching: false, + inputTokenCost: 0.00015, // $0.15 per 1M tokens (placeholder — update once Cerebras publishes official pricing) + outputTokenCost: 0.0006, // $0.60 per 1M tokens (placeholder — mirrored from Groq) + description: 'GPT-OSS 120B on Cerebras' } }; }