From 41d6dac6ba9d170d09eee1685a446b8b08cd0769 Mon Sep 17 00:00:00 2001 From: Stackbilt Date: Wed, 6 May 2026 16:16:00 -0500 Subject: [PATCH 1/2] feat(cerebras): add openai/gpt-oss-120b model (v1.6.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Registers Cerebras GPT-OSS 120B as a supported model with tool-calling capability — equivalent perf/cost profile to Groq's GPT-OSS 120B entry. Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 5 +++++ package.json | 2 +- src/model-catalog.ts | 9 +++++++++ src/providers/cerebras.ts | 2 ++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e3c60d..8132765 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ All notable changes to `@stackbilt/llm-providers` are documented here. Format follows [Keep a Changelog](https://keepachangelog.com/). Versions use [Semantic Versioning](https://semver.org/). +## [1.6.1] — 2026-05-06 + +### Added +- **Cerebras `openai/gpt-oss-120b`** — added to `CerebrasProvider.models`, `TOOL_CAPABLE_MODELS`, and the model catalog with `HIGH_PERFORMANCE | TOOL_CALLING | BALANCED` tiers, 128k context, and tool support + ## [1.6.0] — 2026-04-27 ### Added diff --git a/package.json b/package.json index ccf2e3d..9cc98a1 100755 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@stackbilt/llm-providers", - "version": "1.6.0", + "version": "1.6.1", "description": "Multi-LLM failover with circuit breakers, cost tracking, and intelligent retry. Cloudflare Workers native.", "author": "Stackbilt ", "license": "Apache-2.0", diff --git a/src/model-catalog.ts b/src/model-catalog.ts index 19cdfca..ee993ad 100644 --- a/src/model-catalog.ts +++ b/src/model-catalog.ts @@ -400,6 +400,15 @@ export const MODEL_CATALOG: readonly ModelCatalogEntry[] = [ outputTokenCost: 0.0012, description: 'Cerebras Qwen 3 235B MoE' }, { speed: 4, quality: 4, cost: 3 }), + entry('cerebras', 'openai/gpt-oss-120b', 'active', ['TOOL_CALLING', 'BALANCED', 'HIGH_PERFORMANCE'], { + maxContextLength: 128000, + supportsStreaming: true, + supportsTools: true, + supportsBatching: false, + inputTokenCost: 0.00015, + outputTokenCost: 0.0006, + description: 'Cerebras GPT-OSS 120B' + }, { speed: 5, quality: 4, cost: 4 }), entry('groq', 'llama-3.3-70b-versatile', 'active', ['HIGH_PERFORMANCE', 'TOOL_CALLING', 'BALANCED'], { maxContextLength: 128000, diff --git a/src/providers/cerebras.ts b/src/providers/cerebras.ts index 4b87aef..ab15ced 100644 --- a/src/providers/cerebras.ts +++ b/src/providers/cerebras.ts @@ -113,6 +113,7 @@ interface CerebrasResponse { const TOOL_CAPABLE_MODELS = new Set([ 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507', + 'openai/gpt-oss-120b', ]); export class CerebrasProvider extends BaseProvider { @@ -122,6 +123,7 @@ export class CerebrasProvider extends BaseProvider { 'llama-3.3-70b', 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507', + 'openai/gpt-oss-120b', ]; supportsStreaming = true; supportsTools = true; From 45621d09a1c550327cb7bedb9557e15976c80699 Mon Sep 17 00:00:00 2001 From: Stackbilt Date: Thu, 7 May 2026 12:05:17 -0500 Subject: [PATCH 2/2] fix(cerebras): add openai/gpt-oss-120b to getModelCapabilities() The model was registered in models[] and TOOL_CAPABLE_MODELS but missing from getModelCapabilities(). estimateCost() returns 0 when the model is absent, silently disabling quota pre-checks and cost tracking for all requests using this model. Also updates the hardcoded model list assertion in cerebras.test.ts. Co-Authored-By: Claude Sonnet 4.6 --- src/__tests__/cerebras.test.ts | 2 +- src/providers/cerebras.ts | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/__tests__/cerebras.test.ts b/src/__tests__/cerebras.test.ts index e71e4c5..dd926fd 100644 --- a/src/__tests__/cerebras.test.ts +++ b/src/__tests__/cerebras.test.ts @@ -71,7 +71,7 @@ describe('CerebrasProvider', () => { describe('getModels', () => { it('should return available models', () => { const models = provider.getModels(); - expect(models).toEqual(['llama-3.1-8b', 'llama-3.3-70b', 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507']); + expect(models).toEqual(['llama-3.1-8b', 'llama-3.3-70b', 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507', 'openai/gpt-oss-120b']); }); it('should return a copy of the models array', () => { diff --git a/src/providers/cerebras.ts b/src/providers/cerebras.ts index ab15ced..1064b71 100644 --- a/src/providers/cerebras.ts +++ b/src/providers/cerebras.ts @@ -242,6 +242,15 @@ export class CerebrasProvider extends BaseProvider { inputTokenCost: 0.0006, // $0.60 per 1M tokens outputTokenCost: 0.0012, // $1.20 per 1M tokens description: 'Qwen 3 235B MoE (22B active) - Tool calling, structured outputs (Preview)' + }, + 'openai/gpt-oss-120b': { + maxContextLength: 128000, + supportsStreaming: true, + supportsTools: true, + supportsBatching: false, + inputTokenCost: 0.00015, // $0.15 per 1M tokens (placeholder — update once Cerebras publishes official pricing) + outputTokenCost: 0.0006, // $0.60 per 1M tokens (placeholder — mirrored from Groq) + description: 'GPT-OSS 120B on Cerebras' } }; }