Stackbilt-dev · stackbilt-admin · May 7, 2026 · May 6, 2026 · May 7, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,11 @@
 All notable changes to `@stackbilt/llm-providers` are documented here.
 Format follows [Keep a Changelog](https://keepachangelog.com/). Versions use [Semantic Versioning](https://semver.org/).
 
+## [1.6.1] — 2026-05-06
+
+### Added
+- **Cerebras `openai/gpt-oss-120b`** — added to `CerebrasProvider.models`, `TOOL_CAPABLE_MODELS`, and the model catalog with `HIGH_PERFORMANCE | TOOL_CALLING | BALANCED` tiers, 128k context, and tool support
+
 ## [1.6.0] — 2026-04-27
 
 ### Added

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@stackbilt/llm-providers",
-  "version": "1.6.0",
+  "version": "1.6.1",
   "description": "Multi-LLM failover with circuit breakers, cost tracking, and intelligent retry. Cloudflare Workers native.",
   "author": "Stackbilt <admin@stackbilt.dev>",
   "license": "Apache-2.0",

diff --git a/src/__tests__/cerebras.test.ts b/src/__tests__/cerebras.test.ts
@@ -71,7 +71,7 @@ describe('CerebrasProvider', () => {
   describe('getModels', () => {
     it('should return available models', () => {
       const models = provider.getModels();
-      expect(models).toEqual(['llama-3.1-8b', 'llama-3.3-70b', 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507']);
+      expect(models).toEqual(['llama-3.1-8b', 'llama-3.3-70b', 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507', 'openai/gpt-oss-120b']);
     });
 
     it('should return a copy of the models array', () => {

diff --git a/src/model-catalog.ts b/src/model-catalog.ts
@@ -400,6 +400,15 @@ export const MODEL_CATALOG: readonly ModelCatalogEntry[] = [
     outputTokenCost: 0.0012,
     description: 'Cerebras Qwen 3 235B MoE'
   }, { speed: 4, quality: 4, cost: 3 }),
+  entry('cerebras', 'openai/gpt-oss-120b', 'active', ['TOOL_CALLING', 'BALANCED', 'HIGH_PERFORMANCE'], {
+    maxContextLength: 128000,
+    supportsStreaming: true,
+    supportsTools: true,
+    supportsBatching: false,
+    inputTokenCost: 0.00015,
+    outputTokenCost: 0.0006,
+    description: 'Cerebras GPT-OSS 120B'
+  }, { speed: 5, quality: 4, cost: 4 }),
 
   entry('groq', 'llama-3.3-70b-versatile', 'active', ['HIGH_PERFORMANCE', 'TOOL_CALLING', 'BALANCED'], {
     maxContextLength: 128000,

diff --git a/src/providers/cerebras.ts b/src/providers/cerebras.ts
@@ -113,6 +113,7 @@ interface CerebrasResponse {
 const TOOL_CAPABLE_MODELS = new Set([
   'zai-glm-4.7',
   'qwen-3-235b-a22b-instruct-2507',
+  'openai/gpt-oss-120b',
 ]);
 
 export class CerebrasProvider extends BaseProvider {
@@ -122,6 +123,7 @@ export class CerebrasProvider extends BaseProvider {
     'llama-3.3-70b',
     'zai-glm-4.7',
     'qwen-3-235b-a22b-instruct-2507',
+    'openai/gpt-oss-120b',
   ];
   supportsStreaming = true;
   supportsTools = true;
@@ -240,6 +242,15 @@ export class CerebrasProvider extends BaseProvider {
         inputTokenCost: 0.0006, // $0.60 per 1M tokens
         outputTokenCost: 0.0012, // $1.20 per 1M tokens
         description: 'Qwen 3 235B MoE (22B active) - Tool calling, structured outputs (Preview)'
+      },
+      'openai/gpt-oss-120b': {
+        maxContextLength: 128000,
+        supportsStreaming: true,
+        supportsTools: true,
+        supportsBatching: false,
+        inputTokenCost: 0.00015, // $0.15 per 1M tokens (placeholder — update once Cerebras publishes official pricing)
+        outputTokenCost: 0.0006, // $0.60 per 1M tokens (placeholder — mirrored from Groq)
+        description: 'GPT-OSS 120B on Cerebras'
       }
     };
   }