Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
All notable changes to `@stackbilt/llm-providers` are documented here.
Format follows [Keep a Changelog](https://keepachangelog.com/). Versions use [Semantic Versioning](https://semver.org/).

## [1.6.1] — 2026-05-06

### Added
- **Cerebras `openai/gpt-oss-120b`** — added to `CerebrasProvider.models`, `TOOL_CAPABLE_MODELS`, and the model catalog with `HIGH_PERFORMANCE | TOOL_CALLING | BALANCED` tiers, 128k context, and tool support

## [1.6.0] — 2026-04-27

### Added
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@stackbilt/llm-providers",
"version": "1.6.0",
"version": "1.6.1",
"description": "Multi-LLM failover with circuit breakers, cost tracking, and intelligent retry. Cloudflare Workers native.",
"author": "Stackbilt <admin@stackbilt.dev>",
"license": "Apache-2.0",
Expand Down
2 changes: 1 addition & 1 deletion src/__tests__/cerebras.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ describe('CerebrasProvider', () => {
describe('getModels', () => {
it('should return available models', () => {
const models = provider.getModels();
expect(models).toEqual(['llama-3.1-8b', 'llama-3.3-70b', 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507']);
expect(models).toEqual(['llama-3.1-8b', 'llama-3.3-70b', 'zai-glm-4.7', 'qwen-3-235b-a22b-instruct-2507', 'openai/gpt-oss-120b']);
});

it('should return a copy of the models array', () => {
Expand Down
9 changes: 9 additions & 0 deletions src/model-catalog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,15 @@ export const MODEL_CATALOG: readonly ModelCatalogEntry[] = [
outputTokenCost: 0.0012,
description: 'Cerebras Qwen 3 235B MoE'
}, { speed: 4, quality: 4, cost: 3 }),
entry('cerebras', 'openai/gpt-oss-120b', 'active', ['TOOL_CALLING', 'BALANCED', 'HIGH_PERFORMANCE'], {
maxContextLength: 128000,
supportsStreaming: true,
supportsTools: true,
supportsBatching: false,
inputTokenCost: 0.00015,
outputTokenCost: 0.0006,
description: 'Cerebras GPT-OSS 120B'
}, { speed: 5, quality: 4, cost: 4 }),

entry('groq', 'llama-3.3-70b-versatile', 'active', ['HIGH_PERFORMANCE', 'TOOL_CALLING', 'BALANCED'], {
maxContextLength: 128000,
Expand Down
11 changes: 11 additions & 0 deletions src/providers/cerebras.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ interface CerebrasResponse {
const TOOL_CAPABLE_MODELS = new Set([
'zai-glm-4.7',
'qwen-3-235b-a22b-instruct-2507',
'openai/gpt-oss-120b',
]);

export class CerebrasProvider extends BaseProvider {
Expand All @@ -122,6 +123,7 @@ export class CerebrasProvider extends BaseProvider {
'llama-3.3-70b',
'zai-glm-4.7',
'qwen-3-235b-a22b-instruct-2507',
'openai/gpt-oss-120b',
];
supportsStreaming = true;
supportsTools = true;
Expand Down Expand Up @@ -240,6 +242,15 @@ export class CerebrasProvider extends BaseProvider {
inputTokenCost: 0.0006, // $0.60 per 1M tokens
outputTokenCost: 0.0012, // $1.20 per 1M tokens
description: 'Qwen 3 235B MoE (22B active) - Tool calling, structured outputs (Preview)'
},
'openai/gpt-oss-120b': {
maxContextLength: 128000,
supportsStreaming: true,
supportsTools: true,
supportsBatching: false,
inputTokenCost: 0.00015, // $0.15 per 1M tokens (placeholder — update once Cerebras publishes official pricing)
outputTokenCost: 0.0006, // $0.60 per 1M tokens (placeholder — mirrored from Groq)
description: 'GPT-OSS 120B on Cerebras'
}
};
}
Expand Down