From 4b87c1d664c93bb2f452eeba9a7ad1699965c7c0 Mon Sep 17 00:00:00 2001 From: Zpankz <4054353+Zpankz@users.noreply.github.com> Date: Sat, 30 May 2026 21:29:53 +1000 Subject: [PATCH] models: Pareto role assignments from live catalogs; use gpt-5.4-mini sidecar fallback Queried live provider catalogs via wired OAuth creds (Codex backend, Antigravity cloudcode-pa, xAI) instead of relying on stale hardcoded lists. Documented Pareto-optimal model->role assignments for every specialized agent role (primary coding, swarm subagents, memory sidecar, autoreview, autojudge, ambient) in docs/model-role-assignment.md, including a status table, a per-role config example, and reproducible catalog-fetch commands. gpt-5.4-mini is now live (272k ctx, cheaper/faster than gpt-5.4) so it is the Pareto-better OpenAI sidecar fallback for the high-frequency relevance/extraction workload. Added it to ALL_OPENAI_MODELS so the picker/catalog surface it. --- crates/jcode-base/src/sidecar.rs | 6 +- crates/jcode-provider-core/src/models.rs | 1 + docs/model-role-assignment.md | 201 +++++++++++++++++++++++ 3 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 docs/model-role-assignment.md diff --git a/crates/jcode-base/src/sidecar.rs b/crates/jcode-base/src/sidecar.rs index 8ad4e1146..7c2571039 100644 --- a/crates/jcode-base/src/sidecar.rs +++ b/crates/jcode-base/src/sidecar.rs @@ -14,7 +14,11 @@ use serde::{Deserialize, Serialize}; /// Fast/cheap OpenAI model used when Codex credentials are available. pub const SIDECAR_OPENAI_MODEL: &str = "gpt-5.3-codex-spark"; -const SIDECAR_OPENAI_OAUTH_FALLBACK_MODEL: &str = "gpt-5.4"; +/// Pareto-optimal sidecar fallback: `gpt-5.4-mini` (live in the Codex catalog as +/// of 2026-05) keeps the full 272k context window of `gpt-5.4` while being +/// cheaper and faster, which suits the sidecar's high-frequency +/// relevance/extraction workload. See docs/model-role-assignment.md. +const SIDECAR_OPENAI_OAUTH_FALLBACK_MODEL: &str = "gpt-5.4-mini"; const SIDECAR_OPENAI_OAUTH_FALLBACK_REASONING: &str = "low"; /// Fast/cheap Claude model used when only Claude credentials are available. diff --git a/crates/jcode-provider-core/src/models.rs b/crates/jcode-provider-core/src/models.rs index ece20b3a1..e21ec40cb 100644 --- a/crates/jcode-provider-core/src/models.rs +++ b/crates/jcode-provider-core/src/models.rs @@ -17,6 +17,7 @@ pub const ALL_OPENAI_MODELS: &[&str] = &[ "gpt-5.5", "gpt-5.4", "gpt-5.4-pro", + "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2-chat-latest", diff --git a/docs/model-role-assignment.md b/docs/model-role-assignment.md new file mode 100644 index 000000000..bb518074a --- /dev/null +++ b/docs/model-role-assignment.md @@ -0,0 +1,201 @@ +# Pareto-Optimal Model Assignment for jcode Specialized Agent Roles + +| | | +|---|---| +| Status | Reference / recommended defaults | +| Last verified | 2026-05-30 | +| Method | Live provider catalogs queried via the wired OAuth credentials (not the hardcoded fallback lists) | +| Snapshot | `/tmp/jcode_models/live_catalog.json` (ephemeral; regenerate with the commands in section 6) | +| Code impact | `crates/jcode-base/src/sidecar.rs` (sidecar OAuth fallback), `crates/jcode-provider-core/src/models.rs` (`ALL_OPENAI_MODELS`) | + +These are recommended values, not enforced defaults: each role's model is left +`None` in config so the runtime picks the provider's strongest model unless the +user overrides it. Section 4 lists the values to set when you want the +Pareto-optimal pick for a role. + +## 1. Live model catalogs (verified via API) + +### OpenAI / Codex backend +Endpoint: `https://chatgpt.com/backend-api/codex/models?client_version=1.0.0` +(auth: `~/.codex/auth.json` `tokens.access_token`). + +| slug | ctx | reasoning levels | priority | notes | +|---|---|---|---|---| +| `gpt-5.5` | 272k | low/medium/high/xhigh | 9 | frontier coding model | +| `gpt-5.4` | 272k | low/medium/high/xhigh | 16 | strong generalist | +| `gpt-5.4-mini` | 272k | low/medium/high/xhigh | 23 | cheap, large ctx (NOT in hardcoded catalog) | +| `gpt-5.3-codex` | 272k | low/medium/high/xhigh | 25 | codex-tuned | +| `gpt-5.3-codex-spark` | 128k | low/medium/high/xhigh | 26 | fast, default reasoning=high | +| `gpt-5.2` | 272k | low/medium/high/xhigh | 29 | older generalist | +| `codex-auto-review` | 272k | low/medium/high/xhigh | 43 | hidden; vendor's dedicated review model | + +### Antigravity / Gemini (cloudcode-pa) +Endpoint: `https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels` +(auth: agy account token at `~/.antigravity_tools/accounts/.json`; the +Gemini-CLI token at `~/.gemini/oauth_creds.json` is `PERMISSION_DENIED` here). +Tier: **Google AI Ultra**. + +| name | display | max_tok | thinking | vendor role hint | +|---|---|---|---|---| +| `gemini-3.1-pro-high` | Gemini 3.1 Pro (High) | 1.05M | yes | tiered:pro (deprecated -> `gemini-pro-agent`) | +| `gemini-pro-agent` | Gemini 3.1 Pro (High) | 1.05M | yes | agent-grade pro | +| `gemini-3.1-pro-low` | Gemini 3.1 Pro (Low) | 1.05M | yes | tiered:pro | +| `gemini-3-flash-agent` | Gemini 3.5 Flash (High) | 1.05M | yes | tiered:flash | +| `gemini-3.5-flash-low` | Gemini 3.5 Flash (Medium) | 1.05M | yes | **defaultAgentModelId** | +| `gemini-3.5-flash-extra-low` | Gemini 3.5 Flash (Low) | 1.05M | yes | cheap agent | +| `gemini-3-flash` | Gemini 3 Flash | 1.05M | yes | commandModelIds | +| `gemini-3.1-flash-lite` | Gemini 3.1 Flash Lite | 1.05M | no | tiered:flashLite, webSearch/mquery | +| `gpt-oss-120b-medium` | GPT-OSS 120B (Medium) | 131k | yes | OSS option | +| `claude-sonnet-4-6` | Claude Sonnet 4.6 (Thinking) | 250k | yes | via antigravity proxy | +| `claude-opus-4-6-thinking` | Claude Opus 4.6 (Thinking) | 250k | yes | via antigravity proxy | +| `gemini-2.5-pro` | Gemini 2.5 Pro | 1.05M | yes | legacy | +| (+ tab/image/lite variants) | | | | non-chat | + +Vendor role hints from the same response: +`defaultAgentModelId=gemini-3.5-flash-low`, +`commandModelIds=[gemini-3-flash]`, +`webSearchModelIds/mqueryModelIds=[gemini-3.1-flash-lite]`, +`tieredModelIds={flashLite: gemini-3.1-flash-lite, flash: gemini-3-flash-agent, pro: gemini-3.1-pro-low}`. + +### xAI / Grok +Endpoint: `https://api.x.ai/v1/language-models` (auth: `~/.grok/auth.json` +OIDC `key`). Profile in repo: `XAI_PROFILE` (`api.x.ai/v1`, default +`grok-code-fast-1`). + +| id | in price | out price | +|---|---|---| +| `grok-4.3` | 12500 | 25000 | +| `grok-4.20-0309-reasoning` | 12500 | 25000 | +| `grok-4.20-0309-non-reasoning` | 12500 | 25000 | +| `grok-4.20-multi-agent-0309` | 12500 | 25000 | +| `grok-build-0.1` | 10000 | 20000 | + +`grok-build-0.1` and `grok-4.20-multi-agent-0309` remain first-class (per +standing preference). Prices are micro-units per the xAI API; relative scaling +only. + +## 2. jcode role -> config key mapping (verified) + +| Role | Config key | Current default | +|---|---|---| +| Primary coding | `provider.default_model` + `provider.default_provider` | none (provider strongest) | +| Swarm subagents | `agents.swarm_model` | none (inherits) | +| Memory sidecar / side panel | `agents.memory_model`; `sidecar.rs` consts | OpenAI `gpt-5.3-codex-spark` -> fallback `gpt-5.4` -> Claude `claude-haiku-4-5` | +| Autoreview | `autoreview.model` | none | +| Autojudge | `autojudge.model` | none | +| Ambient / orchestrator | `ambient.model` + `ambient.provider` | none (provider strongest) | + +There is no separate "side panel model" role; the side panel is driven by the +memory sidecar. + +## 3. Pareto reasoning + +Each role is scored on capability (benchmark/agentic strength), latency +(time-to-first-token + throughput), and cost (token price / quota burn). A model +is Pareto-optimal for a role when no other available model is at least as good on +all three axes and strictly better on one, for that role's workload. + +Role workload profiles: +- Primary coding: high capability dominant, latency secondary, cost tertiary. +- Swarm subagents: parallel fan-out, so cost + latency dominate; capability + "good enough" since work is decomposed. +- Memory sidecar: very high frequency, tiny tasks (relevance/extraction); + latency + cost dominate, capability minimal. +- Autoreview: capability dominant (catching real bugs), latency irrelevant + (end-of-turn), cost secondary. +- Autojudge: structured verdicts; mid capability, low latency, low cost. +- Ambient: long-horizon autonomous; capability dominant, cost matters (runs + unattended), latency irrelevant. + +## 4. Assignments + +| Role | Primary (OpenAI-first) | Antigravity alt | Grok alt | Rationale | +|---|---|---|---|---| +| Primary coding | `gpt-5.5` (high) | `gemini-3.1-pro-high` | `grok-4.3` | Frontier coding; top priority slug 9. 272k ctx. | +| Swarm subagents | `gpt-5.4-mini` | `gemini-3.5-flash-low` (vendor default agent) | `grok-build-0.1` | Cheapest capable agent tier; large ctx; built for fan-out. | +| Memory sidecar | `gpt-5.3-codex-spark` (keep) -> `gpt-5.4-mini` | `gemini-3.1-flash-lite` | `grok-build-0.1` | High-frequency tiny tasks; spark is fast. flash-lite is vendor's mquery/search pick. | +| Autoreview | `gpt-5.3-codex` | `gemini-pro-agent` | `grok-4.20-0309-reasoning` | Codex-tuned for code review; `codex-auto-review` is hidden so use codex slug. | +| Autojudge | `gpt-5.4` | `gemini-3-flash-agent` | `grok-4.20-0309-reasoning` | Structured verdicts; balanced capability/latency. | +| Ambient/orchestrator | `gpt-5.5` (medium) | `gemini-3.1-pro-high` | `grok-4.20-multi-agent-0309` | Long-horizon autonomy; multi-agent grok is purpose-built. | + +Notes: +- Sidecar already prefers `gpt-5.3-codex-spark`; keep but add `gpt-5.4-mini` as a + cheaper/larger-ctx alternative now that it is live (it was missing from the + hardcoded catalog). This is now applied in `sidecar.rs`. +- `codex-auto-review` exists but has `visibility=hide`; do not surface it in the + picker. Use `gpt-5.3-codex` for the autoreview role instead. +- For Grok, autoreview/autojudge should use a reasoning variant + (`grok-4.20-0309-reasoning`), not the non-reasoning one. + +### Config example (OpenAI-first picks) + +Set these in the jcode config to pin the Pareto picks per role: + +```toml +[provider] +default_provider = "openai" +default_model = "gpt-5.5" + +[agents] +swarm_model = "gpt-5.4-mini" +memory_model = "gpt-5.3-codex-spark" + +[autoreview] +model = "gpt-5.3-codex" + +[autojudge] +model = "gpt-5.4" + +[ambient] +provider = "openai" +model = "gpt-5.5" +``` + +## 5. Catalog drift to fix in code + +The hardcoded fallback catalogs are stale relative to live: +- `crates/jcode-provider-core/src/models.rs` `ALL_OPENAI_MODELS` was missing + `gpt-5.4-mini` (now added). +- `crates/jcode-provider-gemini/src/lib.rs` `AVAILABLE_MODELS` lists + `gemini-3.1-pro-preview` / `gemini-3-pro-preview` / `gemini-3-flash-preview`, + but the live Ultra-tier Antigravity catalog exposes `gemini-3.1-pro-high`, + `gemini-pro-agent`, `gemini-3.5-flash-low`, `gemini-3-flash`, + `gemini-3.1-flash-lite`, etc. + +Recommend wiring the role defaults to read from the live catalog (already +fetched by `fetch_openai_model_catalog` / `fetchAvailableModels`) and only fall +back to the static lists when offline. + +## 6. Reproducing the live catalog + +The snapshot in the header is ephemeral. Regenerate it from the wired creds: + +```bash +# OpenAI / Codex backend +CODEX_TOKEN=$(python3 -c "import json;print(json.load(open('$HOME/.codex/auth.json'))['tokens']['access_token'])") +curl -s "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0" \ + -H "Authorization: Bearer $CODEX_TOKEN" + +# Antigravity / Gemini (uses the agy account token, NOT ~/.gemini) +ACC=$HOME/.antigravity_tools/accounts/$(python3 -c "import json;print(json.load(open('$HOME/.antigravity_tools/accounts.json'))['current_account_id'])").json +ATOKEN=$(python3 -c "import json;print(json.load(open('$ACC'))['token']['access_token'])") +APROJ=$(python3 -c "import json;print(json.load(open('$ACC'))['token']['project_id'])") +curl -s -X POST "https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels" \ + -H "Authorization: Bearer $ATOKEN" -H "Content-Type: application/json" \ + -H "User-Agent: antigravity/1.18.3 darwin/arm64" \ + -H "x-goog-api-client: google-cloud-sdk vscode_cloudshelleditor/0.1" \ + -H 'client-metadata: {"ideType":"ANTIGRAVITY","platform":"PLATFORM_UNSPECIFIED","pluginType":"GEMINI"}' \ + -d "{\"project\":\"$APROJ\"}" + +# xAI / Grok +GKEY=$(python3 -c "import json;d=json.load(open('$HOME/.grok/auth.json'));print(list(d.values())[0]['key'])") +curl -s "https://api.x.ai/v1/language-models" -H "Authorization: Bearer $GKEY" +``` + +Notes: +- The `~/.gemini/oauth_creds.json` token is `PERMISSION_DENIED` on + `fetchAvailableModels`; that endpoint is gated to the Antigravity OAuth client, + so the agy account token must be used. +- Tokens expire (Codex/Gemini ~1h, Grok ~6h); refresh via the respective CLI if + a request returns 401/403 with an auth error. +