diff --git a/docs/docs.json b/docs/docs.json index bc0d63bf..3ac99f1c 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -38,7 +38,8 @@ "pages": [ "features/aliases", "features/user-path", - "features/cache" + "features/cache", + "features/failover" ] }, { diff --git a/docs/features/failover.mdx b/docs/features/failover.mdx new file mode 100644 index 00000000..8e41705b --- /dev/null +++ b/docs/features/failover.mdx @@ -0,0 +1,71 @@ +--- +title: "Failover" +description: "Configure GoModel failover with manual rules, understand experimental auto mode, and know when fallback attempts run." +icon: "shuffle" +keywords: ["failover", "fallback"] +--- + +## Overview + +GoModel exposes failover through the `fallback` config block. + +When a request fails, GoModel can retry it against alternate models. For +predictable behavior, use manual mode. + +## Manual Mode + +Manual mode is the recommended mode today. + +```yaml +fallback: + default_mode: "manual" + manual_rules_path: "config/fallback.json" +``` + +`config/fallback.json` is a JSON object where each model entry contains an +ordered candidate list (array); top-level keys are not ordered: + +```json +{ + "gpt-4o": [ + "azure/gpt-4o", + "gemini/gemini-2.5-pro" + ] +} +``` + +The order-sensitive part is the array under each model entry. + +GoModel tries the listed candidates in order and stops on the first success. +Use bare model names like `gpt-4o` or provider-qualified selectors like +`azure/gpt-4o`. + +If needed, you can override the mode per model with `fallback.overrides`. + +## Auto Mode + + + `auto` mode is experimental right now. + + +```yaml +fallback: + default_mode: "auto" + manual_rules_path: "config/fallback.json" +``` + +Auto mode keeps any manual candidates first, then appends up to five extra +candidates from the current model registry. It prefers models with the same +request category, similar rankings, overlapping capabilities, and the same +family when possible. + +## When It Runs + +Failover is attempted only after the primary request returns: + +- `5xx` +- `429` +- model unavailable, unsupported, or not found style errors + +It currently applies to translated `/v1/chat/completions` and `/v1/responses` +requests, not `/v1/embeddings`.