From 7f09987b97c312ebf18824c7ae75568872920f1e Mon Sep 17 00:00:00 2001
From: erikhoward <erikhoward@pm.me>
Date: Tue, 12 May 2026 23:08:16 -0700
Subject: [PATCH] docs: update model references to newer versions

Update documentation and examples to reference newer AI models that
were added in the provider model definitions update:

- README.md: Updated model tables for xAI, Z.ai, Gemini; updated code
  examples to use claude-sonnet-4-6 and gpt-5.4
- docs/PROVIDERS.md: Updated all provider model tables with new models
- examples/chat/responses-api/main.go: Updated to use GPT-5.4

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 README.md                           | 45 ++++++++++++-------
 docs/PROVIDERS.md                   | 69 ++++++++++++++++++++++++-----
 examples/chat/responses-api/main.go | 16 +++----
 3 files changed, 94 insertions(+), 36 deletions(-)

diff --git a/README.md b/README.md
index 50d3b42..36dceac 100644
--- a/README.md
+++ b/README.md
@@ -118,8 +118,8 @@ func main() {
     // Create a client
     client := core.NewClient(provider)
 
-    // Send a chat request
-    resp, err := client.Chat("claude-sonnet-4-5").
+    // Send a chat request (using latest Claude Sonnet 4.6)
+    resp, err := client.Chat("claude-sonnet-4-6").
         System("You are a helpful assistant.").
         User("What is the capital of France?").
         GetResponse(context.Background())
@@ -646,13 +646,13 @@ resp, _ := provider.EditImage(ctx, &core.ImageEditRequest{
 | `dall-e-3` | High quality (deprecated May 2026) |
 | `dall-e-2` | Lower cost, inpainting (deprecated May 2026) |
 
-### Using the Responses API (GPT-5)
+### Using the Responses API (GPT-5.4)
 
-GPT-5 models automatically use OpenAI's Responses API, which provides advanced features like reasoning, built-in tools, and response chaining.
+GPT-5+ models automatically use OpenAI's Responses API, which provides advanced features like reasoning, built-in tools, and response chaining.
 
 ```go
-// GPT-5 uses the Responses API automatically
-resp, err := client.Chat("gpt-5").
+// GPT-5.4 uses the Responses API automatically
+resp, err := client.Chat("gpt-5.4").
     Instructions("You are a helpful research assistant.").
     User("What are the latest developments in quantum computing?").
     ReasoningEffort(core.ReasoningEffortHigh).
@@ -673,7 +673,7 @@ if resp.Reasoning != nil {
 }
 
 // Response chaining - continue from a previous response
-followUp, err := client.Chat("gpt-5").
+followUp, err := client.Chat("gpt-5.4").
     ContinueFrom(resp.ID).
     User("Can you elaborate on the most promising approach?").
     GetResponse(ctx)
@@ -694,7 +694,7 @@ iris keys set ollama  # Only needed for Ollama Cloud
 iris chat --provider openai --model gpt-4o --prompt "Hello, world!"
 
 # Chat with Anthropic Claude
-iris chat --provider anthropic --model claude-sonnet-4-5 --prompt "Hello, world!"
+iris chat --provider anthropic --model claude-sonnet-4-6 --prompt "Hello, world!"
 
 # Chat with Google Gemini
 iris chat --provider gemini --model gemini-2.5-flash --prompt "Hello, world!"
@@ -713,7 +713,7 @@ iris chat --provider openai --model gpt-5 --prompt "Explain quantum entanglement
 
 # Stream responses
 iris chat --provider openai --model gpt-4o --prompt "Tell me a story" --stream
-iris chat --provider anthropic --model claude-sonnet-4-5 --prompt "Tell me a story" --stream
+iris chat --provider anthropic --model claude-sonnet-4-6 --prompt "Tell me a story" --stream
 
 # Get JSON output
 iris chat --provider openai --model gpt-4o --prompt "Hello" --json
@@ -753,7 +753,7 @@ Iris looks for configuration at `~/.iris/config.yaml`:
 
 ```yaml
 default_provider: openai
-default_model: gpt-5  # or gpt-4o for older models
+default_model: gpt-5.4  # or gpt-4o for older models
 
 providers:
   openai:
@@ -821,20 +821,28 @@ See [docs/SECURITY.md](docs/SECURITY.md) for comprehensive security documentatio
 
 | Model ID | Features |
 |----------|----------|
-| `grok-3` | Chat, Streaming, Tools, Reasoning |
-| `grok-3-mini` | Chat, Streaming, Tools, Reasoning (exposes reasoning_content) |
-| `grok-4` | Chat, Streaming, Tools, Reasoning (latest) |
+| `grok-4.20-multi-agent-beta-0309` | Chat, Streaming, Tools, Reasoning (multi-agent beta) |
+| `grok-4.20-beta-0309-reasoning` | Chat, Streaming, Tools, Reasoning (beta) |
+| `grok-4.20-beta-0309-non-reasoning` | Chat, Streaming, Tools (beta) |
+| `grok-4.1` | Chat, Streaming, Tools |
+| `grok-4-1-fast-non-reasoning` | Chat, Streaming, Tools (default for CLI) |
+| `grok-4-1-fast-reasoning` | Chat, Streaming, Tools, Reasoning |
+| `grok-4` | Chat, Streaming, Tools, Reasoning |
 | `grok-4-fast-non-reasoning` | Chat, Streaming, Tools |
 | `grok-4-fast-reasoning` | Chat, Streaming, Tools, Reasoning |
+| `grok-3` | Chat, Streaming, Tools, Reasoning |
+| `grok-3-mini` | Chat, Streaming, Tools, Reasoning (exposes reasoning_content) |
 | `grok-code-fast` | Chat, Streaming, Tools (code-optimized) |
-| `grok-4-1-fast-non-reasoning` | Chat, Streaming, Tools (default for CLI) |
-| `grok-4-1-fast-reasoning` | Chat, Streaming, Tools, Reasoning |
 
 ### Z.ai GLM Models
 
 | Model ID | Features |
 |----------|----------|
-| `glm-4.7` | Chat, Streaming, Tools, Thinking (latest flagship) |
+| `glm-5.1` | Chat, Streaming, Tools, Thinking (latest) |
+| `glm-5` | Chat, Streaming, Tools, Thinking |
+| `glm-5-turbo` | Chat, Streaming, Tools, Thinking |
+| `glm-5v-turbo` | Chat, Streaming, Tools, Thinking, Vision |
+| `glm-4.7` | Chat, Streaming, Tools, Thinking |
 | `glm-4.7-flash` | Chat, Streaming, Tools (default for CLI) |
 | `glm-4.7-flashx` | Chat, Streaming, Tools |
 | `glm-4.6` | Chat, Streaming, Tools, Thinking |
@@ -847,6 +855,7 @@ See [docs/SECURITY.md](docs/SECURITY.md) for comprehensive security documentatio
 | `glm-4.5-air` | Chat, Streaming, Tools |
 | `glm-4.5-airx` | Chat, Streaming, Tools |
 | `glm-4.5-flash` | Chat, Streaming, Tools |
+| `glm-for-coding` | Chat, Streaming, Tools, Thinking (code-optimized) |
 | `glm-4-32b-0414-128k` | Chat, Streaming, Tools (128K context) |
 
 ### Perplexity Models
@@ -862,11 +871,15 @@ See [docs/SECURITY.md](docs/SECURITY.md) for comprehensive security documentatio
 
 | Model ID | Features |
 |----------|----------|
+| `gemini-3.1-flash-image-preview` | Chat, Streaming, Image Generation |
 | `gemini-3-pro-preview` | Chat, Streaming, Tools, Reasoning (thinkingLevel) |
 | `gemini-3-flash-preview` | Chat, Streaming, Tools, Reasoning (thinkingLevel) |
+| `gemini-3-pro-image-preview` | Image Generation (Nano Banana Pro) |
 | `gemini-2.5-pro` | Chat, Streaming, Tools, Reasoning (thinkingBudget) |
 | `gemini-2.5-flash` | Chat, Streaming, Tools, Reasoning (thinkingBudget) |
 | `gemini-2.5-flash-lite` | Chat, Streaming, Tools, Reasoning (thinkingBudget) |
+| `gemini-2.5-flash-image` | Image Generation (Nano Banana) |
+| `gemini-2.0-flash-lite` | Chat, Streaming |
 
 ### Ollama Models
 
diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md
index 9701fd8..988568d 100644
--- a/docs/PROVIDERS.md
+++ b/docs/PROVIDERS.md
@@ -34,22 +34,35 @@ This document provides a comprehensive comparison of the AI providers supported
 
 | Model | Display Name | Reasoning | Built-in Tools | Notes |
 |-------|--------------|-----------|----------------|-------|
-| gpt-5.2 | GPT-5.2 | Yes | Yes | Latest flagship |
+| gpt-5.4 | GPT-5.4 | Yes | Yes | Latest flagship |
+| gpt-5.4-pro | GPT-5.4 Pro | Yes | Yes | Enhanced capabilities |
+| gpt-5.4-mini | GPT-5.4 Mini | Yes | Yes | Smaller, faster |
+| gpt-5.4-nano | GPT-5.4 Nano | Yes | Yes | Lightweight |
+| gpt-5.2 | GPT-5.2 | Yes | Yes | |
 | gpt-5.2-pro | GPT-5.2 Pro | Yes | Yes | Enhanced capabilities |
 | gpt-5.2-codex | GPT-5.2 Codex | Yes | Yes | Code specialized |
 | gpt-5.1 | GPT-5.1 | Yes | Yes | |
 | gpt-5.1-codex | GPT-5.1 Codex | Yes | Yes | Code specialized |
+| gpt-5.1-codex-mini | GPT-5.1 Codex Mini | Yes | Yes | Smaller codex |
+| gpt-5.1-codex-max | GPT-5.1 Codex Max | Yes | Yes | Largest codex |
 | gpt-5 | GPT-5 | Yes | Yes | |
 | gpt-5-mini | GPT-5 Mini | Yes | Yes | Smaller, faster |
 | gpt-5-nano | GPT-5 Nano | No | Yes | Lightweight |
+| gpt-5-codex | GPT-5 Codex | Yes | Yes | Code specialized |
+| gpt-5-thinking | GPT-5 Thinking | Yes | Yes | Extended reasoning |
 | gpt-4.1 | GPT-4.1 | No | Yes | |
+| gpt-4.1-mini | GPT-4.1 Mini | No | Yes | |
+| gpt-4.1-nano | GPT-4.1 Nano | No | Yes | |
 | gpt-4o | GPT-4o | No | No | Multimodal |
 | gpt-4o-mini | GPT-4o Mini | No | No | Cost-effective |
 | o4-mini | o4-mini | Yes | Yes | Reasoning focused |
+| o4-mini-deep-research | o4-mini Deep Research | Yes | Yes | Research focused |
 | o3 | o3 | Yes | Yes | Reasoning focused |
+| o3-mini | o3-mini | Yes | Yes | Smaller reasoning |
 | o1 | o1 | Yes | No | Reasoning focused |
+| o1-pro | o1 Pro | Yes | No | Enhanced reasoning |
 
-**Image Generation Models**: gpt-image-1.5, gpt-image-1, dall-e-3, dall-e-2
+**Image Generation Models**: gpt-image-1.5, gpt-image-1, gpt-image-1-mini, dall-e-3, dall-e-2, chatgpt-image-latest
 
 **Usage Example**:
 ```go
@@ -71,23 +84,32 @@ resp, err := client.Chat(openai.ModelGPT4o).
 
 **Models**:
 
-| Model | Display Name | Notes |
-|-------|--------------|-------|
-| claude-sonnet-4-5 | Claude Sonnet 4.5 | Balanced performance |
-| claude-haiku-4-5 | Claude Haiku 4.5 | Fast, cost-effective |
-| claude-opus-4-5 | Claude Opus 4.5 | Most capable |
+| Model | Display Name | Reasoning | Notes |
+|-------|--------------|-----------|-------|
+| claude-opus-4-7 | Claude Opus 4.7 | Yes | Latest flagship |
+| claude-sonnet-4-6 | Claude Sonnet 4.6 | Yes | Balanced performance |
+| claude-sonnet-4-6-thinking | Claude Sonnet 4.6 (Thinking) | Yes | Extended reasoning |
+| claude-opus-4-6 | Claude Opus 4.6 | Yes | High capability |
+| claude-opus-4-6-thinking | Claude Opus 4.6 (Thinking) | Yes | Extended reasoning |
+| claude-sonnet-4-5 | Claude Sonnet 4.5 | Yes | Balanced performance |
+| claude-sonnet-4-5-thinking | Claude Sonnet 4.5 (Thinking) | Yes | Extended reasoning |
+| claude-haiku-4-5 | Claude Haiku 4.5 | Yes | Fast, cost-effective |
+| claude-opus-4-5 | Claude Opus 4.5 | Yes | High capability |
+| claude-opus-4-5-thinking | Claude Opus 4.5 (Thinking) | Yes | Extended reasoning |
+| claude-3-5-haiku-latest | Claude 3.5 Haiku | No | Legacy fast model |
 
 **Special Features**:
 - Extended context windows
 - Strong instruction following
 - Built-in safety guardrails
+- Thinking/reasoning modes
 
 **Usage Example**:
 ```go
 provider := anthropic.New(os.Getenv("ANTHROPIC_API_KEY"))
 client := core.NewClient(provider)
 
-resp, err := client.Chat(anthropic.ModelClaudeSonnet45).
+resp, err := client.Chat(anthropic.ModelClaudeSonnet46).
     System("You are a helpful assistant.").
     User("Explain quantum computing.").
     GetResponse(ctx)
@@ -105,13 +127,17 @@ resp, err := client.Chat(anthropic.ModelClaudeSonnet45).
 
 | Model | Display Name | Reasoning | Notes |
 |-------|--------------|-----------|-------|
+| gemini-3.1-flash-image-preview | Gemini 3.1 Flash Image Preview | No | Image generation |
 | gemini-3-pro-preview | Gemini 3 Pro Preview | Yes | Latest preview |
 | gemini-3-flash-preview | Gemini 3 Flash Preview | Yes | Fast preview |
+| gemini-3-pro-image-preview | Gemini 3 Pro Image Preview | No | Image generation (Nano Banana Pro) |
 | gemini-2.5-pro | Gemini 2.5 Pro | Yes | Production ready |
 | gemini-2.5-flash | Gemini 2.5 Flash | Yes | Fast, efficient |
 | gemini-2.5-flash-lite | Gemini 2.5 Flash Lite | Yes | Lightweight |
+| gemini-2.5-flash-image | Gemini 2.5 Flash Image | No | Image generation (Nano Banana) |
+| gemini-2.0-flash-lite | Gemini 2.0 Flash Lite | No | Legacy lightweight |
 
-**Image Generation Models**: gemini-2.5-flash-image, gemini-3-pro-image-preview (Nano Banana)
+**Image Generation Models**: gemini-3.1-flash-image-preview, gemini-3-pro-image-preview, gemini-2.5-flash-image (Nano Banana)
 
 **Special Features**:
 - Native multimodal support
@@ -140,10 +166,15 @@ resp, err := client.Chat(gemini.ModelGemini25Flash).
 
 | Model | Display Name | Reasoning | Notes |
 |-------|--------------|-----------|-------|
-| grok-4 | Grok 4 | Yes | Latest flagship |
+| grok-4.20-multi-agent-beta-0309 | Grok 4.20 Multi-Agent Beta | Yes | Multi-agent beta |
+| grok-4.20-beta-0309-reasoning | Grok 4.20 Beta (Reasoning) | Yes | Beta with reasoning |
+| grok-4.20-beta-0309-non-reasoning | Grok 4.20 Beta (Non-Reasoning) | No | Beta without reasoning |
+| grok-4.1 | Grok 4.1 | No | Latest stable |
+| grok-4-1-fast-non-reasoning | Grok 4.1 Fast (Non-Reasoning) | No | Fast without reasoning |
+| grok-4-1-fast-reasoning | Grok 4.1 Fast (Reasoning) | Yes | Fast with reasoning |
+| grok-4 | Grok 4 | Yes | Flagship |
 | grok-4-fast-reasoning | Grok 4 Fast (Reasoning) | Yes | Fast with reasoning |
 | grok-4-fast-non-reasoning | Grok 4 Fast (Non-Reasoning) | No | Fast without reasoning |
-| grok-4-1-fast-reasoning | Grok 4.1 Fast (Reasoning) | Yes | Newest fast model |
 | grok-3 | Grok 3 | Yes | Previous generation |
 | grok-3-mini | Grok 3 Mini | Yes | Smaller model |
 | grok-code-fast | Grok Code Fast | No | Code specialized |
@@ -151,6 +182,7 @@ resp, err := client.Chat(gemini.ModelGemini25Flash).
 **Special Features**:
 - Real-time information access
 - Distinct reasoning modes
+- Multi-agent capabilities (beta)
 
 **Usage Example**:
 ```go
@@ -206,18 +238,31 @@ resp, err := client.Chat(perplexity.ModelSonarPro).
 
 | Model | Display Name | Reasoning | Vision | Notes |
 |-------|--------------|-----------|--------|-------|
-| glm-4.7 | GLM-4.7 | Yes | No | Latest flagship |
+| glm-5.1 | GLM-5.1 | Yes | No | Latest flagship |
+| glm-5 | GLM-5 | Yes | No | |
+| glm-5-turbo | GLM-5 Turbo | Yes | No | Fast |
+| glm-5v-turbo | GLM-5V Turbo | Yes | Yes | Vision capable |
+| glm-4.7 | GLM-4.7 | Yes | No | |
 | glm-4.7-flash | GLM-4.7 Flash | No | No | Fast |
+| glm-4.7-flashx | GLM-4.7 FlashX | No | No | Extra fast |
 | glm-4.6 | GLM-4.6 | Yes | No | |
 | glm-4.6v | GLM-4.6V | Yes | Yes | Vision capable |
+| glm-4.6v-flash | GLM-4.6V Flash | No | Yes | Fast vision |
+| glm-4.6v-flashx | GLM-4.6V FlashX | No | Yes | Extra fast vision |
 | glm-4.5 | GLM-4.5 | Yes | No | |
 | glm-4.5v | GLM-4.5V | Yes | Yes | Vision capable |
+| glm-4.5-x | GLM-4.5-X | No | No | Extended |
+| glm-4.5-air | GLM-4.5 Air | No | No | Lightweight |
+| glm-4.5-airx | GLM-4.5 AirX | No | No | Extra lightweight |
+| glm-4.5-flash | GLM-4.5 Flash | No | No | Fast |
+| glm-for-coding | GLM for Coding | Yes | No | Code specialized |
 | glm-4-32b-0414-128k | GLM-4 32B | No | No | Large context |
 
 **Special Features**:
 - Vision models for image understanding
 - Chinese language optimization
 - Large context windows
+- Code-specialized model
 
 **Usage Example**:
 ```go
diff --git a/examples/chat/responses-api/main.go b/examples/chat/responses-api/main.go
index d21c89e..a193fbb 100644
--- a/examples/chat/responses-api/main.go
+++ b/examples/chat/responses-api/main.go
@@ -1,7 +1,7 @@
 // Example: OpenAI Responses API
 //
 // This example demonstrates the new Responses API features available
-// with newer OpenAI models like GPT-5.2, including:
+// with newer OpenAI models like GPT-5.4, including:
 // - Reasoning with configurable effort levels
 // - Built-in tools (web search, code interpreter)
 // - Response chaining (continuing from previous responses)
@@ -41,9 +41,9 @@ func main() {
 	defer cancel()
 
 	// Example 1: Basic Responses API usage
-	// GPT-5.2 automatically uses the Responses API
+	// GPT-5.4 automatically uses the Responses API
 	fmt.Println("=== Example 1: Basic Responses API ===")
-	resp, err := client.Chat(openai.ModelGPT52).
+	resp, err := client.Chat(openai.ModelGPT54).
 		Instructions("You are a helpful assistant.").
 		User("What is the capital of France?").
 		GetResponse(ctx)
@@ -59,7 +59,7 @@ func main() {
 
 	// Example 2: Using reasoning with high effort
 	fmt.Println("=== Example 2: Reasoning with High Effort ===")
-	resp, err = client.Chat(openai.ModelGPT52).
+	resp, err = client.Chat(openai.ModelGPT54).
 		Instructions("You are a math tutor. Show your reasoning.").
 		User("What is 15% of 240?").
 		ReasoningEffort(core.ReasoningEffortHigh).
@@ -81,7 +81,7 @@ func main() {
 
 	// Example 3: Using built-in web search
 	fmt.Println("=== Example 3: Built-in Web Search ===")
-	resp, err = client.Chat(openai.ModelGPT52).
+	resp, err = client.Chat(openai.ModelGPT54).
 		Instructions("Use web search to find current information.").
 		User("What are the top news stories today?").
 		WebSearch().
@@ -97,7 +97,7 @@ func main() {
 
 	// Example 4: Response chaining
 	fmt.Println("=== Example 4: Response Chaining ===")
-	firstResp, err := client.Chat(openai.ModelGPT52).
+	firstResp, err := client.Chat(openai.ModelGPT54).
 		Instructions("You are a storyteller.").
 		User("Start a short story about a robot.").
 		GetResponse(ctx)
@@ -111,7 +111,7 @@ func main() {
 	fmt.Println()
 
 	// Continue from the previous response
-	secondResp, err := client.Chat(openai.ModelGPT52).
+	secondResp, err := client.Chat(openai.ModelGPT54).
 		ContinueFrom(firstResp.ID).
 		User("Continue the story with an unexpected twist.").
 		GetResponse(ctx)
@@ -126,7 +126,7 @@ func main() {
 
 	// Example 5: Streaming with Responses API
 	fmt.Println("=== Example 5: Streaming with Responses API ===")
-	stream, err := client.Chat(openai.ModelGPT52).
+	stream, err := client.Chat(openai.ModelGPT54).
 		Instructions("You are a poet.").
 		User("Write a haiku about programming.").
 		Stream(ctx)