diff --git a/pkg/llm/provider/ollama/ollama.go b/pkg/llm/provider/ollama/ollama.go index 14448d2..b4b0df1 100644 --- a/pkg/llm/provider/ollama/ollama.go +++ b/pkg/llm/provider/ollama/ollama.go @@ -4,6 +4,7 @@ import ( "encoding/json" "github.com/papercomputeco/tapes/pkg/llm" + "github.com/papercomputeco/tapes/pkg/llm/provider/openai" ) // Provider implements the Provider interface for Ollama's API. @@ -23,7 +24,15 @@ func (o *Provider) DefaultStreaming() bool { func (o *Provider) ParseRequest(payload []byte) (*llm.ChatRequest, error) { var req ollamaRequest if err := json.Unmarshal(payload, &req); err != nil { - return nil, err + return openai.ParseRequestPayload(payload) + } + + // Detect OpenAI-format payloads that unmarshal successfully but lose content. + // When content is a JSON array (e.g. OpenCode sending OpenAI-format requests + // to Ollama), Go's decoder silently zero-values the string field, producing + // messages with a role but no content, images, or tool calls. + if hasLostContent(req.Messages) { + return openai.ParseRequestPayload(payload) } messages := make([]llm.Message, 0, len(req.Messages)) @@ -108,7 +117,14 @@ func (o *Provider) ParseRequest(payload []byte) (*llm.ChatRequest, error) { func (o *Provider) ParseResponse(payload []byte) (*llm.ChatResponse, error) { var resp ollamaResponse if err := json.Unmarshal(payload, &resp); err != nil { - return nil, err + return openai.ParseResponsePayload(payload) + } + + // Detect OpenAI-format responses: they use a "choices" array instead of a + // top-level "message" field, so resp.Message will be zero-valued while the + // model field is still populated from the JSON. + if resp.Model != "" && resp.Message.Role == "" && !resp.Done { + return openai.ParseResponsePayload(payload) } // Convert message content @@ -183,3 +199,16 @@ func (o *Provider) ParseResponse(payload []byte) (*llm.ChatResponse, error) { func (o *Provider) ParseStreamChunk(_ []byte) (*llm.StreamChunk, error) { panic("Not yet implemented") } + +// hasLostContent detects when an OpenAI-format payload was unmarshaled into +// Ollama types. Because ollamaMessage.Content is a string, array-valued content +// (e.g. [{type: "text", text: "..."}]) gets silently zero-valued by Go's JSON +// decoder, producing messages with a role but no content, images, or tool calls. +func hasLostContent(msgs []ollamaMessage) bool { + for _, m := range msgs { + if m.Role != "" && m.Content == "" && len(m.Images) == 0 && len(m.ToolCalls) == 0 { + return true + } + } + return false +} diff --git a/pkg/llm/provider/ollama/ollama_test.go b/pkg/llm/provider/ollama/ollama_test.go index 46b6f04..7eb744b 100644 --- a/pkg/llm/provider/ollama/ollama_test.go +++ b/pkg/llm/provider/ollama/ollama_test.go @@ -471,6 +471,95 @@ var _ = Describe("Ollama Provider", func() { }) }) + Describe("ParseRequest with OpenAI-format content (OpenCode compatibility)", func() { + It("parses array content from OpenCode/Ollama requests", func() { + // This is the exact format OpenCode sends when using Ollama, + // where content is an array of objects instead of a plain string. + // See: https://github.com/papercomputeco/tapes/issues/137 + payload := []byte(`{ + "model": "qwen3-coder:30b", + "max_tokens": 32000, + "top_p": 1, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": [ + {"type": "text", "text": "I want to plan a unit test"}, + {"type": "text", "text": "Additional context here"} + ] + } + ] + }`) + + req, err := p.ParseRequest(payload) + Expect(err).NotTo(HaveOccurred()) + Expect(req.Model).To(Equal("qwen3-coder:30b")) + Expect(req.Messages).To(HaveLen(2)) + Expect(req.Messages[0].Role).To(Equal("system")) + Expect(req.Messages[0].GetText()).To(Equal("You are a helpful assistant.")) + Expect(req.Messages[1].Role).To(Equal("user")) + Expect(req.Messages[1].Content).To(HaveLen(2)) + Expect(req.Messages[1].Content[0].Text).To(Equal("I want to plan a unit test")) + Expect(req.Messages[1].Content[1].Text).To(Equal("Additional context here")) + }) + + It("handles all-string OpenAI-format messages without false positive", func() { + // When all messages have string content, native Ollama parsing should + // be used (no fallback needed). + payload := []byte(`{ + "model": "llama2", + "messages": [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "Hello!"} + ] + }`) + + req, err := p.ParseRequest(payload) + Expect(err).NotTo(HaveOccurred()) + Expect(req.Messages).To(HaveLen(2)) + Expect(req.Messages[1].GetText()).To(Equal("Hello!")) + }) + }) + + Describe("ParseResponse with OpenAI-format (OpenCode compatibility)", func() { + It("parses OpenAI-format response with choices array", func() { + payload := []byte(`{ + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1677858242, + "model": "qwen3-coder:30b", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Here is the test plan." + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + } + }`) + + resp, err := p.ParseResponse(payload) + Expect(err).NotTo(HaveOccurred()) + Expect(resp.Model).To(Equal("qwen3-coder:30b")) + Expect(resp.Message.Role).To(Equal("assistant")) + Expect(resp.Message.GetText()).To(Equal("Here is the test plan.")) + Expect(resp.Usage).NotTo(BeNil()) + Expect(resp.Usage.PromptTokens).To(Equal(100)) + Expect(resp.Usage.CompletionTokens).To(Equal(50)) + }) + }) + Describe("ParseRequest with tool calls", func() { It("parses tool calls in assistant messages", func() { payload := []byte(`{ diff --git a/pkg/llm/provider/openai/openai.go b/pkg/llm/provider/openai/openai.go index 6243247..4d476de 100644 --- a/pkg/llm/provider/openai/openai.go +++ b/pkg/llm/provider/openai/openai.go @@ -2,9 +2,6 @@ package openai import ( - "encoding/json" - "time" - "github.com/papercomputeco/tapes/pkg/llm" ) @@ -23,197 +20,11 @@ func (o *Provider) DefaultStreaming() bool { } func (o *Provider) ParseRequest(payload []byte) (*llm.ChatRequest, error) { - var req openaiRequest - if err := json.Unmarshal(payload, &req); err != nil { - return nil, err - } - - messages := make([]llm.Message, 0, len(req.Messages)) - for _, msg := range req.Messages { - converted := llm.Message{Role: msg.Role} - - switch content := msg.Content.(type) { - case string: - converted.Content = []llm.ContentBlock{{Type: "text", Text: content}} - case []any: - // Multimodal content (e.g., vision) - for _, item := range content { - if part, ok := item.(map[string]any); ok { - cb := llm.ContentBlock{} - if t, ok := part["type"].(string); ok { - cb.Type = t - } - if text, ok := part["text"].(string); ok { - cb.Text = text - } - if imageURL, ok := part["image_url"].(map[string]any); ok { - cb.Type = "image" - if url, ok := imageURL["url"].(string); ok { - cb.ImageURL = url - } - } - converted.Content = append(converted.Content, cb) - } - } - case nil: - // Empty content (can happen with tool calls) - converted.Content = []llm.ContentBlock{} - } - - // Handle tool calls in assistant messages - for _, tc := range msg.ToolCalls { - var input map[string]any - if err := json.Unmarshal([]byte(tc.Function.Arguments), &input); err == nil { - converted.Content = append(converted.Content, llm.ContentBlock{ - Type: "tool_use", - ToolUseID: tc.ID, - ToolName: tc.Function.Name, - ToolInput: input, - }) - } - } - - // Handle tool results - if msg.Role == "tool" && msg.ToolCallID != "" { - text := "" - if s, ok := msg.Content.(string); ok { - text = s - } - converted.Content = []llm.ContentBlock{{ - Type: "tool_result", - ToolResultID: msg.ToolCallID, - ToolOutput: text, - }} - } - - messages = append(messages, converted) - } - - // Parse stop sequences - var stop []string - switch s := req.Stop.(type) { - case string: - stop = []string{s} - case []any: - for _, item := range s { - if str, ok := item.(string); ok { - stop = append(stop, str) - } - } - } - - result := &llm.ChatRequest{ - Model: req.Model, - Messages: messages, - MaxTokens: req.MaxTokens, - Temperature: req.Temperature, - TopP: req.TopP, - Stop: stop, - Seed: req.Seed, - Stream: req.Stream, - RawRequest: payload, - } - - // Preserve OpenAI-specific fields - if req.FrequencyPenalty != nil || req.PresencePenalty != nil || req.ResponseFormat != nil { - result.Extra = make(map[string]any) - if req.FrequencyPenalty != nil { - result.Extra["frequency_penalty"] = *req.FrequencyPenalty - } - if req.PresencePenalty != nil { - result.Extra["presence_penalty"] = *req.PresencePenalty - } - if req.ResponseFormat != nil { - result.Extra["response_format"] = req.ResponseFormat - } - } - - return result, nil + return ParseRequestPayload(payload) } func (o *Provider) ParseResponse(payload []byte) (*llm.ChatResponse, error) { - var resp openaiResponse - if err := json.Unmarshal(payload, &resp); err != nil { - return nil, err - } - - if len(resp.Choices) == 0 { - // Return empty response if no choices - return &llm.ChatResponse{ - Model: resp.Model, - Done: true, - RawResponse: payload, - }, nil - } - - choice := resp.Choices[0] - msg := choice.Message - - // Convert message content - var content []llm.ContentBlock - switch c := msg.Content.(type) { - case string: - content = []llm.ContentBlock{{Type: "text", Text: c}} - case []any: - for _, item := range c { - if part, ok := item.(map[string]any); ok { - cb := llm.ContentBlock{} - if t, ok := part["type"].(string); ok { - cb.Type = t - } - if text, ok := part["text"].(string); ok { - cb.Text = text - } - content = append(content, cb) - } - } - case nil: - content = []llm.ContentBlock{} - } - - // Handle tool calls - for _, tc := range msg.ToolCalls { - var input map[string]any - if err := json.Unmarshal([]byte(tc.Function.Arguments), &input); err == nil { - content = append(content, llm.ContentBlock{ - Type: "tool_use", - ToolUseID: tc.ID, - ToolName: tc.Function.Name, - ToolInput: input, - }) - } - } - - var usage *llm.Usage - if resp.Usage != nil { - usage = &llm.Usage{ - PromptTokens: resp.Usage.PromptTokens, - CompletionTokens: resp.Usage.CompletionTokens, - TotalTokens: resp.Usage.TotalTokens, - } - if resp.Usage.PromptTokensDetails != nil { - usage.CacheReadInputTokens = resp.Usage.PromptTokensDetails.CachedTokens - } - } - - result := &llm.ChatResponse{ - Model: resp.Model, - Message: llm.Message{ - Role: msg.Role, - Content: content, - }, - Done: true, - StopReason: choice.FinishReason, - Usage: usage, - CreatedAt: time.Unix(resp.Created, 0), - RawResponse: payload, - Extra: map[string]any{ - "id": resp.ID, - "object": resp.Object, - }, - } - - return result, nil + return ParseResponsePayload(payload) } func (o *Provider) ParseStreamChunk(_ []byte) (*llm.StreamChunk, error) { diff --git a/pkg/llm/provider/openai/parser.go b/pkg/llm/provider/openai/parser.go new file mode 100644 index 0000000..f91d2aa --- /dev/null +++ b/pkg/llm/provider/openai/parser.go @@ -0,0 +1,211 @@ +// Package openai +package openai + +import ( + "encoding/json" + "time" + + "github.com/papercomputeco/tapes/pkg/llm" +) + +// ParseRequestPayload parses an OpenAI-format chat completion request payload +// into the common ChatRequest type. This is exported so other providers (e.g. +// Ollama) can fall back to OpenAI parsing when they receive OpenAI-compatible +// payloads. +func ParseRequestPayload(payload []byte) (*llm.ChatRequest, error) { + var req openaiRequest + if err := json.Unmarshal(payload, &req); err != nil { + return nil, err + } + + messages := make([]llm.Message, 0, len(req.Messages)) + for _, msg := range req.Messages { + converted := llm.Message{Role: msg.Role} + + switch content := msg.Content.(type) { + case string: + converted.Content = []llm.ContentBlock{{Type: "text", Text: content}} + case []any: + // Multimodal content (e.g., vision) + for _, item := range content { + if part, ok := item.(map[string]any); ok { + cb := llm.ContentBlock{} + if t, ok := part["type"].(string); ok { + cb.Type = t + } + if text, ok := part["text"].(string); ok { + cb.Text = text + } + if imageURL, ok := part["image_url"].(map[string]any); ok { + cb.Type = "image" + if url, ok := imageURL["url"].(string); ok { + cb.ImageURL = url + } + } + converted.Content = append(converted.Content, cb) + } + } + case nil: + // Empty content (can happen with tool calls) + converted.Content = []llm.ContentBlock{} + } + + // Handle tool calls in assistant messages + for _, tc := range msg.ToolCalls { + var input map[string]any + if err := json.Unmarshal([]byte(tc.Function.Arguments), &input); err == nil { + converted.Content = append(converted.Content, llm.ContentBlock{ + Type: "tool_use", + ToolUseID: tc.ID, + ToolName: tc.Function.Name, + ToolInput: input, + }) + } + } + + // Handle tool results + if msg.Role == "tool" && msg.ToolCallID != "" { + text := "" + if s, ok := msg.Content.(string); ok { + text = s + } + converted.Content = []llm.ContentBlock{{ + Type: "tool_result", + ToolResultID: msg.ToolCallID, + ToolOutput: text, + }} + } + + messages = append(messages, converted) + } + + // Parse stop sequences + var stop []string + switch s := req.Stop.(type) { + case string: + stop = []string{s} + case []any: + for _, item := range s { + if str, ok := item.(string); ok { + stop = append(stop, str) + } + } + } + + result := &llm.ChatRequest{ + Model: req.Model, + Messages: messages, + MaxTokens: req.MaxTokens, + Temperature: req.Temperature, + TopP: req.TopP, + Stop: stop, + Seed: req.Seed, + Stream: req.Stream, + RawRequest: payload, + } + + // Preserve OpenAI-specific fields + if req.FrequencyPenalty != nil || req.PresencePenalty != nil || req.ResponseFormat != nil { + result.Extra = make(map[string]any) + if req.FrequencyPenalty != nil { + result.Extra["frequency_penalty"] = *req.FrequencyPenalty + } + if req.PresencePenalty != nil { + result.Extra["presence_penalty"] = *req.PresencePenalty + } + if req.ResponseFormat != nil { + result.Extra["response_format"] = req.ResponseFormat + } + } + + return result, nil +} + +// ParseResponsePayload parses an OpenAI-format chat completion response payload +// into the common ChatResponse type. This is exported so other providers (e.g. +// Ollama) can fall back to OpenAI parsing when they receive OpenAI-compatible +// payloads. +func ParseResponsePayload(payload []byte) (*llm.ChatResponse, error) { + var resp openaiResponse + if err := json.Unmarshal(payload, &resp); err != nil { + return nil, err + } + + if len(resp.Choices) == 0 { + // Return empty response if no choices + return &llm.ChatResponse{ + Model: resp.Model, + Done: true, + RawResponse: payload, + }, nil + } + + choice := resp.Choices[0] + msg := choice.Message + + // Convert message content + var content []llm.ContentBlock + switch c := msg.Content.(type) { + case string: + content = []llm.ContentBlock{{Type: "text", Text: c}} + case []any: + for _, item := range c { + if part, ok := item.(map[string]any); ok { + cb := llm.ContentBlock{} + if t, ok := part["type"].(string); ok { + cb.Type = t + } + if text, ok := part["text"].(string); ok { + cb.Text = text + } + content = append(content, cb) + } + } + case nil: + content = []llm.ContentBlock{} + } + + // Handle tool calls + for _, tc := range msg.ToolCalls { + var input map[string]any + if err := json.Unmarshal([]byte(tc.Function.Arguments), &input); err == nil { + content = append(content, llm.ContentBlock{ + Type: "tool_use", + ToolUseID: tc.ID, + ToolName: tc.Function.Name, + ToolInput: input, + }) + } + } + + var usage *llm.Usage + if resp.Usage != nil { + usage = &llm.Usage{ + PromptTokens: resp.Usage.PromptTokens, + CompletionTokens: resp.Usage.CompletionTokens, + TotalTokens: resp.Usage.TotalTokens, + } + if resp.Usage.PromptTokensDetails != nil { + usage.CacheReadInputTokens = resp.Usage.PromptTokensDetails.CachedTokens + } + } + + result := &llm.ChatResponse{ + Model: resp.Model, + Message: llm.Message{ + Role: msg.Role, + Content: content, + }, + Done: true, + StopReason: choice.FinishReason, + Usage: usage, + CreatedAt: time.Unix(resp.Created, 0), + RawResponse: payload, + Extra: map[string]any{ + "id": resp.ID, + "object": resp.Object, + }, + } + + return result, nil +} diff --git a/proxy/proxy.go b/proxy/proxy.go index bc407cd..161906d 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -536,6 +536,11 @@ func (p *Proxy) extractUsageFromSSE(data []byte, providerName string, usage *llm usage.PromptTokens = jsonInt(chunkData, "prompt_eval_count") usage.CompletionTokens = jsonInt(chunkData, "eval_count") } + // In some cases, Ollama matches openAI formats (e.g. with OpenCode) + if u, ok := chunkData["usage"].(map[string]any); ok { + usage.PromptTokens = jsonInt(u, "prompt_tokens") + usage.CompletionTokens = jsonInt(u, "completion_tokens") + } } }