Compare commits

...

1 Commits

Author SHA1 Message Date
Bruce MacDonald
f82d00af74 openai: do not separate images into their own messages
- Changed the case []any: branch to accumulate text and images into slices first, then create a single api.Message at the end
- Multiple text parts are joined with \n\n
- All images are collected into a single Images slice
2025-12-18 14:07:31 -08:00
2 changed files with 42 additions and 23 deletions

View File

@@ -463,6 +463,8 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
} }
messages = append(messages, api.Message{Role: msg.Role, Content: content, Thinking: msg.Reasoning, ToolCalls: toolCalls, ToolName: toolName, ToolCallID: msg.ToolCallID}) messages = append(messages, api.Message{Role: msg.Role, Content: content, Thinking: msg.Reasoning, ToolCalls: toolCalls, ToolName: toolName, ToolCallID: msg.ToolCallID})
case []any: case []any:
var texts []string
var images []api.ImageData
for _, c := range content { for _, c := range content {
data, ok := c.(map[string]any) data, ok := c.(map[string]any)
if !ok { if !ok {
@@ -474,7 +476,7 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
if !ok { if !ok {
return nil, errors.New("invalid message format") return nil, errors.New("invalid message format")
} }
messages = append(messages, api.Message{Role: msg.Role, Content: text}) texts = append(texts, text)
case "image_url": case "image_url":
var url string var url string
if urlMap, ok := data["image_url"].(map[string]any); ok { if urlMap, ok := data["image_url"].(map[string]any); ok {
@@ -492,23 +494,24 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
return nil, err return nil, err
} }
messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{img}}) images = append(images, img)
default: default:
return nil, errors.New("invalid message format") return nil, errors.New("invalid message format")
} }
} }
// since we might have added multiple messages above, if we have tools toolCalls, err := FromCompletionToolCall(msg.ToolCalls)
// calls we'll add them to the last message if err != nil {
if len(messages) > 0 && len(msg.ToolCalls) > 0 { return nil, err
toolCalls, err := FromCompletionToolCall(msg.ToolCalls)
if err != nil {
return nil, err
}
messages[len(messages)-1].ToolCalls = toolCalls
messages[len(messages)-1].ToolName = toolName
messages[len(messages)-1].ToolCallID = msg.ToolCallID
messages[len(messages)-1].Thinking = msg.Reasoning
} }
messages = append(messages, api.Message{
Role: msg.Role,
Content: strings.Join(texts, "\n\n"),
Images: images,
Thinking: msg.Reasoning,
ToolCalls: toolCalls,
ToolName: toolName,
ToolCallID: msg.ToolCallID,
})
default: default:
// content is only optional if tool calls are present // content is only optional if tool calls are present
if msg.ToolCalls == nil { if msg.ToolCalls == nil {

View File

@@ -41,7 +41,7 @@ func TestFromChatRequest_Basic(t *testing.T) {
} }
} }
func TestFromChatRequest_WithImage(t *testing.T) { func TestFromChatRequest_MultiPartContent(t *testing.T) {
imgData, _ := base64.StdEncoding.DecodeString(image) imgData, _ := base64.StdEncoding.DecodeString(image)
req := ChatCompletionRequest{ req := ChatCompletionRequest{
@@ -50,7 +50,12 @@ func TestFromChatRequest_WithImage(t *testing.T) {
{ {
Role: "user", Role: "user",
Content: []any{ Content: []any{
map[string]any{"type": "text", "text": "Hello"}, map[string]any{"type": "text", "text": "First part."},
map[string]any{"type": "text", "text": "Second part."},
map[string]any{
"type": "image_url",
"image_url": map[string]any{"url": prefix + image},
},
map[string]any{ map[string]any{
"type": "image_url", "type": "image_url",
"image_url": map[string]any{"url": prefix + image}, "image_url": map[string]any{"url": prefix + image},
@@ -65,20 +70,31 @@ func TestFromChatRequest_WithImage(t *testing.T) {
t.Fatalf("unexpected error: %v", err) t.Fatalf("unexpected error: %v", err)
} }
if len(result.Messages) != 2 { // Multi-part content array should produce a single message per OpenAI spec
t.Fatalf("expected 2 messages, got %d", len(result.Messages)) if len(result.Messages) != 1 {
t.Fatalf("expected 1 message, got %d", len(result.Messages))
} }
if result.Messages[0].Content != "Hello" { msg := result.Messages[0]
t.Errorf("expected first message content 'Hello', got %q", result.Messages[0].Content) if msg.Role != "user" {
t.Errorf("expected role 'user', got %q", msg.Role)
} }
if len(result.Messages[1].Images) != 1 { // Multiple text parts should be joined
t.Fatalf("expected 1 image, got %d", len(result.Messages[1].Images)) expectedContent := "First part.\n\nSecond part."
if msg.Content != expectedContent {
t.Errorf("expected content %q, got %q", expectedContent, msg.Content)
} }
if string(result.Messages[1].Images[0]) != string(imgData) { // Multiple images should be in the same message
t.Error("image data mismatch") if len(msg.Images) != 2 {
t.Fatalf("expected 2 images, got %d", len(msg.Images))
}
for i, img := range msg.Images {
if string(img) != string(imgData) {
t.Errorf("image %d data mismatch", i)
}
} }
} }