From 34bc1bda1e3b8a1d6df72fc77c9a65a34e1d17b0 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Sun, 9 Nov 2025 22:00:27 +0100 Subject: [PATCH] fix(api): SSE streaming format to comply with specification (#7182) * Initial plan * Fix SSE streaming format to comply with specification - Replace json.Encoder with json.Marshal for explicit formatting - Use explicit \n\n for all SSE messages (instead of relying on implicit newlines) - Change %v to %s format specifier for proper string formatting - Fix error message streaming to include proper SSE format - Ensure consistency between chat.go and completion.go endpoints Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Add proper error handling for JSON marshal failures in streaming - Handle json.Marshal errors explicitly in error response paths - Add fallback simple error message if marshal fails - Prevents sending 'data: ' on marshal failures - Addresses code review feedback Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Fix SSE streaming format to comply with specification Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Fix finish_reason field to use pointer for proper null handling - Change FinishReason from string to *string in Choice schema - Streaming chunks now omit finish_reason (null) instead of empty string - Final chunks properly set finish_reason to "stop", "tool_calls", etc. - Remove empty content from initial streaming chunks (only send role) - Final streaming chunk sends empty delta with finish_reason - Addresses OpenAI API compliance issues causing client failures Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Improve code consistency for string pointer creation - Use consistent pattern: declare variable then take address - Remove inline anonymous function for better readability - Addresses code review feedback Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Move common finish reasons to constants - Create constants.go with FinishReasonStop, FinishReasonToolCalls, FinishReasonFunctionCall - Replace all string literals with constants in chat.go, completion.go, realtime.go - Improves code maintainability and prevents typos Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Make it build Signed-off-by: Ettore Di Giacinto * Fix finish_reason to always be present with null or string value - Remove omitempty from FinishReason field in Choice struct - Explicitly set FinishReason to nil for all streaming chunks - Ensures finish_reason appears as null in JSON for streaming chunks - Final chunks still properly set finish_reason to "stop", "tool_calls", etc. - Complies with OpenAI API specification example Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --------- Signed-off-by: Ettore Di Giacinto Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- .github/gallery-agent/go.mod | 1 - core/http/endpoints/openai/chat.go | 68 +++++++++++++++--------- core/http/endpoints/openai/completion.go | 48 +++++++++++++---- core/http/endpoints/openai/constants.go | 8 +++ core/http/endpoints/openai/realtime.go | 9 ++-- core/schema/openai.go | 2 +- docs/go.mod | 2 - docs/go.sum | 4 -- 8 files changed, 95 insertions(+), 47 deletions(-) create mode 100644 core/http/endpoints/openai/constants.go diff --git a/.github/gallery-agent/go.mod b/.github/gallery-agent/go.mod index 250fc7967..7129f5507 100644 --- a/.github/gallery-agent/go.mod +++ b/.github/gallery-agent/go.mod @@ -8,7 +8,6 @@ require ( github.com/onsi/gomega v1.38.2 github.com/sashabaranov/go-openai v1.41.2 github.com/tmc/langchaingo v0.1.13 - gopkg.in/yaml.v3 v3.0.1 ) require ( diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 3691e058a..975aac3e8 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -2,7 +2,6 @@ package openai import ( "bufio" - "bytes" "context" "encoding/json" "fmt" @@ -91,7 +90,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator ID: id, Created: created, Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}}, + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}}, Object: "chat.completion.chunk", } responses <- initialMessage @@ -111,7 +110,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator ID: id, Created: created, Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}}, + Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0, FinishReason: nil}}, Object: "chat.completion.chunk", Usage: usage, } @@ -145,7 +144,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator ID: id, Created: created, Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}}, + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}}, Object: "chat.completion.chunk", } responses <- initialMessage @@ -169,7 +168,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator ID: id, Created: created, Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, + Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0, FinishReason: nil}}, Object: "chat.completion.chunk", Usage: usage, } @@ -197,7 +196,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator }, }, }, - }}}, + }, + Index: 0, + FinishReason: nil, + }}, Object: "chat.completion.chunk", } responses <- initialMessage @@ -220,7 +222,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator }, }, }, - }}}, + }, + Index: 0, + FinishReason: nil, + }}, Object: "chat.completion.chunk", } } @@ -427,11 +432,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator if len(ev.Choices[0].Delta.ToolCalls) > 0 { toolsCalled = true } - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - enc.Encode(ev) - log.Debug().Msgf("Sending chunk: %s", buf.String()) - _, err := fmt.Fprintf(w, "data: %v\n", buf.String()) + respData, err := json.Marshal(ev) + if err != nil { + log.Debug().Msgf("Failed to marshal response: %v", err) + input.Cancel() + continue + } + log.Debug().Msgf("Sending chunk: %s", string(respData)) + _, err = fmt.Fprintf(w, "data: %s\n\n", string(respData)) if err != nil { log.Debug().Msgf("Sending chunk failed: %v", err) input.Cancel() @@ -443,22 +451,28 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator } log.Error().Msgf("Stream ended with error: %v", err) + stopReason := FinishReasonStop resp := &schema.OpenAIResponse{ ID: id, Created: created, Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { - FinishReason: "stop", + FinishReason: &stopReason, Index: 0, Delta: &schema.Message{Content: "Internal error: " + err.Error()}, }}, Object: "chat.completion.chunk", Usage: *usage, } - respData, _ := json.Marshal(resp) - - w.WriteString(fmt.Sprintf("data: %s\n\n", respData)) + respData, marshalErr := json.Marshal(resp) + if marshalErr != nil { + log.Error().Msgf("Failed to marshal error response: %v", marshalErr) + // Send a simple error message as fallback + w.WriteString("data: {\"error\":\"Internal error\"}\n\n") + } else { + w.WriteString(fmt.Sprintf("data: %s\n\n", respData)) + } w.WriteString("data: [DONE]\n\n") w.Flush() @@ -466,11 +480,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator } } - finishReason := "stop" + finishReason := FinishReasonStop if toolsCalled && len(input.Tools) > 0 { - finishReason = "tool_calls" + finishReason = FinishReasonToolCalls } else if toolsCalled { - finishReason = "function_call" + finishReason = FinishReasonFunctionCall } resp := &schema.OpenAIResponse{ @@ -479,9 +493,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { - FinishReason: finishReason, + FinishReason: &finishReason, Index: 0, - Delta: &schema.Message{Content: &textContentToReturn}, + Delta: &schema.Message{}, }}, Object: "chat.completion.chunk", Usage: *usage, @@ -502,7 +516,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator tokenCallback := func(s string, c *[]schema.Choice) { if !shouldUseFn { // no function is called, just reply and use stop as finish reason - *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) + stopReason := FinishReasonStop + *c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) return } @@ -520,12 +535,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator return } + stopReason := FinishReasonStop *c = append(*c, schema.Choice{ - FinishReason: "stop", + FinishReason: &stopReason, Message: &schema.Message{Role: "assistant", Content: &result}}) default: + toolCallsReason := FinishReasonToolCalls toolChoice := schema.Choice{ - FinishReason: "tool_calls", + FinishReason: &toolCallsReason, Message: &schema.Message{ Role: "assistant", }, @@ -549,8 +566,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator ) } else { // otherwise we return more choices directly (deprecated) + functionCallReason := FinishReasonFunctionCall *c = append(*c, schema.Choice{ - FinishReason: "function_call", + FinishReason: &functionCallReason, Message: &schema.Message{ Role: "assistant", Content: &textContentToReturn, diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 03f5f95e2..fc9acb0c3 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -2,7 +2,6 @@ package openai import ( "bufio" - "bytes" "encoding/json" "errors" "fmt" @@ -47,8 +46,9 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { - Index: 0, - Text: s, + Index: 0, + Text: s, + FinishReason: nil, }, }, Object: "text_completion", @@ -140,24 +140,49 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva log.Debug().Msgf("No choices in the response, skipping") continue } - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - enc.Encode(ev) + respData, err := json.Marshal(ev) + if err != nil { + log.Debug().Msgf("Failed to marshal response: %v", err) + continue + } - log.Debug().Msgf("Sending chunk: %s", buf.String()) - fmt.Fprintf(w, "data: %v\n", buf.String()) + log.Debug().Msgf("Sending chunk: %s", string(respData)) + fmt.Fprintf(w, "data: %s\n\n", string(respData)) w.Flush() case err := <-ended: if err == nil { break LOOP } log.Error().Msgf("Stream ended with error: %v", err) - fmt.Fprintf(w, "data: %v\n", "Internal error: "+err.Error()) + + stopReason := FinishReasonStop + errorResp := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: input.Model, + Choices: []schema.Choice{ + { + Index: 0, + FinishReason: &stopReason, + Text: "Internal error: " + err.Error(), + }, + }, + Object: "text_completion", + } + errorData, marshalErr := json.Marshal(errorResp) + if marshalErr != nil { + log.Error().Msgf("Failed to marshal error response: %v", marshalErr) + // Send a simple error message as fallback + fmt.Fprintf(w, "data: {\"error\":\"Internal error\"}\n\n") + } else { + fmt.Fprintf(w, "data: %s\n\n", string(errorData)) + } w.Flush() break LOOP } } + stopReason := FinishReasonStop resp := &schema.OpenAIResponse{ ID: id, Created: created, @@ -165,7 +190,7 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva Choices: []schema.Choice{ { Index: 0, - FinishReason: "stop", + FinishReason: &stopReason, }, }, Object: "text_completion", @@ -197,7 +222,8 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva r, tokenUsage, err := ComputeChoices( input, i, config, cl, appConfig, ml, func(s string, c *[]schema.Choice) { - *c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k}) + stopReason := FinishReasonStop + *c = append(*c, schema.Choice{Text: s, FinishReason: &stopReason, Index: k}) }, nil) if err != nil { return err diff --git a/core/http/endpoints/openai/constants.go b/core/http/endpoints/openai/constants.go new file mode 100644 index 000000000..bc7dae10b --- /dev/null +++ b/core/http/endpoints/openai/constants.go @@ -0,0 +1,8 @@ +package openai + +// Finish reason constants for OpenAI API responses +const ( + FinishReasonStop = "stop" + FinishReasonToolCalls = "tool_calls" + FinishReasonFunctionCall = "function_call" +) diff --git a/core/http/endpoints/openai/realtime.go b/core/http/endpoints/openai/realtime.go index d6a9186b0..c56e10375 100644 --- a/core/http/endpoints/openai/realtime.go +++ b/core/http/endpoints/openai/realtime.go @@ -1072,7 +1072,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st result, tokenUsage, err := ComputeChoices(input, prompt, config, startupOptions, ml, func(s string, c *[]schema.Choice) { if !shouldUseFn { // no function is called, just reply and use stop as finish reason - *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) + stopReason := FinishReasonStop + *c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) return } @@ -1099,7 +1100,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st } if len(input.Tools) > 0 { - toolChoice.FinishReason = "tool_calls" + toolCallsReason := FinishReasonToolCalls + toolChoice.FinishReason = &toolCallsReason } for _, ss := range results { @@ -1120,8 +1122,9 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st ) } else { // otherwise we return more choices directly + functionCallReason := FinishReasonFunctionCall *c = append(*c, schema.Choice{ - FinishReason: "function_call", + FinishReason: &functionCallReason, Message: &schema.Message{ Role: "assistant", Content: &textContentToReturn, diff --git a/core/schema/openai.go b/core/schema/openai.go index 49e18642f..fd24ec280 100644 --- a/core/schema/openai.go +++ b/core/schema/openai.go @@ -50,7 +50,7 @@ type OpenAIResponse struct { type Choice struct { Index int `json:"index"` - FinishReason string `json:"finish_reason"` + FinishReason *string `json:"finish_reason"` Message *Message `json:"message,omitempty"` Delta *Message `json:"delta,omitempty"` Text string `json:"text,omitempty"` diff --git a/docs/go.mod b/docs/go.mod index 0595d0523..35b89dd11 100644 --- a/docs/go.mod +++ b/docs/go.mod @@ -1,5 +1,3 @@ module github.com/McShelby/hugo-theme-relearn.git go 1.19 - -require github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 // indirect diff --git a/docs/go.sum b/docs/go.sum index 503662320..e69de29bb 100644 --- a/docs/go.sum +++ b/docs/go.sum @@ -1,4 +0,0 @@ -github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 h1:SmpwwN3DNzJWbV+IT8gaFu07ENUFpCvKou5BHYUKuVs= -github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200/go.mod h1:kx8MBj9T7SFR8ZClWvKZPmmUxBaltkoXvnWlZZcSnYA= -github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2 v2.21100.20000/go.mod h1:mFberT6ZtcchrsDtfvJM7aAH2bDKLdOnruUHl0hlapI= -github.com/twbs/bootstrap v5.3.2+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0=