mirror of
https://github.com/mudler/LocalAI.git
synced 2026-03-31 21:25:59 -04:00
* fix(openresponses): do not omit required fields summary and id * fix(openresponses): ensure ORItemParam.Summary is never null Normalize Summary to an empty slice at serialization chokepoints (sendSSEEvent, bufferEvent, buildORResponse) so it always serializes as [] instead of null. Closes #9047
3054 lines
103 KiB
Go
3054 lines
103 KiB
Go
package openresponses
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/backend"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
|
openaiEndpoint "github.com/mudler/LocalAI/core/http/endpoints/openai"
|
|
"github.com/mudler/LocalAI/core/http/middleware"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
"github.com/mudler/LocalAI/core/templates"
|
|
"github.com/mudler/LocalAI/pkg/functions"
|
|
"github.com/mudler/LocalAI/pkg/model"
|
|
reason "github.com/mudler/LocalAI/pkg/reasoning"
|
|
"github.com/mudler/LocalAI/pkg/utils"
|
|
"github.com/mudler/xlog"
|
|
)
|
|
|
|
// ResponsesEndpoint is the Open Responses API endpoint
|
|
// https://www.openresponses.org/specification
|
|
// @Summary Create a response using the Open Responses API
|
|
// @Param request body schema.OpenResponsesRequest true "Request body"
|
|
// @Success 200 {object} schema.ORResponseResource "Response"
|
|
// @Router /v1/responses [post]
|
|
func ResponsesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
createdAt := time.Now().Unix()
|
|
responseID := fmt.Sprintf("resp_%s", uuid.New().String())
|
|
|
|
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenResponsesRequest)
|
|
if !ok || input.Model == "" {
|
|
return sendOpenResponsesError(c, 400, "invalid_request", "model is required", "")
|
|
}
|
|
|
|
cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
|
|
if !ok || cfg == nil {
|
|
return sendOpenResponsesError(c, 400, "invalid_request", "model configuration not found", "")
|
|
}
|
|
|
|
// Initialize store with TTL from appConfig
|
|
store := GetGlobalStore()
|
|
if appConfig.OpenResponsesStoreTTL > 0 {
|
|
store.SetTTL(appConfig.OpenResponsesStoreTTL)
|
|
}
|
|
|
|
// Check if storage is disabled for this request
|
|
shouldStore := true
|
|
if input.Store != nil && !*input.Store {
|
|
shouldStore = false
|
|
}
|
|
|
|
// Handle previous_response_id if provided
|
|
var previousResponse *schema.ORResponseResource
|
|
var messages []schema.Message
|
|
if input.PreviousResponseID != "" {
|
|
stored, err := store.Get(input.PreviousResponseID)
|
|
if err != nil {
|
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("previous response not found: %s", input.PreviousResponseID), "previous_response_id")
|
|
}
|
|
previousResponse = stored.Response
|
|
|
|
// Also convert previous response input to messages
|
|
previousInputMessages, err := convertORInputToMessages(stored.Request.Input, cfg)
|
|
if err != nil {
|
|
return sendOpenResponsesError(c, 400, "invalid_request", fmt.Sprintf("failed to convert previous input: %v", err), "")
|
|
}
|
|
|
|
// Convert previous response output items to messages
|
|
previousOutputMessages, err := convertOROutputItemsToMessages(previousResponse.Output)
|
|
if err != nil {
|
|
return sendOpenResponsesError(c, 400, "invalid_request", fmt.Sprintf("failed to convert previous response: %v", err), "")
|
|
}
|
|
|
|
// Concatenate: previous_input + previous_output + new_input
|
|
// Start with previous input messages
|
|
messages = previousInputMessages
|
|
// Add previous output as assistant messages
|
|
messages = append(messages, previousOutputMessages...)
|
|
}
|
|
|
|
// Convert Open Responses input to internal Messages
|
|
newMessages, err := convertORInputToMessages(input.Input, cfg)
|
|
if err != nil {
|
|
return sendOpenResponsesError(c, 400, "invalid_request", fmt.Sprintf("failed to parse input: %v", err), "")
|
|
}
|
|
// Append new input messages
|
|
messages = append(messages, newMessages...)
|
|
|
|
// Add instructions as system message if provided
|
|
if input.Instructions != "" {
|
|
messages = append([]schema.Message{{Role: "system", StringContent: input.Instructions}}, messages...)
|
|
}
|
|
|
|
// Handle tools
|
|
var funcs functions.Functions
|
|
var shouldUseFn bool
|
|
var mcpToolInfos []mcpTools.MCPToolInfo
|
|
|
|
if len(input.Tools) > 0 {
|
|
funcs, shouldUseFn = convertORToolsToFunctions(input, cfg)
|
|
}
|
|
|
|
// MCP injection: prompts, resources, and tools
|
|
mcpServers := mcpTools.MCPServersFromMetadata(input.Metadata)
|
|
mcpPromptName, mcpPromptArgs := mcpTools.MCPPromptFromMetadata(input.Metadata)
|
|
mcpResourceURIs := mcpTools.MCPResourcesFromMetadata(input.Metadata)
|
|
|
|
hasMCPRequest := len(mcpServers) > 0 || mcpPromptName != "" || len(mcpResourceURIs) > 0
|
|
hasMCPConfig := cfg.MCP.Servers != "" || cfg.MCP.Stdio != ""
|
|
|
|
if hasMCPRequest && hasMCPConfig {
|
|
remote, stdio, mcpErr := cfg.MCP.MCPConfigFromYAML()
|
|
if mcpErr == nil {
|
|
namedSessions, sessErr := mcpTools.NamedSessionsFromMCPConfig(cfg.Name, remote, stdio, mcpServers)
|
|
if sessErr == nil && len(namedSessions) > 0 {
|
|
// Prompt injection
|
|
if mcpPromptName != "" {
|
|
prompts, discErr := mcpTools.DiscoverMCPPrompts(c.Request().Context(), namedSessions)
|
|
if discErr == nil {
|
|
promptMsgs, getErr := mcpTools.GetMCPPrompt(c.Request().Context(), prompts, mcpPromptName, mcpPromptArgs)
|
|
if getErr == nil {
|
|
var injected []schema.Message
|
|
for _, pm := range promptMsgs {
|
|
injected = append(injected, schema.Message{
|
|
Role: string(pm.Role),
|
|
Content: mcpTools.PromptMessageToText(pm),
|
|
})
|
|
}
|
|
messages = append(injected, messages...)
|
|
xlog.Debug("Open Responses MCP prompt injected", "prompt", mcpPromptName, "messages", len(injected))
|
|
} else {
|
|
xlog.Error("Failed to get MCP prompt", "error", getErr)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Resource injection
|
|
if len(mcpResourceURIs) > 0 {
|
|
resources, discErr := mcpTools.DiscoverMCPResources(c.Request().Context(), namedSessions)
|
|
if discErr == nil {
|
|
var resourceTexts []string
|
|
for _, uri := range mcpResourceURIs {
|
|
content, readErr := mcpTools.ReadMCPResource(c.Request().Context(), resources, uri)
|
|
if readErr != nil {
|
|
xlog.Error("Failed to read MCP resource", "error", readErr, "uri", uri)
|
|
continue
|
|
}
|
|
name := uri
|
|
for _, r := range resources {
|
|
if r.URI == uri {
|
|
name = r.Name
|
|
break
|
|
}
|
|
}
|
|
resourceTexts = append(resourceTexts, fmt.Sprintf("--- MCP Resource: %s ---\n%s", name, content))
|
|
}
|
|
if len(resourceTexts) > 0 && len(messages) > 0 {
|
|
lastIdx := len(messages) - 1
|
|
suffix := "\n\n" + strings.Join(resourceTexts, "\n\n")
|
|
switch ct := messages[lastIdx].Content.(type) {
|
|
case string:
|
|
messages[lastIdx].Content = ct + suffix
|
|
default:
|
|
messages[lastIdx].Content = fmt.Sprintf("%v%s", ct, suffix)
|
|
}
|
|
xlog.Debug("Open Responses MCP resources injected", "count", len(resourceTexts))
|
|
}
|
|
}
|
|
}
|
|
|
|
// Tool injection
|
|
if len(mcpServers) > 0 {
|
|
discovered, discErr := mcpTools.DiscoverMCPTools(c.Request().Context(), namedSessions)
|
|
if discErr == nil {
|
|
mcpToolInfos = discovered
|
|
for _, ti := range mcpToolInfos {
|
|
funcs = append(funcs, ti.Function)
|
|
input.Tools = append(input.Tools, schema.ORFunctionTool{
|
|
Type: "function",
|
|
Name: ti.Function.Name,
|
|
Description: ti.Function.Description,
|
|
Parameters: ti.Function.Parameters,
|
|
})
|
|
}
|
|
shouldUseFn = len(funcs) > 0 && cfg.ShouldUseFunctions()
|
|
xlog.Debug("Open Responses MCP tools injected", "count", len(mcpToolInfos), "total_funcs", len(funcs))
|
|
} else {
|
|
xlog.Error("Failed to discover MCP tools", "error", discErr)
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
xlog.Error("Failed to parse MCP config", "error", mcpErr)
|
|
}
|
|
} else if len(input.Tools) == 0 && hasMCPConfig {
|
|
// Backward compat: model has MCP config, no user tools and no mcp_servers field
|
|
remote, stdio, mcpErr := cfg.MCP.MCPConfigFromYAML()
|
|
if mcpErr == nil {
|
|
namedSessions, sessErr := mcpTools.NamedSessionsFromMCPConfig(cfg.Name, remote, stdio, nil)
|
|
if sessErr == nil && len(namedSessions) > 0 {
|
|
discovered, discErr := mcpTools.DiscoverMCPTools(c.Request().Context(), namedSessions)
|
|
if discErr == nil {
|
|
mcpToolInfos = discovered
|
|
for _, ti := range mcpToolInfos {
|
|
funcs = append(funcs, ti.Function)
|
|
input.Tools = append(input.Tools, schema.ORFunctionTool{
|
|
Type: "function",
|
|
Name: ti.Function.Name,
|
|
Description: ti.Function.Description,
|
|
Parameters: ti.Function.Parameters,
|
|
})
|
|
}
|
|
shouldUseFn = len(funcs) > 0 && cfg.ShouldUseFunctions()
|
|
xlog.Debug("Open Responses MCP tools auto-activated", "count", len(mcpToolInfos))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create OpenAI-compatible request for internal processing
|
|
openAIReq := &schema.OpenAIRequest{
|
|
PredictionOptions: schema.PredictionOptions{
|
|
BasicModelRequest: schema.BasicModelRequest{Model: input.Model},
|
|
Temperature: input.Temperature,
|
|
TopP: input.TopP,
|
|
Maxtokens: input.MaxOutputTokens,
|
|
},
|
|
Messages: messages,
|
|
Stream: input.Stream,
|
|
Context: input.Context,
|
|
Cancel: input.Cancel,
|
|
Functions: funcs,
|
|
}
|
|
|
|
// Handle text_format -> response_format conversion
|
|
if input.TextFormat != nil {
|
|
openAIReq.ResponseFormat = convertTextFormatToResponseFormat(input.TextFormat)
|
|
}
|
|
|
|
// Generate grammar for function calling (similar to OpenAI chat endpoint)
|
|
if shouldUseFn && !cfg.FunctionsConfig.GrammarConfig.NoGrammar {
|
|
// Add no-action function to allow model to respond without calling a tool
|
|
noActionName := "answer"
|
|
noActionDescription := "use this action to answer without performing any action"
|
|
if cfg.FunctionsConfig.NoActionFunctionName != "" {
|
|
noActionName = cfg.FunctionsConfig.NoActionFunctionName
|
|
}
|
|
if cfg.FunctionsConfig.NoActionDescriptionName != "" {
|
|
noActionDescription = cfg.FunctionsConfig.NoActionDescriptionName
|
|
}
|
|
|
|
noActionGrammar := functions.Function{
|
|
Name: noActionName,
|
|
Description: noActionDescription,
|
|
Parameters: map[string]interface{}{
|
|
"properties": map[string]interface{}{
|
|
"message": map[string]interface{}{
|
|
"type": "string",
|
|
"description": "The message to reply the user with",
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
// Make a copy of funcs to avoid modifying the original
|
|
funcsWithNoAction := make(functions.Functions, len(funcs))
|
|
copy(funcsWithNoAction, funcs)
|
|
|
|
// Append no-action function unless disabled
|
|
if !cfg.FunctionsConfig.DisableNoAction {
|
|
funcsWithNoAction = append(funcsWithNoAction, noActionGrammar)
|
|
}
|
|
|
|
// Force picking one of the functions by the request
|
|
if cfg.FunctionToCall() != "" {
|
|
funcsWithNoAction = funcsWithNoAction.Select(cfg.FunctionToCall())
|
|
}
|
|
|
|
// Generate grammar to constrain model output to valid function calls
|
|
jsStruct := funcsWithNoAction.ToJSONStructure(cfg.FunctionsConfig.FunctionNameKey, cfg.FunctionsConfig.FunctionNameKey)
|
|
g, err := jsStruct.Grammar(cfg.FunctionsConfig.GrammarOptions()...)
|
|
if err == nil {
|
|
cfg.Grammar = g
|
|
xlog.Debug("Open Responses - Generated grammar for function calling")
|
|
} else {
|
|
xlog.Error("Open Responses - Failed generating grammar for function calling", "error", err)
|
|
}
|
|
}
|
|
|
|
// Template the prompt
|
|
predInput := evaluator.TemplateMessages(*openAIReq, openAIReq.Messages, cfg, funcs, shouldUseFn)
|
|
xlog.Debug("Open Responses - Prompt (after templating)", "prompt", predInput)
|
|
|
|
// Handle background mode
|
|
isBackground := input.Background != nil && *input.Background
|
|
if isBackground {
|
|
// Background mode requires storage
|
|
if !shouldStore {
|
|
return sendOpenResponsesError(c, 400, "invalid_request_error", "background=true requires store=true", "background")
|
|
}
|
|
|
|
// Create initial response with "queued" status
|
|
queuedResponse := buildORResponse(responseID, createdAt, nil, schema.ORStatusQueued, input, []schema.ORItemField{}, nil, true)
|
|
|
|
// Create cancellable context for background execution
|
|
bgCtx, bgCancel := context.WithCancel(context.Background())
|
|
|
|
// Store the background response
|
|
store.StoreBackground(responseID, input, queuedResponse, bgCancel, input.Stream)
|
|
|
|
// Start background processing goroutine
|
|
go func() {
|
|
defer bgCancel()
|
|
|
|
// Update status to in_progress
|
|
store.UpdateStatus(responseID, schema.ORStatusInProgress, nil)
|
|
|
|
var finalResponse *schema.ORResponseResource
|
|
var bgErr error
|
|
|
|
if input.Stream {
|
|
// Background streaming processing (buffer events)
|
|
finalResponse, bgErr = handleBackgroundStream(bgCtx, store, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, mcpToolInfos, evaluator)
|
|
} else {
|
|
// Background non-streaming processing
|
|
finalResponse, bgErr = handleBackgroundNonStream(bgCtx, store, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, mcpToolInfos, evaluator)
|
|
}
|
|
|
|
if bgErr != nil {
|
|
xlog.Error("Background response failed", "response_id", responseID, "error", bgErr)
|
|
now := time.Now().Unix()
|
|
store.UpdateStatus(responseID, schema.ORStatusFailed, &now)
|
|
return
|
|
}
|
|
|
|
// Update final response in store
|
|
if finalResponse != nil {
|
|
store.UpdateResponse(responseID, finalResponse)
|
|
}
|
|
}()
|
|
|
|
// Return immediately with queued response
|
|
return c.JSON(200, queuedResponse)
|
|
}
|
|
|
|
if input.Stream {
|
|
return handleOpenResponsesStream(c, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, shouldStore, mcpToolInfos, evaluator)
|
|
}
|
|
|
|
return handleOpenResponsesNonStream(c, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, shouldStore, mcpToolInfos, evaluator, 0)
|
|
}
|
|
}
|
|
|
|
// convertORInputToMessages converts Open Responses input to internal Messages
|
|
func convertORInputToMessages(input interface{}, cfg *config.ModelConfig) ([]schema.Message, error) {
|
|
var messages []schema.Message
|
|
|
|
switch v := input.(type) {
|
|
case string:
|
|
// Simple string = user message
|
|
return []schema.Message{{Role: "user", StringContent: v}}, nil
|
|
case []interface{}:
|
|
// Array of items
|
|
for _, itemRaw := range v {
|
|
itemMap, ok := itemRaw.(map[string]interface{})
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
itemType, _ := itemMap["type"].(string)
|
|
switch itemType {
|
|
case "message":
|
|
msg, err := convertORMessageItem(itemMap, cfg)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
messages = append(messages, msg)
|
|
case "reasoning":
|
|
msg, err := convertORReasoningItemToMessage(itemMap)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
messages = append(messages, msg)
|
|
case "function_call":
|
|
msg, err := convertORFunctionCallItemToMessage(itemMap)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
messages = append(messages, msg)
|
|
case "function_call_output":
|
|
// Convert function call output to tool role message
|
|
callID, _ := itemMap["call_id"].(string)
|
|
output := itemMap["output"]
|
|
var outputStr string
|
|
if str, ok := output.(string); ok {
|
|
outputStr = str
|
|
} else {
|
|
// Convert to JSON string
|
|
outputBytes, _ := json.Marshal(output)
|
|
outputStr = string(outputBytes)
|
|
}
|
|
// For tool messages, we use the Name field to store the call ID
|
|
messages = append(messages, schema.Message{
|
|
Role: "tool",
|
|
Name: callID,
|
|
Content: outputStr,
|
|
StringContent: outputStr,
|
|
})
|
|
case "item_reference":
|
|
// Handle item references - look up item in stored responses
|
|
// According to spec, item_reference uses "id" field, not "item_id"
|
|
itemID, ok := itemMap["id"].(string)
|
|
if !ok || itemID == "" {
|
|
return nil, fmt.Errorf("item_reference missing id")
|
|
}
|
|
|
|
store := GetGlobalStore()
|
|
item, responseID, err := store.FindItem(itemID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("item not found: %s (from response %s): %w", itemID, responseID, err)
|
|
}
|
|
|
|
// Log item reference resolution for debugging
|
|
xlog.Debug("Resolved item reference", "item_id", itemID, "response_id", responseID, "item_type", item.Type)
|
|
|
|
// Convert referenced item to message based on its type
|
|
msg, err := convertORItemToMessage(item, responseID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to convert referenced item %s from response %s: %w", itemID, responseID, err)
|
|
}
|
|
messages = append(messages, msg)
|
|
}
|
|
}
|
|
return mergeContiguousAssistantMessages(messages), nil
|
|
default:
|
|
return nil, fmt.Errorf("unsupported input type: %T", input)
|
|
}
|
|
}
|
|
|
|
// convertORReasoningItemToMessage converts an Open Responses reasoning item to an assistant Message fragment (for merging).
|
|
func convertORReasoningItemToMessage(itemMap map[string]interface{}) (schema.Message, error) {
|
|
var reasoning string
|
|
if content := itemMap["content"]; content != nil {
|
|
if s, ok := content.(string); ok {
|
|
reasoning = s
|
|
} else if parts, ok := content.([]interface{}); ok {
|
|
for _, p := range parts {
|
|
if partMap, ok := p.(map[string]interface{}); ok {
|
|
if t, _ := partMap["type"].(string); (t == "output_text" || t == "input_text") && partMap["text"] != nil {
|
|
if tStr, ok := partMap["text"].(string); ok {
|
|
reasoning += tStr
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return schema.Message{Role: "assistant", Reasoning: stringPtr(reasoning)}, nil
|
|
}
|
|
|
|
// convertORFunctionCallItemToMessage converts an Open Responses function_call item to an assistant Message fragment (for merging).
|
|
func convertORFunctionCallItemToMessage(itemMap map[string]interface{}) (schema.Message, error) {
|
|
callID, _ := itemMap["call_id"].(string)
|
|
name, _ := itemMap["name"].(string)
|
|
arguments, _ := itemMap["arguments"].(string)
|
|
if callID == "" {
|
|
callID = fmt.Sprintf("call_%s", name)
|
|
}
|
|
return schema.Message{
|
|
Role: "assistant",
|
|
ToolCalls: []schema.ToolCall{{
|
|
Index: 0,
|
|
ID: callID,
|
|
Type: "function",
|
|
FunctionCall: schema.FunctionCall{Name: name, Arguments: arguments},
|
|
}},
|
|
}, nil
|
|
}
|
|
|
|
func stringPtr(s string) *string {
|
|
if s == "" {
|
|
return nil
|
|
}
|
|
return &s
|
|
}
|
|
|
|
// convertORItemToMessage converts a single ORItemField to a Message
|
|
// responseID is the ID of the response where this item was found (for logging/debugging)
|
|
func convertORItemToMessage(item *schema.ORItemField, responseID string) (schema.Message, error) {
|
|
switch item.Type {
|
|
case "message":
|
|
// Convert message item to message
|
|
var textContent string
|
|
if contentParts, ok := item.Content.([]schema.ORContentPart); ok {
|
|
for _, part := range contentParts {
|
|
if part.Type == "output_text" || part.Type == "input_text" {
|
|
textContent += part.Text
|
|
}
|
|
}
|
|
} else if str, ok := item.Content.(string); ok {
|
|
textContent = str
|
|
}
|
|
return schema.Message{
|
|
Role: item.Role,
|
|
StringContent: textContent,
|
|
Content: textContent,
|
|
}, nil
|
|
case "function_call_output":
|
|
// Convert function call output to tool role message
|
|
var outputStr string
|
|
if str, ok := item.Output.(string); ok {
|
|
outputStr = str
|
|
} else {
|
|
// Convert to JSON string
|
|
outputBytes, _ := json.Marshal(item.Output)
|
|
outputStr = string(outputBytes)
|
|
}
|
|
return schema.Message{
|
|
Role: "tool",
|
|
Name: item.CallID,
|
|
Content: outputStr,
|
|
StringContent: outputStr,
|
|
}, nil
|
|
case "reasoning":
|
|
reasoning := extractReasoningContentFromORItem(item)
|
|
return schema.Message{Role: "assistant", Reasoning: stringPtr(reasoning)}, nil
|
|
case "function_call":
|
|
callID := item.CallID
|
|
if callID == "" {
|
|
callID = fmt.Sprintf("call_%s", item.Name)
|
|
}
|
|
return schema.Message{
|
|
Role: "assistant",
|
|
ToolCalls: []schema.ToolCall{{
|
|
Index: 0,
|
|
ID: callID,
|
|
Type: "function",
|
|
FunctionCall: schema.FunctionCall{Name: item.Name, Arguments: item.Arguments},
|
|
}},
|
|
}, nil
|
|
default:
|
|
return schema.Message{}, fmt.Errorf("unsupported item type for conversion: %s (from response %s)", item.Type, responseID)
|
|
}
|
|
}
|
|
|
|
func extractReasoningContentFromORItem(item *schema.ORItemField) string {
|
|
if contentParts, ok := item.Content.([]schema.ORContentPart); ok {
|
|
var s string
|
|
for _, part := range contentParts {
|
|
if part.Type == "output_text" || part.Type == "input_text" {
|
|
s += part.Text
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
if s, ok := item.Content.(string); ok {
|
|
return s
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// convertOROutputItemsToMessages converts Open Responses output items to internal Messages.
|
|
// Contiguous assistant items (message, reasoning, function_call) are merged into a single message.
|
|
func convertOROutputItemsToMessages(outputItems []schema.ORItemField) ([]schema.Message, error) {
|
|
var messages []schema.Message
|
|
|
|
for _, item := range outputItems {
|
|
switch item.Type {
|
|
case "message":
|
|
var textContent string
|
|
if contentParts, ok := item.Content.([]schema.ORContentPart); ok && len(contentParts) > 0 {
|
|
for _, part := range contentParts {
|
|
if part.Type == "output_text" {
|
|
textContent += part.Text
|
|
}
|
|
}
|
|
}
|
|
messages = append(messages, schema.Message{
|
|
Role: item.Role,
|
|
StringContent: textContent,
|
|
Content: textContent,
|
|
})
|
|
case "reasoning":
|
|
reasoning := extractReasoningContentFromORItem(&item)
|
|
messages = append(messages, schema.Message{Role: "assistant", Reasoning: stringPtr(reasoning)})
|
|
case "function_call":
|
|
msg := schema.Message{
|
|
Role: "assistant",
|
|
ToolCalls: []schema.ToolCall{{
|
|
Index: 0,
|
|
ID: item.CallID,
|
|
Type: "function",
|
|
FunctionCall: schema.FunctionCall{Name: item.Name, Arguments: item.Arguments},
|
|
}},
|
|
}
|
|
if msg.ToolCalls[0].ID == "" {
|
|
msg.ToolCalls[0].ID = fmt.Sprintf("call_%s", item.Name)
|
|
}
|
|
messages = append(messages, msg)
|
|
case "function_call_output":
|
|
// Convert function call output to tool role message
|
|
var outputStr string
|
|
if str, ok := item.Output.(string); ok {
|
|
outputStr = str
|
|
} else {
|
|
// Convert to JSON string
|
|
outputBytes, _ := json.Marshal(item.Output)
|
|
outputStr = string(outputBytes)
|
|
}
|
|
messages = append(messages, schema.Message{
|
|
Role: "tool",
|
|
Name: item.CallID,
|
|
Content: outputStr,
|
|
StringContent: outputStr,
|
|
})
|
|
}
|
|
}
|
|
|
|
return mergeContiguousAssistantMessages(messages), nil
|
|
}
|
|
|
|
// mergeContiguousAssistantMessages merges contiguous assistant messages into one.
|
|
// Many chat templates expect content, reasoning, and tool calls in a single assistant message
|
|
// (see e.g. llama.cpp PR 19773). This avoids creating separate messages per input item.
|
|
func mergeContiguousAssistantMessages(messages []schema.Message) []schema.Message {
|
|
if len(messages) == 0 {
|
|
return messages
|
|
}
|
|
var out []schema.Message
|
|
var acc *schema.Message
|
|
for i := range messages {
|
|
m := &messages[i]
|
|
if m.Role != "assistant" {
|
|
flushAssistantAccumulator(&out, &acc)
|
|
out = append(out, *m)
|
|
continue
|
|
}
|
|
if acc == nil {
|
|
acc = &schema.Message{Role: "assistant"}
|
|
}
|
|
if m.StringContent != "" {
|
|
if acc.StringContent != "" {
|
|
acc.StringContent += "\n" + m.StringContent
|
|
} else {
|
|
acc.StringContent = m.StringContent
|
|
}
|
|
if acc.Content == nil {
|
|
acc.Content = m.Content
|
|
} else if _, ok := m.Content.(string); ok {
|
|
acc.Content = acc.StringContent
|
|
}
|
|
}
|
|
if m.Reasoning != nil && *m.Reasoning != "" {
|
|
if acc.Reasoning == nil {
|
|
acc.Reasoning = m.Reasoning
|
|
} else {
|
|
combined := *acc.Reasoning + "\n" + *m.Reasoning
|
|
acc.Reasoning = &combined
|
|
}
|
|
}
|
|
if len(m.ToolCalls) > 0 {
|
|
acc.ToolCalls = append(acc.ToolCalls, m.ToolCalls...)
|
|
}
|
|
}
|
|
flushAssistantAccumulator(&out, &acc)
|
|
return out
|
|
}
|
|
|
|
func flushAssistantAccumulator(out *[]schema.Message, acc **schema.Message) {
|
|
if acc == nil || *acc == nil {
|
|
return
|
|
}
|
|
m := *acc
|
|
if m.StringContent == "" && (m.Reasoning == nil || *m.Reasoning == "") && len(m.ToolCalls) == 0 {
|
|
*acc = nil
|
|
return
|
|
}
|
|
if m.Content == nil {
|
|
m.Content = m.StringContent
|
|
}
|
|
// Re-index tool calls after merge (each may have been 0)
|
|
for i := range m.ToolCalls {
|
|
m.ToolCalls[i].Index = i
|
|
}
|
|
*out = append(*out, *m)
|
|
*acc = nil
|
|
}
|
|
|
|
// convertORMessageItem converts an Open Responses message item to internal Message
|
|
func convertORMessageItem(itemMap map[string]interface{}, cfg *config.ModelConfig) (schema.Message, error) {
|
|
role, _ := itemMap["role"].(string)
|
|
msg := schema.Message{Role: role}
|
|
|
|
content := itemMap["content"]
|
|
switch contentVal := content.(type) {
|
|
case string:
|
|
msg.StringContent = contentVal
|
|
msg.Content = contentVal
|
|
case []interface{}:
|
|
// Array of content parts
|
|
var textContent string
|
|
var stringImages []string
|
|
var stringVideos []string
|
|
var stringAudios []string
|
|
|
|
for _, partRaw := range contentVal {
|
|
partMap, ok := partRaw.(map[string]interface{})
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
partType, _ := partMap["type"].(string)
|
|
switch partType {
|
|
case "input_text":
|
|
if text, ok := partMap["text"].(string); ok {
|
|
textContent += text
|
|
}
|
|
case "input_image":
|
|
if imageURL, ok := partMap["image_url"].(string); ok {
|
|
// Convert to base64 data URI
|
|
base64, err := utils.GetContentURIAsBase64(imageURL)
|
|
if err != nil {
|
|
xlog.Error("Failed encoding image", "error", err)
|
|
continue
|
|
}
|
|
stringImages = append(stringImages, base64)
|
|
}
|
|
case "input_file":
|
|
if fileURL, ok := partMap["file_url"].(string); ok {
|
|
// Convert to base64
|
|
base64, err := utils.GetContentURIAsBase64(fileURL)
|
|
if err != nil {
|
|
xlog.Error("Failed encoding file", "error", err)
|
|
continue
|
|
}
|
|
// For now, treat files as text content
|
|
textContent += base64
|
|
} else if fileData, ok := partMap["file_data"].(string); ok {
|
|
// Already base64
|
|
textContent += fileData
|
|
}
|
|
case "input_video":
|
|
if videoURL, ok := partMap["video_url"].(string); ok {
|
|
// Convert to base64 data URI
|
|
base64, err := utils.GetContentURIAsBase64(videoURL)
|
|
if err != nil {
|
|
xlog.Error("Failed encoding video", "error", err)
|
|
continue
|
|
}
|
|
stringVideos = append(stringVideos, base64)
|
|
}
|
|
case "input_audio":
|
|
if audioURL, ok := partMap["audio_url"].(string); ok {
|
|
// Convert to base64 data URI
|
|
base64, err := utils.GetContentURIAsBase64(audioURL)
|
|
if err != nil {
|
|
xlog.Error("Failed encoding audio", "error", err)
|
|
continue
|
|
}
|
|
stringAudios = append(stringAudios, base64)
|
|
}
|
|
}
|
|
}
|
|
|
|
msg.StringContent = textContent
|
|
msg.Content = textContent
|
|
msg.StringImages = stringImages
|
|
msg.StringVideos = stringVideos
|
|
msg.StringAudios = stringAudios
|
|
|
|
// Template multimodal content
|
|
if len(stringImages) > 0 || len(stringVideos) > 0 || len(stringAudios) > 0 {
|
|
msg.StringContent, _ = templates.TemplateMultiModal(cfg.TemplateConfig.Multimodal, templates.MultiModalOptions{
|
|
TotalImages: len(stringImages),
|
|
TotalVideos: len(stringVideos),
|
|
TotalAudios: len(stringAudios),
|
|
ImagesInMessage: len(stringImages),
|
|
VideosInMessage: len(stringVideos),
|
|
AudiosInMessage: len(stringAudios),
|
|
}, textContent)
|
|
}
|
|
}
|
|
|
|
return msg, nil
|
|
}
|
|
|
|
// convertORToolsToFunctions converts Open Responses tools to internal Functions
|
|
func convertORToolsToFunctions(input *schema.OpenResponsesRequest, cfg *config.ModelConfig) (functions.Functions, bool) {
|
|
if len(input.Tools) == 0 {
|
|
return nil, false
|
|
}
|
|
|
|
// Build allowed tools set if specified
|
|
allowedSet := make(map[string]bool)
|
|
if len(input.AllowedTools) > 0 {
|
|
for _, name := range input.AllowedTools {
|
|
allowedSet[name] = true
|
|
}
|
|
}
|
|
|
|
var funcs functions.Functions
|
|
for _, tool := range input.Tools {
|
|
if tool.Type == "function" {
|
|
// Skip if not in allowed list (when allowed_tools is specified)
|
|
if len(allowedSet) > 0 && !allowedSet[tool.Name] {
|
|
continue
|
|
}
|
|
f := functions.Function{
|
|
Name: tool.Name,
|
|
Description: tool.Description,
|
|
Parameters: tool.Parameters,
|
|
}
|
|
funcs = append(funcs, f)
|
|
}
|
|
}
|
|
|
|
// Handle tool_choice
|
|
if input.ToolChoice != nil {
|
|
switch tc := input.ToolChoice.(type) {
|
|
case string:
|
|
switch tc {
|
|
case "required":
|
|
cfg.SetFunctionCallString("required")
|
|
case "none":
|
|
return nil, false
|
|
case "auto":
|
|
// "auto" is the default - let model decide whether to use tools
|
|
// Tools are available but not forced
|
|
}
|
|
case map[string]interface{}:
|
|
if tcType, ok := tc["type"].(string); ok && tcType == "function" {
|
|
if name, ok := tc["name"].(string); ok {
|
|
cfg.SetFunctionCallString(name)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return funcs, len(funcs) > 0 && cfg.ShouldUseFunctions()
|
|
}
|
|
|
|
// convertTextFormatToResponseFormat converts Open Responses text_format to OpenAI response_format
|
|
func convertTextFormatToResponseFormat(textFormat interface{}) interface{} {
|
|
switch tf := textFormat.(type) {
|
|
case map[string]interface{}:
|
|
if tfType, ok := tf["type"].(string); ok {
|
|
if tfType == "json_schema" {
|
|
return map[string]interface{}{
|
|
"type": "json_schema",
|
|
"json_schema": tf,
|
|
}
|
|
}
|
|
return map[string]interface{}{"type": tfType}
|
|
}
|
|
case string:
|
|
return map[string]interface{}{"type": tf}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// handleBackgroundNonStream handles background non-streaming responses
|
|
func handleBackgroundNonStream(ctx context.Context, store *ResponseStore, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, mcpToolInfos []mcpTools.MCPToolInfo, evaluator *templates.Evaluator) (*schema.ORResponseResource, error) {
|
|
mcpMaxIterations := 10
|
|
if cfg.Agent.MaxIterations > 0 {
|
|
mcpMaxIterations = cfg.Agent.MaxIterations
|
|
}
|
|
hasMCPTools := len(mcpToolInfos) > 0
|
|
var allOutputItems []schema.ORItemField
|
|
|
|
for mcpIteration := 0; mcpIteration <= mcpMaxIterations; mcpIteration++ {
|
|
if mcpIteration > 0 {
|
|
predInput = evaluator.TemplateMessages(*openAIReq, openAIReq.Messages, cfg, funcs, shouldUseFn)
|
|
xlog.Debug("Background MCP re-templating", "iteration", mcpIteration)
|
|
}
|
|
|
|
// Populate openAIReq fields for ComputeChoices
|
|
openAIReq.Tools = convertORToolsToOpenAIFormat(input.Tools)
|
|
openAIReq.ToolsChoice = input.ToolChoice
|
|
if input.TopLogprobs != nil && *input.TopLogprobs > 0 {
|
|
openAIReq.TopLogprobs = input.TopLogprobs
|
|
openAIReq.Logprobs = schema.LogprobsValue{Enabled: true}
|
|
}
|
|
openAIReq.LogitBias = input.LogitBias
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil, ctx.Err()
|
|
default:
|
|
}
|
|
|
|
var result string
|
|
cb := func(s string, c *[]schema.Choice) {
|
|
result = s
|
|
}
|
|
choices, tokenUsage, chatDeltas, err := openaiEndpoint.ComputeChoices(openAIReq, predInput, cfg, cl, appConfig, ml, cb, nil)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("model inference failed: %w", err)
|
|
}
|
|
|
|
// Extract logprobs from choices if available
|
|
var resultLogprobs *schema.Logprobs
|
|
if len(choices) > 0 {
|
|
resultLogprobs = choices[0].Logprobs
|
|
}
|
|
|
|
// Parse tool calls
|
|
var funcCallResults []functions.FuncCallResults
|
|
var textContent string
|
|
|
|
if shouldUseFn {
|
|
if deltaToolCalls := functions.ToolCallsFromChatDeltas(chatDeltas); len(deltaToolCalls) > 0 {
|
|
funcCallResults = deltaToolCalls
|
|
textContent = functions.ContentFromChatDeltas(chatDeltas)
|
|
} else {
|
|
cleanedResult := functions.CleanupLLMResult(result, cfg.FunctionsConfig)
|
|
funcCallResults = functions.ParseFunctionCall(cleanedResult, cfg.FunctionsConfig)
|
|
textContent = functions.ParseTextContent(cleanedResult, cfg.FunctionsConfig)
|
|
}
|
|
|
|
noActionName := "answer"
|
|
if cfg.FunctionsConfig.NoActionFunctionName != "" {
|
|
noActionName = cfg.FunctionsConfig.NoActionFunctionName
|
|
}
|
|
|
|
var toolCalls []schema.ToolCall
|
|
for i, fc := range funcCallResults {
|
|
if fc.Name == noActionName {
|
|
if fc.Arguments != "" {
|
|
var args map[string]interface{}
|
|
if err := json.Unmarshal([]byte(fc.Arguments), &args); err == nil {
|
|
if msg, ok := args["message"].(string); ok && msg != "" {
|
|
textContent = msg
|
|
}
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
toolCalls = append(toolCalls, schema.ToolCall{
|
|
Index: i,
|
|
ID: fmt.Sprintf("fc_%s", uuid.New().String()),
|
|
Type: "function",
|
|
FunctionCall: schema.FunctionCall{
|
|
Name: fc.Name,
|
|
Arguments: fc.Arguments,
|
|
},
|
|
})
|
|
}
|
|
|
|
// MCP tool execution
|
|
if hasMCPTools && len(toolCalls) > 0 {
|
|
var hasMCPCalls bool
|
|
for _, tc := range toolCalls {
|
|
if mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) {
|
|
hasMCPCalls = true
|
|
break
|
|
}
|
|
}
|
|
if hasMCPCalls {
|
|
assistantMsg := schema.Message{Role: "assistant", Content: result, ToolCalls: toolCalls}
|
|
openAIReq.Messages = append(openAIReq.Messages, assistantMsg)
|
|
|
|
for _, tc := range toolCalls {
|
|
// Emit function_call + function_call_output items
|
|
allOutputItems = append(allOutputItems, schema.ORItemField{
|
|
Type: "function_call", ID: fmt.Sprintf("fc_%s", uuid.New().String()),
|
|
Status: "completed", CallID: tc.ID, Name: tc.FunctionCall.Name, Arguments: tc.FunctionCall.Arguments,
|
|
})
|
|
|
|
if !mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) {
|
|
continue
|
|
}
|
|
toolResult, toolErr := mcpTools.ExecuteMCPToolCall(ctx, mcpToolInfos, tc.FunctionCall.Name, tc.FunctionCall.Arguments)
|
|
if toolErr != nil {
|
|
toolResult = fmt.Sprintf("Error: %v", toolErr)
|
|
}
|
|
openAIReq.Messages = append(openAIReq.Messages, schema.Message{
|
|
Role: "tool", Content: toolResult, StringContent: toolResult, ToolCallID: tc.ID, Name: tc.FunctionCall.Name,
|
|
})
|
|
allOutputItems = append(allOutputItems, schema.ORItemField{
|
|
Type: "function_call_output", ID: fmt.Sprintf("fco_%s", uuid.New().String()),
|
|
Status: "completed", CallID: tc.ID, Output: toolResult,
|
|
})
|
|
}
|
|
continue // next MCP iteration
|
|
}
|
|
}
|
|
|
|
// No MCP calls, build output items
|
|
if textContent != "" {
|
|
allOutputItems = append(allOutputItems, schema.ORItemField{
|
|
Type: "message", ID: fmt.Sprintf("msg_%s", uuid.New().String()),
|
|
Status: "completed", Role: "assistant",
|
|
Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(textContent, resultLogprobs)},
|
|
})
|
|
}
|
|
for _, tc := range toolCalls {
|
|
allOutputItems = append(allOutputItems, schema.ORItemField{
|
|
Type: "function_call", ID: fmt.Sprintf("fc_%s", uuid.New().String()),
|
|
Status: "completed", CallID: tc.ID, Name: tc.FunctionCall.Name, Arguments: tc.FunctionCall.Arguments,
|
|
})
|
|
}
|
|
if len(allOutputItems) == 0 && result != "" {
|
|
allOutputItems = append(allOutputItems, schema.ORItemField{
|
|
Type: "message", ID: fmt.Sprintf("msg_%s", uuid.New().String()),
|
|
Status: "completed", Role: "assistant",
|
|
Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(result, resultLogprobs)},
|
|
})
|
|
}
|
|
} else {
|
|
allOutputItems = append(allOutputItems, schema.ORItemField{
|
|
Type: "message", ID: fmt.Sprintf("msg_%s", uuid.New().String()),
|
|
Status: "completed", Role: "assistant",
|
|
Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(result, resultLogprobs)},
|
|
})
|
|
}
|
|
|
|
now := time.Now().Unix()
|
|
return buildORResponse(responseID, createdAt, &now, schema.ORStatusCompleted, input, allOutputItems, &schema.ORUsage{
|
|
InputTokens: tokenUsage.Prompt,
|
|
OutputTokens: tokenUsage.Completion,
|
|
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
|
}, true), nil
|
|
} // end MCP iteration loop
|
|
|
|
return nil, fmt.Errorf("MCP iteration limit reached")
|
|
}
|
|
|
|
// handleBackgroundStream handles background streaming responses with event buffering
|
|
func handleBackgroundStream(ctx context.Context, store *ResponseStore, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, mcpToolInfos []mcpTools.MCPToolInfo, evaluator *templates.Evaluator) (*schema.ORResponseResource, error) {
|
|
// Populate openAIReq fields for ComputeChoices
|
|
openAIReq.Tools = convertORToolsToOpenAIFormat(input.Tools)
|
|
openAIReq.ToolsChoice = input.ToolChoice
|
|
if input.TopLogprobs != nil && *input.TopLogprobs > 0 {
|
|
openAIReq.TopLogprobs = input.TopLogprobs
|
|
openAIReq.Logprobs = schema.LogprobsValue{Enabled: true}
|
|
}
|
|
openAIReq.LogitBias = input.LogitBias
|
|
|
|
sequenceNumber := 0
|
|
|
|
// Emit response.created
|
|
responseCreated := buildORResponse(responseID, createdAt, nil, schema.ORStatusInProgress, input, []schema.ORItemField{}, nil, true)
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.created",
|
|
SequenceNumber: sequenceNumber,
|
|
Response: responseCreated,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit response.in_progress
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.in_progress",
|
|
SequenceNumber: sequenceNumber,
|
|
Response: responseCreated,
|
|
})
|
|
sequenceNumber++
|
|
|
|
var accumulatedText string
|
|
var collectedOutputItems []schema.ORItemField
|
|
outputIndex := 0
|
|
|
|
mcpBgStreamMaxIterations := 10
|
|
if cfg.Agent.MaxIterations > 0 {
|
|
mcpBgStreamMaxIterations = cfg.Agent.MaxIterations
|
|
}
|
|
hasMCPTools := len(mcpToolInfos) > 0
|
|
|
|
var lastTokenUsage backend.TokenUsage
|
|
var lastLogprobs *schema.Logprobs
|
|
|
|
for mcpIter := 0; mcpIter <= mcpBgStreamMaxIterations; mcpIter++ {
|
|
if mcpIter > 0 {
|
|
predInput = evaluator.TemplateMessages(*openAIReq, openAIReq.Messages, cfg, funcs, shouldUseFn)
|
|
xlog.Debug("Background stream MCP re-templating", "iteration", mcpIter)
|
|
}
|
|
|
|
accumulatedText = ""
|
|
currentMessageID := fmt.Sprintf("msg_%s", uuid.New().String())
|
|
|
|
// Emit output_item.added
|
|
messageItem := &schema.ORItemField{
|
|
Type: "message",
|
|
ID: currentMessageID,
|
|
Status: "in_progress",
|
|
Role: "assistant",
|
|
Content: []schema.ORContentPart{},
|
|
}
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: messageItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit content_part.added
|
|
currentContentIndex := 0
|
|
emptyPart := makeOutputTextPart("")
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.content_part.added",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Part: &emptyPart,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Token callback for streaming
|
|
tokenCallback := func(token string, tokenUsage backend.TokenUsage) bool {
|
|
select {
|
|
case <-ctx.Done():
|
|
return false
|
|
default:
|
|
}
|
|
|
|
accumulatedText += token
|
|
|
|
// Buffer text delta
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.output_text.delta",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Delta: strPtr(token),
|
|
Logprobs: emptyLogprobs(),
|
|
})
|
|
sequenceNumber++
|
|
return true
|
|
}
|
|
|
|
var result string
|
|
cb := func(s string, c *[]schema.Choice) {
|
|
result = s
|
|
}
|
|
choices, tokenUsage, chatDeltas, err := openaiEndpoint.ComputeChoices(openAIReq, predInput, cfg, cl, appConfig, ml, cb, tokenCallback)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("model inference failed: %w", err)
|
|
}
|
|
lastTokenUsage = tokenUsage
|
|
if len(choices) > 0 {
|
|
lastLogprobs = choices[0].Logprobs
|
|
}
|
|
|
|
// Check for MCP tool calls in the streamed result
|
|
if shouldUseFn && hasMCPTools {
|
|
var funcCallResults []functions.FuncCallResults
|
|
if deltaToolCalls := functions.ToolCallsFromChatDeltas(chatDeltas); len(deltaToolCalls) > 0 {
|
|
funcCallResults = deltaToolCalls
|
|
} else {
|
|
cleanedResult := functions.CleanupLLMResult(result, cfg.FunctionsConfig)
|
|
funcCallResults = functions.ParseFunctionCall(cleanedResult, cfg.FunctionsConfig)
|
|
}
|
|
|
|
noActionName := "answer"
|
|
if cfg.FunctionsConfig.NoActionFunctionName != "" {
|
|
noActionName = cfg.FunctionsConfig.NoActionFunctionName
|
|
}
|
|
|
|
var toolCalls []schema.ToolCall
|
|
for i, fc := range funcCallResults {
|
|
if fc.Name == noActionName {
|
|
continue
|
|
}
|
|
toolCalls = append(toolCalls, schema.ToolCall{
|
|
Index: i, ID: fmt.Sprintf("fc_%s", uuid.New().String()),
|
|
Type: "function",
|
|
FunctionCall: schema.FunctionCall{Name: fc.Name, Arguments: fc.Arguments},
|
|
})
|
|
}
|
|
|
|
var hasMCPCalls bool
|
|
for _, tc := range toolCalls {
|
|
if mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) {
|
|
hasMCPCalls = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if hasMCPCalls {
|
|
// Close the current message
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.output_text.done", SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID, OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex, Text: strPtr(accumulatedText),
|
|
Logprobs: emptyLogprobs(),
|
|
})
|
|
sequenceNumber++
|
|
textPart := makeOutputTextPart(accumulatedText)
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.content_part.done", SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID, OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex, Part: &textPart,
|
|
})
|
|
sequenceNumber++
|
|
completedMsg := &schema.ORItemField{
|
|
Type: "message", ID: currentMessageID, Status: "completed",
|
|
Role: "assistant", Content: []schema.ORContentPart{textPart},
|
|
}
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done", SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex, Item: completedMsg,
|
|
})
|
|
sequenceNumber++
|
|
collectedOutputItems = append(collectedOutputItems, *completedMsg)
|
|
|
|
// Append assistant message with tool calls
|
|
assistantMsg := schema.Message{Role: "assistant", Content: result, ToolCalls: toolCalls}
|
|
openAIReq.Messages = append(openAIReq.Messages, assistantMsg)
|
|
|
|
// Execute MCP tools and emit events
|
|
for _, tc := range toolCalls {
|
|
outputIndex++
|
|
functionCallItem := &schema.ORItemField{
|
|
Type: "function_call", ID: tc.ID, Status: "completed",
|
|
CallID: tc.ID, Name: tc.FunctionCall.Name, Arguments: tc.FunctionCall.Arguments,
|
|
}
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added", SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex, Item: functionCallItem,
|
|
})
|
|
sequenceNumber++
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done", SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex, Item: functionCallItem,
|
|
})
|
|
sequenceNumber++
|
|
collectedOutputItems = append(collectedOutputItems, *functionCallItem)
|
|
|
|
if !mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) {
|
|
continue
|
|
}
|
|
|
|
xlog.Debug("Executing MCP tool (background stream)", "tool", tc.FunctionCall.Name, "iteration", mcpIter)
|
|
toolResult, toolErr := mcpTools.ExecuteMCPToolCall(ctx, mcpToolInfos, tc.FunctionCall.Name, tc.FunctionCall.Arguments)
|
|
if toolErr != nil {
|
|
toolResult = fmt.Sprintf("Error: %v", toolErr)
|
|
}
|
|
openAIReq.Messages = append(openAIReq.Messages, schema.Message{
|
|
Role: "tool", Content: toolResult, StringContent: toolResult, ToolCallID: tc.ID, Name: tc.FunctionCall.Name,
|
|
})
|
|
|
|
outputIndex++
|
|
outputItem := &schema.ORItemField{
|
|
Type: "function_call_output", ID: fmt.Sprintf("fco_%s", uuid.New().String()),
|
|
Status: "completed", CallID: tc.ID, Output: toolResult,
|
|
}
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added", SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex, Item: outputItem,
|
|
})
|
|
sequenceNumber++
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done", SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex, Item: outputItem,
|
|
})
|
|
sequenceNumber++
|
|
collectedOutputItems = append(collectedOutputItems, *outputItem)
|
|
}
|
|
continue // next MCP iteration
|
|
}
|
|
}
|
|
|
|
// No MCP tools — close the message and break
|
|
streamEventLogprobs := convertLogprobsForStreaming(lastLogprobs)
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.output_text.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Text: strPtr(accumulatedText),
|
|
Logprobs: logprobsPtr(streamEventLogprobs),
|
|
})
|
|
sequenceNumber++
|
|
|
|
textPart := makeOutputTextPartWithLogprobs(accumulatedText, lastLogprobs)
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.content_part.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Part: &textPart,
|
|
})
|
|
sequenceNumber++
|
|
|
|
completedMessageItem := &schema.ORItemField{
|
|
Type: "message",
|
|
ID: currentMessageID,
|
|
Status: "completed",
|
|
Role: "assistant",
|
|
Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(accumulatedText, lastLogprobs)},
|
|
}
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: completedMessageItem,
|
|
})
|
|
sequenceNumber++
|
|
collectedOutputItems = append(collectedOutputItems, *completedMessageItem)
|
|
|
|
break
|
|
} // end MCP background stream iteration loop
|
|
|
|
// Build final response
|
|
now := time.Now().Unix()
|
|
response := buildORResponse(responseID, createdAt, &now, schema.ORStatusCompleted, input, collectedOutputItems, &schema.ORUsage{
|
|
InputTokens: lastTokenUsage.Prompt,
|
|
OutputTokens: lastTokenUsage.Completion,
|
|
TotalTokens: lastTokenUsage.Prompt + lastTokenUsage.Completion,
|
|
}, true)
|
|
|
|
// Emit response.completed
|
|
bufferEvent(store, responseID, &schema.ORStreamEvent{
|
|
Type: "response.completed",
|
|
SequenceNumber: sequenceNumber,
|
|
Response: response,
|
|
})
|
|
|
|
return response, nil
|
|
}
|
|
|
|
// bufferEvent stores an SSE event in the response store for streaming resume
|
|
func bufferEvent(store *ResponseStore, responseID string, event *schema.ORStreamEvent) {
|
|
normalizeORStreamEvent(event)
|
|
if err := store.AppendEvent(responseID, event); err != nil {
|
|
xlog.Error("Failed to buffer event", "response_id", responseID, "error", err)
|
|
}
|
|
}
|
|
|
|
// handleOpenResponsesNonStream handles non-streaming responses
|
|
func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, shouldStore bool, mcpToolInfos []mcpTools.MCPToolInfo, evaluator *templates.Evaluator, mcpIteration int) error {
|
|
mcpMaxIterations := 10
|
|
if cfg.Agent.MaxIterations > 0 {
|
|
mcpMaxIterations = cfg.Agent.MaxIterations
|
|
}
|
|
if mcpIteration > mcpMaxIterations {
|
|
return sendOpenResponsesError(c, 500, "server_error", "MCP iteration limit reached", "")
|
|
}
|
|
// Populate openAIReq fields for ComputeChoices
|
|
openAIReq.Tools = convertORToolsToOpenAIFormat(input.Tools)
|
|
openAIReq.ToolsChoice = input.ToolChoice
|
|
if input.TopLogprobs != nil && *input.TopLogprobs > 0 {
|
|
openAIReq.TopLogprobs = input.TopLogprobs
|
|
openAIReq.Logprobs = schema.LogprobsValue{Enabled: true}
|
|
}
|
|
openAIReq.LogitBias = input.LogitBias
|
|
|
|
var result string
|
|
cb := func(s string, c *[]schema.Choice) {
|
|
result = s
|
|
}
|
|
choices, tokenUsage, chatDeltas, err := openaiEndpoint.ComputeChoices(openAIReq, predInput, cfg, cl, appConfig, ml, cb, nil)
|
|
if err != nil {
|
|
xlog.Error("Open Responses model inference failed", "error", err)
|
|
return sendOpenResponsesError(c, 500, "model_error", fmt.Sprintf("model inference failed: %v", err), "")
|
|
}
|
|
var resultLogprobs *schema.Logprobs
|
|
if len(choices) > 0 {
|
|
resultLogprobs = choices[0].Logprobs
|
|
}
|
|
xlog.Debug("Open Responses - Raw model result", "result", result, "shouldUseFn", shouldUseFn)
|
|
|
|
// Detect if thinking token is already in prompt or template
|
|
var template string
|
|
if cfg.TemplateConfig.UseTokenizerTemplate {
|
|
template = cfg.GetModelTemplate()
|
|
} else {
|
|
template = predInput
|
|
}
|
|
thinkingStartToken := reason.DetectThinkingStartToken(template, &cfg.ReasoningConfig)
|
|
|
|
// Extract reasoning from result before cleaning
|
|
reasoningContent, cleanedResult := reason.ExtractReasoningWithConfig(result, thinkingStartToken, cfg.ReasoningConfig)
|
|
|
|
// Parse tool calls if using functions
|
|
var outputItems []schema.ORItemField
|
|
var toolCalls []schema.ToolCall
|
|
|
|
// Add reasoning item if reasoning was found (reasoning comes first per spec)
|
|
if reasoningContent != "" {
|
|
reasoningItem := schema.ORItemField{
|
|
Type: "reasoning",
|
|
ID: fmt.Sprintf("reasoning_%s", uuid.New().String()),
|
|
Status: "completed",
|
|
Content: []schema.ORContentPart{makeOutputTextPart(reasoningContent)},
|
|
}
|
|
outputItems = append(outputItems, reasoningItem)
|
|
xlog.Debug("Open Responses - Extracted reasoning", "reasoning_length", len(reasoningContent))
|
|
}
|
|
|
|
if shouldUseFn {
|
|
var funcCallResults []functions.FuncCallResults
|
|
var textContent string
|
|
|
|
// Try pre-parsed tool calls from C++ autoparser first
|
|
if deltaToolCalls := functions.ToolCallsFromChatDeltas(chatDeltas); len(deltaToolCalls) > 0 {
|
|
xlog.Debug("[ChatDeltas] OpenResponses: using pre-parsed tool calls", "count", len(deltaToolCalls))
|
|
funcCallResults = deltaToolCalls
|
|
textContent = functions.ContentFromChatDeltas(chatDeltas)
|
|
} else {
|
|
xlog.Debug("[ChatDeltas] OpenResponses: no pre-parsed tool calls, falling back to Go-side text parsing")
|
|
// Clean up the result (already extracted reasoning above)
|
|
cleanedResult = functions.CleanupLLMResult(cleanedResult, cfg.FunctionsConfig)
|
|
funcCallResults = functions.ParseFunctionCall(cleanedResult, cfg.FunctionsConfig)
|
|
textContent = functions.ParseTextContent(cleanedResult, cfg.FunctionsConfig)
|
|
}
|
|
xlog.Debug("[ChatDeltas] OpenResponses: final tool call decision", "count", len(funcCallResults), "textContent", textContent)
|
|
|
|
// Check for noAction function (model chose to respond without tool)
|
|
noActionName := "answer"
|
|
if cfg.FunctionsConfig.NoActionFunctionName != "" {
|
|
noActionName = cfg.FunctionsConfig.NoActionFunctionName
|
|
}
|
|
|
|
// Filter out noAction calls and extract the message
|
|
for i, fc := range funcCallResults {
|
|
if fc.Name == noActionName {
|
|
// This is a text response, not a tool call
|
|
// Try to extract the message from the arguments
|
|
if fc.Arguments != "" {
|
|
var args map[string]interface{}
|
|
if err := json.Unmarshal([]byte(fc.Arguments), &args); err == nil {
|
|
if msg, ok := args["message"].(string); ok && msg != "" {
|
|
textContent = msg
|
|
}
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
toolCalls = append(toolCalls, schema.ToolCall{
|
|
Index: i,
|
|
ID: fmt.Sprintf("fc_%s", uuid.New().String()),
|
|
Type: "function",
|
|
FunctionCall: schema.FunctionCall{
|
|
Name: fc.Name,
|
|
Arguments: fc.Arguments,
|
|
},
|
|
})
|
|
}
|
|
|
|
// MCP server-side tool execution: if any tool calls are MCP tools, execute and re-run
|
|
if len(mcpToolInfos) > 0 && len(toolCalls) > 0 {
|
|
var hasMCPCalls bool
|
|
for _, tc := range toolCalls {
|
|
if mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) {
|
|
hasMCPCalls = true
|
|
break
|
|
}
|
|
}
|
|
if hasMCPCalls {
|
|
// Append assistant message with tool_calls to conversation
|
|
assistantMsg := schema.Message{Role: "assistant", Content: result, ToolCalls: toolCalls}
|
|
openAIReq.Messages = append(openAIReq.Messages, assistantMsg)
|
|
|
|
// Execute each MCP tool call and append results
|
|
for _, tc := range toolCalls {
|
|
if !mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) {
|
|
continue
|
|
}
|
|
xlog.Debug("Executing MCP tool (Open Responses)", "tool", tc.FunctionCall.Name)
|
|
toolResult, toolErr := mcpTools.ExecuteMCPToolCall(
|
|
c.Request().Context(), mcpToolInfos,
|
|
tc.FunctionCall.Name, tc.FunctionCall.Arguments,
|
|
)
|
|
if toolErr != nil {
|
|
xlog.Error("MCP tool execution failed", "tool", tc.FunctionCall.Name, "error", toolErr)
|
|
toolResult = fmt.Sprintf("Error: %v", toolErr)
|
|
}
|
|
openAIReq.Messages = append(openAIReq.Messages, schema.Message{
|
|
Role: "tool", Content: toolResult, StringContent: toolResult, ToolCallID: tc.ID, Name: tc.FunctionCall.Name,
|
|
})
|
|
|
|
// Collect function_call + function_call_output items for the response
|
|
outputItems = append(outputItems, schema.ORItemField{
|
|
Type: "function_call", ID: fmt.Sprintf("fc_%s", uuid.New().String()),
|
|
Status: "completed", CallID: tc.ID, Name: tc.FunctionCall.Name, Arguments: tc.FunctionCall.Arguments,
|
|
})
|
|
outputItems = append(outputItems, schema.ORItemField{
|
|
Type: "function_call_output", ID: fmt.Sprintf("fco_%s", uuid.New().String()),
|
|
Status: "completed", CallID: tc.ID, Output: toolResult,
|
|
})
|
|
}
|
|
|
|
// Re-template and re-run inference
|
|
predInput = evaluator.TemplateMessages(*openAIReq, openAIReq.Messages, cfg, funcs, shouldUseFn)
|
|
return handleOpenResponsesNonStream(c, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, shouldStore, mcpToolInfos, evaluator, mcpIteration+1)
|
|
}
|
|
}
|
|
|
|
// Add message item with text content (include logprobs if available)
|
|
if textContent != "" {
|
|
outputItems = append(outputItems, schema.ORItemField{
|
|
Type: "message",
|
|
ID: fmt.Sprintf("msg_%s", uuid.New().String()),
|
|
Status: "completed",
|
|
Role: "assistant",
|
|
Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(textContent, resultLogprobs)},
|
|
})
|
|
}
|
|
|
|
// Add function call items
|
|
for _, tc := range toolCalls {
|
|
outputItems = append(outputItems, schema.ORItemField{
|
|
Type: "function_call",
|
|
ID: fmt.Sprintf("fc_%s", uuid.New().String()),
|
|
Status: "completed",
|
|
CallID: tc.ID,
|
|
Name: tc.FunctionCall.Name,
|
|
Arguments: tc.FunctionCall.Arguments,
|
|
})
|
|
}
|
|
|
|
// If we have no output items but the model did produce output, include the cleaned result as a message
|
|
hasMessageItem := false
|
|
for _, item := range outputItems {
|
|
if item.Type == "message" {
|
|
hasMessageItem = true
|
|
break
|
|
}
|
|
}
|
|
if !hasMessageItem && cleanedResult != "" {
|
|
xlog.Debug("Open Responses - No parsed output, falling back to cleaned result")
|
|
outputItems = append(outputItems, schema.ORItemField{
|
|
Type: "message",
|
|
ID: fmt.Sprintf("msg_%s", uuid.New().String()),
|
|
Status: "completed",
|
|
Role: "assistant",
|
|
Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(cleanedResult, resultLogprobs)},
|
|
})
|
|
}
|
|
} else {
|
|
// Simple text response (include logprobs if available)
|
|
messageItem := schema.ORItemField{
|
|
Type: "message",
|
|
ID: fmt.Sprintf("msg_%s", uuid.New().String()),
|
|
Status: "completed",
|
|
Role: "assistant",
|
|
Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(cleanedResult, resultLogprobs)},
|
|
}
|
|
outputItems = append(outputItems, messageItem)
|
|
}
|
|
|
|
// Calculate reasoning tokens (approximate: character count / 4)
|
|
reasoningTokens := 0
|
|
if reasoningContent != "" {
|
|
// Simple estimation: ~4 characters per token
|
|
reasoningTokens = len(reasoningContent) / 4
|
|
if reasoningTokens == 0 && len(reasoningContent) > 0 {
|
|
reasoningTokens = 1
|
|
}
|
|
}
|
|
|
|
// Build response with all required fields
|
|
now := time.Now().Unix()
|
|
response := buildORResponse(responseID, createdAt, &now, "completed", input, outputItems, &schema.ORUsage{
|
|
InputTokens: tokenUsage.Prompt,
|
|
OutputTokens: tokenUsage.Completion,
|
|
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
|
OutputTokensDetails: &schema.OROutputTokensDetails{
|
|
ReasoningTokens: reasoningTokens,
|
|
},
|
|
}, shouldStore)
|
|
|
|
// Store response for future reference (if enabled)
|
|
if shouldStore {
|
|
store := GetGlobalStore()
|
|
store.Store(responseID, input, response)
|
|
}
|
|
|
|
return c.JSON(200, response)
|
|
}
|
|
|
|
// handleOpenResponsesStream handles streaming responses
|
|
func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, shouldStore bool, mcpToolInfos []mcpTools.MCPToolInfo, evaluator *templates.Evaluator) error {
|
|
c.Response().Header().Set("Content-Type", "text/event-stream")
|
|
c.Response().Header().Set("Cache-Control", "no-cache")
|
|
c.Response().Header().Set("Connection", "keep-alive")
|
|
|
|
sequenceNumber := 0
|
|
|
|
// Emit response.created - use helper to create response with all required fields
|
|
responseCreated := buildORResponse(responseID, createdAt, nil, "in_progress", input, []schema.ORItemField{}, nil, shouldStore)
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.created",
|
|
SequenceNumber: sequenceNumber,
|
|
Response: responseCreated,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit response.in_progress
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.in_progress",
|
|
SequenceNumber: sequenceNumber,
|
|
Response: responseCreated,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Populate openAIReq fields for ComputeChoices
|
|
openAIReq.Tools = convertORToolsToOpenAIFormat(input.Tools)
|
|
openAIReq.ToolsChoice = input.ToolChoice
|
|
if input.TopLogprobs != nil && *input.TopLogprobs > 0 {
|
|
openAIReq.TopLogprobs = input.TopLogprobs
|
|
openAIReq.Logprobs = schema.LogprobsValue{Enabled: true}
|
|
}
|
|
openAIReq.LogitBias = input.LogitBias
|
|
|
|
// Detect if thinking token is already in prompt or template
|
|
var template string
|
|
if cfg.TemplateConfig.UseTokenizerTemplate {
|
|
template = cfg.GetModelTemplate()
|
|
} else {
|
|
template = predInput
|
|
}
|
|
thinkingStartToken := reason.DetectThinkingStartToken(template, &cfg.ReasoningConfig)
|
|
|
|
// Track state for streaming
|
|
var currentMessageID string
|
|
var currentContentIndex int
|
|
var accumulatedText string
|
|
var lastEmittedToolCallCount int
|
|
outputIndex := 0
|
|
inToolCallMode := false
|
|
|
|
// Track reasoning state for streaming
|
|
var currentReasoningID string
|
|
var currentReasoningContentIndex int
|
|
var reasoningTokens int
|
|
extractor := reason.NewReasoningExtractor(thinkingStartToken, cfg.ReasoningConfig)
|
|
|
|
// Collect all output items for storage
|
|
var collectedOutputItems []schema.ORItemField
|
|
|
|
if shouldUseFn {
|
|
mcpStreamMaxIterations := 10
|
|
if cfg.Agent.MaxIterations > 0 {
|
|
mcpStreamMaxIterations = cfg.Agent.MaxIterations
|
|
}
|
|
hasMCPToolsStream := len(mcpToolInfos) > 0
|
|
|
|
var result, finalReasoning, finalCleanedResult string
|
|
var textContent string
|
|
var parsedToolCalls []functions.FuncCallResults
|
|
var toolCalls []functions.FuncCallResults
|
|
var lastStreamTokenUsage backend.TokenUsage
|
|
var lastStreamLogprobs *schema.Logprobs
|
|
|
|
for mcpStreamIter := 0; mcpStreamIter <= mcpStreamMaxIterations; mcpStreamIter++ {
|
|
if mcpStreamIter > 0 {
|
|
// Reset reasoning and tool-call state for re-inference so reasoning
|
|
// extraction runs again on subsequent iterations
|
|
inToolCallMode = false
|
|
extractor.Reset()
|
|
currentMessageID = ""
|
|
lastEmittedToolCallCount = 0
|
|
currentReasoningID = ""
|
|
|
|
predInput = evaluator.TemplateMessages(*openAIReq, openAIReq.Messages, cfg, funcs, shouldUseFn)
|
|
xlog.Debug("Open Responses stream MCP re-templating", "iteration", mcpStreamIter)
|
|
}
|
|
|
|
// For tool calls, we need to track accumulated result and parse incrementally
|
|
// We'll handle this differently - track the full result and parse tool calls
|
|
accumulatedResult := ""
|
|
tokenCallback := func(token string, tokenUsage backend.TokenUsage) bool {
|
|
accumulatedResult += token
|
|
accumulatedText += token
|
|
|
|
// Try to parse tool calls incrementally
|
|
cleanedResult := functions.CleanupLLMResult(accumulatedResult, cfg.FunctionsConfig)
|
|
|
|
// Determine XML format from config
|
|
var xmlFormat *functions.XMLToolCallFormat
|
|
if cfg.FunctionsConfig.XMLFormat != nil {
|
|
xmlFormat = cfg.FunctionsConfig.XMLFormat
|
|
} else if cfg.FunctionsConfig.XMLFormatPreset != "" {
|
|
xmlFormat = functions.GetXMLFormatPreset(cfg.FunctionsConfig.XMLFormatPreset)
|
|
}
|
|
|
|
// Try XML parsing first
|
|
partialResults, parseErr := functions.ParseXMLIterative(cleanedResult, xmlFormat, true)
|
|
if parseErr == nil && len(partialResults) > lastEmittedToolCallCount {
|
|
// New tool calls detected
|
|
if !inToolCallMode && currentMessageID != "" {
|
|
// Close the current message content part
|
|
textPart := makeOutputTextPart(functions.ParseTextContent(cleanedResult, cfg.FunctionsConfig))
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.content_part.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Part: &textPart,
|
|
})
|
|
sequenceNumber++
|
|
inToolCallMode = true
|
|
}
|
|
|
|
// Emit new tool calls
|
|
for i := lastEmittedToolCallCount; i < len(partialResults); i++ {
|
|
tc := partialResults[i]
|
|
toolCallID := fmt.Sprintf("fc_%s", uuid.New().String())
|
|
outputIndex++
|
|
|
|
// Emit function_call item added
|
|
functionCallItem := &schema.ORItemField{
|
|
Type: "function_call",
|
|
ID: toolCallID,
|
|
Status: "in_progress",
|
|
CallID: toolCallID,
|
|
Name: tc.Name,
|
|
Arguments: "",
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: functionCallItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit arguments delta
|
|
if tc.Arguments != "" {
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.function_call_arguments.delta",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: toolCallID,
|
|
OutputIndex: &outputIndex,
|
|
Delta: strPtr(tc.Arguments),
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit arguments done
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.function_call_arguments.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: toolCallID,
|
|
OutputIndex: &outputIndex,
|
|
Arguments: strPtr(tc.Arguments),
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit function_call item done
|
|
functionCallItem.Status = "completed"
|
|
functionCallItem.Arguments = tc.Arguments
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: functionCallItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Collect item for storage
|
|
collectedOutputItems = append(collectedOutputItems, *functionCallItem)
|
|
}
|
|
}
|
|
lastEmittedToolCallCount = len(partialResults)
|
|
c.Response().Flush()
|
|
return true
|
|
}
|
|
|
|
// Try JSON parsing as fallback
|
|
jsonResults, jsonErr := functions.ParseJSONIterative(cleanedResult, true)
|
|
if jsonErr == nil && len(jsonResults) > lastEmittedToolCallCount {
|
|
for i := lastEmittedToolCallCount; i < len(jsonResults); i++ {
|
|
jsonObj := jsonResults[i]
|
|
if name, ok := jsonObj["name"].(string); ok && name != "" {
|
|
args := "{}"
|
|
if argsVal, ok := jsonObj["arguments"]; ok {
|
|
if argsStr, ok := argsVal.(string); ok {
|
|
args = argsStr
|
|
} else {
|
|
argsBytes, _ := json.Marshal(argsVal)
|
|
args = string(argsBytes)
|
|
}
|
|
}
|
|
|
|
toolCallID := fmt.Sprintf("fc_%s", uuid.New().String())
|
|
outputIndex++
|
|
|
|
functionCallItem := &schema.ORItemField{
|
|
Type: "function_call",
|
|
ID: toolCallID,
|
|
Status: "completed",
|
|
CallID: toolCallID,
|
|
Name: name,
|
|
Arguments: args,
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: functionCallItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: functionCallItem,
|
|
})
|
|
sequenceNumber++
|
|
}
|
|
}
|
|
lastEmittedToolCallCount = len(jsonResults)
|
|
c.Response().Flush()
|
|
return true
|
|
}
|
|
|
|
// If no tool calls detected yet, handle reasoning and text
|
|
if !inToolCallMode {
|
|
reasoningDelta, contentDelta := extractor.ProcessToken(token)
|
|
|
|
// Handle reasoning item
|
|
if extractor.Reasoning() != "" {
|
|
// Check if we need to create reasoning item
|
|
if currentReasoningID == "" {
|
|
outputIndex++
|
|
currentReasoningID = fmt.Sprintf("reasoning_%s", uuid.New().String())
|
|
reasoningItem := &schema.ORItemField{
|
|
Type: "reasoning",
|
|
ID: currentReasoningID,
|
|
Status: "in_progress",
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: reasoningItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit content_part.added for reasoning
|
|
currentReasoningContentIndex = 0
|
|
emptyPart := makeOutputTextPart("")
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.content_part.added",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentReasoningID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tReasoningContentIndex,
|
|
Part: &emptyPart,
|
|
})
|
|
sequenceNumber++
|
|
}
|
|
|
|
// Emit reasoning delta if there's new content
|
|
if reasoningDelta != "" {
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_text.delta",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentReasoningID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tReasoningContentIndex,
|
|
Delta: strPtr(reasoningDelta),
|
|
Logprobs: emptyLogprobs(),
|
|
})
|
|
sequenceNumber++
|
|
c.Response().Flush()
|
|
}
|
|
}
|
|
|
|
// Only emit message content if there's actual content (not just reasoning)
|
|
if contentDelta != "" {
|
|
if currentMessageID == "" {
|
|
// Emit output_item.added for message
|
|
outputIndex++
|
|
currentMessageID = fmt.Sprintf("msg_%s", uuid.New().String())
|
|
messageItem := &schema.ORItemField{
|
|
Type: "message",
|
|
ID: currentMessageID,
|
|
Status: "in_progress",
|
|
Role: "assistant",
|
|
Content: []schema.ORContentPart{},
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: messageItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit content_part.added
|
|
currentContentIndex = 0
|
|
emptyPart := makeOutputTextPart("")
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.content_part.added",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Part: &emptyPart,
|
|
})
|
|
sequenceNumber++
|
|
}
|
|
|
|
// Emit text delta
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_text.delta",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Delta: strPtr(contentDelta),
|
|
Logprobs: emptyLogprobs(),
|
|
})
|
|
sequenceNumber++
|
|
c.Response().Flush()
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
var ccResult string
|
|
ccCb := func(s string, c *[]schema.Choice) {
|
|
ccResult = s
|
|
}
|
|
choices, ccTokenUsage, chatDeltas, err := openaiEndpoint.ComputeChoices(openAIReq, predInput, cfg, cl, appConfig, ml, ccCb, tokenCallback)
|
|
if err != nil {
|
|
xlog.Error("Open Responses stream model inference failed", "error", err)
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "error",
|
|
SequenceNumber: sequenceNumber,
|
|
Error: &schema.ORErrorPayload{
|
|
Type: "model_error",
|
|
Message: fmt.Sprintf("model inference failed: %v", err),
|
|
},
|
|
})
|
|
sequenceNumber++
|
|
responseFailed := responseCreated
|
|
responseFailed.Status = "failed"
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.failed",
|
|
SequenceNumber: sequenceNumber,
|
|
Response: responseFailed,
|
|
})
|
|
// Send [DONE] even on error
|
|
fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n")
|
|
c.Response().Flush()
|
|
return nil
|
|
}
|
|
result = ccResult
|
|
lastStreamTokenUsage = ccTokenUsage
|
|
if len(choices) > 0 {
|
|
lastStreamLogprobs = choices[0].Logprobs
|
|
}
|
|
|
|
// Source reasoning from: (1) ChatDeltas from C++ autoparser, (2) extractor's
|
|
// streaming state, (3) final extraction from the finetuned result.
|
|
if chatDeltaReasoning := functions.ReasoningFromChatDeltas(chatDeltas); chatDeltaReasoning != "" {
|
|
finalReasoning = chatDeltaReasoning
|
|
finalCleanedResult = functions.ContentFromChatDeltas(chatDeltas)
|
|
if finalCleanedResult == "" {
|
|
finalCleanedResult = extractor.CleanedContent()
|
|
}
|
|
} else {
|
|
finalReasoning = extractor.Reasoning()
|
|
finalCleanedResult = extractor.CleanedContent()
|
|
}
|
|
if finalReasoning == "" && finalCleanedResult == "" {
|
|
finalReasoning, finalCleanedResult = reason.ExtractReasoningWithConfig(result, thinkingStartToken, cfg.ReasoningConfig)
|
|
}
|
|
|
|
// Close reasoning item if it exists and wasn't closed yet
|
|
if currentReasoningID != "" && finalReasoning != "" {
|
|
// Emit output_text.done for reasoning
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_text.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentReasoningID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tReasoningContentIndex,
|
|
Text: strPtr(finalReasoning),
|
|
Logprobs: emptyLogprobs(),
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit content_part.done for reasoning
|
|
reasoningPart := makeOutputTextPart(finalReasoning)
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.content_part.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentReasoningID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tReasoningContentIndex,
|
|
Part: &reasoningPart,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit output_item.done for reasoning
|
|
reasoningItem := &schema.ORItemField{
|
|
Type: "reasoning",
|
|
ID: currentReasoningID,
|
|
Status: "completed",
|
|
Content: []schema.ORContentPart{reasoningPart},
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: reasoningItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Collect reasoning item for storage
|
|
collectedOutputItems = append(collectedOutputItems, *reasoningItem)
|
|
|
|
// Calculate reasoning tokens
|
|
reasoningTokens = len(finalReasoning) / 4
|
|
if reasoningTokens == 0 && len(finalReasoning) > 0 {
|
|
reasoningTokens = 1
|
|
}
|
|
}
|
|
|
|
parsedToolCalls = nil
|
|
textContent = ""
|
|
|
|
// Try pre-parsed tool calls from C++ autoparser first
|
|
if deltaToolCalls := functions.ToolCallsFromChatDeltas(chatDeltas); len(deltaToolCalls) > 0 {
|
|
xlog.Debug("[ChatDeltas] OpenResponses Stream: using pre-parsed tool calls", "count", len(deltaToolCalls))
|
|
parsedToolCalls = deltaToolCalls
|
|
textContent = functions.ContentFromChatDeltas(chatDeltas)
|
|
} else {
|
|
xlog.Debug("[ChatDeltas] OpenResponses Stream: no pre-parsed tool calls, falling back to Go-side text parsing")
|
|
cleanedResult := functions.CleanupLLMResult(finalCleanedResult, cfg.FunctionsConfig)
|
|
parsedToolCalls = functions.ParseFunctionCall(cleanedResult, cfg.FunctionsConfig)
|
|
textContent = functions.ParseTextContent(cleanedResult, cfg.FunctionsConfig)
|
|
}
|
|
|
|
// Handle noAction function (model chose to respond without tool)
|
|
noActionName := "answer"
|
|
if cfg.FunctionsConfig.NoActionFunctionName != "" {
|
|
noActionName = cfg.FunctionsConfig.NoActionFunctionName
|
|
}
|
|
|
|
// Filter out noAction calls and extract the message
|
|
toolCalls = nil
|
|
for _, fc := range parsedToolCalls {
|
|
if fc.Name == noActionName {
|
|
// This is a text response, not a tool call
|
|
if fc.Arguments != "" {
|
|
var args map[string]interface{}
|
|
if err := json.Unmarshal([]byte(fc.Arguments), &args); err == nil {
|
|
if msg, ok := args["message"].(string); ok && msg != "" {
|
|
textContent = msg
|
|
}
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
toolCalls = append(toolCalls, fc)
|
|
}
|
|
|
|
xlog.Debug("Open Responses Stream - Parsed", "toolCalls", len(toolCalls), "textContent", textContent)
|
|
|
|
// MCP streaming tool execution: check if any tool calls are MCP tools
|
|
if hasMCPToolsStream && len(toolCalls) > 0 {
|
|
var hasMCPCalls bool
|
|
for _, tc := range toolCalls {
|
|
if mcpTools.IsMCPTool(mcpToolInfos, tc.Name) {
|
|
hasMCPCalls = true
|
|
break
|
|
}
|
|
}
|
|
if hasMCPCalls {
|
|
// Build schema.ToolCall list for the assistant message
|
|
var schemaToolCalls []schema.ToolCall
|
|
for i, tc := range toolCalls {
|
|
schemaToolCalls = append(schemaToolCalls, schema.ToolCall{
|
|
Index: i, ID: fmt.Sprintf("fc_%s", uuid.New().String()),
|
|
Type: "function",
|
|
FunctionCall: schema.FunctionCall{Name: tc.Name, Arguments: tc.Arguments},
|
|
})
|
|
}
|
|
assistantMsg := schema.Message{Role: "assistant", Content: result, ToolCalls: schemaToolCalls}
|
|
openAIReq.Messages = append(openAIReq.Messages, assistantMsg)
|
|
|
|
for idx, tc := range toolCalls {
|
|
tcID := schemaToolCalls[idx].ID
|
|
|
|
// Emit function_call item
|
|
outputIndex++
|
|
functionCallItem := &schema.ORItemField{
|
|
Type: "function_call", ID: tcID, Status: "completed",
|
|
CallID: tcID, Name: tc.Name, Arguments: tc.Arguments,
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added", SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex, Item: functionCallItem,
|
|
})
|
|
sequenceNumber++
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done", SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex, Item: functionCallItem,
|
|
})
|
|
sequenceNumber++
|
|
collectedOutputItems = append(collectedOutputItems, *functionCallItem)
|
|
|
|
if !mcpTools.IsMCPTool(mcpToolInfos, tc.Name) {
|
|
continue
|
|
}
|
|
|
|
// Execute MCP tool
|
|
xlog.Debug("Executing MCP tool (Open Responses stream)", "tool", tc.Name, "iteration", mcpStreamIter)
|
|
toolResult, toolErr := mcpTools.ExecuteMCPToolCall(
|
|
input.Context, mcpToolInfos, tc.Name, tc.Arguments,
|
|
)
|
|
if toolErr != nil {
|
|
xlog.Error("MCP tool execution failed", "tool", tc.Name, "error", toolErr)
|
|
toolResult = fmt.Sprintf("Error: %v", toolErr)
|
|
}
|
|
openAIReq.Messages = append(openAIReq.Messages, schema.Message{
|
|
Role: "tool", Content: toolResult, StringContent: toolResult, ToolCallID: tcID, Name: tc.Name,
|
|
})
|
|
|
|
// Emit function_call_output item
|
|
outputIndex++
|
|
outputItem := &schema.ORItemField{
|
|
Type: "function_call_output", ID: fmt.Sprintf("fco_%s", uuid.New().String()),
|
|
Status: "completed", CallID: tcID, Output: toolResult,
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added", SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex, Item: outputItem,
|
|
})
|
|
sequenceNumber++
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done", SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex, Item: outputItem,
|
|
})
|
|
sequenceNumber++
|
|
collectedOutputItems = append(collectedOutputItems, *outputItem)
|
|
}
|
|
c.Response().Flush()
|
|
xlog.Debug("MCP streaming tools executed, re-running inference", "iteration", mcpStreamIter)
|
|
continue // next MCP stream iteration
|
|
}
|
|
}
|
|
|
|
|
|
// Convert logprobs for streaming events
|
|
streamEventLogprobs := convertLogprobsForStreaming(lastStreamLogprobs)
|
|
|
|
// If we have no output but the model did produce something, use the cleaned result (without reasoning tags)
|
|
if textContent == "" && len(toolCalls) == 0 && finalCleanedResult != "" {
|
|
xlog.Debug("Open Responses Stream - No parsed output, using cleaned result")
|
|
textContent = finalCleanedResult
|
|
}
|
|
|
|
// Close message if we have text content
|
|
if currentMessageID != "" && textContent != "" && !inToolCallMode {
|
|
// Emit output_text.done
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_text.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Text: strPtr(textContent),
|
|
Logprobs: logprobsPtr(streamEventLogprobs),
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit content_part.done (with actual logprobs)
|
|
textPart := makeOutputTextPartWithLogprobs(textContent, lastStreamLogprobs)
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.content_part.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Part: &textPart,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit output_item.done for message (with actual logprobs)
|
|
messageItem := &schema.ORItemField{
|
|
Type: "message",
|
|
ID: currentMessageID,
|
|
Status: "completed",
|
|
Role: "assistant",
|
|
Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(textContent, lastStreamLogprobs)},
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: messageItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Collect message item for storage
|
|
collectedOutputItems = append(collectedOutputItems, *messageItem)
|
|
}
|
|
|
|
// Emit any remaining tool calls that weren't streamed
|
|
for i := lastEmittedToolCallCount; i < len(toolCalls); i++ {
|
|
tc := toolCalls[i]
|
|
toolCallID := fmt.Sprintf("fc_%s", uuid.New().String())
|
|
outputIndex++
|
|
|
|
functionCallItem := &schema.ORItemField{
|
|
Type: "function_call",
|
|
ID: toolCallID,
|
|
Status: "completed",
|
|
CallID: toolCallID,
|
|
Name: tc.Name,
|
|
Arguments: tc.Arguments,
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: functionCallItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: functionCallItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Collect function call item for storage
|
|
collectedOutputItems = append(collectedOutputItems, *functionCallItem)
|
|
}
|
|
|
|
break // no MCP tools to execute, exit loop
|
|
} // end MCP stream iteration loop
|
|
|
|
// Build final response with all items (include reasoning first, then messages, then tool calls)
|
|
var allOutputItems []schema.ORItemField
|
|
// Add reasoning item if it exists
|
|
if currentReasoningID != "" && finalReasoning != "" {
|
|
allOutputItems = append(allOutputItems, schema.ORItemField{
|
|
Type: "reasoning",
|
|
ID: currentReasoningID,
|
|
Status: "completed",
|
|
Content: []schema.ORContentPart{makeOutputTextPart(finalReasoning)},
|
|
})
|
|
}
|
|
// Add message item
|
|
if currentMessageID != "" && textContent != "" {
|
|
allOutputItems = append(allOutputItems, schema.ORItemField{
|
|
Type: "message",
|
|
ID: currentMessageID,
|
|
Status: "completed",
|
|
Role: "assistant",
|
|
Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(textContent, lastStreamLogprobs)},
|
|
})
|
|
}
|
|
// Add tool call items
|
|
for _, tc := range toolCalls {
|
|
toolCallID := fmt.Sprintf("fc_%s", uuid.New().String())
|
|
allOutputItems = append(allOutputItems, schema.ORItemField{
|
|
Type: "function_call",
|
|
ID: toolCallID,
|
|
Status: "completed",
|
|
CallID: toolCallID,
|
|
Name: tc.Name,
|
|
Arguments: tc.Arguments,
|
|
})
|
|
}
|
|
|
|
// Emit response.completed
|
|
now := time.Now().Unix()
|
|
responseCompleted := buildORResponse(responseID, createdAt, &now, "completed", input, allOutputItems, &schema.ORUsage{
|
|
InputTokens: lastStreamTokenUsage.Prompt,
|
|
OutputTokens: lastStreamTokenUsage.Completion,
|
|
TotalTokens: lastStreamTokenUsage.Prompt + lastStreamTokenUsage.Completion,
|
|
OutputTokensDetails: &schema.OROutputTokensDetails{
|
|
ReasoningTokens: reasoningTokens,
|
|
},
|
|
}, shouldStore)
|
|
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.completed",
|
|
SequenceNumber: sequenceNumber,
|
|
Response: responseCompleted,
|
|
})
|
|
|
|
// Store response for future reference (if enabled)
|
|
if shouldStore {
|
|
store := GetGlobalStore()
|
|
store.Store(responseID, input, responseCompleted)
|
|
}
|
|
|
|
// Send [DONE]
|
|
fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n")
|
|
c.Response().Flush()
|
|
|
|
return nil
|
|
}
|
|
|
|
// Non-tool-call streaming path
|
|
// Emit output_item.added for message
|
|
currentMessageID = fmt.Sprintf("msg_%s", uuid.New().String())
|
|
messageItem := &schema.ORItemField{
|
|
Type: "message",
|
|
ID: currentMessageID,
|
|
Status: "in_progress",
|
|
Role: "assistant",
|
|
Content: []schema.ORContentPart{},
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: messageItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit content_part.added
|
|
currentContentIndex = 0
|
|
emptyTextPart := makeOutputTextPart("")
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.content_part.added",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Part: &emptyTextPart,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Stream text deltas with reasoning extraction
|
|
tokenCallback := func(token string, tokenUsage backend.TokenUsage) bool {
|
|
accumulatedText += token
|
|
reasoningDelta, contentDelta := extractor.ProcessToken(token)
|
|
|
|
// Handle reasoning item
|
|
if extractor.Reasoning() != "" {
|
|
// Check if we need to create reasoning item
|
|
if currentReasoningID == "" {
|
|
outputIndex++
|
|
currentReasoningID = fmt.Sprintf("reasoning_%s", uuid.New().String())
|
|
reasoningItem := &schema.ORItemField{
|
|
Type: "reasoning",
|
|
ID: currentReasoningID,
|
|
Status: "in_progress",
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.added",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: reasoningItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit content_part.added for reasoning
|
|
currentReasoningContentIndex = 0
|
|
emptyPart := makeOutputTextPart("")
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.content_part.added",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentReasoningID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tReasoningContentIndex,
|
|
Part: &emptyPart,
|
|
})
|
|
sequenceNumber++
|
|
}
|
|
|
|
// Emit reasoning delta if there's new content
|
|
if reasoningDelta != "" {
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_text.delta",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentReasoningID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tReasoningContentIndex,
|
|
Delta: strPtr(reasoningDelta),
|
|
Logprobs: emptyLogprobs(),
|
|
})
|
|
sequenceNumber++
|
|
c.Response().Flush()
|
|
}
|
|
}
|
|
|
|
// Only emit message content if there's actual content (not just reasoning)
|
|
if contentDelta != "" {
|
|
// Emit text delta
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_text.delta",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Delta: strPtr(contentDelta),
|
|
Logprobs: emptyLogprobs(),
|
|
})
|
|
sequenceNumber++
|
|
c.Response().Flush()
|
|
}
|
|
return true
|
|
}
|
|
|
|
var noToolResult string
|
|
noToolCb := func(s string, c *[]schema.Choice) {
|
|
noToolResult = s
|
|
}
|
|
noToolChoices, noToolTokenUsage, noToolChatDeltas, err := openaiEndpoint.ComputeChoices(openAIReq, predInput, cfg, cl, appConfig, ml, noToolCb, tokenCallback)
|
|
if err != nil {
|
|
xlog.Error("Open Responses stream model inference failed", "error", err)
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "error",
|
|
SequenceNumber: sequenceNumber,
|
|
Error: &schema.ORErrorPayload{
|
|
Type: "model_error",
|
|
Message: fmt.Sprintf("model inference failed: %v", err),
|
|
},
|
|
})
|
|
sequenceNumber++
|
|
responseFailed := responseCreated
|
|
responseFailed.Status = "failed"
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.failed",
|
|
SequenceNumber: sequenceNumber,
|
|
Response: responseFailed,
|
|
})
|
|
// Send [DONE] even on error
|
|
fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n")
|
|
c.Response().Flush()
|
|
return nil
|
|
}
|
|
result := noToolResult
|
|
var noToolLogprobs *schema.Logprobs
|
|
if len(noToolChoices) > 0 {
|
|
noToolLogprobs = noToolChoices[0].Logprobs
|
|
}
|
|
|
|
// Source reasoning from: (1) ChatDeltas from C++ autoparser, (2) extractor's
|
|
// streaming state, (3) final extraction from the finetuned result.
|
|
var finalReasoning, finalCleanedResult string
|
|
if chatDeltaReasoning := functions.ReasoningFromChatDeltas(noToolChatDeltas); chatDeltaReasoning != "" {
|
|
finalReasoning = chatDeltaReasoning
|
|
finalCleanedResult = functions.ContentFromChatDeltas(noToolChatDeltas)
|
|
if finalCleanedResult == "" {
|
|
finalCleanedResult = extractor.CleanedContent()
|
|
}
|
|
} else {
|
|
finalReasoning = extractor.Reasoning()
|
|
finalCleanedResult = extractor.CleanedContent()
|
|
}
|
|
if finalReasoning == "" && finalCleanedResult == "" {
|
|
finalReasoning, finalCleanedResult = reason.ExtractReasoningWithConfig(result, thinkingStartToken, cfg.ReasoningConfig)
|
|
}
|
|
|
|
// Close reasoning item if it exists and wasn't closed yet
|
|
if currentReasoningID != "" && finalReasoning != "" {
|
|
// Emit output_text.done for reasoning
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_text.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentReasoningID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tReasoningContentIndex,
|
|
Text: strPtr(finalReasoning),
|
|
Logprobs: emptyLogprobs(),
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit content_part.done for reasoning
|
|
reasoningPart := makeOutputTextPart(finalReasoning)
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.content_part.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentReasoningID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tReasoningContentIndex,
|
|
Part: &reasoningPart,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit output_item.done for reasoning
|
|
reasoningItem := &schema.ORItemField{
|
|
Type: "reasoning",
|
|
ID: currentReasoningID,
|
|
Status: "completed",
|
|
Content: []schema.ORContentPart{reasoningPart},
|
|
}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: reasoningItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Collect reasoning item for storage
|
|
collectedOutputItems = append(collectedOutputItems, *reasoningItem)
|
|
|
|
// Calculate reasoning tokens
|
|
reasoningTokens = len(finalReasoning) / 4
|
|
if reasoningTokens == 0 && len(finalReasoning) > 0 {
|
|
reasoningTokens = 1
|
|
}
|
|
}
|
|
|
|
result = finalCleanedResult
|
|
|
|
// Convert logprobs for streaming events
|
|
mcpStreamLogprobs := convertLogprobsForStreaming(noToolLogprobs)
|
|
|
|
// Emit output_text.done
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_text.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Text: strPtr(result),
|
|
Logprobs: logprobsPtr(mcpStreamLogprobs),
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit content_part.done (with actual logprobs)
|
|
resultPart := makeOutputTextPartWithLogprobs(result, noToolLogprobs)
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.content_part.done",
|
|
SequenceNumber: sequenceNumber,
|
|
ItemID: currentMessageID,
|
|
OutputIndex: &outputIndex,
|
|
ContentIndex: ¤tContentIndex,
|
|
Part: &resultPart,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit output_item.done (with actual logprobs)
|
|
messageItem.Status = "completed"
|
|
messageItem.Content = []schema.ORContentPart{makeOutputTextPartWithLogprobs(result, noToolLogprobs)}
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.output_item.done",
|
|
SequenceNumber: sequenceNumber,
|
|
OutputIndex: &outputIndex,
|
|
Item: messageItem,
|
|
})
|
|
sequenceNumber++
|
|
|
|
// Emit response.completed
|
|
now := time.Now().Unix()
|
|
|
|
// Collect final output items (reasoning first, then message)
|
|
var finalOutputItems []schema.ORItemField
|
|
// Add reasoning item if it exists
|
|
if currentReasoningID != "" && finalReasoning != "" {
|
|
finalOutputItems = append(finalOutputItems, schema.ORItemField{
|
|
Type: "reasoning",
|
|
ID: currentReasoningID,
|
|
Status: "completed",
|
|
Content: []schema.ORContentPart{makeOutputTextPart(finalReasoning)},
|
|
})
|
|
}
|
|
// Add message item
|
|
if len(collectedOutputItems) > 0 {
|
|
// Use collected items (may include reasoning already)
|
|
for _, item := range collectedOutputItems {
|
|
if item.Type == "message" {
|
|
finalOutputItems = append(finalOutputItems, item)
|
|
}
|
|
}
|
|
} else {
|
|
finalOutputItems = append(finalOutputItems, *messageItem)
|
|
}
|
|
responseCompleted := buildORResponse(responseID, createdAt, &now, "completed", input, finalOutputItems, &schema.ORUsage{
|
|
InputTokens: noToolTokenUsage.Prompt,
|
|
OutputTokens: noToolTokenUsage.Completion,
|
|
TotalTokens: noToolTokenUsage.Prompt + noToolTokenUsage.Completion,
|
|
OutputTokensDetails: &schema.OROutputTokensDetails{
|
|
ReasoningTokens: reasoningTokens,
|
|
},
|
|
}, shouldStore)
|
|
sendSSEEvent(c, &schema.ORStreamEvent{
|
|
Type: "response.completed",
|
|
SequenceNumber: sequenceNumber,
|
|
Response: responseCompleted,
|
|
})
|
|
|
|
// Store response for future reference (if enabled)
|
|
if shouldStore {
|
|
store := GetGlobalStore()
|
|
store.Store(responseID, input, responseCompleted)
|
|
}
|
|
|
|
// Send [DONE]
|
|
fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n")
|
|
c.Response().Flush()
|
|
|
|
return nil
|
|
}
|
|
|
|
// sendSSEEvent sends a Server-Sent Event
|
|
func sendSSEEvent(c echo.Context, event *schema.ORStreamEvent) {
|
|
normalizeORStreamEvent(event)
|
|
data, err := json.Marshal(event)
|
|
if err != nil {
|
|
xlog.Error("Failed to marshal SSE event", "error", err)
|
|
return
|
|
}
|
|
fmt.Fprintf(c.Response().Writer, "event: %s\ndata: %s\n\n", event.Type, string(data))
|
|
}
|
|
|
|
// normalizeORStreamEvent ensures required fields like Summary are never null.
|
|
func normalizeORStreamEvent(event *schema.ORStreamEvent) {
|
|
if event.Item != nil && event.Item.Summary == nil {
|
|
event.Item.Summary = []schema.ORContentPart{}
|
|
}
|
|
}
|
|
|
|
// getTopLogprobs returns the top_logprobs value, defaulting to 0 if nil
|
|
func getTopLogprobs(topLogprobs *int) int {
|
|
if topLogprobs != nil {
|
|
return *topLogprobs
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// Helper functions for pointer types in streaming events
|
|
func strPtr(s string) *string {
|
|
return &s
|
|
}
|
|
|
|
func logprobsPtr(lp []schema.ORLogProb) *[]schema.ORLogProb {
|
|
return &lp
|
|
}
|
|
|
|
func emptyLogprobs() *[]schema.ORLogProb {
|
|
empty := []schema.ORLogProb{}
|
|
return &empty
|
|
}
|
|
|
|
// makeOutputTextPart creates an output_text content part with all required fields per Open Responses spec
|
|
func makeOutputTextPart(text string) schema.ORContentPart {
|
|
return schema.ORContentPartWithLogprobs(text, nil)
|
|
}
|
|
|
|
// makeOutputTextPartWithLogprobs creates an output_text content part with actual logprobs data
|
|
func makeOutputTextPartWithLogprobs(text string, logprobs *schema.Logprobs) schema.ORContentPart {
|
|
return schema.ORContentPartWithLogprobs(text, logprobs)
|
|
}
|
|
|
|
// convertLogprobsForStreaming converts OpenAI-style logprobs to Open Responses format for streaming events
|
|
func convertLogprobsForStreaming(logprobs *schema.Logprobs) []schema.ORLogProb {
|
|
if logprobs == nil || len(logprobs.Content) == 0 {
|
|
return []schema.ORLogProb{}
|
|
}
|
|
|
|
result := make([]schema.ORLogProb, 0, len(logprobs.Content))
|
|
for _, lp := range logprobs.Content {
|
|
topLPs := make([]schema.ORTopLogProb, 0, len(lp.TopLogprobs))
|
|
for _, tlp := range lp.TopLogprobs {
|
|
topLPs = append(topLPs, schema.ORTopLogProb{
|
|
Token: tlp.Token,
|
|
Logprob: tlp.Logprob,
|
|
Bytes: tlp.Bytes,
|
|
})
|
|
}
|
|
result = append(result, schema.ORLogProb{
|
|
Token: lp.Token,
|
|
Logprob: lp.Logprob,
|
|
Bytes: lp.Bytes,
|
|
TopLogprobs: topLPs,
|
|
})
|
|
}
|
|
return result
|
|
}
|
|
|
|
// ensureUsageDetails ensures usage has all required detail fields
|
|
func ensureUsageDetails(usage *schema.ORUsage) *schema.ORUsage {
|
|
if usage == nil {
|
|
return nil
|
|
}
|
|
// Ensure details are always present (not nil)
|
|
if usage.InputTokensDetails == nil {
|
|
usage.InputTokensDetails = &schema.ORInputTokensDetails{CachedTokens: 0}
|
|
}
|
|
if usage.OutputTokensDetails == nil {
|
|
usage.OutputTokensDetails = &schema.OROutputTokensDetails{ReasoningTokens: 0}
|
|
}
|
|
return usage
|
|
}
|
|
|
|
// buildORResponse creates a complete ORResponseResource with all required fields
|
|
func buildORResponse(responseID string, createdAt int64, completedAt *int64, status string, input *schema.OpenResponsesRequest, outputItems []schema.ORItemField, usage *schema.ORUsage, shouldStore bool) *schema.ORResponseResource {
|
|
// Ensure output is never null - always an array
|
|
if outputItems == nil {
|
|
outputItems = []schema.ORItemField{}
|
|
}
|
|
|
|
// Ensure Summary is never null on any output item
|
|
for i := range outputItems {
|
|
if outputItems[i].Summary == nil {
|
|
outputItems[i].Summary = []schema.ORContentPart{}
|
|
}
|
|
}
|
|
|
|
// Ensure tools is never null - always an array
|
|
tools := input.Tools
|
|
if tools == nil {
|
|
tools = []schema.ORFunctionTool{}
|
|
}
|
|
|
|
// Ensure metadata is never null - always a map
|
|
metadata := input.Metadata
|
|
if metadata == nil {
|
|
metadata = map[string]string{}
|
|
}
|
|
|
|
// Set default values for sampling parameters
|
|
temperature := 1.0
|
|
if input.Temperature != nil {
|
|
temperature = *input.Temperature
|
|
}
|
|
|
|
topP := 1.0
|
|
if input.TopP != nil {
|
|
topP = *input.TopP
|
|
}
|
|
|
|
presencePenalty := 0.0
|
|
if input.PresencePenalty != nil {
|
|
presencePenalty = *input.PresencePenalty
|
|
}
|
|
|
|
frequencyPenalty := 0.0
|
|
if input.FrequencyPenalty != nil {
|
|
frequencyPenalty = *input.FrequencyPenalty
|
|
}
|
|
|
|
// Default truncation to "auto"
|
|
truncation := "auto"
|
|
if input.Truncation != "" {
|
|
truncation = input.Truncation
|
|
}
|
|
|
|
// Default service_tier to "default"
|
|
serviceTier := "default"
|
|
if input.ServiceTier != "" {
|
|
serviceTier = input.ServiceTier
|
|
}
|
|
|
|
// Default parallel_tool_calls to true
|
|
parallelToolCalls := true
|
|
if input.ParallelToolCalls != nil {
|
|
parallelToolCalls = *input.ParallelToolCalls
|
|
}
|
|
|
|
// Default tool_choice: "auto" if tools are present, "none" otherwise
|
|
var toolChoice interface{}
|
|
if input.ToolChoice != nil {
|
|
toolChoice = input.ToolChoice
|
|
} else if len(tools) > 0 {
|
|
toolChoice = "auto"
|
|
} else {
|
|
toolChoice = "none"
|
|
}
|
|
|
|
// Background defaults to false
|
|
background := false
|
|
if input.Background != nil {
|
|
background = *input.Background
|
|
}
|
|
|
|
// Convert nullable string fields
|
|
var previousResponseID *string
|
|
if input.PreviousResponseID != "" {
|
|
previousResponseID = &input.PreviousResponseID
|
|
}
|
|
|
|
var instructions *string
|
|
if input.Instructions != "" {
|
|
instructions = &input.Instructions
|
|
}
|
|
|
|
// Convert reasoning
|
|
var reasoning *schema.ORReasoning
|
|
if input.Reasoning != nil {
|
|
reasoning = &schema.ORReasoning{
|
|
Effort: input.Reasoning.Effort,
|
|
Summary: input.Reasoning.Summary,
|
|
}
|
|
}
|
|
|
|
// Build default text config
|
|
textConfig := &schema.ORTextConfig{
|
|
Format: &schema.ORTextFormat{
|
|
Type: "text",
|
|
},
|
|
}
|
|
|
|
return &schema.ORResponseResource{
|
|
ID: responseID,
|
|
Object: "response",
|
|
CreatedAt: createdAt,
|
|
CompletedAt: completedAt,
|
|
Status: status,
|
|
Model: input.Model,
|
|
Output: outputItems,
|
|
Error: nil, // null when no error
|
|
IncompleteDetails: nil, // null when complete
|
|
PreviousResponseID: previousResponseID,
|
|
Instructions: instructions,
|
|
|
|
// Tool-related fields
|
|
Tools: tools,
|
|
ToolChoice: toolChoice,
|
|
ParallelToolCalls: parallelToolCalls,
|
|
MaxToolCalls: input.MaxToolCalls,
|
|
|
|
// Sampling parameters
|
|
Temperature: temperature,
|
|
TopP: topP,
|
|
PresencePenalty: presencePenalty,
|
|
FrequencyPenalty: frequencyPenalty,
|
|
TopLogprobs: getTopLogprobs(input.TopLogprobs),
|
|
MaxOutputTokens: input.MaxOutputTokens,
|
|
|
|
// Text format
|
|
Text: textConfig,
|
|
|
|
// Truncation and reasoning
|
|
Truncation: truncation,
|
|
Reasoning: reasoning,
|
|
|
|
// Usage
|
|
Usage: ensureUsageDetails(usage),
|
|
|
|
// Metadata and operational flags
|
|
Metadata: metadata,
|
|
Store: shouldStore,
|
|
Background: background,
|
|
ServiceTier: serviceTier,
|
|
|
|
// Safety and caching (nullable, not yet implemented)
|
|
SafetyIdentifier: nil,
|
|
PromptCacheKey: nil,
|
|
}
|
|
}
|
|
|
|
// sendOpenResponsesError sends an error response
|
|
func sendOpenResponsesError(c echo.Context, statusCode int, errorType, message, param string) error {
|
|
errorResp := map[string]interface{}{
|
|
"error": map[string]interface{}{
|
|
"type": errorType,
|
|
"message": message,
|
|
},
|
|
}
|
|
if param != "" {
|
|
errorResp["error"].(map[string]interface{})["param"] = param
|
|
}
|
|
return c.JSON(statusCode, errorResp)
|
|
}
|
|
|
|
// convertORToolsToOpenAIFormat converts Open Responses tools to OpenAI format for the backend
|
|
// Open Responses format: { type, name, description, parameters }
|
|
// OpenAI format: { type, function: { name, description, parameters } }
|
|
func convertORToolsToOpenAIFormat(orTools []schema.ORFunctionTool) []functions.Tool {
|
|
result := make([]functions.Tool, 0, len(orTools))
|
|
for _, t := range orTools {
|
|
result = append(result, functions.Tool{
|
|
Type: "function",
|
|
Function: functions.Function{
|
|
Name: t.Name,
|
|
Description: t.Description,
|
|
Parameters: t.Parameters,
|
|
},
|
|
})
|
|
}
|
|
return result
|
|
}
|
|
|
|
// GetResponseEndpoint returns a handler for GET /responses/:id
|
|
// This endpoint is used for polling background responses or resuming streaming
|
|
// @Summary Get a response by ID
|
|
// @Description Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.
|
|
// @Param id path string true "Response ID"
|
|
// @Param stream query string false "Set to 'true' to resume streaming"
|
|
// @Param starting_after query int false "Sequence number to resume from (for streaming)"
|
|
// @Success 200 {object} schema.ORResponseResource "Response"
|
|
// @Failure 400 {object} map[string]interface{} "Bad Request"
|
|
// @Failure 404 {object} map[string]interface{} "Not Found"
|
|
// @Router /v1/responses/{id} [get]
|
|
func GetResponseEndpoint() func(c echo.Context) error {
|
|
return func(c echo.Context) error {
|
|
responseID := c.Param("id")
|
|
if responseID == "" {
|
|
return sendOpenResponsesError(c, 400, "invalid_request_error", "response ID is required", "id")
|
|
}
|
|
|
|
store := GetGlobalStore()
|
|
stored, err := store.Get(responseID)
|
|
if err != nil {
|
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
|
}
|
|
|
|
// Check if streaming resume is requested
|
|
streamParam := c.QueryParam("stream")
|
|
if streamParam == "true" {
|
|
// Validate that the response was created with streaming enabled
|
|
if !stored.StreamEnabled {
|
|
return sendOpenResponsesError(c, 400, "invalid_request_error", "cannot stream a response that was not created with stream=true", "stream")
|
|
}
|
|
|
|
// Get starting_after parameter
|
|
startingAfter := 0
|
|
startingAfterParam := c.QueryParam("starting_after")
|
|
if startingAfterParam != "" {
|
|
if _, err := fmt.Sscanf(startingAfterParam, "%d", &startingAfter); err != nil {
|
|
return sendOpenResponsesError(c, 400, "invalid_request_error", "starting_after must be an integer", "starting_after")
|
|
}
|
|
}
|
|
|
|
return handleStreamResume(c, store, responseID, stored, startingAfter)
|
|
}
|
|
|
|
// Non-streaming: return the current response state
|
|
stored.mu.RLock()
|
|
response := stored.Response
|
|
stored.mu.RUnlock()
|
|
|
|
return c.JSON(200, response)
|
|
}
|
|
}
|
|
|
|
// handleStreamResume handles resuming a streaming response from a specific sequence number
|
|
func handleStreamResume(c echo.Context, store *ResponseStore, responseID string, stored *StoredResponse, startingAfter int) error {
|
|
c.Response().Header().Set("Content-Type", "text/event-stream")
|
|
c.Response().Header().Set("Cache-Control", "no-cache")
|
|
c.Response().Header().Set("Connection", "keep-alive")
|
|
|
|
// Get buffered events after the starting point
|
|
events, err := store.GetEventsAfter(responseID, startingAfter)
|
|
if err != nil {
|
|
return sendOpenResponsesError(c, 500, "server_error", fmt.Sprintf("failed to get events: %v", err), "")
|
|
}
|
|
|
|
// Send all buffered events
|
|
for _, event := range events {
|
|
fmt.Fprintf(c.Response().Writer, "event: %s\ndata: %s\n\n", event.EventType, string(event.Data))
|
|
c.Response().Flush()
|
|
}
|
|
|
|
// Get the current status
|
|
stored.mu.RLock()
|
|
status := stored.Response.Status
|
|
stored.mu.RUnlock()
|
|
|
|
// If response is still in progress, subscribe to new events
|
|
if status == schema.ORStatusQueued || status == schema.ORStatusInProgress {
|
|
eventsChan, err := store.GetEventsChan(responseID)
|
|
if err != nil {
|
|
// Response might have completed, just finish
|
|
fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n")
|
|
c.Response().Flush()
|
|
return nil
|
|
}
|
|
|
|
// Track last sent sequence number
|
|
lastSeq := startingAfter
|
|
if len(events) > 0 {
|
|
lastSeq = events[len(events)-1].SequenceNumber
|
|
}
|
|
|
|
// Wait for new events or completion
|
|
for {
|
|
select {
|
|
case <-c.Request().Context().Done():
|
|
// Client disconnected
|
|
return nil
|
|
case <-eventsChan:
|
|
// New events available
|
|
newEvents, err := store.GetEventsAfter(responseID, lastSeq)
|
|
if err != nil {
|
|
break
|
|
}
|
|
for _, event := range newEvents {
|
|
fmt.Fprintf(c.Response().Writer, "event: %s\ndata: %s\n\n", event.EventType, string(event.Data))
|
|
c.Response().Flush()
|
|
lastSeq = event.SequenceNumber
|
|
}
|
|
|
|
// Check if response is now complete
|
|
stored.mu.RLock()
|
|
status = stored.Response.Status
|
|
stored.mu.RUnlock()
|
|
|
|
if status != schema.ORStatusQueued && status != schema.ORStatusInProgress {
|
|
fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n")
|
|
c.Response().Flush()
|
|
return nil
|
|
}
|
|
case <-time.After(30 * time.Second):
|
|
// Timeout - send keepalive or check status
|
|
stored.mu.RLock()
|
|
status = stored.Response.Status
|
|
stored.mu.RUnlock()
|
|
|
|
if status != schema.ORStatusQueued && status != schema.ORStatusInProgress {
|
|
fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n")
|
|
c.Response().Flush()
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Response already complete
|
|
fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n")
|
|
c.Response().Flush()
|
|
return nil
|
|
}
|
|
|
|
// CancelResponseEndpoint returns a handler for POST /responses/:id/cancel
|
|
// This endpoint cancels a background response if it's still in progress
|
|
// @Summary Cancel a response
|
|
// @Description Cancel a background response if it's still in progress
|
|
// @Param id path string true "Response ID"
|
|
// @Success 200 {object} schema.ORResponseResource "Response"
|
|
// @Failure 400 {object} map[string]interface{} "Bad Request"
|
|
// @Failure 404 {object} map[string]interface{} "Not Found"
|
|
// @Router /v1/responses/{id}/cancel [post]
|
|
func CancelResponseEndpoint() func(c echo.Context) error {
|
|
return func(c echo.Context) error {
|
|
responseID := c.Param("id")
|
|
if responseID == "" {
|
|
return sendOpenResponsesError(c, 400, "invalid_request_error", "response ID is required", "id")
|
|
}
|
|
|
|
store := GetGlobalStore()
|
|
response, err := store.Cancel(responseID)
|
|
if err != nil {
|
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
|
}
|
|
|
|
// Return the final response object
|
|
return c.JSON(200, response)
|
|
}
|
|
}
|