From 5195062e125a39e51665fd75883812ecec02f731 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Tue, 3 Feb 2026 13:30:37 +0000 Subject: [PATCH] fix(realtime): Include noAction function in prompt template and handle tool_choice (#8372) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The realtime endpoint was not passing the noAction "answer" function to the model in the prompt template, causing the model to always call user-provided tools even when a direct response was appropriate. Root cause: - User tools were added to the funcs list - TemplateMessages() was called to generate the prompt - noAction function was only added AFTER templating - This meant the prompt didn't include the "answer" function, even though the grammar did Fix: - Move noAction function creation before TemplateMessages() call so it's included in both the prompt and grammar - Add proper tool_choice parameter handling to support "auto", "required", "none", and specific function selection - Match behavior of the standard chat endpoint 💘 Generated with Crush Assisted-by: Claude Sonnet 4.5 via Crush Signed-off-by: Richard Palethorpe --- core/http/endpoints/openai/realtime_model.go | 94 ++++++++++++-------- 1 file changed, 57 insertions(+), 37 deletions(-) diff --git a/core/http/endpoints/openai/realtime_model.go b/core/http/endpoints/openai/realtime_model.go index 44631bf25..c737adbba 100644 --- a/core/http/endpoints/openai/realtime_model.go +++ b/core/http/endpoints/openai/realtime_model.go @@ -28,7 +28,7 @@ type wrappedModel struct { TTSConfig *config.ModelConfig TranscriptionConfig *config.ModelConfig LLMConfig *config.ModelConfig - VADConfig *config.ModelConfig + VADConfig *config.ModelConfig appConfig *config.ApplicationConfig modelLoader *model.ModelLoader @@ -114,6 +114,35 @@ func (m *wrappedModel) Predict(ctx context.Context, messages schema.Messages, im }) } } + + // Add noAction function before templating so it's included in the prompt + // Allow the user to set custom actions via config file + noActionName := "answer" + noActionDescription := "use this action to answer without performing any action" + + if m.LLMConfig.FunctionsConfig.NoActionFunctionName != "" { + noActionName = m.LLMConfig.FunctionsConfig.NoActionFunctionName + } + if m.LLMConfig.FunctionsConfig.NoActionDescriptionName != "" { + noActionDescription = m.LLMConfig.FunctionsConfig.NoActionDescriptionName + } + + noActionGrammar := functions.Function{ + Name: noActionName, + Description: noActionDescription, + Parameters: map[string]interface{}{ + "properties": map[string]interface{}{ + "message": map[string]interface{}{ + "type": "string", + "description": "The message to reply the user with", + }, + }, + }, + } + + if !m.LLMConfig.FunctionsConfig.DisableNoAction { + funcs = append(funcs, noActionGrammar) + } } predInput = m.evaluator.TemplateMessages(input, input.Messages, m.LLMConfig, funcs, len(funcs) > 0) @@ -124,38 +153,29 @@ func (m *wrappedModel) Predict(ctx context.Context, messages schema.Messages, im } } + // Handle tool_choice parameter similar to the chat endpoint + if toolChoice != nil { + if toolChoice.Mode != "" { + // String values: "auto", "required", "none" + switch toolChoice.Mode { + case types.ToolChoiceModeRequired: + m.LLMConfig.SetFunctionCallString("required") + case types.ToolChoiceModeNone: + // Don't use tools + m.LLMConfig.SetFunctionCallString("none") + case types.ToolChoiceModeAuto: + // Default behavior - let model decide + } + } else if toolChoice.Function != nil { + // Specific function specified + m.LLMConfig.SetFunctionCallString(toolChoice.Function.Name) + } + } + // Generate grammar for function calling if tools are provided and grammar generation is enabled shouldUseFn := len(tools) > 0 && m.LLMConfig.ShouldUseFunctions() if !m.LLMConfig.FunctionsConfig.GrammarConfig.NoGrammar && shouldUseFn { - // Allow the user to set custom actions via config file - noActionName := "answer" - noActionDescription := "use this action to answer without performing any action" - - if m.LLMConfig.FunctionsConfig.NoActionFunctionName != "" { - noActionName = m.LLMConfig.FunctionsConfig.NoActionFunctionName - } - if m.LLMConfig.FunctionsConfig.NoActionDescriptionName != "" { - noActionDescription = m.LLMConfig.FunctionsConfig.NoActionDescriptionName - } - - noActionGrammar := functions.Function{ - Name: noActionName, - Description: noActionDescription, - Parameters: map[string]interface{}{ - "properties": map[string]interface{}{ - "message": map[string]interface{}{ - "type": "string", - "description": "The message to reply the user with", - }, - }, - }, - } - - if !m.LLMConfig.FunctionsConfig.DisableNoAction { - funcs = append(funcs, noActionGrammar) - } - // Force picking one of the functions by the request if m.LLMConfig.FunctionToCall() != "" { funcs = functions.Functions(funcs).Select(m.LLMConfig.FunctionToCall()) @@ -184,7 +204,7 @@ func (m *wrappedModel) Predict(ctx context.Context, messages schema.Messages, im toolChoiceJSON = string(b) } - return backend.ModelInference(ctx, predInput, messages, images, videos, audios, m.modelLoader, m.LLMConfig, m.confLoader, m.appConfig, tokenCallback, toolsJSON, toolChoiceJSON, logprobs, topLogprobs, logitBias, ) + return backend.ModelInference(ctx, predInput, messages, images, videos, audios, m.modelLoader, m.LLMConfig, m.confLoader, m.appConfig, tokenCallback, toolsJSON, toolChoiceJSON, logprobs, topLogprobs, logitBias) } func (m *wrappedModel) TTS(ctx context.Context, text, voice, language string) (string, *proto.Result, error) { @@ -218,11 +238,11 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig return &transcriptOnlyModel{ TranscriptionConfig: cfgSST, - VADConfig: cfgVAD, + VADConfig: cfgVAD, - confLoader: cl, + confLoader: cl, modelLoader: ml, - appConfig: appConfig, + appConfig: appConfig, }, cfgSST, nil } @@ -297,11 +317,11 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model TTSConfig: cfgTTS, TranscriptionConfig: cfgSST, LLMConfig: cfgLLM, - VADConfig: cfgVAD, + VADConfig: cfgVAD, - confLoader: cl, + confLoader: cl, modelLoader: ml, - appConfig: appConfig, - evaluator: evaluator, + appConfig: appConfig, + evaluator: evaluator, }, nil }