fix(realtime): Use user provided voice and allow pipeline models to have no backend (#8415)

* fix(realtime): Use the voice provided by the user or none at all

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* fix(ui,config): Allow pipeline models to have no backend and use same validation in frontend

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
Richard Palethorpe
2026-02-11 13:18:05 +00:00
committed by GitHub
parent 0ee92317ec
commit 7270a98ce5
6 changed files with 75 additions and 47 deletions

View File

@@ -76,42 +76,35 @@ func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) {
}
}
// TODO: either in the next PR or the next commit, I want to merge these down into a single function that looks at the first few characters of the file to determine if we need to deserialize to []BackendConfig or BackendConfig
func readMultipleModelConfigsFromFile(file string, opts ...ConfigLoaderOption) ([]*ModelConfig, error) {
c := &[]*ModelConfig{}
// readModelConfigsFromFile reads a config file that may contain either a single
// ModelConfig or an array of ModelConfigs. It tries to unmarshal as an array first,
// then falls back to a single config if that fails.
func readModelConfigsFromFile(file string, opts ...ConfigLoaderOption) ([]*ModelConfig, error) {
f, err := os.ReadFile(file)
if err != nil {
return nil, fmt.Errorf("readMultipleModelConfigsFromFile cannot read config file %q: %w", file, err)
}
if err := yaml.Unmarshal(f, c); err != nil {
return nil, fmt.Errorf("readMultipleModelConfigsFromFile cannot unmarshal config file %q: %w", file, err)
return nil, fmt.Errorf("readModelConfigsFromFile cannot read config file %q: %w", file, err)
}
for _, cc := range *c {
cc.modelConfigFile = file
cc.SetDefaults(opts...)
// Try to unmarshal as array first
var configs []*ModelConfig
if err := yaml.Unmarshal(f, &configs); err == nil && len(configs) > 0 {
for _, cc := range configs {
cc.modelConfigFile = file
cc.SetDefaults(opts...)
}
return configs, nil
}
return *c, nil
}
func readModelConfigFromFile(file string, opts ...ConfigLoaderOption) (*ModelConfig, error) {
lo := &LoadOptions{}
lo.Apply(opts...)
// Fall back to single config
c := &ModelConfig{}
f, err := os.ReadFile(file)
if err != nil {
return nil, fmt.Errorf("readModelConfigFromFile cannot read config file %q: %w", file, err)
}
if err := yaml.Unmarshal(f, c); err != nil {
return nil, fmt.Errorf("readModelConfigFromFile cannot unmarshal config file %q: %w", file, err)
return nil, fmt.Errorf("readModelConfigsFromFile cannot unmarshal config file %q: %w", file, err)
}
c.SetDefaults(opts...)
c.modelConfigFile = file
return c, nil
c.SetDefaults(opts...)
return []*ModelConfig{c}, nil
}
// Load a config file for a model
@@ -163,7 +156,7 @@ func (bcl *ModelConfigLoader) LoadModelConfigFileByNameDefaultOptions(modelName
func (bcl *ModelConfigLoader) LoadMultipleModelConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
bcl.Lock()
defer bcl.Unlock()
c, err := readMultipleModelConfigsFromFile(file, opts...)
c, err := readModelConfigsFromFile(file, opts...)
if err != nil {
return fmt.Errorf("cannot load config file: %w", err)
}
@@ -181,11 +174,18 @@ func (bcl *ModelConfigLoader) LoadMultipleModelConfigsSingleFile(file string, op
func (bcl *ModelConfigLoader) ReadModelConfig(file string, opts ...ConfigLoaderOption) error {
bcl.Lock()
defer bcl.Unlock()
c, err := readModelConfigFromFile(file, opts...)
configs, err := readModelConfigsFromFile(file, opts...)
if err != nil {
return fmt.Errorf("ReadModelConfig cannot read config file %q: %w", file, err)
}
if len(configs) == 0 {
return fmt.Errorf("ReadModelConfig: no configs found in file %q", file)
}
if len(configs) > 1 {
xlog.Warn("ReadModelConig: read more than one config from file, only using first", "file", file, "configs", len(configs))
}
c := configs[0]
if valid, err := c.Validate(); valid {
bcl.configs[c.Name] = *c
} else {
@@ -375,15 +375,23 @@ func (bcl *ModelConfigLoader) LoadModelConfigsFromPath(path string, opts ...Conf
strings.HasPrefix(file.Name(), ".") {
continue
}
c, err := readModelConfigFromFile(filepath.Join(path, file.Name()), opts...)
filePath := filepath.Join(path, file.Name())
// Read config(s) - handles both single and array formats
configs, err := readModelConfigsFromFile(filePath, opts...)
if err != nil {
xlog.Error("LoadModelConfigsFromPath cannot read config file", "error", err, "File Name", file.Name())
continue
}
if valid, validationErr := c.Validate(); valid {
bcl.configs[c.Name] = *c
} else {
xlog.Error("config is not valid", "error", validationErr, "Name", c.Name)
// Validate and store each config
for _, c := range configs {
if valid, validationErr := c.Validate(); valid {
bcl.configs[c.Name] = *c
} else {
xlog.Error("config is not valid", "error", validationErr, "Name", c.Name)
}
}
}

View File

@@ -25,7 +25,8 @@ known_usecases:
- COMPLETION
`)
Expect(err).ToNot(HaveOccurred())
config, err := readModelConfigFromFile(tmp.Name())
configs, err := readModelConfigsFromFile(tmp.Name())
config := configs[0]
Expect(err).To(BeNil())
Expect(config).ToNot(BeNil())
valid, err := config.Validate()
@@ -43,7 +44,8 @@ backend: "foo-bar"
parameters:
model: "foo-bar"`)
Expect(err).ToNot(HaveOccurred())
config, err := readModelConfigFromFile(tmp.Name())
configs, err := readModelConfigsFromFile(tmp.Name())
config := configs[0]
Expect(err).To(BeNil())
Expect(config).ToNot(BeNil())
// two configs in config.yaml
@@ -62,7 +64,8 @@ parameters:
defer os.Remove(tmp.Name())
_, err = io.Copy(tmp, resp.Body)
Expect(err).To(BeNil())
config, err = readModelConfigFromFile(tmp.Name())
configs, err = readModelConfigsFromFile(tmp.Name())
config = configs[0]
Expect(err).To(BeNil())
Expect(config).ToNot(BeNil())
// two configs in config.yaml
@@ -188,7 +191,8 @@ mcp:
}
}`)
Expect(err).ToNot(HaveOccurred())
config, err := readModelConfigFromFile(tmp.Name())
configs, err := readModelConfigsFromFile(tmp.Name())
config := configs[0]
Expect(err).To(BeNil())
Expect(config).ToNot(BeNil())
valid, err := config.Validate()
@@ -218,7 +222,8 @@ mcp:
}
}`)
Expect(err).ToNot(HaveOccurred())
config, err := readModelConfigFromFile(tmp.Name())
configs, err := readModelConfigsFromFile(tmp.Name())
config := configs[0]
Expect(err).To(BeNil())
Expect(config).ToNot(BeNil())
valid, err := config.Validate()

View File

@@ -16,7 +16,7 @@ var _ = Describe("Test cases for config related functions", func() {
Context("Test Read configuration functions", func() {
configFile = os.Getenv("CONFIG_FILE")
It("Test readConfigFile", func() {
config, err := readMultipleModelConfigsFromFile(configFile)
config, err := readModelConfigsFromFile(configFile)
Expect(err).To(BeNil())
Expect(config).ToNot(BeNil())
// two configs in config.yaml

View File

@@ -336,6 +336,7 @@ var _ = Describe("API test", func() {
Name: "bert",
URL: bertEmbeddingsURL,
},
Overrides: map[string]interface{}{"backend": "llama-cpp"},
},
{
Metadata: gallery.Metadata{
@@ -953,7 +954,8 @@ parameters:
It("returns the models list", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(7)) // If "config.yaml" should be included, this should be 8?
// A model called "bert" can be present in the model directory depending on the order of the tests
Expect(len(models.Models)).To(BeNumerically(">=", 8))
})
It("can generate completions via ggml", func() {
if runtime.GOOS != "linux" {

View File

@@ -183,14 +183,13 @@ func registerRealtime(application *application.Application, model string) func(c
}
sttModel := cfg.Pipeline.Transcription
ttsModel := cfg.Pipeline.TTS
sessionID := generateSessionID()
session := &Session{
ID: sessionID,
TranscriptionOnly: false,
Model: model,
Voice: ttsModel,
Voice: cfg.TTSConfig.Voice,
ModelConfig: cfg,
TurnDetection: &types.TurnDetectionUnion{
ServerVad: &types.ServerVad{
@@ -557,13 +556,13 @@ func updateSession(session *Session, update *types.SessionUnion, cl *config.Mode
session.InputAudioTranscription = &types.AudioTranscription{}
}
session.InputAudioTranscription.Model = cfg.Pipeline.Transcription
session.Voice = cfg.Pipeline.TTS
session.Voice = cfg.TTSConfig.Voice
session.Model = rt.Model
session.ModelConfig = cfg
}
if rt.Audio != nil && rt.Audio.Output != nil && rt.Audio.Output.Voice != "" {
xlog.Warn("Ignoring voice setting; not implemented", "voice", rt.Audio.Output.Voice)
session.Voice = string(rt.Audio.Output.Voice)
}
if rt.Audio != nil && rt.Audio.Input != nil && rt.Audio.Input.Transcription != nil {
@@ -746,6 +745,10 @@ func commitUtterance(ctx context.Context, utt []byte, session *Session, conv *Co
tr, err := session.ModelInterface.Transcribe(ctx, f.Name(), session.InputAudioTranscription.Language, false, false, session.InputAudioTranscription.Prompt)
if err != nil {
sendError(c, "transcription_failed", err.Error(), "", "event_TODO")
return
} else if tr == nil {
sendError(c, "transcription_failed", "trancribe result is nil", "", "event_TODO")
return
}
transcript = tr.Text
@@ -1006,7 +1009,16 @@ func generateResponse(session *Session, utt []byte, transcript string, conv *Con
sendError(c, "tts_error", fmt.Sprintf("Failed to read TTS audio: %v", err), "", item.Assistant.ID)
return
}
audioString := base64.StdEncoding.EncodeToString(audioBytes)
// Strip WAV header (44 bytes) to get raw PCM data
// The OpenAI Realtime API expects raw PCM, not WAV files
const wavHeaderSize = 44
pcmData := audioBytes
if len(audioBytes) > wavHeaderSize {
pcmData = audioBytes[wavHeaderSize:]
}
audioString := base64.StdEncoding.EncodeToString(pcmData)
sendEvent(c, types.ResponseOutputAudioTranscriptDeltaEvent{
ServerEventBase: types.ServerEventBase{},

View File

@@ -1026,7 +1026,8 @@ parameters:
if (!config.name) {
throw new Error('Model name is required');
}
if (!config.backend) {
const isPipeline = config.pipeline && (config.pipeline.vad || config.pipeline.transcription || config.pipeline.tts || config.pipeline.llm);
if (!isPipeline && !config.backend) {
throw new Error('Backend is required');
}
if (!config.parameters || !config.parameters.model) {
@@ -1041,7 +1042,6 @@ parameters:
async saveConfig() {
try {
// Validate before saving
const yamlContent = this.yamlEditor.getValue();
const config = jsyaml.load(yamlContent);
@@ -1052,7 +1052,8 @@ parameters:
if (!config.name) {
throw new Error('Model name is required');
}
if (!config.backend) {
const isPipeline = config.pipeline && (config.pipeline.vad || config.pipeline.transcription || config.pipeline.tts || config.pipeline.llm);
if (!isPipeline && !config.backend) {
throw new Error('Backend is required');
}
if (!config.parameters || !config.parameters.model) {