mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-15 09:03:14 -05:00
fix(realtime): Use user provided voice and allow pipeline models to have no backend (#8415)
* fix(realtime): Use the voice provided by the user or none at all Signed-off-by: Richard Palethorpe <io@richiejp.com> * fix(ui,config): Allow pipeline models to have no backend and use same validation in frontend Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
0ee92317ec
commit
7270a98ce5
@@ -76,42 +76,35 @@ func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: either in the next PR or the next commit, I want to merge these down into a single function that looks at the first few characters of the file to determine if we need to deserialize to []BackendConfig or BackendConfig
|
||||
func readMultipleModelConfigsFromFile(file string, opts ...ConfigLoaderOption) ([]*ModelConfig, error) {
|
||||
c := &[]*ModelConfig{}
|
||||
// readModelConfigsFromFile reads a config file that may contain either a single
|
||||
// ModelConfig or an array of ModelConfigs. It tries to unmarshal as an array first,
|
||||
// then falls back to a single config if that fails.
|
||||
func readModelConfigsFromFile(file string, opts ...ConfigLoaderOption) ([]*ModelConfig, error) {
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("readMultipleModelConfigsFromFile cannot read config file %q: %w", file, err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("readMultipleModelConfigsFromFile cannot unmarshal config file %q: %w", file, err)
|
||||
return nil, fmt.Errorf("readModelConfigsFromFile cannot read config file %q: %w", file, err)
|
||||
}
|
||||
|
||||
for _, cc := range *c {
|
||||
cc.modelConfigFile = file
|
||||
cc.SetDefaults(opts...)
|
||||
// Try to unmarshal as array first
|
||||
var configs []*ModelConfig
|
||||
if err := yaml.Unmarshal(f, &configs); err == nil && len(configs) > 0 {
|
||||
for _, cc := range configs {
|
||||
cc.modelConfigFile = file
|
||||
cc.SetDefaults(opts...)
|
||||
}
|
||||
return configs, nil
|
||||
}
|
||||
|
||||
return *c, nil
|
||||
}
|
||||
|
||||
func readModelConfigFromFile(file string, opts ...ConfigLoaderOption) (*ModelConfig, error) {
|
||||
lo := &LoadOptions{}
|
||||
lo.Apply(opts...)
|
||||
|
||||
// Fall back to single config
|
||||
c := &ModelConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("readModelConfigFromFile cannot read config file %q: %w", file, err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("readModelConfigFromFile cannot unmarshal config file %q: %w", file, err)
|
||||
return nil, fmt.Errorf("readModelConfigsFromFile cannot unmarshal config file %q: %w", file, err)
|
||||
}
|
||||
|
||||
c.SetDefaults(opts...)
|
||||
|
||||
c.modelConfigFile = file
|
||||
return c, nil
|
||||
c.SetDefaults(opts...)
|
||||
|
||||
return []*ModelConfig{c}, nil
|
||||
}
|
||||
|
||||
// Load a config file for a model
|
||||
@@ -163,7 +156,7 @@ func (bcl *ModelConfigLoader) LoadModelConfigFileByNameDefaultOptions(modelName
|
||||
func (bcl *ModelConfigLoader) LoadMultipleModelConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
c, err := readMultipleModelConfigsFromFile(file, opts...)
|
||||
c, err := readModelConfigsFromFile(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot load config file: %w", err)
|
||||
}
|
||||
@@ -181,11 +174,18 @@ func (bcl *ModelConfigLoader) LoadMultipleModelConfigsSingleFile(file string, op
|
||||
func (bcl *ModelConfigLoader) ReadModelConfig(file string, opts ...ConfigLoaderOption) error {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
c, err := readModelConfigFromFile(file, opts...)
|
||||
configs, err := readModelConfigsFromFile(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("ReadModelConfig cannot read config file %q: %w", file, err)
|
||||
}
|
||||
if len(configs) == 0 {
|
||||
return fmt.Errorf("ReadModelConfig: no configs found in file %q", file)
|
||||
}
|
||||
if len(configs) > 1 {
|
||||
xlog.Warn("ReadModelConig: read more than one config from file, only using first", "file", file, "configs", len(configs))
|
||||
}
|
||||
|
||||
c := configs[0]
|
||||
if valid, err := c.Validate(); valid {
|
||||
bcl.configs[c.Name] = *c
|
||||
} else {
|
||||
@@ -375,15 +375,23 @@ func (bcl *ModelConfigLoader) LoadModelConfigsFromPath(path string, opts ...Conf
|
||||
strings.HasPrefix(file.Name(), ".") {
|
||||
continue
|
||||
}
|
||||
c, err := readModelConfigFromFile(filepath.Join(path, file.Name()), opts...)
|
||||
|
||||
filePath := filepath.Join(path, file.Name())
|
||||
|
||||
// Read config(s) - handles both single and array formats
|
||||
configs, err := readModelConfigsFromFile(filePath, opts...)
|
||||
if err != nil {
|
||||
xlog.Error("LoadModelConfigsFromPath cannot read config file", "error", err, "File Name", file.Name())
|
||||
continue
|
||||
}
|
||||
if valid, validationErr := c.Validate(); valid {
|
||||
bcl.configs[c.Name] = *c
|
||||
} else {
|
||||
xlog.Error("config is not valid", "error", validationErr, "Name", c.Name)
|
||||
|
||||
// Validate and store each config
|
||||
for _, c := range configs {
|
||||
if valid, validationErr := c.Validate(); valid {
|
||||
bcl.configs[c.Name] = *c
|
||||
} else {
|
||||
xlog.Error("config is not valid", "error", validationErr, "Name", c.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,8 @@ known_usecases:
|
||||
- COMPLETION
|
||||
`)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
config, err := readModelConfigFromFile(tmp.Name())
|
||||
configs, err := readModelConfigsFromFile(tmp.Name())
|
||||
config := configs[0]
|
||||
Expect(err).To(BeNil())
|
||||
Expect(config).ToNot(BeNil())
|
||||
valid, err := config.Validate()
|
||||
@@ -43,7 +44,8 @@ backend: "foo-bar"
|
||||
parameters:
|
||||
model: "foo-bar"`)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
config, err := readModelConfigFromFile(tmp.Name())
|
||||
configs, err := readModelConfigsFromFile(tmp.Name())
|
||||
config := configs[0]
|
||||
Expect(err).To(BeNil())
|
||||
Expect(config).ToNot(BeNil())
|
||||
// two configs in config.yaml
|
||||
@@ -62,7 +64,8 @@ parameters:
|
||||
defer os.Remove(tmp.Name())
|
||||
_, err = io.Copy(tmp, resp.Body)
|
||||
Expect(err).To(BeNil())
|
||||
config, err = readModelConfigFromFile(tmp.Name())
|
||||
configs, err = readModelConfigsFromFile(tmp.Name())
|
||||
config = configs[0]
|
||||
Expect(err).To(BeNil())
|
||||
Expect(config).ToNot(BeNil())
|
||||
// two configs in config.yaml
|
||||
@@ -188,7 +191,8 @@ mcp:
|
||||
}
|
||||
}`)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
config, err := readModelConfigFromFile(tmp.Name())
|
||||
configs, err := readModelConfigsFromFile(tmp.Name())
|
||||
config := configs[0]
|
||||
Expect(err).To(BeNil())
|
||||
Expect(config).ToNot(BeNil())
|
||||
valid, err := config.Validate()
|
||||
@@ -218,7 +222,8 @@ mcp:
|
||||
}
|
||||
}`)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
config, err := readModelConfigFromFile(tmp.Name())
|
||||
configs, err := readModelConfigsFromFile(tmp.Name())
|
||||
config := configs[0]
|
||||
Expect(err).To(BeNil())
|
||||
Expect(config).ToNot(BeNil())
|
||||
valid, err := config.Validate()
|
||||
|
||||
@@ -16,7 +16,7 @@ var _ = Describe("Test cases for config related functions", func() {
|
||||
Context("Test Read configuration functions", func() {
|
||||
configFile = os.Getenv("CONFIG_FILE")
|
||||
It("Test readConfigFile", func() {
|
||||
config, err := readMultipleModelConfigsFromFile(configFile)
|
||||
config, err := readModelConfigsFromFile(configFile)
|
||||
Expect(err).To(BeNil())
|
||||
Expect(config).ToNot(BeNil())
|
||||
// two configs in config.yaml
|
||||
|
||||
@@ -336,6 +336,7 @@ var _ = Describe("API test", func() {
|
||||
Name: "bert",
|
||||
URL: bertEmbeddingsURL,
|
||||
},
|
||||
Overrides: map[string]interface{}{"backend": "llama-cpp"},
|
||||
},
|
||||
{
|
||||
Metadata: gallery.Metadata{
|
||||
@@ -953,7 +954,8 @@ parameters:
|
||||
It("returns the models list", func() {
|
||||
models, err := client.ListModels(context.TODO())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models.Models)).To(Equal(7)) // If "config.yaml" should be included, this should be 8?
|
||||
// A model called "bert" can be present in the model directory depending on the order of the tests
|
||||
Expect(len(models.Models)).To(BeNumerically(">=", 8))
|
||||
})
|
||||
It("can generate completions via ggml", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
|
||||
@@ -183,14 +183,13 @@ func registerRealtime(application *application.Application, model string) func(c
|
||||
}
|
||||
|
||||
sttModel := cfg.Pipeline.Transcription
|
||||
ttsModel := cfg.Pipeline.TTS
|
||||
|
||||
sessionID := generateSessionID()
|
||||
session := &Session{
|
||||
ID: sessionID,
|
||||
TranscriptionOnly: false,
|
||||
Model: model,
|
||||
Voice: ttsModel,
|
||||
Voice: cfg.TTSConfig.Voice,
|
||||
ModelConfig: cfg,
|
||||
TurnDetection: &types.TurnDetectionUnion{
|
||||
ServerVad: &types.ServerVad{
|
||||
@@ -557,13 +556,13 @@ func updateSession(session *Session, update *types.SessionUnion, cl *config.Mode
|
||||
session.InputAudioTranscription = &types.AudioTranscription{}
|
||||
}
|
||||
session.InputAudioTranscription.Model = cfg.Pipeline.Transcription
|
||||
session.Voice = cfg.Pipeline.TTS
|
||||
session.Voice = cfg.TTSConfig.Voice
|
||||
session.Model = rt.Model
|
||||
session.ModelConfig = cfg
|
||||
}
|
||||
|
||||
if rt.Audio != nil && rt.Audio.Output != nil && rt.Audio.Output.Voice != "" {
|
||||
xlog.Warn("Ignoring voice setting; not implemented", "voice", rt.Audio.Output.Voice)
|
||||
session.Voice = string(rt.Audio.Output.Voice)
|
||||
}
|
||||
|
||||
if rt.Audio != nil && rt.Audio.Input != nil && rt.Audio.Input.Transcription != nil {
|
||||
@@ -746,6 +745,10 @@ func commitUtterance(ctx context.Context, utt []byte, session *Session, conv *Co
|
||||
tr, err := session.ModelInterface.Transcribe(ctx, f.Name(), session.InputAudioTranscription.Language, false, false, session.InputAudioTranscription.Prompt)
|
||||
if err != nil {
|
||||
sendError(c, "transcription_failed", err.Error(), "", "event_TODO")
|
||||
return
|
||||
} else if tr == nil {
|
||||
sendError(c, "transcription_failed", "trancribe result is nil", "", "event_TODO")
|
||||
return
|
||||
}
|
||||
|
||||
transcript = tr.Text
|
||||
@@ -1006,7 +1009,16 @@ func generateResponse(session *Session, utt []byte, transcript string, conv *Con
|
||||
sendError(c, "tts_error", fmt.Sprintf("Failed to read TTS audio: %v", err), "", item.Assistant.ID)
|
||||
return
|
||||
}
|
||||
audioString := base64.StdEncoding.EncodeToString(audioBytes)
|
||||
|
||||
// Strip WAV header (44 bytes) to get raw PCM data
|
||||
// The OpenAI Realtime API expects raw PCM, not WAV files
|
||||
const wavHeaderSize = 44
|
||||
pcmData := audioBytes
|
||||
if len(audioBytes) > wavHeaderSize {
|
||||
pcmData = audioBytes[wavHeaderSize:]
|
||||
}
|
||||
|
||||
audioString := base64.StdEncoding.EncodeToString(pcmData)
|
||||
|
||||
sendEvent(c, types.ResponseOutputAudioTranscriptDeltaEvent{
|
||||
ServerEventBase: types.ServerEventBase{},
|
||||
|
||||
@@ -1026,7 +1026,8 @@ parameters:
|
||||
if (!config.name) {
|
||||
throw new Error('Model name is required');
|
||||
}
|
||||
if (!config.backend) {
|
||||
const isPipeline = config.pipeline && (config.pipeline.vad || config.pipeline.transcription || config.pipeline.tts || config.pipeline.llm);
|
||||
if (!isPipeline && !config.backend) {
|
||||
throw new Error('Backend is required');
|
||||
}
|
||||
if (!config.parameters || !config.parameters.model) {
|
||||
@@ -1041,7 +1042,6 @@ parameters:
|
||||
|
||||
async saveConfig() {
|
||||
try {
|
||||
// Validate before saving
|
||||
const yamlContent = this.yamlEditor.getValue();
|
||||
const config = jsyaml.load(yamlContent);
|
||||
|
||||
@@ -1052,7 +1052,8 @@ parameters:
|
||||
if (!config.name) {
|
||||
throw new Error('Model name is required');
|
||||
}
|
||||
if (!config.backend) {
|
||||
const isPipeline = config.pipeline && (config.pipeline.vad || config.pipeline.transcription || config.pipeline.tts || config.pipeline.llm);
|
||||
if (!isPipeline && !config.backend) {
|
||||
throw new Error('Backend is required');
|
||||
}
|
||||
if (!config.parameters || !config.parameters.model) {
|
||||
|
||||
Reference in New Issue
Block a user