From 482314c623c86d9fb362a828a371295c1d417f85 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Jun 2026 21:50:44 +0200 Subject: [PATCH] fix(realtime): resolve model aliases for pipeline sub-models (#10484) Realtime pipeline sub-models (llm/transcription/tts/vad/sound-detection) were loaded via cl.LoadModelConfigFileByName without alias resolution, unlike top-level API requests which resolve aliases in core/http/middleware/request.go. So a pipeline that references an alias (e.g. `pipeline.llm: default`, where `default` is an alias for a real LLM) reached model loading as the alias stub with an empty Backend. This was silently broken on a single host (it failed downstream) and a hard error in distributed/p2p mode: routing model : loading model default: ... installing backend on node X: backend name is empty Fix by routing every pipeline sub-model load through a small helper that follows a single alias hop (mirroring the top-level resolution), so non-alias sub-models behave identically and aliased ones get the target's full config (Backend, Model, ...). Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- core/http/endpoints/openai/realtime_model.go | 33 +++++++++--- .../openai/realtime_model_alias_test.go | 52 +++++++++++++++++++ 2 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 core/http/endpoints/openai/realtime_model_alias_test.go diff --git a/core/http/endpoints/openai/realtime_model.go b/core/http/endpoints/openai/realtime_model.go index 6843a521d..0dafa0a35 100644 --- a/core/http/endpoints/openai/realtime_model.go +++ b/core/http/endpoints/openai/realtime_model.go @@ -432,7 +432,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL if pipeline.SoundDetection == "" { return nil, nil } - cfg, err := cl.LoadModelConfigFileByName(pipeline.SoundDetection, ml.ModelPath) + cfg, err := loadPipelineSubModel(cl, pipeline.SoundDetection, ml.ModelPath) if err != nil { return nil, fmt.Errorf("failed to load sound detection config: %w", err) } @@ -443,7 +443,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL } func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (Model, *config.ModelConfig, error) { - cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath) + cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath) if err != nil { return nil, nil, fmt.Errorf("failed to load backend config: %w", err) @@ -453,7 +453,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig return nil, nil, fmt.Errorf("failed to validate config: %w", err) } - cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath) + cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath) if err != nil { return nil, nil, fmt.Errorf("failed to load backend config: %w", err) @@ -542,11 +542,30 @@ func buildRealtimeRoutingContext(a *application.Application, sessionID string) * } } +// loadPipelineSubModel loads a pipeline sub-model config by name and follows a +// single alias hop, so a pipeline that references an alias (e.g. `llm: default`) +// gets the alias target's full config (Backend, Model, ...) rather than the +// alias stub with an empty Backend. Without this the alias survives unresolved +// into model loading and fails downstream — notably in distributed mode with +// "backend name is empty". Mirrors the top-level alias resolution in +// core/http/middleware/request.go. +func loadPipelineSubModel(cl *config.ModelConfigLoader, name, modelPath string) (*config.ModelConfig, error) { + cfg, err := cl.LoadModelConfigFileByName(name, modelPath) + if err != nil { + return nil, err + } + resolved, _, err := cl.ResolveAlias(cfg) + if err != nil { + return nil, err + } + return resolved, nil +} + // returns and loads either a wrapped model or a model that support audio-to-audio func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, evaluator *templates.Evaluator, routing *RealtimeRoutingContext) (Model, error) { xlog.Debug("Creating new model pipeline model", "pipeline", pipeline) - cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath) + cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath) if err != nil { return nil, fmt.Errorf("failed to load backend config: %w", err) @@ -557,7 +576,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model } // TODO: Do we always need a transcription model? It can be disabled. Note that any-to-any instruction following models don't transcribe as such, so if transcription is required it is a separate process - cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath) + cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath) if err != nil { return nil, fmt.Errorf("failed to load backend config: %w", err) @@ -589,7 +608,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model xlog.Debug("Loading a wrapped model") // Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations - cfgLLM, err := cl.LoadModelConfigFileByName(pipeline.LLM, ml.ModelPath) + cfgLLM, err := loadPipelineSubModel(cl, pipeline.LLM, ml.ModelPath) if err != nil { return nil, fmt.Errorf("failed to load backend config: %w", err) @@ -604,7 +623,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model applyPipelineReasoning(cfgLLM, *pipeline) applyPipelineThinking(cfgLLM, *pipeline) - cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath) + cfgTTS, err := loadPipelineSubModel(cl, pipeline.TTS, ml.ModelPath) if err != nil { return nil, fmt.Errorf("failed to load backend config: %w", err) diff --git a/core/http/endpoints/openai/realtime_model_alias_test.go b/core/http/endpoints/openai/realtime_model_alias_test.go new file mode 100644 index 000000000..77179d963 --- /dev/null +++ b/core/http/endpoints/openai/realtime_model_alias_test.go @@ -0,0 +1,52 @@ +package openai + +import ( + "os" + "path/filepath" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/mudler/LocalAI/core/config" +) + +// loadPipelineSubModel must resolve a pipeline sub-model that references an +// alias (e.g. `llm: default`) one hop to the alias target's full config — so +// the effective backend is the target's backend, not the empty backend of the +// alias stub. This mirrors the top-level alias resolution done in +// core/http/middleware/request.go, which the realtime pipeline previously +// skipped (failing in distributed mode with "backend name is empty"). +var _ = Describe("loadPipelineSubModel", func() { + It("resolves a sub-model alias one hop to the target's config", func() { + tmpDir := GinkgoT().TempDir() + + // A real model config with a concrete backend. + realLLM := `name: real-llm +backend: llama-cpp +parameters: + model: real-llm.gguf +` + Expect(os.WriteFile(filepath.Join(tmpDir, "real-llm.yaml"), []byte(realLLM), 0644)).To(Succeed()) + + // An alias pointing at the real model. + aliasCfg := `name: default +alias: real-llm +` + Expect(os.WriteFile(filepath.Join(tmpDir, "default.yaml"), []byte(aliasCfg), 0644)).To(Succeed()) + + cl := config.NewModelConfigLoader(tmpDir) + Expect(cl.LoadModelConfigsFromPath(tmpDir)).To(Succeed()) + + // Resolving the alias must follow the hop to the target's full config. + resolved, err := loadPipelineSubModel(cl, "default", tmpDir) + Expect(err).NotTo(HaveOccurred()) + Expect(resolved.IsAlias()).To(BeFalse()) + Expect(resolved.Backend).To(Equal("llama-cpp")) + + // A non-alias name must load unchanged. + direct, err := loadPipelineSubModel(cl, "real-llm", tmpDir) + Expect(err).NotTo(HaveOccurred()) + Expect(direct.Backend).To(Equal("llama-cpp")) + Expect(direct.Name).To(Equal("real-llm")) + }) +})