mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-03 12:57:02 -04:00
Realtime sessions previously lazy-loaded each pipeline sub-model (VAD,
transcription, LLM, TTS) on first use, so every cold session paid a
per-request model-load stall and load errors only surfaced mid-stream.
Warm the whole pipeline eagerly and blockingly at session start
(including the voice-gate speaker-recognition model, which an enforced
gate blocks each utterance on; compaction's summary_model stays lazy
since it only runs off the response path):
- Add backend.PreloadModel / PreloadModelByName as the single load path
for every modality (no transcription special-case; backend-omitted
configs are deprecated).
- The realtime session blocks on Model.Warmup and returns a
model_load_error to the client if any stage fails to load;
updateSession warms in the background. Opt out per pipeline with
pipeline.disable_warmup, exposed as a UI toggle via the
config-metadata registry.
Add a LocalAI-native POST /backend/load (and /v1/backend/load) that
pre-loads a model -- expanding realtime pipelines into their sub-models
-- as the inverse of /backend/shutdown. There is one preload engine
(backend.PreloadStages): the realtime Warmup methods, /backend/load and
the --load-to-memory startup flag all use it, so --load-to-memory now
also expands pipeline models and records load-failure traces. Pipeline
sub-model alias resolution is likewise shared
(ModelConfigLoader.LoadResolvedModelConfig). Surface the endpoint
everywhere an admin manages models:
- MCP admin tool load_model (httpapi + inproc clients, safety/catalog
prompts, catalog/dispatch tests).
- "Load into memory" action in the React models UI.
- Swagger regenerated; docs moved to the general backend-monitor page
since it is not realtime-specific.
Fix a Traces UI crash ("json: unsupported value: -Inf"): audio-snippet
RMS/peak now floor at a finite dBFS, and backend-trace data is sanitized
to drop non-finite floats before marshaling. The sanitizer is
copy-on-write -- it runs on every RecordBackendTrace, so containers are
only re-allocated on the paths that actually changed.
Migrate core/http/openresponses_test.go onto the prebuilt mock-backend
the rest of the http suite already uses -- it was the last spec still
pointing at a real HuggingFace model, so it 404'd wherever no vision
backend was built -- and fix its item_reference specs to send the
spec's "id" field instead of "item_id", which the handler never
accepted.
Assisted-by: Claude:claude-opus-4-8 Claude Code
Signed-off-by: Richard Palethorpe <io@richiejp.com>
147 lines
4.3 KiB
Go
147 lines
4.3 KiB
Go
package backend
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/pkg/model"
|
|
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
var _ = Describe("pipelineStages", func() {
|
|
seed := func(dir string, names ...string) *config.ModelConfigLoader {
|
|
for _, n := range names {
|
|
yaml := "name: " + n + "\nbackend: fake-backend\n"
|
|
Expect(os.WriteFile(filepath.Join(dir, n+".yaml"), []byte(yaml), 0o644)).To(Succeed())
|
|
}
|
|
cl := config.NewModelConfigLoader(dir)
|
|
Expect(cl.LoadModelConfigsFromPath(dir)).To(Succeed())
|
|
return cl
|
|
}
|
|
|
|
It("resolves only the populated stages, in load order", func() {
|
|
dir := GinkgoT().TempDir()
|
|
cl := seed(dir, "vad-m", "stt-m", "llm-m", "tts-m")
|
|
|
|
stages, err := pipelineStages(cl, &config.Pipeline{
|
|
VAD: "vad-m",
|
|
Transcription: "stt-m",
|
|
LLM: "llm-m",
|
|
TTS: "tts-m",
|
|
}, dir)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
roles := make([]string, len(stages))
|
|
names := make([]string, len(stages))
|
|
for i, s := range stages {
|
|
roles[i] = s.Role
|
|
names[i] = s.Cfg.Name
|
|
}
|
|
Expect(roles).To(Equal([]string{"vad", "transcription", "llm", "tts"}))
|
|
Expect(names).To(Equal([]string{"vad-m", "stt-m", "llm-m", "tts-m"}))
|
|
})
|
|
|
|
It("skips unset stages and includes sound_detection and voice_recognition when set", func() {
|
|
dir := GinkgoT().TempDir()
|
|
cl := seed(dir, "stt-m", "ced", "spk")
|
|
|
|
stages, err := pipelineStages(cl, &config.Pipeline{
|
|
Transcription: "stt-m",
|
|
SoundDetection: "ced",
|
|
VoiceRecognition: &config.PipelineVoiceRecognition{Model: "spk"},
|
|
}, dir)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
roles := make([]string, len(stages))
|
|
for i, s := range stages {
|
|
roles[i] = s.Role
|
|
}
|
|
Expect(roles).To(ConsistOf("transcription", "sound_detection", "voice_recognition"))
|
|
})
|
|
|
|
It("returns nil for a pipeline with no stages (not a pipeline)", func() {
|
|
dir := GinkgoT().TempDir()
|
|
cl := seed(dir)
|
|
|
|
stages, err := pipelineStages(cl, &config.Pipeline{}, dir)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(stages).To(BeNil())
|
|
})
|
|
})
|
|
|
|
var _ = Describe("PreloadStages", func() {
|
|
var (
|
|
mu sync.Mutex
|
|
seen []string
|
|
)
|
|
|
|
// stubLoader swaps the loadStage seam for a recorder so no real backends
|
|
// are spawned; errFor injects per-model failures.
|
|
stubLoader := func(errFor map[string]error) {
|
|
loadStage = func(_ context.Context, _ *model.ModelLoader, cfg config.ModelConfig, _ *config.ApplicationConfig) error {
|
|
mu.Lock()
|
|
seen = append(seen, cfg.Name)
|
|
mu.Unlock()
|
|
return errFor[cfg.Name]
|
|
}
|
|
}
|
|
|
|
BeforeEach(func() {
|
|
seen = nil
|
|
})
|
|
AfterEach(func() {
|
|
loadStage = PreloadModel
|
|
})
|
|
|
|
mkStage := func(role, name string) PreloadStage {
|
|
return PreloadStage{Role: role, Cfg: &config.ModelConfig{Name: name}}
|
|
}
|
|
|
|
It("loads every present stage, skips absent (nil-config) ones, and returns the loaded names", func() {
|
|
stubLoader(nil)
|
|
|
|
loaded, err := PreloadStages(context.Background(), nil, nil, []PreloadStage{
|
|
mkStage("vad", "vad-m"),
|
|
{Role: "transcription"}, // absent stage
|
|
mkStage("llm", "llm-m"),
|
|
})
|
|
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(loaded).To(ConsistOf("vad-m", "llm-m"))
|
|
// Barrier: every stage has run by the time PreloadStages returns, so
|
|
// reading seen without the lock here is safe.
|
|
Expect(seen).To(ConsistOf("vad-m", "llm-m"))
|
|
})
|
|
|
|
It("reports a joined error naming each failed stage while still loading the rest", func() {
|
|
stubLoader(map[string]error{
|
|
"vad-m": errors.New("vad boom"),
|
|
"tts-m": errors.New("tts boom"),
|
|
})
|
|
|
|
loaded, err := PreloadStages(context.Background(), nil, nil, []PreloadStage{
|
|
mkStage("vad", "vad-m"),
|
|
mkStage("llm", "llm-m"),
|
|
mkStage("tts", "tts-m"),
|
|
})
|
|
|
|
// Every stage ran (a failure does not cancel the others)...
|
|
Expect(seen).To(ConsistOf("vad-m", "llm-m", "tts-m"))
|
|
// ...the stage that loaded fine is reported as loaded...
|
|
Expect(loaded).To(ConsistOf("llm-m"))
|
|
// ...and the joined error names every broken stage and its cause.
|
|
Expect(err).To(HaveOccurred())
|
|
Expect(err.Error()).To(ContainSubstring("vad (vad-m)"))
|
|
Expect(err.Error()).To(ContainSubstring("vad boom"))
|
|
Expect(err.Error()).To(ContainSubstring("tts (tts-m)"))
|
|
Expect(err.Error()).To(ContainSubstring("tts boom"))
|
|
Expect(err.Error()).ToNot(ContainSubstring("llm"))
|
|
})
|
|
})
|