Files
LocalAI/core/backend/preload_internal_test.go
Richard Palethorpe eb32cd9073 feat(realtime): eager blocking pipeline warm-up + /backend/load API (#10662)
Realtime sessions previously lazy-loaded each pipeline sub-model (VAD,
transcription, LLM, TTS) on first use, so every cold session paid a
per-request model-load stall and load errors only surfaced mid-stream.

Warm the whole pipeline eagerly and blockingly at session start
(including the voice-gate speaker-recognition model, which an enforced
gate blocks each utterance on; compaction's summary_model stays lazy
since it only runs off the response path):
- Add backend.PreloadModel / PreloadModelByName as the single load path
  for every modality (no transcription special-case; backend-omitted
  configs are deprecated).
- The realtime session blocks on Model.Warmup and returns a
  model_load_error to the client if any stage fails to load;
  updateSession warms in the background. Opt out per pipeline with
  pipeline.disable_warmup, exposed as a UI toggle via the
  config-metadata registry.

Add a LocalAI-native POST /backend/load (and /v1/backend/load) that
pre-loads a model -- expanding realtime pipelines into their sub-models
-- as the inverse of /backend/shutdown. There is one preload engine
(backend.PreloadStages): the realtime Warmup methods, /backend/load and
the --load-to-memory startup flag all use it, so --load-to-memory now
also expands pipeline models and records load-failure traces. Pipeline
sub-model alias resolution is likewise shared
(ModelConfigLoader.LoadResolvedModelConfig). Surface the endpoint
everywhere an admin manages models:
- MCP admin tool load_model (httpapi + inproc clients, safety/catalog
  prompts, catalog/dispatch tests).
- "Load into memory" action in the React models UI.
- Swagger regenerated; docs moved to the general backend-monitor page
  since it is not realtime-specific.

Fix a Traces UI crash ("json: unsupported value: -Inf"): audio-snippet
RMS/peak now floor at a finite dBFS, and backend-trace data is sanitized
to drop non-finite floats before marshaling. The sanitizer is
copy-on-write -- it runs on every RecordBackendTrace, so containers are
only re-allocated on the paths that actually changed.

Migrate core/http/openresponses_test.go onto the prebuilt mock-backend
the rest of the http suite already uses -- it was the last spec still
pointing at a real HuggingFace model, so it 404'd wherever no vision
backend was built -- and fix its item_reference specs to send the
spec's "id" field instead of "item_id", which the handler never
accepted.

Assisted-by: Claude:claude-opus-4-8 Claude Code

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-07-03 18:00:37 +02:00

147 lines
4.3 KiB
Go

package backend
import (
"context"
"errors"
"os"
"path/filepath"
"sync"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("pipelineStages", func() {
seed := func(dir string, names ...string) *config.ModelConfigLoader {
for _, n := range names {
yaml := "name: " + n + "\nbackend: fake-backend\n"
Expect(os.WriteFile(filepath.Join(dir, n+".yaml"), []byte(yaml), 0o644)).To(Succeed())
}
cl := config.NewModelConfigLoader(dir)
Expect(cl.LoadModelConfigsFromPath(dir)).To(Succeed())
return cl
}
It("resolves only the populated stages, in load order", func() {
dir := GinkgoT().TempDir()
cl := seed(dir, "vad-m", "stt-m", "llm-m", "tts-m")
stages, err := pipelineStages(cl, &config.Pipeline{
VAD: "vad-m",
Transcription: "stt-m",
LLM: "llm-m",
TTS: "tts-m",
}, dir)
Expect(err).ToNot(HaveOccurred())
roles := make([]string, len(stages))
names := make([]string, len(stages))
for i, s := range stages {
roles[i] = s.Role
names[i] = s.Cfg.Name
}
Expect(roles).To(Equal([]string{"vad", "transcription", "llm", "tts"}))
Expect(names).To(Equal([]string{"vad-m", "stt-m", "llm-m", "tts-m"}))
})
It("skips unset stages and includes sound_detection and voice_recognition when set", func() {
dir := GinkgoT().TempDir()
cl := seed(dir, "stt-m", "ced", "spk")
stages, err := pipelineStages(cl, &config.Pipeline{
Transcription: "stt-m",
SoundDetection: "ced",
VoiceRecognition: &config.PipelineVoiceRecognition{Model: "spk"},
}, dir)
Expect(err).ToNot(HaveOccurred())
roles := make([]string, len(stages))
for i, s := range stages {
roles[i] = s.Role
}
Expect(roles).To(ConsistOf("transcription", "sound_detection", "voice_recognition"))
})
It("returns nil for a pipeline with no stages (not a pipeline)", func() {
dir := GinkgoT().TempDir()
cl := seed(dir)
stages, err := pipelineStages(cl, &config.Pipeline{}, dir)
Expect(err).ToNot(HaveOccurred())
Expect(stages).To(BeNil())
})
})
var _ = Describe("PreloadStages", func() {
var (
mu sync.Mutex
seen []string
)
// stubLoader swaps the loadStage seam for a recorder so no real backends
// are spawned; errFor injects per-model failures.
stubLoader := func(errFor map[string]error) {
loadStage = func(_ context.Context, _ *model.ModelLoader, cfg config.ModelConfig, _ *config.ApplicationConfig) error {
mu.Lock()
seen = append(seen, cfg.Name)
mu.Unlock()
return errFor[cfg.Name]
}
}
BeforeEach(func() {
seen = nil
})
AfterEach(func() {
loadStage = PreloadModel
})
mkStage := func(role, name string) PreloadStage {
return PreloadStage{Role: role, Cfg: &config.ModelConfig{Name: name}}
}
It("loads every present stage, skips absent (nil-config) ones, and returns the loaded names", func() {
stubLoader(nil)
loaded, err := PreloadStages(context.Background(), nil, nil, []PreloadStage{
mkStage("vad", "vad-m"),
{Role: "transcription"}, // absent stage
mkStage("llm", "llm-m"),
})
Expect(err).ToNot(HaveOccurred())
Expect(loaded).To(ConsistOf("vad-m", "llm-m"))
// Barrier: every stage has run by the time PreloadStages returns, so
// reading seen without the lock here is safe.
Expect(seen).To(ConsistOf("vad-m", "llm-m"))
})
It("reports a joined error naming each failed stage while still loading the rest", func() {
stubLoader(map[string]error{
"vad-m": errors.New("vad boom"),
"tts-m": errors.New("tts boom"),
})
loaded, err := PreloadStages(context.Background(), nil, nil, []PreloadStage{
mkStage("vad", "vad-m"),
mkStage("llm", "llm-m"),
mkStage("tts", "tts-m"),
})
// Every stage ran (a failure does not cancel the others)...
Expect(seen).To(ConsistOf("vad-m", "llm-m", "tts-m"))
// ...the stage that loaded fine is reported as loaded...
Expect(loaded).To(ConsistOf("llm-m"))
// ...and the joined error names every broken stage and its cause.
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("vad (vad-m)"))
Expect(err.Error()).To(ContainSubstring("vad boom"))
Expect(err.Error()).To(ContainSubstring("tts (tts-m)"))
Expect(err.Error()).To(ContainSubstring("tts boom"))
Expect(err.Error()).ToNot(ContainSubstring("llm"))
})
})