LocalAI/core/backend/preload.go

package backend

import (
	"context"
	"errors"
	"fmt"
	"sync"

	"github.com/mudler/LocalAI/core/config"
	"github.com/mudler/LocalAI/pkg/model"
	"github.com/mudler/xlog"
)

// PreloadModelByName loads the named model into memory so the first request
// that uses it pays no cold-start load cost — the inverse of shutting a model
// down. If the model is a realtime pipeline (its config declares a `pipeline:`
// block), each configured sub-model (VAD, transcription, LLM, TTS,
// sound_detection, voice_recognition) is loaded concurrently instead of the
// pipeline stub, which has no backend of its own. It returns the model names
// actually loaded and a joined error naming each sub-model that failed (nil on
// full success); a partial pipeline load reports both the loaded names and the
// failures so the caller can surface exactly what is and isn't resident.
// Compaction's summary_model is deliberately left cold: it is only invoked off
// the response path, so it can stay lazy.
func PreloadModelByName(ctx context.Context, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, name string) ([]string, error) {
	cfg, err := cl.LoadModelConfigFileByNameDefaultOptions(name, appConfig)
	if err != nil {
		return nil, err
	}

	stages, err := pipelineStages(cl, &cfg.Pipeline, ml.ModelPath)
	if err != nil {
		return nil, err
	}
	if len(stages) == 0 {
		// Not a pipeline: load the model's own backend directly.
		if err := PreloadModel(ctx, ml, *cfg, appConfig); err != nil {
			return nil, err
		}
		return []string{cfg.Name}, nil
	}
	return PreloadStages(ctx, ml, appConfig, stages)
}

// PreloadStage names one pipeline sub-model to preload and the resolved config
// to load it from (nil = stage absent, skipped). Role labels the pipeline slot
// in errors and logs.
type PreloadStage struct {
	Role string
	Cfg  *config.ModelConfig
}

// loadStage is PreloadModel behind a seam so PreloadStages can be unit-tested
// without spawning real backends.
var loadStage = PreloadModel

// pipelineStages resolves each populated pipeline stage to its concrete model
// config, following a single alias hop — the same resolution the realtime
// pipeline itself uses. A stage that fails to resolve is a misconfiguration,
// so it fails fast rather than being deferred to load. A pipeline with no
// stages set returns nil, which callers treat as "not a pipeline".
func pipelineStages(cl *config.ModelConfigLoader, p *config.Pipeline, modelPath string) ([]PreloadStage, error) {
	voiceRec := ""
	if p.VoiceRecognition != nil {
		voiceRec = p.VoiceRecognition.Model
	}
	var stages []PreloadStage
	for _, s := range []struct{ role, name string }{
		{"vad", p.VAD},
		{"transcription", p.Transcription},
		{"llm", p.LLM},
		{"tts", p.TTS},
		{"sound_detection", p.SoundDetection},
		{"voice_recognition", voiceRec},
	} {
		if s.name == "" {
			continue
		}
		cfg, err := cl.LoadResolvedModelConfig(s.name, modelPath)
		if err != nil {
			return nil, fmt.Errorf("%s (%s): %w", s.role, s.name, err)
		}
		stages = append(stages, PreloadStage{Role: s.role, Cfg: cfg})
	}
	return stages, nil
}

// PreloadStages loads every present stage at once and waits for all of them, so
// a pipeline warms in the time of its slowest stage rather than the sum. Absent
// (nil-config) stages are skipped. A failed stage does not cancel the others —
// they all run to completion so the joined error names every broken stage at
// once, alongside the names that did load.
func PreloadStages(ctx context.Context, ml *model.ModelLoader, appConfig *config.ApplicationConfig, stages []PreloadStage) ([]string, error) {
	var (
		wg     sync.WaitGroup
		mu     sync.Mutex
		loaded []string
		errs   []error
	)
	for _, s := range stages {
		if s.Cfg == nil {
			continue
		}
		wg.Add(1)
		go func(s PreloadStage) {
			defer wg.Done()
			if err := loadStage(ctx, ml, *s.Cfg, appConfig); err != nil {
				xlog.Warn("preload: failed to load pipeline sub-model", "stage", s.Role, "model", s.Cfg.Name, "error", err)
				mu.Lock()
				errs = append(errs, fmt.Errorf("%s (%s): %w", s.Role, s.Cfg.Name, err))
				mu.Unlock()
				return
			}
			xlog.Debug("preload: loaded pipeline sub-model", "stage", s.Role, "model", s.Cfg.Name)
			mu.Lock()
			loaded = append(loaded, s.Cfg.Name)
			mu.Unlock()
		}(s)
	}
	wg.Wait()
	return loaded, errors.Join(errs...)
}