diff --git a/pkg/model/autoload.go b/pkg/model/autoload.go new file mode 100644 index 000000000..d6af3e095 --- /dev/null +++ b/pkg/model/autoload.go @@ -0,0 +1,99 @@ +package model + +import ( + "slices" + "sort" + "strings" + + "github.com/mudler/LocalAI/core/config" +) + +// preferredGGUFBackend is tried first when auto-detecting the backend for a +// GGUF model, since GGUF is overwhelmingly llama.cpp's native format. +const preferredGGUFBackend = "llama-cpp" + +// llmCapableUsecases are the BackendCapabilities usecases that signal a backend +// can serve a text/LLM GGUF model. A GGUF model that declares no explicit +// backend must only be auto-tried against backends carrying one of these +// usecases - never against audio/codec/image backends (e.g. opus) that happen +// to be installed alongside it (see issue #9287). +var llmCapableUsecases = []string{ + config.UsecaseChat, + config.UsecaseCompletion, + config.UsecaseEdit, + config.UsecaseEmbeddings, +} + +// SelectAutoLoadBackends returns the ordered, deterministic list of backend +// names to try when loading a model that declares no explicit backend. +// +// available is the set of installed backend names (unordered, as it comes from a +// Go map). modelFile is the model file name/path (may be empty). +// +// The trial loop in (*ModelLoader).Load picks the first backend whose gRPC +// LoadModel succeeds, so the order and membership of this list directly decide +// which backend wins. The previous implementation ranged a Go map (random +// order) with no filtering, so an unrelated installed backend such as the +// "opus" audio codec could win a GGUF/LLM model load (#9287). +// +// Behaviour: +// - The result is always deterministically ordered, so auto-detect no longer +// depends on map iteration order. +// - For a GGUF model file the list is filtered to LLM-capable backends and +// llama-cpp is placed first, so an incompatible audio/codec/image backend +// can never win the trial loop. +// - If filtering would leave no candidate, the full sorted set is returned +// instead, so a model that previously loaded never becomes unloadable. +func SelectAutoLoadBackends(available []string, modelFile string) []string { + sorted := append([]string(nil), available...) + sort.Strings(sorted) + + if !isGGUFModelFile(modelFile) { + return sorted + } + + filtered := make([]string, 0, len(sorted)) + hasLlama := false + for _, b := range sorted { + if b == preferredGGUFBackend { + hasLlama = true + continue // added explicitly first below + } + if isLLMCapableBackend(b) { + filtered = append(filtered, b) + } + } + if hasLlama { + filtered = append([]string{preferredGGUFBackend}, filtered...) + } + + if len(filtered) == 0 { + // Conservative fallback: no known LLM-capable backend is installed, so + // rather than refuse to load, fall back to the previous behaviour of + // trying every installed backend (now at least in a deterministic order). + return sorted + } + return filtered +} + +func isGGUFModelFile(modelFile string) bool { + return strings.HasSuffix(strings.ToLower(modelFile), ".gguf") +} + +// isLLMCapableBackend reports whether a backend is known to serve text/LLM +// models. Backends absent from the capability map (unknown) are treated as +// not LLM-capable here: for GGUF auto-detection we only want backends we can +// positively confirm handle LLMs, and the zero-candidate fallback keeps unknown +// setups working. +func isLLMCapableBackend(name string) bool { + capability := config.GetBackendCapability(name) + if capability == nil { + return false + } + for _, u := range capability.PossibleUsecases { + if slices.Contains(llmCapableUsecases, u) { + return true + } + } + return false +} diff --git a/pkg/model/autoload_test.go b/pkg/model/autoload_test.go new file mode 100644 index 000000000..f191969b4 --- /dev/null +++ b/pkg/model/autoload_test.go @@ -0,0 +1,46 @@ +package model_test + +import ( + "github.com/mudler/LocalAI/pkg/model" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("SelectAutoLoadBackends (#9287)", func() { + Describe("GGUF model auto-detection", func() { + It("excludes incompatible audio/codec backends (e.g. opus) for a .gguf model", func() { + // Regression for #9287: installing an unrelated audio backend like + // "opus" must never win the GGUF auto-detect trial loop. + got := model.SelectAutoLoadBackends([]string{"opus", "llama-cpp"}, "Qwen3.5-9b.gguf") + Expect(got).NotTo(ContainElement("opus")) + Expect(got).To(ContainElement("llama-cpp")) + }) + + It("places llama-cpp first for a .gguf model", func() { + got := model.SelectAutoLoadBackends([]string{"vllm", "opus", "llama-cpp"}, "model.gguf") + Expect(got).NotTo(BeEmpty()) + Expect(got[0]).To(Equal("llama-cpp")) + }) + + It("is deterministic regardless of input ordering", func() { + a := model.SelectAutoLoadBackends([]string{"opus", "vllm", "llama-cpp", "whisper"}, "m.gguf") + b := model.SelectAutoLoadBackends([]string{"whisper", "llama-cpp", "vllm", "opus"}, "m.gguf") + Expect(a).To(Equal(b)) + }) + + It("falls back to the full sorted set when filtering leaves no candidate", func() { + // No LLM-capable backend installed: never make a previously-loadable + // model unloadable, return the original set (sorted). + got := model.SelectAutoLoadBackends([]string{"opus"}, "model.gguf") + Expect(got).To(Equal([]string{"opus"})) + }) + }) + + Describe("non-GGUF model auto-detection", func() { + It("returns a deterministic (sorted) set without filtering", func() { + got := model.SelectAutoLoadBackends([]string{"opus", "llama-cpp", "diffusers"}, "model-dir") + Expect(got).To(Equal([]string{"diffusers", "llama-cpp", "opus"})) + }) + }) +}) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index d7719ca13..4dc01b9a4 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -350,14 +350,17 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) { // Otherwise scan for backends in the asset directory var err error - // get backends embedded in the binary - autoLoadBackends := []string{} - - // append externalBackends supplied by the user via the CLI + // Collect the installed/external backends (the map is unordered). + available := []string{} for b := range ml.GetAllExternalBackends(o) { - autoLoadBackends = append(autoLoadBackends, b) + available = append(available, b) } + // Build a deterministic, file-type-filtered candidate list so an + // incompatible backend (e.g. an audio codec like opus) can never win the + // trial loop for a GGUF/LLM model. See SelectAutoLoadBackends / #9287. + autoLoadBackends := SelectAutoLoadBackends(available, o.model) + if len(autoLoadBackends) == 0 { xlog.Error("No backends found") return nil, fmt.Errorf("no backends found")