Compare commits

...

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
69e482b0a8 fix(model): deterministic, file-type-filtered backend auto-detect (#9287)
When a model config declares no explicit `backend:`, Load() fell into a
trial loop built by ranging the external-backends Go map (random order)
with no filtering, returning the first backend whose gRPC LoadModel
succeeded. An unrelated installed backend - e.g. the "opus" audio codec -
could therefore win a GGUF/LLM model load, so a model that should run on
llama.cpp wrongly tried to use opus.

Extract the candidate selection into a pure, testable function
SelectAutoLoadBackends that:

  - sorts the candidate list deterministically (no more map-order
    nondeterminism), and
  - for a `.gguf` model, filters to LLM-capable backends (via
    core/config.BackendCapabilities) and puts llama-cpp first, so an
    incompatible audio/codec/image backend can never win the trial loop.

If filtering would leave zero candidates, the full sorted set is returned
unchanged, so a previously-loadable model is never made unloadable.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: claude:claude-opus-4-8 [Claude Code]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-12 21:46:25 +00:00
3 changed files with 153 additions and 5 deletions

99
pkg/model/autoload.go Normal file
View File

@@ -0,0 +1,99 @@
package model
import (
"slices"
"sort"
"strings"
"github.com/mudler/LocalAI/core/config"
)
// preferredGGUFBackend is tried first when auto-detecting the backend for a
// GGUF model, since GGUF is overwhelmingly llama.cpp's native format.
const preferredGGUFBackend = "llama-cpp"
// llmCapableUsecases are the BackendCapabilities usecases that signal a backend
// can serve a text/LLM GGUF model. A GGUF model that declares no explicit
// backend must only be auto-tried against backends carrying one of these
// usecases - never against audio/codec/image backends (e.g. opus) that happen
// to be installed alongside it (see issue #9287).
var llmCapableUsecases = []string{
config.UsecaseChat,
config.UsecaseCompletion,
config.UsecaseEdit,
config.UsecaseEmbeddings,
}
// SelectAutoLoadBackends returns the ordered, deterministic list of backend
// names to try when loading a model that declares no explicit backend.
//
// available is the set of installed backend names (unordered, as it comes from a
// Go map). modelFile is the model file name/path (may be empty).
//
// The trial loop in (*ModelLoader).Load picks the first backend whose gRPC
// LoadModel succeeds, so the order and membership of this list directly decide
// which backend wins. The previous implementation ranged a Go map (random
// order) with no filtering, so an unrelated installed backend such as the
// "opus" audio codec could win a GGUF/LLM model load (#9287).
//
// Behaviour:
// - The result is always deterministically ordered, so auto-detect no longer
// depends on map iteration order.
// - For a GGUF model file the list is filtered to LLM-capable backends and
// llama-cpp is placed first, so an incompatible audio/codec/image backend
// can never win the trial loop.
// - If filtering would leave no candidate, the full sorted set is returned
// instead, so a model that previously loaded never becomes unloadable.
func SelectAutoLoadBackends(available []string, modelFile string) []string {
sorted := append([]string(nil), available...)
sort.Strings(sorted)
if !isGGUFModelFile(modelFile) {
return sorted
}
filtered := make([]string, 0, len(sorted))
hasLlama := false
for _, b := range sorted {
if b == preferredGGUFBackend {
hasLlama = true
continue // added explicitly first below
}
if isLLMCapableBackend(b) {
filtered = append(filtered, b)
}
}
if hasLlama {
filtered = append([]string{preferredGGUFBackend}, filtered...)
}
if len(filtered) == 0 {
// Conservative fallback: no known LLM-capable backend is installed, so
// rather than refuse to load, fall back to the previous behaviour of
// trying every installed backend (now at least in a deterministic order).
return sorted
}
return filtered
}
func isGGUFModelFile(modelFile string) bool {
return strings.HasSuffix(strings.ToLower(modelFile), ".gguf")
}
// isLLMCapableBackend reports whether a backend is known to serve text/LLM
// models. Backends absent from the capability map (unknown) are treated as
// not LLM-capable here: for GGUF auto-detection we only want backends we can
// positively confirm handle LLMs, and the zero-candidate fallback keeps unknown
// setups working.
func isLLMCapableBackend(name string) bool {
capability := config.GetBackendCapability(name)
if capability == nil {
return false
}
for _, u := range capability.PossibleUsecases {
if slices.Contains(llmCapableUsecases, u) {
return true
}
}
return false
}

View File

@@ -0,0 +1,46 @@
package model_test
import (
"github.com/mudler/LocalAI/pkg/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("SelectAutoLoadBackends (#9287)", func() {
Describe("GGUF model auto-detection", func() {
It("excludes incompatible audio/codec backends (e.g. opus) for a .gguf model", func() {
// Regression for #9287: installing an unrelated audio backend like
// "opus" must never win the GGUF auto-detect trial loop.
got := model.SelectAutoLoadBackends([]string{"opus", "llama-cpp"}, "Qwen3.5-9b.gguf")
Expect(got).NotTo(ContainElement("opus"))
Expect(got).To(ContainElement("llama-cpp"))
})
It("places llama-cpp first for a .gguf model", func() {
got := model.SelectAutoLoadBackends([]string{"vllm", "opus", "llama-cpp"}, "model.gguf")
Expect(got).NotTo(BeEmpty())
Expect(got[0]).To(Equal("llama-cpp"))
})
It("is deterministic regardless of input ordering", func() {
a := model.SelectAutoLoadBackends([]string{"opus", "vllm", "llama-cpp", "whisper"}, "m.gguf")
b := model.SelectAutoLoadBackends([]string{"whisper", "llama-cpp", "vllm", "opus"}, "m.gguf")
Expect(a).To(Equal(b))
})
It("falls back to the full sorted set when filtering leaves no candidate", func() {
// No LLM-capable backend installed: never make a previously-loadable
// model unloadable, return the original set (sorted).
got := model.SelectAutoLoadBackends([]string{"opus"}, "model.gguf")
Expect(got).To(Equal([]string{"opus"}))
})
})
Describe("non-GGUF model auto-detection", func() {
It("returns a deterministic (sorted) set without filtering", func() {
got := model.SelectAutoLoadBackends([]string{"opus", "llama-cpp", "diffusers"}, "model-dir")
Expect(got).To(Equal([]string{"diffusers", "llama-cpp", "opus"}))
})
})
})

View File

@@ -350,14 +350,17 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
// Otherwise scan for backends in the asset directory
var err error
// get backends embedded in the binary
autoLoadBackends := []string{}
// append externalBackends supplied by the user via the CLI
// Collect the installed/external backends (the map is unordered).
available := []string{}
for b := range ml.GetAllExternalBackends(o) {
autoLoadBackends = append(autoLoadBackends, b)
available = append(available, b)
}
// Build a deterministic, file-type-filtered candidate list so an
// incompatible backend (e.g. an audio codec like opus) can never win the
// trial loop for a GGUF/LLM model. See SelectAutoLoadBackends / #9287.
autoLoadBackends := SelectAutoLoadBackends(available, o.model)
if len(autoLoadBackends) == 0 {
xlog.Error("No backends found")
return nil, fmt.Errorf("no backends found")