Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
e1c06465bd fix(realtime): resolve platform opus codec backend by alias (#9813)
The realtime WebRTC transport hardcoded loading the literal "opus" backend.
On darwin/arm64 the only installable opus codec is "metal-opus" (it shares
the gallery alias "opus"). Backend names resolve through the model loader's
external-backends map, and the "opus" alias key is only registered when a
user-path variant's alias was collected; a system-path metal-opus registers
only under its concrete name. As a result, with metal-opus installed the
realtime path still failed with "opus backend not available".

Resolve the opus codec from the set of currently loadable backends instead
of the hardcoded literal: an exact "opus" match wins (covers the plain
backend and the alias key), otherwise fall back to a platform-appropriate
"*opus*" codec, preferring the metal build on darwin/arm64. Behavior is
unchanged when a plain "opus" backend is present, and the same error is
surfaced when no opus codec is installed at all.

The selection logic is extracted into resolveOpusBackend and unit-tested.

Assisted-by: claude:claude-opus-4-8 [Claude Code]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-12 22:13:48 +00:00
5 changed files with 101 additions and 159 deletions

View File

@@ -2,6 +2,8 @@ package openai
import (
"net/http"
"runtime"
"strings"
"time"
"github.com/labstack/echo/v4"
@@ -11,6 +13,52 @@ import (
"github.com/pion/webrtc/v4"
)
// opusBackendName is the canonical gallery name/alias of the opus audio codec
// backend that the realtime WebRTC transport needs.
const opusBackendName = "opus"
// resolveOpusBackend picks which installed opus-codec backend the realtime
// WebRTC transport should load. The transport historically hardcoded the
// literal "opus" backend name, but on darwin/arm64 the only installable opus
// codec is "metal-opus" (it shares the gallery alias "opus"). When that
// platform-specific variant is registered under its concrete directory name
// rather than the "opus" alias key, loading the literal "opus" fails with
// "opus backend not available" (issue #9813). Given the set of currently
// loadable backend names, this returns the best opus codec to load for the
// running platform, falling back to the literal name so the caller surfaces
// the same error as before when no opus codec is installed at all.
func resolveOpusBackend(installed []string, goos, goarch string) string {
// An exact match wins: this covers the plain "opus" backend as well as the
// "opus" alias key registered by gallery alias resolution for a
// user-installed platform variant.
for _, b := range installed {
if b == opusBackendName {
return opusBackendName
}
}
// No "opus" key is registered (e.g. a system-path metal-opus whose alias
// was never collected). Fall back to a platform-appropriate "*opus*" codec
// backend; on darwin/arm64 prefer the metal build.
var fallback string
for _, b := range installed {
if !strings.Contains(strings.ToLower(b), opusBackendName) {
continue
}
if goos == "darwin" && goarch == "arm64" && strings.Contains(strings.ToLower(b), "metal") {
return b
}
if fallback == "" {
fallback = b
}
}
if fallback != "" {
return fallback
}
return opusBackendName
}
// RealtimeCallRequest is the JSON body for POST /v1/realtime/calls.
type RealtimeCallRequest struct {
SDP string `json:"sdp"`
@@ -94,15 +142,25 @@ func RealtimeCalls(application *application.Application) echo.HandlerFunc {
}
}()
// Load the Opus backend
opusBackend, err := application.ModelLoader().Load(
model.WithBackendString("opus"),
// Load the Opus backend. The opus codec ships under different backend
// names per platform (e.g. "metal-opus" on darwin/arm64), so resolve the
// platform-appropriate variant from the installed backends instead of
// hardcoding the literal "opus" name (issue #9813).
ml := application.ModelLoader()
installed := make([]string, 0)
for name := range ml.GetAllExternalBackends(nil) {
installed = append(installed, name)
}
opusName := resolveOpusBackend(installed, runtime.GOOS, runtime.GOARCH)
opusBackend, err := ml.Load(
model.WithBackendString(opusName),
model.WithModelID("__opus_codec__"),
model.WithModel("opus"),
model.WithModel(opusName),
)
if err != nil {
pc.Close()
xlog.Error("failed to load opus backend", "error", err)
xlog.Error("failed to load opus backend", "error", err, "backend", opusName)
return c.JSON(http.StatusInternalServerError, map[string]string{"error": "opus backend not available"})
}

View File

@@ -0,0 +1,32 @@
package openai
import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("resolveOpusBackend", func() {
It("prefers the exact opus backend when it is installed", func() {
Expect(resolveOpusBackend([]string{"opus", "metal-opus"}, "linux", "amd64")).To(Equal("opus"))
})
It("resolves to the opus alias key on linux", func() {
Expect(resolveOpusBackend([]string{"opus"}, "linux", "amd64")).To(Equal("opus"))
})
It("selects metal-opus on darwin/arm64 when no plain opus is installed", func() {
Expect(resolveOpusBackend([]string{"metal-opus"}, "darwin", "arm64")).To(Equal("metal-opus"))
})
It("selects metal-opus on darwin/arm64 even when other backends are present", func() {
Expect(resolveOpusBackend([]string{"silero-vad", "metal-opus", "whisper"}, "darwin", "arm64")).To(Equal("metal-opus"))
})
It("falls back to any opus codec backend when there is no exact match (non-darwin)", func() {
Expect(resolveOpusBackend([]string{"metal-opus"}, "linux", "amd64")).To(Equal("metal-opus"))
})
It("returns the literal opus name when no opus codec is installed", func() {
Expect(resolveOpusBackend([]string{"silero-vad", "whisper"}, "darwin", "arm64")).To(Equal("opus"))
})
})

View File

@@ -1,99 +0,0 @@
package model
import (
"slices"
"sort"
"strings"
"github.com/mudler/LocalAI/core/config"
)
// preferredGGUFBackend is tried first when auto-detecting the backend for a
// GGUF model, since GGUF is overwhelmingly llama.cpp's native format.
const preferredGGUFBackend = "llama-cpp"
// llmCapableUsecases are the BackendCapabilities usecases that signal a backend
// can serve a text/LLM GGUF model. A GGUF model that declares no explicit
// backend must only be auto-tried against backends carrying one of these
// usecases - never against audio/codec/image backends (e.g. opus) that happen
// to be installed alongside it (see issue #9287).
var llmCapableUsecases = []string{
config.UsecaseChat,
config.UsecaseCompletion,
config.UsecaseEdit,
config.UsecaseEmbeddings,
}
// SelectAutoLoadBackends returns the ordered, deterministic list of backend
// names to try when loading a model that declares no explicit backend.
//
// available is the set of installed backend names (unordered, as it comes from a
// Go map). modelFile is the model file name/path (may be empty).
//
// The trial loop in (*ModelLoader).Load picks the first backend whose gRPC
// LoadModel succeeds, so the order and membership of this list directly decide
// which backend wins. The previous implementation ranged a Go map (random
// order) with no filtering, so an unrelated installed backend such as the
// "opus" audio codec could win a GGUF/LLM model load (#9287).
//
// Behaviour:
// - The result is always deterministically ordered, so auto-detect no longer
// depends on map iteration order.
// - For a GGUF model file the list is filtered to LLM-capable backends and
// llama-cpp is placed first, so an incompatible audio/codec/image backend
// can never win the trial loop.
// - If filtering would leave no candidate, the full sorted set is returned
// instead, so a model that previously loaded never becomes unloadable.
func SelectAutoLoadBackends(available []string, modelFile string) []string {
sorted := append([]string(nil), available...)
sort.Strings(sorted)
if !isGGUFModelFile(modelFile) {
return sorted
}
filtered := make([]string, 0, len(sorted))
hasLlama := false
for _, b := range sorted {
if b == preferredGGUFBackend {
hasLlama = true
continue // added explicitly first below
}
if isLLMCapableBackend(b) {
filtered = append(filtered, b)
}
}
if hasLlama {
filtered = append([]string{preferredGGUFBackend}, filtered...)
}
if len(filtered) == 0 {
// Conservative fallback: no known LLM-capable backend is installed, so
// rather than refuse to load, fall back to the previous behaviour of
// trying every installed backend (now at least in a deterministic order).
return sorted
}
return filtered
}
func isGGUFModelFile(modelFile string) bool {
return strings.HasSuffix(strings.ToLower(modelFile), ".gguf")
}
// isLLMCapableBackend reports whether a backend is known to serve text/LLM
// models. Backends absent from the capability map (unknown) are treated as
// not LLM-capable here: for GGUF auto-detection we only want backends we can
// positively confirm handle LLMs, and the zero-candidate fallback keeps unknown
// setups working.
func isLLMCapableBackend(name string) bool {
capability := config.GetBackendCapability(name)
if capability == nil {
return false
}
for _, u := range capability.PossibleUsecases {
if slices.Contains(llmCapableUsecases, u) {
return true
}
}
return false
}

View File

@@ -1,46 +0,0 @@
package model_test
import (
"github.com/mudler/LocalAI/pkg/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("SelectAutoLoadBackends (#9287)", func() {
Describe("GGUF model auto-detection", func() {
It("excludes incompatible audio/codec backends (e.g. opus) for a .gguf model", func() {
// Regression for #9287: installing an unrelated audio backend like
// "opus" must never win the GGUF auto-detect trial loop.
got := model.SelectAutoLoadBackends([]string{"opus", "llama-cpp"}, "Qwen3.5-9b.gguf")
Expect(got).NotTo(ContainElement("opus"))
Expect(got).To(ContainElement("llama-cpp"))
})
It("places llama-cpp first for a .gguf model", func() {
got := model.SelectAutoLoadBackends([]string{"vllm", "opus", "llama-cpp"}, "model.gguf")
Expect(got).NotTo(BeEmpty())
Expect(got[0]).To(Equal("llama-cpp"))
})
It("is deterministic regardless of input ordering", func() {
a := model.SelectAutoLoadBackends([]string{"opus", "vllm", "llama-cpp", "whisper"}, "m.gguf")
b := model.SelectAutoLoadBackends([]string{"whisper", "llama-cpp", "vllm", "opus"}, "m.gguf")
Expect(a).To(Equal(b))
})
It("falls back to the full sorted set when filtering leaves no candidate", func() {
// No LLM-capable backend installed: never make a previously-loadable
// model unloadable, return the original set (sorted).
got := model.SelectAutoLoadBackends([]string{"opus"}, "model.gguf")
Expect(got).To(Equal([]string{"opus"}))
})
})
Describe("non-GGUF model auto-detection", func() {
It("returns a deterministic (sorted) set without filtering", func() {
got := model.SelectAutoLoadBackends([]string{"opus", "llama-cpp", "diffusers"}, "model-dir")
Expect(got).To(Equal([]string{"diffusers", "llama-cpp", "opus"}))
})
})
})

View File

@@ -350,16 +350,13 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
// Otherwise scan for backends in the asset directory
var err error
// Collect the installed/external backends (the map is unordered).
available := []string{}
for b := range ml.GetAllExternalBackends(o) {
available = append(available, b)
}
// get backends embedded in the binary
autoLoadBackends := []string{}
// Build a deterministic, file-type-filtered candidate list so an
// incompatible backend (e.g. an audio codec like opus) can never win the
// trial loop for a GGUF/LLM model. See SelectAutoLoadBackends / #9287.
autoLoadBackends := SelectAutoLoadBackends(available, o.model)
// append externalBackends supplied by the user via the CLI
for b := range ml.GetAllExternalBackends(o) {
autoLoadBackends = append(autoLoadBackends, b)
}
if len(autoLoadBackends) == 0 {
xlog.Error("No backends found")