mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-12 18:58:49 -04:00
Compare commits
1 Commits
fix/9287-b
...
fix/9813-o
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e1c06465bd |
@@ -2,6 +2,8 @@ package openai
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
@@ -11,6 +13,52 @@ import (
|
||||
"github.com/pion/webrtc/v4"
|
||||
)
|
||||
|
||||
// opusBackendName is the canonical gallery name/alias of the opus audio codec
|
||||
// backend that the realtime WebRTC transport needs.
|
||||
const opusBackendName = "opus"
|
||||
|
||||
// resolveOpusBackend picks which installed opus-codec backend the realtime
|
||||
// WebRTC transport should load. The transport historically hardcoded the
|
||||
// literal "opus" backend name, but on darwin/arm64 the only installable opus
|
||||
// codec is "metal-opus" (it shares the gallery alias "opus"). When that
|
||||
// platform-specific variant is registered under its concrete directory name
|
||||
// rather than the "opus" alias key, loading the literal "opus" fails with
|
||||
// "opus backend not available" (issue #9813). Given the set of currently
|
||||
// loadable backend names, this returns the best opus codec to load for the
|
||||
// running platform, falling back to the literal name so the caller surfaces
|
||||
// the same error as before when no opus codec is installed at all.
|
||||
func resolveOpusBackend(installed []string, goos, goarch string) string {
|
||||
// An exact match wins: this covers the plain "opus" backend as well as the
|
||||
// "opus" alias key registered by gallery alias resolution for a
|
||||
// user-installed platform variant.
|
||||
for _, b := range installed {
|
||||
if b == opusBackendName {
|
||||
return opusBackendName
|
||||
}
|
||||
}
|
||||
|
||||
// No "opus" key is registered (e.g. a system-path metal-opus whose alias
|
||||
// was never collected). Fall back to a platform-appropriate "*opus*" codec
|
||||
// backend; on darwin/arm64 prefer the metal build.
|
||||
var fallback string
|
||||
for _, b := range installed {
|
||||
if !strings.Contains(strings.ToLower(b), opusBackendName) {
|
||||
continue
|
||||
}
|
||||
if goos == "darwin" && goarch == "arm64" && strings.Contains(strings.ToLower(b), "metal") {
|
||||
return b
|
||||
}
|
||||
if fallback == "" {
|
||||
fallback = b
|
||||
}
|
||||
}
|
||||
if fallback != "" {
|
||||
return fallback
|
||||
}
|
||||
|
||||
return opusBackendName
|
||||
}
|
||||
|
||||
// RealtimeCallRequest is the JSON body for POST /v1/realtime/calls.
|
||||
type RealtimeCallRequest struct {
|
||||
SDP string `json:"sdp"`
|
||||
@@ -94,15 +142,25 @@ func RealtimeCalls(application *application.Application) echo.HandlerFunc {
|
||||
}
|
||||
}()
|
||||
|
||||
// Load the Opus backend
|
||||
opusBackend, err := application.ModelLoader().Load(
|
||||
model.WithBackendString("opus"),
|
||||
// Load the Opus backend. The opus codec ships under different backend
|
||||
// names per platform (e.g. "metal-opus" on darwin/arm64), so resolve the
|
||||
// platform-appropriate variant from the installed backends instead of
|
||||
// hardcoding the literal "opus" name (issue #9813).
|
||||
ml := application.ModelLoader()
|
||||
installed := make([]string, 0)
|
||||
for name := range ml.GetAllExternalBackends(nil) {
|
||||
installed = append(installed, name)
|
||||
}
|
||||
opusName := resolveOpusBackend(installed, runtime.GOOS, runtime.GOARCH)
|
||||
|
||||
opusBackend, err := ml.Load(
|
||||
model.WithBackendString(opusName),
|
||||
model.WithModelID("__opus_codec__"),
|
||||
model.WithModel("opus"),
|
||||
model.WithModel(opusName),
|
||||
)
|
||||
if err != nil {
|
||||
pc.Close()
|
||||
xlog.Error("failed to load opus backend", "error", err)
|
||||
xlog.Error("failed to load opus backend", "error", err, "backend", opusName)
|
||||
return c.JSON(http.StatusInternalServerError, map[string]string{"error": "opus backend not available"})
|
||||
}
|
||||
|
||||
|
||||
32
core/http/endpoints/openai/realtime_webrtc_opus_test.go
Normal file
32
core/http/endpoints/openai/realtime_webrtc_opus_test.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("resolveOpusBackend", func() {
|
||||
It("prefers the exact opus backend when it is installed", func() {
|
||||
Expect(resolveOpusBackend([]string{"opus", "metal-opus"}, "linux", "amd64")).To(Equal("opus"))
|
||||
})
|
||||
|
||||
It("resolves to the opus alias key on linux", func() {
|
||||
Expect(resolveOpusBackend([]string{"opus"}, "linux", "amd64")).To(Equal("opus"))
|
||||
})
|
||||
|
||||
It("selects metal-opus on darwin/arm64 when no plain opus is installed", func() {
|
||||
Expect(resolveOpusBackend([]string{"metal-opus"}, "darwin", "arm64")).To(Equal("metal-opus"))
|
||||
})
|
||||
|
||||
It("selects metal-opus on darwin/arm64 even when other backends are present", func() {
|
||||
Expect(resolveOpusBackend([]string{"silero-vad", "metal-opus", "whisper"}, "darwin", "arm64")).To(Equal("metal-opus"))
|
||||
})
|
||||
|
||||
It("falls back to any opus codec backend when there is no exact match (non-darwin)", func() {
|
||||
Expect(resolveOpusBackend([]string{"metal-opus"}, "linux", "amd64")).To(Equal("metal-opus"))
|
||||
})
|
||||
|
||||
It("returns the literal opus name when no opus codec is installed", func() {
|
||||
Expect(resolveOpusBackend([]string{"silero-vad", "whisper"}, "darwin", "arm64")).To(Equal("opus"))
|
||||
})
|
||||
})
|
||||
@@ -1,99 +0,0 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
)
|
||||
|
||||
// preferredGGUFBackend is tried first when auto-detecting the backend for a
|
||||
// GGUF model, since GGUF is overwhelmingly llama.cpp's native format.
|
||||
const preferredGGUFBackend = "llama-cpp"
|
||||
|
||||
// llmCapableUsecases are the BackendCapabilities usecases that signal a backend
|
||||
// can serve a text/LLM GGUF model. A GGUF model that declares no explicit
|
||||
// backend must only be auto-tried against backends carrying one of these
|
||||
// usecases - never against audio/codec/image backends (e.g. opus) that happen
|
||||
// to be installed alongside it (see issue #9287).
|
||||
var llmCapableUsecases = []string{
|
||||
config.UsecaseChat,
|
||||
config.UsecaseCompletion,
|
||||
config.UsecaseEdit,
|
||||
config.UsecaseEmbeddings,
|
||||
}
|
||||
|
||||
// SelectAutoLoadBackends returns the ordered, deterministic list of backend
|
||||
// names to try when loading a model that declares no explicit backend.
|
||||
//
|
||||
// available is the set of installed backend names (unordered, as it comes from a
|
||||
// Go map). modelFile is the model file name/path (may be empty).
|
||||
//
|
||||
// The trial loop in (*ModelLoader).Load picks the first backend whose gRPC
|
||||
// LoadModel succeeds, so the order and membership of this list directly decide
|
||||
// which backend wins. The previous implementation ranged a Go map (random
|
||||
// order) with no filtering, so an unrelated installed backend such as the
|
||||
// "opus" audio codec could win a GGUF/LLM model load (#9287).
|
||||
//
|
||||
// Behaviour:
|
||||
// - The result is always deterministically ordered, so auto-detect no longer
|
||||
// depends on map iteration order.
|
||||
// - For a GGUF model file the list is filtered to LLM-capable backends and
|
||||
// llama-cpp is placed first, so an incompatible audio/codec/image backend
|
||||
// can never win the trial loop.
|
||||
// - If filtering would leave no candidate, the full sorted set is returned
|
||||
// instead, so a model that previously loaded never becomes unloadable.
|
||||
func SelectAutoLoadBackends(available []string, modelFile string) []string {
|
||||
sorted := append([]string(nil), available...)
|
||||
sort.Strings(sorted)
|
||||
|
||||
if !isGGUFModelFile(modelFile) {
|
||||
return sorted
|
||||
}
|
||||
|
||||
filtered := make([]string, 0, len(sorted))
|
||||
hasLlama := false
|
||||
for _, b := range sorted {
|
||||
if b == preferredGGUFBackend {
|
||||
hasLlama = true
|
||||
continue // added explicitly first below
|
||||
}
|
||||
if isLLMCapableBackend(b) {
|
||||
filtered = append(filtered, b)
|
||||
}
|
||||
}
|
||||
if hasLlama {
|
||||
filtered = append([]string{preferredGGUFBackend}, filtered...)
|
||||
}
|
||||
|
||||
if len(filtered) == 0 {
|
||||
// Conservative fallback: no known LLM-capable backend is installed, so
|
||||
// rather than refuse to load, fall back to the previous behaviour of
|
||||
// trying every installed backend (now at least in a deterministic order).
|
||||
return sorted
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
func isGGUFModelFile(modelFile string) bool {
|
||||
return strings.HasSuffix(strings.ToLower(modelFile), ".gguf")
|
||||
}
|
||||
|
||||
// isLLMCapableBackend reports whether a backend is known to serve text/LLM
|
||||
// models. Backends absent from the capability map (unknown) are treated as
|
||||
// not LLM-capable here: for GGUF auto-detection we only want backends we can
|
||||
// positively confirm handle LLMs, and the zero-candidate fallback keeps unknown
|
||||
// setups working.
|
||||
func isLLMCapableBackend(name string) bool {
|
||||
capability := config.GetBackendCapability(name)
|
||||
if capability == nil {
|
||||
return false
|
||||
}
|
||||
for _, u := range capability.PossibleUsecases {
|
||||
if slices.Contains(llmCapableUsecases, u) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
package model_test
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("SelectAutoLoadBackends (#9287)", func() {
|
||||
Describe("GGUF model auto-detection", func() {
|
||||
It("excludes incompatible audio/codec backends (e.g. opus) for a .gguf model", func() {
|
||||
// Regression for #9287: installing an unrelated audio backend like
|
||||
// "opus" must never win the GGUF auto-detect trial loop.
|
||||
got := model.SelectAutoLoadBackends([]string{"opus", "llama-cpp"}, "Qwen3.5-9b.gguf")
|
||||
Expect(got).NotTo(ContainElement("opus"))
|
||||
Expect(got).To(ContainElement("llama-cpp"))
|
||||
})
|
||||
|
||||
It("places llama-cpp first for a .gguf model", func() {
|
||||
got := model.SelectAutoLoadBackends([]string{"vllm", "opus", "llama-cpp"}, "model.gguf")
|
||||
Expect(got).NotTo(BeEmpty())
|
||||
Expect(got[0]).To(Equal("llama-cpp"))
|
||||
})
|
||||
|
||||
It("is deterministic regardless of input ordering", func() {
|
||||
a := model.SelectAutoLoadBackends([]string{"opus", "vllm", "llama-cpp", "whisper"}, "m.gguf")
|
||||
b := model.SelectAutoLoadBackends([]string{"whisper", "llama-cpp", "vllm", "opus"}, "m.gguf")
|
||||
Expect(a).To(Equal(b))
|
||||
})
|
||||
|
||||
It("falls back to the full sorted set when filtering leaves no candidate", func() {
|
||||
// No LLM-capable backend installed: never make a previously-loadable
|
||||
// model unloadable, return the original set (sorted).
|
||||
got := model.SelectAutoLoadBackends([]string{"opus"}, "model.gguf")
|
||||
Expect(got).To(Equal([]string{"opus"}))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("non-GGUF model auto-detection", func() {
|
||||
It("returns a deterministic (sorted) set without filtering", func() {
|
||||
got := model.SelectAutoLoadBackends([]string{"opus", "llama-cpp", "diffusers"}, "model-dir")
|
||||
Expect(got).To(Equal([]string{"diffusers", "llama-cpp", "opus"}))
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -350,16 +350,13 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
||||
// Otherwise scan for backends in the asset directory
|
||||
var err error
|
||||
|
||||
// Collect the installed/external backends (the map is unordered).
|
||||
available := []string{}
|
||||
for b := range ml.GetAllExternalBackends(o) {
|
||||
available = append(available, b)
|
||||
}
|
||||
// get backends embedded in the binary
|
||||
autoLoadBackends := []string{}
|
||||
|
||||
// Build a deterministic, file-type-filtered candidate list so an
|
||||
// incompatible backend (e.g. an audio codec like opus) can never win the
|
||||
// trial loop for a GGUF/LLM model. See SelectAutoLoadBackends / #9287.
|
||||
autoLoadBackends := SelectAutoLoadBackends(available, o.model)
|
||||
// append externalBackends supplied by the user via the CLI
|
||||
for b := range ml.GetAllExternalBackends(o) {
|
||||
autoLoadBackends = append(autoLoadBackends, b)
|
||||
}
|
||||
|
||||
if len(autoLoadBackends) == 0 {
|
||||
xlog.Error("No backends found")
|
||||
|
||||
Reference in New Issue
Block a user