Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
20ed0bc735 fix(gallery): match mmproj/model quant as a whole token so F16 no longer selects BF16 (#10559)
pickPreferredGroup matched a quant preference against the shard base
filename with strings.Contains. Because `f16` is a substring of `bf16`,
asking for the `F16` mmproj quant would wrongly satisfy a `BF16` file and
select it when its group came first.

Match the preference as a whole token instead: it must be delimited by a
non-alphanumeric character (or the string start/end) on both outer edges.
Separators inside the preference itself (e.g. `ud-q4_k_xl`) are left
untouched, and all occurrences are scanned before rejecting.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]
2026-06-27 21:57:23 +00:00
5 changed files with 137 additions and 121 deletions

View File

@@ -25,8 +25,8 @@ var (
type LlamaCPPImporter struct{}
func (i *LlamaCPPImporter) Name() string { return "llama-cpp" }
func (i *LlamaCPPImporter) Modality() string { return "text" }
func (i *LlamaCPPImporter) Name() string { return "llama-cpp" }
func (i *LlamaCPPImporter) Modality() string { return "text" }
func (i *LlamaCPPImporter) AutoDetects() bool { return true }
// AdditionalBackends advertises drop-in replacements that share the
@@ -293,7 +293,7 @@ func pickPreferredGroup(groups []hfapi.ShardGroup, prefs []string) *hfapi.ShardG
for _, pref := range prefs {
lower := strings.ToLower(pref)
for i := range groups {
if strings.Contains(strings.ToLower(groups[i].Base), lower) {
if quantTokenMatches(strings.ToLower(groups[i].Base), lower) {
return &groups[i]
}
}
@@ -301,6 +301,39 @@ func pickPreferredGroup(groups []hfapi.ShardGroup, prefs []string) *hfapi.ShardG
return &groups[len(groups)-1]
}
// quantTokenMatches reports whether pref appears in base as a whole token
// rather than as a substring of a larger alphanumeric run. Both arguments
// must already be lowercased.
//
// A plain strings.Contains is wrong here: `f16` is a substring of `bf16`, so
// asking for the `F16` quant used to wrongly select a `BF16` file (#10559).
// Only the OUTER edges of the matched preference must hit a boundary — a
// non-alphanumeric char (or the start/end of base). Separators inside the
// preference itself (e.g. `ud-q4_k_xl`) are intentionally left untouched.
func quantTokenMatches(base, pref string) bool {
if pref == "" {
return false
}
for start := strings.Index(base, pref); start != -1; {
end := start + len(pref)
leftOK := start == 0 || !isAlphaNum(base[start-1])
rightOK := end == len(base) || !isAlphaNum(base[end])
if leftOK && rightOK {
return true
}
next := strings.Index(base[start+1:], pref)
if next == -1 {
break
}
start += next + 1
}
return false
}
func isAlphaNum(b byte) bool {
return (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9')
}
// maybeApplyMTPDefaults parses the picked GGUF header (range-fetched over
// HTTP for HF/URL imports) and, if the file declares a Multi-Token Prediction
// head, appends the auto-MTP option keys to modelConfig.Options. Failures

View File

@@ -374,6 +374,104 @@ var _ = Describe("LlamaCPPImporter", func() {
})
})
Context("quant token boundary matching", func() {
// Regression for #10559: the quant preference must match as a whole
// token, not as a substring. Asking for `F16` used to select a
// `BF16` mmproj because strings.Contains("...bf16.gguf", "f16") is
// true — the leading `b` was ignored.
const repoBase = "https://huggingface.co/acme/example-GGUF/resolve/main/"
hfFile := func(path, sha string) hfapi.ModelFile {
return hfapi.ModelFile{
Path: path,
SHA256: sha,
URL: repoBase + path,
}
}
withHF := func(preferences string, files ...hfapi.ModelFile) Details {
d := Details{
URI: "https://huggingface.co/acme/example-GGUF",
HuggingFace: &hfapi.ModelDetails{
ModelID: "acme/example-GGUF",
Files: files,
},
}
if preferences != "" {
d.Preferences = json.RawMessage(preferences)
}
return d
}
It("selects the F16 mmproj over BF16 (BF16 listed first)", func() {
details := withHF(`{"name":"VL","mmproj_quantizations":"F16"}`,
hfFile("model-Q4_K_M.gguf", "model"),
hfFile("mmproj-x-BF16.gguf", "bf16"),
hfFile("mmproj-x-F16.gguf", "f16"),
)
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-F16.gguf"), fmt.Sprintf("%+v", modelConfig))
Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("BF16"), fmt.Sprintf("%+v", modelConfig))
})
It("selects the F16 mmproj over BF16 (F16 listed first)", func() {
details := withHF(`{"name":"VL","mmproj_quantizations":"F16"}`,
hfFile("model-Q4_K_M.gguf", "model"),
hfFile("mmproj-x-F16.gguf", "f16"),
hfFile("mmproj-x-BF16.gguf", "bf16"),
)
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-F16.gguf"), fmt.Sprintf("%+v", modelConfig))
Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("BF16"), fmt.Sprintf("%+v", modelConfig))
})
It("selects BF16 when BF16 is the requested mmproj quant", func() {
details := withHF(`{"name":"VL","mmproj_quantizations":"BF16"}`,
hfFile("model-Q4_K_M.gguf", "model"),
hfFile("mmproj-x-F16.gguf", "f16"),
hfFile("mmproj-x-BF16.gguf", "bf16"),
)
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-BF16.gguf"), fmt.Sprintf("%+v", modelConfig))
})
It("still matches a normal model quant with internal separators", func() {
// ud-q4_k_xl contains `-`/`_` internally; only the outer edges
// must hit a token boundary.
details := withHF(`{"name":"M","quantizations":"ud-q4_k_xl"}`,
hfFile("model-UD-Q4_K_XL.gguf", "xl"),
hfFile("model-Q3_K_M.gguf", "q3"),
)
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/M/model-UD-Q4_K_XL.gguf"), fmt.Sprintf("%+v", modelConfig))
})
It("falls back to the last group when no preference matches", func() {
details := withHF(`{"name":"M","quantizations":"Q2_K"}`,
hfFile("model-Q8_0.gguf", "q8"),
hfFile("model-Q3_K_M.gguf", "q3"),
)
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/M/model-Q3_K_M.gguf"), fmt.Sprintf("%+v", modelConfig))
})
})
Context("AdditionalBackends", func() {
It("advertises ik-llama-cpp and turboquant as drop-in replacements", func() {
entries := importer.AdditionalBackends()

View File

@@ -25,7 +25,6 @@ import (
"github.com/mudler/LocalAI/core/http/auth"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services/galleryop"
"github.com/mudler/LocalAI/core/services/messaging"
"github.com/mudler/LocalAI/core/services/nodes"
"github.com/mudler/LocalAI/core/services/nodes/prefixcache"
"github.com/mudler/LocalAI/pkg/httpclient"
@@ -551,23 +550,12 @@ func DeleteBackendOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.HandlerF
}
// ListBackendsOnNodeEndpoint lists installed backends on a worker node via NATS.
func ListBackendsOnNodeEndpoint(unloader nodes.NodeCommandSender, registry *nodes.NodeRegistry) echo.HandlerFunc {
func ListBackendsOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.HandlerFunc {
return func(c echo.Context) error {
nodeID := c.Param("id")
// Agent-type workers don't run backends and never subscribe to the
// nodes.<id>.backend.list NATS subject, so the request would hang
// until timeout with "no responders". Their backend list is simply
// empty. Mirror the aggregate-list guard in managers_distributed.go
// (skip nodes whose NodeType is set and not "backend") so the
// single-node and cluster-wide views stay consistent.
if node, err := registry.Get(c.Request().Context(), nodeID); err == nil {
if node.NodeType != "" && node.NodeType != nodes.NodeTypeBackend {
return c.JSON(http.StatusOK, []messaging.NodeBackendInfo{})
}
}
if unloader == nil {
return c.JSON(http.StatusServiceUnavailable, nodeError(http.StatusServiceUnavailable, "NATS not configured"))
}
nodeID := c.Param("id")
reply, err := unloader.ListBackends(nodeID)
if err != nil {
xlog.Error("Failed to list backends on node", "node", nodeID, "error", err)

View File

@@ -1,103 +0,0 @@
package localai
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/services/messaging"
"github.com/mudler/LocalAI/core/services/nodes"
"github.com/mudler/LocalAI/core/services/testutil"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
// stubNodeCommandSender records whether ListBackends was invoked so the test can
// assert the endpoint short-circuits (no NATS request) for agent-type nodes.
type stubNodeCommandSender struct {
listBackendsCalled bool
}
func (s *stubNodeCommandSender) InstallBackend(_, _, _, _, _, _, _ string, _ int, _ string, _ func(messaging.BackendInstallProgressEvent)) (*messaging.BackendInstallReply, error) {
return &messaging.BackendInstallReply{}, nil
}
func (s *stubNodeCommandSender) UpgradeBackend(_, _, _, _, _, _ string, _ int, _ string, _ func(messaging.BackendInstallProgressEvent)) (*messaging.BackendUpgradeReply, error) {
return &messaging.BackendUpgradeReply{}, nil
}
func (s *stubNodeCommandSender) DeleteBackend(_, _ string) (*messaging.BackendDeleteReply, error) {
return &messaging.BackendDeleteReply{Success: true}, nil
}
func (s *stubNodeCommandSender) ListBackends(_ string) (*messaging.BackendListReply, error) {
s.listBackendsCalled = true
return &messaging.BackendListReply{Backends: []messaging.NodeBackendInfo{{Name: "llama-cpp"}}}, nil
}
func (s *stubNodeCommandSender) StopBackend(_, _ string) error { return nil }
func (s *stubNodeCommandSender) UnloadModelOnNode(_, _ string) error { return nil }
var _ = Describe("ListBackendsOnNodeEndpoint", func() {
var registry *nodes.NodeRegistry
BeforeEach(func() {
db := testutil.SetupTestDB()
var err error
registry, err = nodes.NewNodeRegistry(db)
Expect(err).ToNot(HaveOccurred())
})
callEndpoint := func(unloader nodes.NodeCommandSender, nodeID string) *httptest.ResponseRecorder {
e := echo.New()
req := httptest.NewRequest(http.MethodGet, "/", nil)
rec := httptest.NewRecorder()
c := e.NewContext(req, rec)
c.SetParamNames("id")
c.SetParamValues(nodeID)
handler := ListBackendsOnNodeEndpoint(unloader, registry)
Expect(handler(c)).To(Succeed())
return rec
}
It("returns an empty list for an agent node without issuing a NATS request", func() {
ctx := context.Background()
node := &nodes.BackendNode{Name: "agent-1", NodeType: nodes.NodeTypeAgent}
Expect(registry.Register(ctx, node, true)).To(Succeed())
stub := &stubNodeCommandSender{}
rec := callEndpoint(stub, node.ID)
Expect(rec.Code).To(Equal(http.StatusOK))
Expect(stub.listBackendsCalled).To(BeFalse(),
"agent workers don't subscribe to backend.list; the endpoint must not issue the doomed NATS request")
var list []messaging.NodeBackendInfo
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
Expect(list).To(BeEmpty())
// Must be `[]`, not `null`, so the UI can render it.
Expect(rec.Body.String()).To(ContainSubstring("[]"))
})
It("consults the unloader (NATS) for a backend node", func() {
ctx := context.Background()
node := &nodes.BackendNode{Name: "backend-1", NodeType: nodes.NodeTypeBackend, Address: "10.0.0.1:50051"}
Expect(registry.Register(ctx, node, true)).To(Succeed())
stub := &stubNodeCommandSender{}
rec := callEndpoint(stub, node.ID)
Expect(rec.Code).To(Equal(http.StatusOK))
Expect(stub.listBackendsCalled).To(BeTrue(),
"backend nodes must still be queried over NATS")
var list []messaging.NodeBackendInfo
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
Expect(list).To(HaveLen(1))
Expect(list[0].Name).To(Equal("llama-cpp"))
})
})

View File

@@ -88,7 +88,7 @@ func RegisterNodeAdminRoutes(e *echo.Echo, registry *nodes.NodeRegistry, unloade
admin.POST("/:id/approve", localai.ApproveNodeEndpoint(registry, authDB, hmacSecret, natsCfg))
// Backend management on workers
admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader, registry))
admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader))
admin.POST("/:id/backends/install", localai.InstallBackendOnNodeEndpoint(unloader, galleryService, opcache, appConfig))
admin.POST("/:id/backends/delete", localai.DeleteBackendOnNodeEndpoint(unloader))