From d7be691823c198e3d1ea65a112ba2ea37d33dc69 Mon Sep 17 00:00:00 2001 From: mudler Date: Thu, 11 Jun 2026 22:51:03 +0000 Subject: [PATCH] feat(gallery): locate-anything gallery entry + model importer Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto --- backend/index.yaml | 29 +++ core/gallery/importers/importers.go | 5 + core/gallery/importers/locate-anything.go | 137 +++++++++++ .../gallery/importers/locate-anything_test.go | 218 ++++++++++++++++++ 4 files changed, 389 insertions(+) create mode 100644 core/gallery/importers/locate-anything.go create mode 100644 core/gallery/importers/locate-anything_test.go diff --git a/backend/index.yaml b/backend/index.yaml index 37e689071..e641c9355 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -337,6 +337,35 @@ nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-rfdetr-cpp" intel: "intel-sycl-f32-rfdetr-cpp" vulkan: "vulkan-rfdetr-cpp" +- &locateanything + name: "locate-anything" + alias: "locate-anything" + license: apache-2.0 + description: | + Open-vocabulary object detection and visual grounding (NVIDIA + LocateAnything-3B) in C/C++ using GGML. Loads pre-built GGUF weights + and, given an image and a free-form text prompt, returns bounding + boxes, class labels, and confidence scores for the referred objects. + urls: + - https://github.com/mudler/locate-anything.cpp + - https://huggingface.co/nvidia/LocateAnything-3B + tags: + - object-detection + - visual-grounding + - open-vocabulary + - locate-anything + - gpu + - cpu + capabilities: + default: "cpu-locate-anything-cpp" + nvidia: "cuda12-locate-anything-cpp" + nvidia-cuda-12: "cuda12-locate-anything-cpp" + nvidia-cuda-13: "cuda13-locate-anything-cpp" + nvidia-l4t: "nvidia-l4t-arm64-locate-anything-cpp" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-locate-anything-cpp" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-locate-anything-cpp" + intel: "intel-sycl-f32-locate-anything-cpp" + vulkan: "vulkan-locate-anything-cpp" - &vllm name: "vllm" license: apache-2.0 diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go index dddcb1cff..a897e037f 100644 --- a/core/gallery/importers/importers.go +++ b/core/gallery/importers/importers.go @@ -158,6 +158,11 @@ var defaultImporters = []Importer{ // RFDetrImporter must run before TransformersImporter — RF-DETR // checkpoints may carry tokenizer-adjacent artefacts. &RFDetrImporter{}, + // LocateAnythingImporter (NVIDIA LocateAnything open-vocab detection, + // native C++/ggml port) must run before LlamaCPPImporter so its GGUF + // bundles aren't claimed by the generic .gguf importer; kept next to + // RFDetrImporter as both are detection models. + &LocateAnythingImporter{}, // Existing // DS4Importer must precede LlamaCPPImporter - ds4 weights are GGUFs and // would otherwise be claimed by the generic .gguf-handling llama-cpp diff --git a/core/gallery/importers/locate-anything.go b/core/gallery/importers/locate-anything.go new file mode 100644 index 000000000..5dc87aa88 --- /dev/null +++ b/core/gallery/importers/locate-anything.go @@ -0,0 +1,137 @@ +package importers + +import ( + "encoding/json" + "path/filepath" + "strings" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/schema" + "go.yaml.in/yaml/v2" +) + +var _ Importer = &LocateAnythingImporter{} + +// LocateAnythingImporter routes NVIDIA LocateAnything open-vocabulary +// object-detection / visual-grounding repositories to the +// "locate-anything-cpp" backend (a native C++/ggml port). It must be +// registered BEFORE the generic GGUF matchers (LlamaCPPImporter) so its +// GGUF bundles aren't swallowed by the generic .gguf-handling importer, +// and alongside RFDetrImporter since both are detection models that may +// carry tokenizer-adjacent artefacts. +// +// Detection signals: +// - preferences.backend="locate-anything-cpp" (explicit override); +// - repo name contains "locate-anything" or "locateanything" +// (case-insensitive). +type LocateAnythingImporter struct{} + +func (i *LocateAnythingImporter) Name() string { return "locate-anything-cpp" } +func (i *LocateAnythingImporter) Modality() string { return "detection" } +func (i *LocateAnythingImporter) AutoDetects() bool { return true } + +func repoLooksLikeLocateAnything(repo string) bool { + lower := strings.ToLower(repo) + return strings.Contains(lower, "locate-anything") || + strings.Contains(lower, "locateanything") || + strings.Contains(lower, "locate-anything.cpp") || + strings.Contains(lower, "locate-anything-cpp") +} + +func (i *LocateAnythingImporter) Match(details Details) bool { + preferences, err := details.Preferences.MarshalJSON() + if err != nil { + return false + } + preferencesMap := make(map[string]any) + if len(preferences) > 0 { + if err := json.Unmarshal(preferences, &preferencesMap); err != nil { + return false + } + } + + if b, ok := preferencesMap["backend"].(string); ok && b == "locate-anything-cpp" { + return true + } + + if details.HuggingFace != nil { + repoName := details.HuggingFace.ModelID + if idx := strings.Index(repoName, "/"); idx >= 0 { + repoName = repoName[idx+1:] + } + if repoLooksLikeLocateAnything(repoName) { + return true + } + } + + // Fallback: hfapi recursion bug may leave HuggingFace nil — decide + // from the URI owner/repo. + if _, repo, ok := HFOwnerRepoFromURI(details.URI); ok { + if repoLooksLikeLocateAnything(repo) { + return true + } + } + + return false +} + +func (i *LocateAnythingImporter) Import(details Details) (gallery.ModelConfig, error) { + preferences, err := details.Preferences.MarshalJSON() + if err != nil { + return gallery.ModelConfig{}, err + } + preferencesMap := make(map[string]any) + if len(preferences) > 0 { + if err := json.Unmarshal(preferences, &preferencesMap); err != nil { + return gallery.ModelConfig{}, err + } + } + + name, ok := preferencesMap["name"].(string) + if !ok { + name = filepath.Base(details.URI) + } + + description, ok := preferencesMap["description"].(string) + if !ok { + description = "Imported from " + details.URI + } + + // Prefer the canonical HF "owner/repo" identifier so the emitted + // YAML mirrors gallery locate-anything entries. + model := details.URI + if details.HuggingFace != nil && details.HuggingFace.ModelID != "" { + model = details.HuggingFace.ModelID + } else if owner, repo, ok := HFOwnerRepoFromURI(details.URI); ok { + model = owner + "/" + repo + } + + // Always the native C++/ggml backend; explicit preferences.backend + // overrides the default. + backend := "locate-anything-cpp" + if b, ok := preferencesMap["backend"].(string); ok && b != "" { + backend = b + } + + modelConfig := config.ModelConfig{ + Name: name, + Description: description, + Backend: backend, + KnownUsecaseStrings: []string{"detection"}, + PredictionOptions: schema.PredictionOptions{ + BasicModelRequest: schema.BasicModelRequest{Model: model}, + }, + } + + data, err := yaml.Marshal(modelConfig) + if err != nil { + return gallery.ModelConfig{}, err + } + + return gallery.ModelConfig{ + Name: name, + Description: description, + ConfigFile: string(data), + }, nil +} diff --git a/core/gallery/importers/locate-anything_test.go b/core/gallery/importers/locate-anything_test.go new file mode 100644 index 000000000..1e4b60f7c --- /dev/null +++ b/core/gallery/importers/locate-anything_test.go @@ -0,0 +1,218 @@ +package importers_test + +import ( + "encoding/json" + "fmt" + + "github.com/mudler/LocalAI/core/gallery/importers" + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("LocateAnythingImporter", func() { + Context("Importer interface metadata", func() { + It("exposes name/modality/autodetect", func() { + imp := &importers.LocateAnythingImporter{} + Expect(imp.Name()).To(Equal("locate-anything-cpp")) + Expect(imp.Modality()).To(Equal("detection")) + Expect(imp.AutoDetects()).To(BeTrue()) + }) + }) + + Context("Match", func() { + It("matches when backend preference is locate-anything-cpp", func() { + imp := &importers.LocateAnythingImporter{} + preferences := json.RawMessage(`{"backend": "locate-anything-cpp"}`) + details := importers.Details{ + URI: "https://example.com/some-model", + Preferences: preferences, + } + + Expect(imp.Match(details)).To(BeTrue()) + }) + + It("matches when the repo name contains 'locate-anything' (case-insensitive)", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/mudler/locate-anything-cpp-3b", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "mudler/Locate-Anything-CPP-3B", + Author: "mudler", + }, + } + + Expect(imp.Match(details)).To(BeTrue()) + }) + + It("matches when the repo name contains 'locateanything' (case-insensitive)", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/nvidia/LocateAnything-3B", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "nvidia/LocateAnything-3B", + Author: "nvidia", + }, + } + + Expect(imp.Match(details)).To(BeTrue()) + }) + + It("matches via URI fallback when HuggingFace details are missing", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/nvidia/LocateAnything-3B", + } + + Expect(imp.Match(details)).To(BeTrue()) + }) + + It("does not match unrelated repos without locate-anything signals", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/meta-llama/Llama-3-8B", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "meta-llama/Llama-3-8B", + Author: "meta-llama", + }, + } + + Expect(imp.Match(details)).To(BeFalse()) + }) + + It("does not match an rfdetr repo", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/mudler/rfdetr-cpp-nano", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "mudler/rfdetr-cpp-nano", + Author: "mudler", + }, + } + + Expect(imp.Match(details)).To(BeFalse()) + }) + + It("returns false for invalid preferences JSON", func() { + imp := &importers.LocateAnythingImporter{} + preferences := json.RawMessage(`not valid json`) + details := importers.Details{ + URI: "https://example.com/model", + Preferences: preferences, + } + + Expect(imp.Match(details)).To(BeFalse()) + }) + }) + + Context("Import", func() { + It("produces a YAML with backend locate-anything-cpp and the repo as the model", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/nvidia/LocateAnything-3B", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "nvidia/LocateAnything-3B", + Author: "nvidia", + }, + } + + modelConfig, err := imp.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: locate-anything-cpp"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("nvidia/LocateAnything-3B"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("detection"), fmt.Sprintf("Model config: %+v", modelConfig)) + }) + + It("respects custom name and description from preferences", func() { + imp := &importers.LocateAnythingImporter{} + preferences := json.RawMessage(`{"name": "my-locate", "description": "Custom"}`) + details := importers.Details{ + URI: "https://huggingface.co/nvidia/LocateAnything-3B", + Preferences: preferences, + HuggingFace: &hfapi.ModelDetails{ + ModelID: "nvidia/LocateAnything-3B", + Author: "nvidia", + }, + } + + modelConfig, err := imp.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("my-locate")) + Expect(modelConfig.Description).To(Equal("Custom")) + }) + }) + + // Table-driven coverage of the backend routing: locate-anything repos + // always route to the native locate-anything-cpp backend, with an + // explicit preferences.backend override honoured. + // + // Cases are kept offline-deterministic by injecting Details directly + // rather than going through DiscoverModelConfig (which would hit live HF). + Context("backend routing (offline)", func() { + hfFile := func(path string) hfapi.ModelFile { + return hfapi.ModelFile{Path: path} + } + + type tc struct { + name string + uri string + modelID string + files []hfapi.ModelFile + prefs string + expectBackend string // expected `backend:` line content + } + + entries := []tc{ + { + name: "canonical NVIDIA repo routes to locate-anything-cpp", + uri: "https://huggingface.co/nvidia/LocateAnything-3B", + modelID: "nvidia/LocateAnything-3B", + files: []hfapi.ModelFile{hfFile("locate-anything-3b-q8_0.gguf"), hfFile("README.md")}, + prefs: "", + expectBackend: "backend: locate-anything-cpp", + }, + { + name: "GGUF bundle with locate-anything name routes to locate-anything-cpp", + uri: "https://huggingface.co/mudler/locate-anything.cpp-3b", + modelID: "mudler/locate-anything.cpp-3b", + files: []hfapi.ModelFile{hfFile("model-f16.gguf")}, + prefs: "", + expectBackend: "backend: locate-anything-cpp", + }, + { + name: "explicit preferences.backend override is honoured", + uri: "https://huggingface.co/nvidia/LocateAnything-3B", + modelID: "nvidia/LocateAnything-3B", + files: nil, + prefs: `{"backend": "locate-anything-cpp"}`, + expectBackend: "backend: locate-anything-cpp", + }, + } + + for _, e := range entries { + e := e // capture for closure + It(e.name, func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: e.uri, + HuggingFace: &hfapi.ModelDetails{ + ModelID: e.modelID, + Files: e.files, + }, + } + if e.prefs != "" { + details.Preferences = json.RawMessage(e.prefs) + } + + Expect(imp.Match(details)).To(BeTrue(), fmt.Sprintf("Match should fire for %+v", details)) + + modelConfig, err := imp.Import(details) + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Import error: %v", err)) + Expect(modelConfig.ConfigFile).To(ContainSubstring(e.expectBackend), + fmt.Sprintf("Model config: %+v", modelConfig)) + }) + } + }) +})