From 4c6750fe6b9315c45d82ffb9d9d9b4bfb8d3df7a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 16 Jun 2026 22:03:58 +0200 Subject: [PATCH] feat(depth): metric-large + nested metric model gallery entries (#10363) * feat(depth): add depth-anything-3-metric-large gallery entry DA3METRIC-LARGE (ViT-L) single-file metric-scale depth + sky, served by the existing depth-anything backend (same single-GGUF path as mono-large). GGUF published at mudler/depth-anything.cpp-gguf. Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Ettore Di Giacinto * feat(depth): serve nested metric model (two-file load) The DA3 nested model needs both branches (anyview GIANT + metric ViT-L) loaded together. Wire it through the backend: - Load reads a 'metric_model:' entry from ModelOptions.Options and, when present, calls da_capi_load_nested(anyview, metric) instead of da_capi_load (registers the new abi-4 symbol; helper optionValue + unit test). - gallery: depth-anything-3-nested (model=anyview, options=metric branch, both GGUFs fetched) for metric-scale depth + pose. - bump depth-anything.cpp pin to cce5edc (abi 4 / da_capi_load_nested). Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- backend/go/depth-anything-cpp/Makefile | 6 +- .../depth-anything-cpp/godepthanythingcpp.go | 68 +++++++++++++++---- backend/go/depth-anything-cpp/main.go | 1 + .../go/depth-anything-cpp/nested_e2e_test.go | 64 +++++++++++++++++ backend/go/depth-anything-cpp/options_test.go | 20 ++++++ gallery/index.yaml | 48 +++++++++++++ 6 files changed, 193 insertions(+), 14 deletions(-) create mode 100644 backend/go/depth-anything-cpp/nested_e2e_test.go create mode 100644 backend/go/depth-anything-cpp/options_test.go diff --git a/backend/go/depth-anything-cpp/Makefile b/backend/go/depth-anything-cpp/Makefile index b6f5fe83a..815d2b0db 100644 --- a/backend/go/depth-anything-cpp/Makefile +++ b/backend/go/depth-anything-cpp/Makefile @@ -8,9 +8,11 @@ JOBS?=$(shell nproc --ignore=1) # depth-anything.cpp. Pin to a specific commit for a stable build; a squash # merge upstream can orphan a branch, so the native version is pinned by SHA. -# The SHA is kept alive by the v0.1.2 tag on the upstream repo. +# This SHA adds the nested two-file metric C-API (abi_version 4, +# da_capi_load_nested) required by the depth-anything-3-nested gallery model; +# tag it (e.g. v0.1.3) upstream to keep the SHA alive. DEPTHANYTHING_REPO?=https://github.com/mudler/depth-anything.cpp.git -DEPTHANYTHING_VERSION?=442eea4f73e83ca9d9bc8e026b966cffa678ffc4 +DEPTHANYTHING_VERSION?=cce5edc395fd1843806093d7ccc0c8b0d0b97b72 ifeq ($(NATIVE),false) CMAKE_ARGS+=-DGGML_NATIVE=OFF diff --git a/backend/go/depth-anything-cpp/godepthanythingcpp.go b/backend/go/depth-anything-cpp/godepthanythingcpp.go index cbff7cdf7..7c0cc34f0 100644 --- a/backend/go/depth-anything-cpp/godepthanythingcpp.go +++ b/backend/go/depth-anything-cpp/godepthanythingcpp.go @@ -24,6 +24,7 @@ import ( "math" "os" "path/filepath" + "strings" "unsafe" "github.com/mudler/LocalAI/pkg/grpc/base" @@ -36,6 +37,10 @@ import ( var ( // da_capi_load(const char* gguf_path, int n_threads) -> da_ctx* (0 = fail) CapiLoad func(gguf string, nThreads int32) uintptr + // da_capi_load_nested(const char* anyview_gguf, const char* metric_gguf, + // int n_threads) -> da_ctx* (0 = fail). The returned ctx serves the nested + // metric model: depth/pose calls produce final metric-scale depth + scaled pose. + CapiLoadNested func(anyview string, metric string, nThreads int32) uintptr // da_capi_free(da_ctx* ctx) — safe on a 0 handle. CapiFree func(handle uintptr) // da_capi_last_error(da_ctx* ctx) -> const char* (owned by ctx, "" if none). @@ -87,17 +92,24 @@ func (r *DepthAnythingCpp) Load(opts *pb.ModelOptions) error { return fmt.Errorf("depth-anything-cpp: ModelFile is empty") } - var modelPath string - if filepath.IsAbs(modelFile) { - modelPath = modelFile - } else { - modelPath = filepath.Join(opts.ModelPath, modelFile) + resolve := func(name string) string { + if filepath.IsAbs(name) { + return name + } + return filepath.Join(opts.ModelPath, name) } + modelPath := resolve(modelFile) if _, err := os.Stat(modelPath); err != nil { return fmt.Errorf("depth-anything-cpp: model file not found: %s: %w", modelPath, err) } + // Nested metric models are a two-file pair: the main model is the anyview + // (GIANT) branch and the metric (ViT-L + DPT/sky) branch is named via a + // "metric_model:" entry in opts.Options. When present we load both + // branches so the engine runs the nested metric alignment. + metricFile := optionValue(opts.Options, "metric_model") + threads := opts.Threads if threads <= 0 { threads = 4 @@ -109,19 +121,47 @@ func (r *DepthAnythingCpp) Load(opts *pb.ModelOptions) error { r.handle = 0 } - h := CapiLoad(modelPath, threads) - if h == 0 { - // da_capi_last_error needs a ctx; on a failed load we have none (it - // returns "" for a null ctx), so the text is best-effort. - if msg := CapiLastError(0); msg != "" { - return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s: %s", modelPath, msg) + var h uintptr + if metricFile != "" { + metricPath := resolve(metricFile) + if _, err := os.Stat(metricPath); err != nil { + return fmt.Errorf("depth-anything-cpp: metric_model file not found: %s: %w", metricPath, err) + } + h = CapiLoadNested(modelPath, metricPath, threads) + if h == 0 { + if msg := CapiLastError(0); msg != "" { + return fmt.Errorf("depth-anything-cpp: da_capi_load_nested failed for %s + %s: %s", modelPath, metricPath, msg) + } + return fmt.Errorf("depth-anything-cpp: da_capi_load_nested failed for %s + %s", modelPath, metricPath) + } + } else { + h = CapiLoad(modelPath, threads) + if h == 0 { + // da_capi_last_error needs a ctx; on a failed load we have none (it + // returns "" for a null ctx), so the text is best-effort. + if msg := CapiLastError(0); msg != "" { + return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s: %s", modelPath, msg) + } + return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s", modelPath) } - return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s", modelPath) } r.handle = h return nil } +// optionValue returns the value of the first "key:value" entry in opts whose key +// matches (case-sensitive), or "" if absent. Mirrors how other LocalAI backends +// read ModelOptions.Options. +func optionValue(opts []string, key string) string { + prefix := key + ":" + for _, o := range opts { + if strings.HasPrefix(o, prefix) { + return strings.TrimSpace(o[len(prefix):]) + } + } + return "" +} + // depthResult is the JSON payload returned by Predict. type depthResult struct { DepthW int `json:"depth_w"` @@ -373,6 +413,10 @@ func copyBytes(p *byte, n int) []byte { // runDepthPose runs depth estimation then pose recovery on an image file. It // returns the row-major depth map (length h*w), its dimensions, the 3x4 // extrinsics (12 floats) and 3x3 intrinsics (9 floats). +// runDepthPose returns depth + camera pose via two C-API calls (depth then pose). +// For a nested metric model both calls run the full two-branch pipeline, so this +// path infers twice; the typed Depth RPC (single da_capi_depth_dense call) is the +// efficient path for nested models. func (r *DepthAnythingCpp) runDepthPose(imagePath string) (depth []float32, h, w int, ext [12]float32, intr [9]float32, err error) { if r.handle == 0 { err = fmt.Errorf("depth-anything-cpp: model not loaded") diff --git a/backend/go/depth-anything-cpp/main.go b/backend/go/depth-anything-cpp/main.go index 6ba43fbcc..4c4546797 100644 --- a/backend/go/depth-anything-cpp/main.go +++ b/backend/go/depth-anything-cpp/main.go @@ -37,6 +37,7 @@ func main() { libFuncs := []LibFuncs{ {&CapiLoad, "da_capi_load"}, + {&CapiLoadNested, "da_capi_load_nested"}, {&CapiFree, "da_capi_free"}, {&CapiLastError, "da_capi_last_error"}, {&CapiDepthPath, "da_capi_depth_path"}, diff --git a/backend/go/depth-anything-cpp/nested_e2e_test.go b/backend/go/depth-anything-cpp/nested_e2e_test.go new file mode 100644 index 000000000..53d2fe858 --- /dev/null +++ b/backend/go/depth-anything-cpp/nested_e2e_test.go @@ -0,0 +1,64 @@ +package main + +// nested_e2e_test.go - e2e smoke for the nested two-file metric model. Loads the +// anyview branch as the main model and points the metric branch via the +// "metric_model:" option (exactly as the depth-anything-3-nested gallery +// entry does), then exercises the typed Depth RPC and asserts a metric depth map. +// +// Skips cleanly unless both nested GGUFs are present under ./test-models/ and the +// backend binary + fallback .so are built. + +import ( + "context" + "fmt" + "path/filepath" + "time" + + pb "github.com/mudler/LocalAI/pkg/grpc/proto" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("depth-anything-cpp nested metric model", func() { + It("loads the two-file pair via the metric_model option and returns metric depth", func() { + anyviewPath := modelPathOrSkip("depth-anything-nested-anyview.gguf") + _ = modelPathOrSkip("depth-anything-nested-metric.gguf") + imgB64 := loadTestImage() + + port := freePort() + cleanup := startBackend(port) + defer cleanup() + + client, closeConn := dialBackend(port) + defer closeConn() + + ctx, cancel := context.WithTimeout(context.Background(), 25*time.Minute) + defer cancel() + + loadResp, err := client.LoadModel(ctx, &pb.ModelOptions{ + Model: "depth-anything-nested-anyview.gguf", + ModelFile: anyviewPath, + ModelPath: filepath.Dir(anyviewPath), + Options: []string{"metric_model:depth-anything-nested-metric.gguf"}, + Threads: 8, + }) + Expect(err).ToNot(HaveOccurred(), "LoadModel(nested)") + Expect(loadResp.GetSuccess()).To(BeTrue(), "LoadModel reported failure: %s", loadResp.GetMessage()) + + resp, err := client.Depth(ctx, &pb.DepthRequest{ + Src: imgB64, + IncludeDepth: true, + IncludePose: true, + }) + Expect(err).ToNot(HaveOccurred(), "Depth(nested)") + Expect(resp.GetWidth()).To(BeNumerically(">", 0), "depth width") + Expect(resp.GetHeight()).To(BeNumerically(">", 0), "depth height") + Expect(resp.GetIsMetric()).To(BeTrue(), "nested output must be metric") + Expect(len(resp.GetDepth())).To(Equal(int(resp.GetWidth())*int(resp.GetHeight())), "dense depth length") + Expect(len(resp.GetExtrinsics())).To(Equal(12), "extrinsics 3x4") + Expect(resp.GetIntrinsics()[0]).To(BeNumerically(">", 0), "fx > 0") + + _, _ = fmt.Fprintf(GinkgoWriter, "nested depth OK: %dx%d is_metric=%v fx=%.2f\n", + resp.GetWidth(), resp.GetHeight(), resp.GetIsMetric(), resp.GetIntrinsics()[0]) + }) +}) diff --git a/backend/go/depth-anything-cpp/options_test.go b/backend/go/depth-anything-cpp/options_test.go new file mode 100644 index 000000000..2b6eea142 --- /dev/null +++ b/backend/go/depth-anything-cpp/options_test.go @@ -0,0 +1,20 @@ +package main + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = DescribeTable("optionValue", + func(opts []string, key, want string) { + Expect(optionValue(opts, key)).To(Equal(want)) + }, + Entry("present", []string{"foo:bar", "metric_model:m.gguf"}, "metric_model", "m.gguf"), + Entry("absent", []string{"foo:bar"}, "metric_model", ""), + Entry("nil", []string(nil), "metric_model", ""), + Entry("trims space", []string{"metric_model: m.gguf "}, "metric_model", "m.gguf"), + Entry("value with colon", []string{"metric_model:a:b.gguf"}, "metric_model", "a:b.gguf"), + Entry("first wins", []string{"metric_model:first.gguf", "metric_model:second.gguf"}, "metric_model", "first.gguf"), + Entry("empty value", []string{"metric_model:"}, "metric_model", ""), + Entry("prefix not key", []string{"metric_model_extra:x"}, "metric_model", ""), +) diff --git a/gallery/index.yaml b/gallery/index.yaml index 0dae729d5..286af7487 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -8162,6 +8162,54 @@ - filename: depth-anything-mono-large-f32.gguf uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-mono-large-f32.gguf sha256: "291b1a554af907c3f79986ee225da8933be5f7a31d73c81d06784cda284535de" + +- !!merge <<: *depth-anything-3-base + name: depth-anything-3-metric-large + description: | + Depth Anything 3 (metric large / vitl), f32 (~1.3 GB) — single-image + metric-scale depth (meters) + a sky mask. DPT single-head metric variant; use + GenerateImage (src -> normalized depth PNG) or Predict (JSON metric depth + stats, is_metric=true). + overrides: + backend: depth-anything + parameters: + model: depth-anything-metric-large-f32.gguf + files: + - filename: depth-anything-metric-large-f32.gguf + uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-metric-large-f32.gguf + sha256: "d10b7450c2238244b2d72e2749537a1876255180149cd630a18bc1619c9286be" + +- !!merge <<: *depth-anything-3-base + name: depth-anything-3-nested + description: | + Depth Anything 3 (nested giant+large), f32 — the recommended metric model. A + two-branch pipeline: the anyview GIANT (vitg) branch and a metric ViT-L branch + are run and aligned to recover true metric-scale depth (meters) + scaled camera + pose from a single image. Downloads both branches (~6 GB total); GPU strongly + recommended. Predict returns metric depth stats + pose (is_metric=true). + tags: + - depth-estimation + - camera-pose + - metric-depth + - depth-anything + - native + - cpp + - gpu + overrides: + backend: depth-anything + # The metric (ViT-L) branch is loaded alongside the anyview model via the + # metric_model option; both files are fetched below. + options: + - "metric_model:depth-anything-nested-metric.gguf" + parameters: + model: depth-anything-nested-anyview.gguf + files: + - filename: depth-anything-nested-anyview.gguf + uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-nested-anyview.gguf + sha256: "2a4cb4382aa8c4159fff10dfffa121f3c7a574551c4ff4ad130f235d5442f9ce" + - filename: depth-anything-nested-metric.gguf + uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-nested-metric.gguf + sha256: "b54ed50cbc0b0c14fae1f8edd0fea8bd1cac0850485fd6e7eb2422c7a19e570e" - name: rfdetr-cpp-base url: github:mudler/LocalAI/gallery/virtual.yaml@master urls: