mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-18 13:49:09 -04:00
feat(depth): metric-large + nested metric model gallery entries (#10363)
* feat(depth): add depth-anything-3-metric-large gallery entry DA3METRIC-LARGE (ViT-L) single-file metric-scale depth + sky, served by the existing depth-anything backend (same single-GGUF path as mono-large). GGUF published at mudler/depth-anything.cpp-gguf. Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(depth): serve nested metric model (two-file load) The DA3 nested model needs both branches (anyview GIANT + metric ViT-L) loaded together. Wire it through the backend: - Load reads a 'metric_model:<file>' entry from ModelOptions.Options and, when present, calls da_capi_load_nested(anyview, metric) instead of da_capi_load (registers the new abi-4 symbol; helper optionValue + unit test). - gallery: depth-anything-3-nested (model=anyview, options=metric branch, both GGUFs fetched) for metric-scale depth + pose. - bump depth-anything.cpp pin to cce5edc (abi 4 / da_capi_load_nested). Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -8,9 +8,11 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# depth-anything.cpp. Pin to a specific commit for a stable build; a squash
|
||||
# merge upstream can orphan a branch, so the native version is pinned by SHA.
|
||||
# The SHA is kept alive by the v0.1.2 tag on the upstream repo.
|
||||
# This SHA adds the nested two-file metric C-API (abi_version 4,
|
||||
# da_capi_load_nested) required by the depth-anything-3-nested gallery model;
|
||||
# tag it (e.g. v0.1.3) upstream to keep the SHA alive.
|
||||
DEPTHANYTHING_REPO?=https://github.com/mudler/depth-anything.cpp.git
|
||||
DEPTHANYTHING_VERSION?=442eea4f73e83ca9d9bc8e026b966cffa678ffc4
|
||||
DEPTHANYTHING_VERSION?=cce5edc395fd1843806093d7ccc0c8b0d0b97b72
|
||||
|
||||
ifeq ($(NATIVE),false)
|
||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
@@ -36,6 +37,10 @@ import (
|
||||
var (
|
||||
// da_capi_load(const char* gguf_path, int n_threads) -> da_ctx* (0 = fail)
|
||||
CapiLoad func(gguf string, nThreads int32) uintptr
|
||||
// da_capi_load_nested(const char* anyview_gguf, const char* metric_gguf,
|
||||
// int n_threads) -> da_ctx* (0 = fail). The returned ctx serves the nested
|
||||
// metric model: depth/pose calls produce final metric-scale depth + scaled pose.
|
||||
CapiLoadNested func(anyview string, metric string, nThreads int32) uintptr
|
||||
// da_capi_free(da_ctx* ctx) — safe on a 0 handle.
|
||||
CapiFree func(handle uintptr)
|
||||
// da_capi_last_error(da_ctx* ctx) -> const char* (owned by ctx, "" if none).
|
||||
@@ -87,17 +92,24 @@ func (r *DepthAnythingCpp) Load(opts *pb.ModelOptions) error {
|
||||
return fmt.Errorf("depth-anything-cpp: ModelFile is empty")
|
||||
}
|
||||
|
||||
var modelPath string
|
||||
if filepath.IsAbs(modelFile) {
|
||||
modelPath = modelFile
|
||||
} else {
|
||||
modelPath = filepath.Join(opts.ModelPath, modelFile)
|
||||
resolve := func(name string) string {
|
||||
if filepath.IsAbs(name) {
|
||||
return name
|
||||
}
|
||||
return filepath.Join(opts.ModelPath, name)
|
||||
}
|
||||
modelPath := resolve(modelFile)
|
||||
|
||||
if _, err := os.Stat(modelPath); err != nil {
|
||||
return fmt.Errorf("depth-anything-cpp: model file not found: %s: %w", modelPath, err)
|
||||
}
|
||||
|
||||
// Nested metric models are a two-file pair: the main model is the anyview
|
||||
// (GIANT) branch and the metric (ViT-L + DPT/sky) branch is named via a
|
||||
// "metric_model:<filename>" entry in opts.Options. When present we load both
|
||||
// branches so the engine runs the nested metric alignment.
|
||||
metricFile := optionValue(opts.Options, "metric_model")
|
||||
|
||||
threads := opts.Threads
|
||||
if threads <= 0 {
|
||||
threads = 4
|
||||
@@ -109,19 +121,47 @@ func (r *DepthAnythingCpp) Load(opts *pb.ModelOptions) error {
|
||||
r.handle = 0
|
||||
}
|
||||
|
||||
h := CapiLoad(modelPath, threads)
|
||||
if h == 0 {
|
||||
// da_capi_last_error needs a ctx; on a failed load we have none (it
|
||||
// returns "" for a null ctx), so the text is best-effort.
|
||||
if msg := CapiLastError(0); msg != "" {
|
||||
return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s: %s", modelPath, msg)
|
||||
var h uintptr
|
||||
if metricFile != "" {
|
||||
metricPath := resolve(metricFile)
|
||||
if _, err := os.Stat(metricPath); err != nil {
|
||||
return fmt.Errorf("depth-anything-cpp: metric_model file not found: %s: %w", metricPath, err)
|
||||
}
|
||||
h = CapiLoadNested(modelPath, metricPath, threads)
|
||||
if h == 0 {
|
||||
if msg := CapiLastError(0); msg != "" {
|
||||
return fmt.Errorf("depth-anything-cpp: da_capi_load_nested failed for %s + %s: %s", modelPath, metricPath, msg)
|
||||
}
|
||||
return fmt.Errorf("depth-anything-cpp: da_capi_load_nested failed for %s + %s", modelPath, metricPath)
|
||||
}
|
||||
} else {
|
||||
h = CapiLoad(modelPath, threads)
|
||||
if h == 0 {
|
||||
// da_capi_last_error needs a ctx; on a failed load we have none (it
|
||||
// returns "" for a null ctx), so the text is best-effort.
|
||||
if msg := CapiLastError(0); msg != "" {
|
||||
return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s: %s", modelPath, msg)
|
||||
}
|
||||
return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s", modelPath)
|
||||
}
|
||||
return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s", modelPath)
|
||||
}
|
||||
r.handle = h
|
||||
return nil
|
||||
}
|
||||
|
||||
// optionValue returns the value of the first "key:value" entry in opts whose key
|
||||
// matches (case-sensitive), or "" if absent. Mirrors how other LocalAI backends
|
||||
// read ModelOptions.Options.
|
||||
func optionValue(opts []string, key string) string {
|
||||
prefix := key + ":"
|
||||
for _, o := range opts {
|
||||
if strings.HasPrefix(o, prefix) {
|
||||
return strings.TrimSpace(o[len(prefix):])
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// depthResult is the JSON payload returned by Predict.
|
||||
type depthResult struct {
|
||||
DepthW int `json:"depth_w"`
|
||||
@@ -373,6 +413,10 @@ func copyBytes(p *byte, n int) []byte {
|
||||
// runDepthPose runs depth estimation then pose recovery on an image file. It
|
||||
// returns the row-major depth map (length h*w), its dimensions, the 3x4
|
||||
// extrinsics (12 floats) and 3x3 intrinsics (9 floats).
|
||||
// runDepthPose returns depth + camera pose via two C-API calls (depth then pose).
|
||||
// For a nested metric model both calls run the full two-branch pipeline, so this
|
||||
// path infers twice; the typed Depth RPC (single da_capi_depth_dense call) is the
|
||||
// efficient path for nested models.
|
||||
func (r *DepthAnythingCpp) runDepthPose(imagePath string) (depth []float32, h, w int, ext [12]float32, intr [9]float32, err error) {
|
||||
if r.handle == 0 {
|
||||
err = fmt.Errorf("depth-anything-cpp: model not loaded")
|
||||
|
||||
@@ -37,6 +37,7 @@ func main() {
|
||||
|
||||
libFuncs := []LibFuncs{
|
||||
{&CapiLoad, "da_capi_load"},
|
||||
{&CapiLoadNested, "da_capi_load_nested"},
|
||||
{&CapiFree, "da_capi_free"},
|
||||
{&CapiLastError, "da_capi_last_error"},
|
||||
{&CapiDepthPath, "da_capi_depth_path"},
|
||||
|
||||
64
backend/go/depth-anything-cpp/nested_e2e_test.go
Normal file
64
backend/go/depth-anything-cpp/nested_e2e_test.go
Normal file
@@ -0,0 +1,64 @@
|
||||
package main
|
||||
|
||||
// nested_e2e_test.go - e2e smoke for the nested two-file metric model. Loads the
|
||||
// anyview branch as the main model and points the metric branch via the
|
||||
// "metric_model:<file>" option (exactly as the depth-anything-3-nested gallery
|
||||
// entry does), then exercises the typed Depth RPC and asserts a metric depth map.
|
||||
//
|
||||
// Skips cleanly unless both nested GGUFs are present under ./test-models/ and the
|
||||
// backend binary + fallback .so are built.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("depth-anything-cpp nested metric model", func() {
|
||||
It("loads the two-file pair via the metric_model option and returns metric depth", func() {
|
||||
anyviewPath := modelPathOrSkip("depth-anything-nested-anyview.gguf")
|
||||
_ = modelPathOrSkip("depth-anything-nested-metric.gguf")
|
||||
imgB64 := loadTestImage()
|
||||
|
||||
port := freePort()
|
||||
cleanup := startBackend(port)
|
||||
defer cleanup()
|
||||
|
||||
client, closeConn := dialBackend(port)
|
||||
defer closeConn()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 25*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
loadResp, err := client.LoadModel(ctx, &pb.ModelOptions{
|
||||
Model: "depth-anything-nested-anyview.gguf",
|
||||
ModelFile: anyviewPath,
|
||||
ModelPath: filepath.Dir(anyviewPath),
|
||||
Options: []string{"metric_model:depth-anything-nested-metric.gguf"},
|
||||
Threads: 8,
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred(), "LoadModel(nested)")
|
||||
Expect(loadResp.GetSuccess()).To(BeTrue(), "LoadModel reported failure: %s", loadResp.GetMessage())
|
||||
|
||||
resp, err := client.Depth(ctx, &pb.DepthRequest{
|
||||
Src: imgB64,
|
||||
IncludeDepth: true,
|
||||
IncludePose: true,
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred(), "Depth(nested)")
|
||||
Expect(resp.GetWidth()).To(BeNumerically(">", 0), "depth width")
|
||||
Expect(resp.GetHeight()).To(BeNumerically(">", 0), "depth height")
|
||||
Expect(resp.GetIsMetric()).To(BeTrue(), "nested output must be metric")
|
||||
Expect(len(resp.GetDepth())).To(Equal(int(resp.GetWidth())*int(resp.GetHeight())), "dense depth length")
|
||||
Expect(len(resp.GetExtrinsics())).To(Equal(12), "extrinsics 3x4")
|
||||
Expect(resp.GetIntrinsics()[0]).To(BeNumerically(">", 0), "fx > 0")
|
||||
|
||||
_, _ = fmt.Fprintf(GinkgoWriter, "nested depth OK: %dx%d is_metric=%v fx=%.2f\n",
|
||||
resp.GetWidth(), resp.GetHeight(), resp.GetIsMetric(), resp.GetIntrinsics()[0])
|
||||
})
|
||||
})
|
||||
20
backend/go/depth-anything-cpp/options_test.go
Normal file
20
backend/go/depth-anything-cpp/options_test.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = DescribeTable("optionValue",
|
||||
func(opts []string, key, want string) {
|
||||
Expect(optionValue(opts, key)).To(Equal(want))
|
||||
},
|
||||
Entry("present", []string{"foo:bar", "metric_model:m.gguf"}, "metric_model", "m.gguf"),
|
||||
Entry("absent", []string{"foo:bar"}, "metric_model", ""),
|
||||
Entry("nil", []string(nil), "metric_model", ""),
|
||||
Entry("trims space", []string{"metric_model: m.gguf "}, "metric_model", "m.gguf"),
|
||||
Entry("value with colon", []string{"metric_model:a:b.gguf"}, "metric_model", "a:b.gguf"),
|
||||
Entry("first wins", []string{"metric_model:first.gguf", "metric_model:second.gguf"}, "metric_model", "first.gguf"),
|
||||
Entry("empty value", []string{"metric_model:"}, "metric_model", ""),
|
||||
Entry("prefix not key", []string{"metric_model_extra:x"}, "metric_model", ""),
|
||||
)
|
||||
@@ -8162,6 +8162,54 @@
|
||||
- filename: depth-anything-mono-large-f32.gguf
|
||||
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-mono-large-f32.gguf
|
||||
sha256: "291b1a554af907c3f79986ee225da8933be5f7a31d73c81d06784cda284535de"
|
||||
|
||||
- !!merge <<: *depth-anything-3-base
|
||||
name: depth-anything-3-metric-large
|
||||
description: |
|
||||
Depth Anything 3 (metric large / vitl), f32 (~1.3 GB) — single-image
|
||||
metric-scale depth (meters) + a sky mask. DPT single-head metric variant; use
|
||||
GenerateImage (src -> normalized depth PNG) or Predict (JSON metric depth
|
||||
stats, is_metric=true).
|
||||
overrides:
|
||||
backend: depth-anything
|
||||
parameters:
|
||||
model: depth-anything-metric-large-f32.gguf
|
||||
files:
|
||||
- filename: depth-anything-metric-large-f32.gguf
|
||||
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-metric-large-f32.gguf
|
||||
sha256: "d10b7450c2238244b2d72e2749537a1876255180149cd630a18bc1619c9286be"
|
||||
|
||||
- !!merge <<: *depth-anything-3-base
|
||||
name: depth-anything-3-nested
|
||||
description: |
|
||||
Depth Anything 3 (nested giant+large), f32 — the recommended metric model. A
|
||||
two-branch pipeline: the anyview GIANT (vitg) branch and a metric ViT-L branch
|
||||
are run and aligned to recover true metric-scale depth (meters) + scaled camera
|
||||
pose from a single image. Downloads both branches (~6 GB total); GPU strongly
|
||||
recommended. Predict returns metric depth stats + pose (is_metric=true).
|
||||
tags:
|
||||
- depth-estimation
|
||||
- camera-pose
|
||||
- metric-depth
|
||||
- depth-anything
|
||||
- native
|
||||
- cpp
|
||||
- gpu
|
||||
overrides:
|
||||
backend: depth-anything
|
||||
# The metric (ViT-L) branch is loaded alongside the anyview model via the
|
||||
# metric_model option; both files are fetched below.
|
||||
options:
|
||||
- "metric_model:depth-anything-nested-metric.gguf"
|
||||
parameters:
|
||||
model: depth-anything-nested-anyview.gguf
|
||||
files:
|
||||
- filename: depth-anything-nested-anyview.gguf
|
||||
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-nested-anyview.gguf
|
||||
sha256: "2a4cb4382aa8c4159fff10dfffa121f3c7a574551c4ff4ad130f235d5442f9ce"
|
||||
- filename: depth-anything-nested-metric.gguf
|
||||
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-nested-metric.gguf
|
||||
sha256: "b54ed50cbc0b0c14fae1f8edd0fea8bd1cac0850485fd6e7eb2422c7a19e570e"
|
||||
- name: rfdetr-cpp-base
|
||||
url: github:mudler/LocalAI/gallery/virtual.yaml@master
|
||||
urls:
|
||||
|
||||
Reference in New Issue
Block a user