Compare commits

...

3 Commits

Author SHA1 Message Date
localai-bot
58581c6c34 fix(depth): pin depth-anything.cpp to e0b6814 (ABI 3 dense C-API)
The Depth RPC handler calls da_capi_depth_dense / da_capi_points (C-API ABI 3);
pin the native build to the commit that exports them.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-16 00:11:22 +00:00
Ettore Di Giacinto
fa443d16a2 feat(depth): typed Depth RPC + REST endpoint exposing full DA3 data
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-15 23:50:16 +00:00
Ettore Di Giacinto
a4550b0bd2 feat(backend): add depth-anything (Depth Anything 3) C++/ggml backend + gallery
Mirrors the locate-anything-cpp backend to register a new depth-anything
backend that wraps the Depth Anything 3 ggml port (depth-anything.cpp) via
purego (cgo-less, no Python at inference).

- backend/go/depth-anything-cpp/: gRPC backend (Load + Predict + GenerateImage),
  purego binding to the da_capi_* C ABI, CMake/Makefile/run/package/test scripts
  building depth-anything.cpp's DA_SHARED static .so per CPU variant.
- backend/index.yaml: depth-anything backend meta + all hardware-variant
  capability entries (cpu/cuda12/cuda13/intel-sycl-f32+f16/vulkan/nvidia-l4t).
- gallery/index.yaml: 8 Depth Anything 3 GGUF models (base q4_k/q8_0/f16/f32,
  small, large, giant, mono-large).
- .github/backend-matrix.yml: one build entry per hardware variant.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-15 23:12:55 +00:00
31 changed files with 1765 additions and 0 deletions

View File

@@ -716,6 +716,19 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
@@ -1582,6 +1595,19 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -1621,6 +1647,19 @@ include:
backend: "locate-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-depth-anything-cpp'
base-image: "ubuntu:24.04"
ubuntu-version: '2404'
runs-on: 'ubuntu-24.04-arm'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -2898,6 +2937,19 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
@@ -2911,6 +2963,19 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
@@ -2924,6 +2989,19 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
@@ -2938,6 +3016,20 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
platform-tag: 'amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
@@ -2952,6 +3044,20 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/arm64'
platform-tag: 'arm64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-depth-anything-cpp'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
@@ -3058,6 +3164,19 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2204'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64-depth-anything-cpp'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2204'
# whisper
- build-type: ''
cuda-major-version: ""

View File

@@ -24,6 +24,7 @@ service Backend {
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
rpc Status(HealthMessage) returns (StatusResponse) {}
rpc Detect(DetectOptions) returns (DetectResponse) {}
rpc Depth(DepthRequest) returns (DepthResponse) {}
rpc FaceVerify(FaceVerifyRequest) returns (FaceVerifyResponse) {}
rpc FaceAnalyze(FaceAnalyzeRequest) returns (FaceAnalyzeResponse) {}
rpc VoiceVerify(VoiceVerifyRequest) returns (VoiceVerifyResponse) {}
@@ -670,6 +671,35 @@ message DetectResponse {
repeated Detection Detections = 1;
}
// --- Depth estimation messages (Depth Anything 3) ---
message DepthRequest {
string src = 1; // input image (filesystem path or base64-encoded payload)
string dst = 2; // optional output directory for exports (glb/colmap)
bool include_depth = 3; // return the per-pixel metric depth map
bool include_confidence = 4; // return the per-pixel confidence map (DualDPT)
bool include_pose = 5; // return camera extrinsics/intrinsics (DualDPT)
bool include_sky = 6; // return the per-pixel sky map (mono models)
bool include_points = 7; // back-project to a 3D point cloud (DualDPT)
float points_conf_thresh = 8; // keep points with confidence >= this threshold
repeated string exports = 9; // requested exports: "glb", "colmap"
}
message DepthResponse {
int32 width = 1; // processed depth-map width
int32 height = 2; // processed depth-map height
repeated float depth = 3; // width*height row-major metric depth
repeated float confidence = 4; // width*height row-major confidence (DualDPT)
repeated float sky = 5; // width*height row-major sky map (mono)
repeated float extrinsics = 6; // 12 floats, 3x4 row-major (world-to-camera)
repeated float intrinsics = 7; // 9 floats, 3x3 row-major
int32 num_points = 8; // number of 3D points
repeated float points = 9; // num_points*3 xyz, world space
bytes point_colors = 10; // num_points*3 uint8 rgb
repeated string export_paths = 11; // paths written for the requested exports
bool is_metric = 12; // depth is in metric units
}
// --- Face recognition messages ---
message FacialArea {

View File

@@ -0,0 +1,7 @@
sources/
build*/
package/
libdepthanythingcpp*.so
depth-anything-cpp
test-models/
test-data/

View File

@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.18)
project(libdepthanythingcpp LANGUAGES C CXX)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Static-link ggml into the depth-anything shared library so the resulting .so
# has no runtime dependency on an external libggml — only on
# libc/libstdc++/libgomp, which the LocalAI package step bundles into the
# docker image.
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static libraries" FORCE)
# depth-anything.cpp build switches: skip CLI/tests, but build libdepthanything
# itself as a SHARED library (DA_SHARED) while ggml stays static
# (BUILD_SHARED_LIBS OFF above). The da_capi_* C ABI is compiled into
# src/da_capi.cpp and re-exported by that shared library, so no extra MODULE
# wrapper is needed (unlike locate-anything.cpp).
set(DA_BUILD_CLI OFF CACHE BOOL "Disable depth-anything CLI" FORCE)
set(DA_BUILD_TESTS OFF CACHE BOOL "Disable depth-anything tests" FORCE)
set(DA_SHARED ON CACHE BOOL "Build libdepthanything as a shared lib" FORCE)
add_subdirectory(./sources/depth-anything.cpp)
# Emit libdepthanything.so into the top-level build dir so the Makefile can
# rename it to the per-variant libdepthanythingcpp-<variant>.so.
set_target_properties(depthanything PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})

View File

@@ -0,0 +1,136 @@
CMAKE_ARGS?=
BUILD_TYPE?=
NATIVE?=false
GOCMD?=go
GO_TAGS?=
JOBS?=$(shell nproc --ignore=1)
# depth-anything.cpp. Pin to a specific commit for a stable build; a squash
# merge upstream can orphan a branch, so the native version is pinned by SHA.
DEPTHANYTHING_REPO?=https://github.com/mudler/depth-anything.cpp.git
DEPTHANYTHING_VERSION?=e0b6814d2f58261216da69d63326f1f2d75d4435
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
# Forward LocalAI's BUILD_TYPE to the matching ggml backend switch. depth-anything.cpp
# force-sets GGML_CUDA/GGML_VULKAN/GGML_METAL from its own DA_GGML_* options, so
# those must be toggled via the DA_GGML_* names (a bare -DGGML_CUDA=ON would be
# overridden); the remaining ggml switches pass straight through.
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DGGML_CUDA=ON -DDA_GGML_CUDA=ON
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON
else ifeq ($(BUILD_TYPE),hipblas)
ROCM_HOME ?= /opt/rocm
ROCM_PATH ?= /opt/rocm
export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang
AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
else ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=ON -DDA_GGML_VULKAN=ON
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
else
CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
CMAKE_ARGS+=-DDA_GGML_METAL=ON
endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx \
-DGGML_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx
endif
sources/depth-anything.cpp:
mkdir -p sources && \
git clone --recursive $(DEPTHANYTHING_REPO) sources/depth-anything.cpp && \
cd sources/depth-anything.cpp && \
git checkout $(DEPTHANYTHING_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
# Detect OS
UNAME_S := $(shell uname -s)
# Only build CPU variants on Linux
ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libdepthanythingcpp-avx.so libdepthanythingcpp-avx2.so libdepthanythingcpp-avx512.so libdepthanythingcpp-fallback.so
else
# On non-Linux (e.g., Darwin), build only fallback variant
VARIANT_TARGETS = libdepthanythingcpp-fallback.so
endif
depth-anything-cpp: main.go godepthanythingcpp.go $(VARIANT_TARGETS)
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o depth-anything-cpp ./
package: depth-anything-cpp
bash package.sh
build: package
clean: purge
rm -rf libdepthanythingcpp*.so depth-anything-cpp package sources
purge:
rm -rf build*
# Build all variants (Linux only)
ifeq ($(UNAME_S),Linux)
libdepthanythingcpp-avx.so: sources/depth-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I depth-anything-cpp build info:avx${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
rm -rfv build-$@
libdepthanythingcpp-avx2.so: sources/depth-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I depth-anything-cpp build info:avx2${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libdepthanythingcpp-custom
rm -rfv build-$@
libdepthanythingcpp-avx512.so: sources/depth-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I depth-anything-cpp build info:avx512${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libdepthanythingcpp-custom
rm -rfv build-$@
endif
# Build fallback variant (all platforms)
libdepthanythingcpp-fallback.so: sources/depth-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
rm -rfv build-$@
libdepthanythingcpp-custom: CMakeLists.txt
mkdir -p build-$(SO_TARGET) && \
cd build-$(SO_TARGET) && \
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \
cd .. && \
mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET)
all: depth-anything-cpp package
# `test` is invoked by the top-level Makefile's `test-extra` target. It builds
# the backend binary + the fallback shared library (needed for dlopen at
# runtime), then runs test.sh which downloads a small GGUF + a test image and
# exercises the gRPC Load/Predict wire path via the Go smoke test in
# main_test.go.
test: depth-anything-cpp libdepthanythingcpp-fallback.so
bash test.sh

View File

@@ -0,0 +1,509 @@
package main
// godepthanythingcpp.go - gRPC handlers (Load, Predict, GenerateImage) for the
// depth-anything-cpp backend, wrapping the Depth Anything 3 ggml C-API
// (libdepthanythingcpp-<variant>.so) via purego.
//
// Embeds base.SingleThread to default the unimplemented RPCs to "not supported"
// and to serialize calls — the C side shares a ggml graph allocator and is NOT
// reentrant, so all inference must run one-at-a-time.
//
// Depth has no native OpenAI endpoint, so the model is exposed two ways:
//
// - GenerateImage(src, dst): run depth on the src image and write a
// min-max-normalised grayscale depth PNG to dst.
// - Predict(images[0]): run depth+pose and return a JSON blob with the depth
// dimensions, depth stats and the camera extrinsics (3x4) / intrinsics (3x3).
import (
"encoding/base64"
"encoding/json"
"fmt"
"image"
"image/png"
"math"
"os"
"path/filepath"
"unsafe"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
// C-API function pointers, registered in main.go via purego. The da_capi_*
// symbols live inside libdepthanything (src/da_capi.cpp) and are re-exported by
// the DA_SHARED build.
var (
// da_capi_load(const char* gguf_path, int n_threads) -> da_ctx* (0 = fail)
CapiLoad func(gguf string, nThreads int32) uintptr
// da_capi_free(da_ctx* ctx) — safe on a 0 handle.
CapiFree func(handle uintptr)
// da_capi_last_error(da_ctx* ctx) -> const char* (owned by ctx, "" if none).
// purego marshals the returned C string into a Go string (a copy), so we
// never free it.
CapiLastError func(handle uintptr) string
// da_capi_depth_path(ctx, image_path, out_h*, out_w*) -> float* depth map
// (row-major H*W); nil on error. Caller frees via da_capi_free_floats.
CapiDepthPath func(handle uintptr, imagePath string, outH *int32, outW *int32) *float32
// da_capi_free_floats(float* p)
CapiFreeFloats func(p *float32)
// da_capi_pose_path(ctx, image_path, out_ext[12], out_intr[9]) -> 0 ok, -1 err
CapiPosePath func(handle uintptr, imagePath string, outExt *float32, outIntr *float32) int32
// da_capi_depth_dense(ctx, image_path, out_h*, out_w*, out_depth**, out_conf**,
// out_sky**, out_ext[12], out_intr[9], out_is_metric*) -> 0 ok, -1 err.
// Each non-NULL out_depth/out_conf/out_sky receives a malloc'd float[H*W] (free
// via da_capi_free_floats); buffers the model doesn't produce are set NULL.
CapiDepthDense func(handle uintptr, imagePath string,
outH, outW *int32,
outDepth, outConf, outSky **float32,
outExt, outIntr *float32,
outIsMetric *int32) int32
// da_capi_points(ctx, image_path, conf_thresh, out_n*, out_xyz**, out_rgb**) ->
// 0 ok, -1 err. *out_xyz = malloc'd float[3*N] (free via da_capi_free_floats),
// *out_rgb = malloc'd uint8[3*N] (free via da_capi_free_bytes).
CapiPoints func(handle uintptr, imagePath string, confThresh float32,
outN *int32, outXyz **float32, outRgb **byte) int32
// da_capi_free_bytes(unsigned char* p)
CapiFreeBytes func(p *byte)
// da_capi_export_glb(ctx, image_path, out_glb) -> 0 ok, -1 err
CapiExportGlb func(handle uintptr, imagePath string, outGlb string) int32
// da_capi_export_colmap(ctx, image_path, out_dir, binary) -> 0 ok, -1 err
CapiExportColmap func(handle uintptr, imagePath string, outDir string, binary int32) int32
)
type DepthAnythingCpp struct {
base.SingleThread
handle uintptr
}
// Load loads the GGUF model at opts.ModelFile (joined with opts.ModelPath if
// relative) and stores the da_ctx handle for later inference calls.
func (r *DepthAnythingCpp) Load(opts *pb.ModelOptions) error {
modelFile := opts.ModelFile
if modelFile == "" {
modelFile = opts.Model
}
if modelFile == "" {
return fmt.Errorf("depth-anything-cpp: ModelFile is empty")
}
var modelPath string
if filepath.IsAbs(modelFile) {
modelPath = modelFile
} else {
modelPath = filepath.Join(opts.ModelPath, modelFile)
}
if _, err := os.Stat(modelPath); err != nil {
return fmt.Errorf("depth-anything-cpp: model file not found: %s: %w", modelPath, err)
}
threads := opts.Threads
if threads <= 0 {
threads = 4
}
// Release previous model if any (re-Load).
if r.handle != 0 {
CapiFree(r.handle)
r.handle = 0
}
h := CapiLoad(modelPath, threads)
if h == 0 {
// da_capi_last_error needs a ctx; on a failed load we have none (it
// returns "" for a null ctx), so the text is best-effort.
if msg := CapiLastError(0); msg != "" {
return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s: %s", modelPath, msg)
}
return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s", modelPath)
}
r.handle = h
return nil
}
// depthResult is the JSON payload returned by Predict.
type depthResult struct {
DepthW int `json:"depth_w"`
DepthH int `json:"depth_h"`
DepthMin float32 `json:"depth_min"`
DepthMax float32 `json:"depth_max"`
Extrinsics [12]float32 `json:"extrinsics"` // 3x4 row-major
Intrinsics [9]float32 `json:"intrinsics"` // 3x3 row-major
}
// Predict runs depth+pose on the first supplied image and returns depth
// statistics + camera pose as a JSON string. LocalAI wraps the string into the
// Reply.Message of the gRPC response. The image in Images[0] may be a
// filesystem path or a base64-encoded payload.
func (r *DepthAnythingCpp) Predict(opts *pb.PredictOptions) (string, error) {
imgs := opts.GetImages()
if len(imgs) == 0 {
return "", fmt.Errorf("depth-anything-cpp: Predict requires an image in Images[]")
}
imgPath, cleanup, err := materializeImage(imgs[0])
if err != nil {
return "", fmt.Errorf("depth-anything-cpp: %w", err)
}
defer cleanup()
depth, h, w, ext, intr, err := r.runDepthPose(imgPath)
if err != nil {
return "", err
}
dmin, dmax := minMax(depth)
payload, err := json.Marshal(depthResult{
DepthW: w, DepthH: h,
DepthMin: dmin, DepthMax: dmax,
Extrinsics: ext, Intrinsics: intr,
})
if err != nil {
return "", fmt.Errorf("depth-anything-cpp: marshal: %w", err)
}
return string(payload), nil
}
// GenerateImage runs depth on req.Src and writes a normalised grayscale depth
// PNG to req.Dst.
func (r *DepthAnythingCpp) GenerateImage(req *pb.GenerateImageRequest) error {
if req.GetSrc() == "" {
return fmt.Errorf("depth-anything-cpp: GenerateImage requires src")
}
if req.GetDst() == "" {
return fmt.Errorf("depth-anything-cpp: GenerateImage requires dst")
}
imgPath, cleanup, err := materializeImage(req.GetSrc())
if err != nil {
return fmt.Errorf("depth-anything-cpp: %w", err)
}
defer cleanup()
depth, h, w, _, _, err := r.runDepthPose(imgPath)
if err != nil {
return err
}
return writeDepthPNG(req.GetDst(), depth, h, w)
}
// Depth is the typed Depth RPC. It runs the Depth Anything 3 pipeline on the
// request's src image and fills a DepthResponse honoring the include_* flags and
// exports: per-pixel metric depth + confidence (DualDPT) or depth + sky (mono),
// camera extrinsics/intrinsics, an optional back-projected 3D point cloud and
// glb/COLMAP exports. The src may be a filesystem path or a base64 payload.
func (r *DepthAnythingCpp) Depth(in *pb.DepthRequest) (pb.DepthResponse, error) {
// Accumulate into locals and return a single composite literal at the end:
// returning a named pb.DepthResponse value would copy its embedded mutex
// (go vet copylocks).
if r.handle == 0 {
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: model not loaded")
}
if in.GetSrc() == "" {
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: Depth requires src")
}
imgPath, cleanup, err := materializeImage(in.GetSrc())
if err != nil {
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: %w", err)
}
defer cleanup()
// Dense per-pixel output + pose. Pass buffer pointers only for the
// requested maps so the native side can skip unrequested work; ext/intr
// must always point at 12/9 floats per the C ABI.
var (
h, w, isMetric int32
depthPtr, confPtr *float32
skyPtr *float32
ext [12]float32
intr [9]float32
pDepth, pConf, pSky **float32
)
if in.GetIncludeDepth() {
pDepth = &depthPtr
}
if in.GetIncludeConfidence() {
pConf = &confPtr
}
if in.GetIncludeSky() {
pSky = &skyPtr
}
rc := CapiDepthDense(r.handle, imgPath, &h, &w, pDepth, pConf, pSky, &ext[0], &intr[0], &isMetric)
if rc != 0 {
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: da_capi_depth_dense failed (rc=%d): %s", rc, r.lastError())
}
n := int(h) * int(w)
var (
depth, conf, sky []float32
extrinsics, intrinsic []float32
numPoints int32
points []float32
pointColors []byte
exportPaths []string
)
if depthPtr != nil {
depth = copyFloats(depthPtr, n)
CapiFreeFloats(depthPtr)
}
if confPtr != nil {
conf = copyFloats(confPtr, n)
CapiFreeFloats(confPtr)
}
if skyPtr != nil {
sky = copyFloats(skyPtr, n)
CapiFreeFloats(skyPtr)
}
if in.GetIncludePose() {
extrinsics = append([]float32(nil), ext[:]...)
intrinsic = append([]float32(nil), intr[:]...)
}
// 3D point cloud (DualDPT / pose-capable models only).
if in.GetIncludePoints() {
var (
np int32
xyzPtr *float32
rgbPtr *byte
)
if rc := CapiPoints(r.handle, imgPath, in.GetPointsConfThresh(), &np, &xyzPtr, &rgbPtr); rc != 0 {
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: da_capi_points failed (rc=%d): %s", rc, r.lastError())
}
numPoints = np
if xyzPtr != nil {
points = copyFloats(xyzPtr, int(np)*3)
CapiFreeFloats(xyzPtr)
}
if rgbPtr != nil {
pointColors = copyBytes(rgbPtr, int(np)*3)
CapiFreeBytes(rgbPtr)
}
}
// Exports (glb / colmap). They are written under in.Dst (a directory); a
// temp dir is used when Dst is empty.
if len(in.GetExports()) > 0 {
exportPaths, err = r.runExports(imgPath, in.GetDst(), in.GetExports())
if err != nil {
return pb.DepthResponse{}, err
}
}
return pb.DepthResponse{
Width: w,
Height: h,
Depth: depth,
Confidence: conf,
Sky: sky,
Extrinsics: extrinsics,
Intrinsics: intrinsic,
NumPoints: numPoints,
Points: points,
PointColors: pointColors,
ExportPaths: exportPaths,
IsMetric: isMetric != 0,
}, nil
}
// runExports writes the requested exports for imgPath into dstDir and returns
// the written paths. Supported exports: "glb", "colmap".
func (r *DepthAnythingCpp) runExports(imgPath, dstDir string, exports []string) ([]string, error) {
if dstDir == "" {
tmp, err := os.MkdirTemp("", "depth-anything-export-*")
if err != nil {
return nil, fmt.Errorf("depth-anything-cpp: mkdir export dir: %w", err)
}
dstDir = tmp
} else if err := os.MkdirAll(dstDir, 0o755); err != nil {
return nil, fmt.Errorf("depth-anything-cpp: mkdir %s: %w", dstDir, err)
}
var paths []string
for _, exp := range exports {
switch exp {
case "glb":
out := filepath.Join(dstDir, "pointcloud.glb")
if rc := CapiExportGlb(r.handle, imgPath, out); rc != 0 {
return nil, fmt.Errorf("depth-anything-cpp: da_capi_export_glb failed (rc=%d): %s", rc, r.lastError())
}
paths = append(paths, out)
case "colmap":
out := filepath.Join(dstDir, "colmap")
if err := os.MkdirAll(out, 0o755); err != nil {
return nil, fmt.Errorf("depth-anything-cpp: mkdir %s: %w", out, err)
}
if rc := CapiExportColmap(r.handle, imgPath, out, 1); rc != 0 {
return nil, fmt.Errorf("depth-anything-cpp: da_capi_export_colmap failed (rc=%d): %s", rc, r.lastError())
}
paths = append(paths, out)
default:
return nil, fmt.Errorf("depth-anything-cpp: unknown export %q (want glb|colmap)", exp)
}
}
return paths, nil
}
// copyFloats copies n float32 values from a C heap pointer into a fresh Go
// slice so the C buffer can be freed afterwards.
func copyFloats(p *float32, n int) []float32 {
if p == nil || n <= 0 {
return nil
}
src := unsafe.Slice(p, n)
out := make([]float32, n)
copy(out, src)
return out
}
// copyBytes copies n bytes from a C heap pointer into a fresh Go slice.
func copyBytes(p *byte, n int) []byte {
if p == nil || n <= 0 {
return nil
}
src := unsafe.Slice(p, n)
out := make([]byte, n)
copy(out, src)
return out
}
// runDepthPose runs depth estimation then pose recovery on an image file. It
// returns the row-major depth map (length h*w), its dimensions, the 3x4
// extrinsics (12 floats) and 3x3 intrinsics (9 floats).
func (r *DepthAnythingCpp) runDepthPose(imagePath string) (depth []float32, h, w int, ext [12]float32, intr [9]float32, err error) {
if r.handle == 0 {
err = fmt.Errorf("depth-anything-cpp: model not loaded")
return
}
var ch, cw int32
ptr := CapiDepthPath(r.handle, imagePath, &ch, &cw)
if ptr == nil {
err = fmt.Errorf("depth-anything-cpp: da_capi_depth_path failed: %s", r.lastError())
return
}
h, w = int(ch), int(cw)
n := h * w
if n > 0 {
src := unsafe.Slice(ptr, n)
depth = make([]float32, n)
copy(depth, src)
}
CapiFreeFloats(ptr)
if rc := CapiPosePath(r.handle, imagePath, &ext[0], &intr[0]); rc != 0 {
err = fmt.Errorf("depth-anything-cpp: da_capi_pose_path failed (rc=%d): %s", rc, r.lastError())
return
}
return
}
// lastError returns the context's last error string, or "" if none.
func (r *DepthAnythingCpp) lastError() string {
if CapiLastError == nil || r.handle == 0 {
return ""
}
return CapiLastError(r.handle)
}
// materializeImage returns a filesystem path for an image argument that may be
// either an existing path or a base64-encoded payload. When the input is
// base64 it is decoded into a temp file; cleanup removes it (no-op for a path).
func materializeImage(arg string) (path string, cleanup func(), err error) {
cleanup = func() {}
if _, statErr := os.Stat(arg); statErr == nil {
return arg, cleanup, nil
}
// Strip an optional data URL prefix (data:image/...;base64,<payload>).
b64 := arg
if i := indexComma(b64); i >= 0 && hasDataPrefix(b64) {
b64 = b64[i+1:]
}
data, decErr := base64.StdEncoding.DecodeString(b64)
if decErr != nil {
return "", cleanup, fmt.Errorf("image is neither an existing path nor valid base64: %v", decErr)
}
f, tErr := os.CreateTemp("", "depth-anything-*.img")
if tErr != nil {
return "", cleanup, tErr
}
if _, wErr := f.Write(data); wErr != nil {
_ = f.Close()
_ = os.Remove(f.Name())
return "", cleanup, wErr
}
_ = f.Close()
name := f.Name()
return name, func() { _ = os.Remove(name) }, nil
}
func hasDataPrefix(s string) bool {
return len(s) >= 5 && s[:5] == "data:"
}
func indexComma(s string) int {
for i := 0; i < len(s); i++ {
if s[i] == ',' {
return i
}
}
return -1
}
// writeDepthPNG min-max normalises a depth map and writes it as an 8-bit
// grayscale PNG. Near = bright (255), far = dark (0), matching the usual
// depth-map convention for inverse-depth-like outputs.
func writeDepthPNG(dst string, depth []float32, h, w int) error {
if h <= 0 || w <= 0 || len(depth) < h*w {
return fmt.Errorf("depth-anything-cpp: writeDepthPNG: bad dims h=%d w=%d len=%d", h, w, len(depth))
}
dmin, dmax := minMax(depth)
span := dmax - dmin
if span <= 0 || math.IsNaN(float64(span)) {
span = 1
}
img := image.NewGray(image.Rect(0, 0, w, h))
for y := 0; y < h; y++ {
for x := 0; x < w; x++ {
v := depth[y*w+x]
n := (v - dmin) / span // 0..1
if math.IsNaN(float64(n)) {
n = 0
}
if n < 0 {
n = 0
} else if n > 1 {
n = 1
}
img.Pix[y*img.Stride+x] = uint8(n * 255)
}
}
f, err := os.Create(dst)
if err != nil {
return err
}
defer func() { _ = f.Close() }()
return png.Encode(f, img)
}
func minMax(v []float32) (mn, mx float32) {
if len(v) == 0 {
return 0, 0
}
mn, mx = v[0], v[0]
for _, x := range v {
if math.IsNaN(float64(x)) || math.IsInf(float64(x), 0) {
continue
}
if x < mn {
mn = x
}
if x > mx {
mx = x
}
}
return mn, mx
}

View File

@@ -0,0 +1,61 @@
package main
// main.go - entry point for the depth-anything-cpp gRPC backend.
//
// Dlopens libdepthanythingcpp-<variant>.so via purego at the path in
// DEPTHANYTHING_LIBRARY (set by run.sh based on /proc/cpuinfo), registers the
// da_capi_* C ABI symbols, then starts the gRPC server.
import (
"flag"
"os"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
type LibFuncs struct {
FuncPtr any
Name string
}
func main() {
// Get library name from environment variable, default to fallback
libName := os.Getenv("DEPTHANYTHING_LIBRARY")
if libName == "" {
libName = "./libdepthanythingcpp-fallback.so"
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil {
panic(err)
}
libFuncs := []LibFuncs{
{&CapiLoad, "da_capi_load"},
{&CapiFree, "da_capi_free"},
{&CapiLastError, "da_capi_last_error"},
{&CapiDepthPath, "da_capi_depth_path"},
{&CapiFreeFloats, "da_capi_free_floats"},
{&CapiPosePath, "da_capi_pose_path"},
{&CapiDepthDense, "da_capi_depth_dense"},
{&CapiPoints, "da_capi_points"},
{&CapiFreeBytes, "da_capi_free_bytes"},
{&CapiExportGlb, "da_capi_export_glb"},
{&CapiExportColmap, "da_capi_export_colmap"},
}
for _, lf := range libFuncs {
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
}
flag.Parse()
if err := grpc.StartServer(*addr, &DepthAnythingCpp{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,167 @@
package main
// main_test.go - end-to-end smoke test for the depth-anything-cpp gRPC backend.
//
// Spawns the compiled depth-anything-cpp binary on a free local port, dials it
// via gRPC, and exercises LoadModel + Predict against the test fixtures
// downloaded by test.sh: the small (vits) f32 GGUF of Depth Anything 3 and a
// real photo. Asserts that Predict returns a JSON payload with a positive
// depth-map width/height.
//
// The spec Skip()s cleanly if its fixtures (the model, the test image, the
// built binary, or the fallback .so) are missing, so the test target stays
// usable on a fresh checkout / on CI runners where the model hasn't been
// downloaded.
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"testing"
"time"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
func TestDepth(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "depth-anything-cpp backend smoke suite")
}
// freePort grabs an ephemeral TCP port and immediately releases it so the
// spawned backend can bind to it. There is a tiny TOCTOU window here but in
// practice it's adequate for a smoke test on a quiet runner.
func freePort() int {
l, err := net.Listen("tcp", "127.0.0.1:0")
Expect(err).ToNot(HaveOccurred(), "freePort listen")
port := l.Addr().(*net.TCPAddr).Port
Expect(l.Close()).To(Succeed())
return port
}
// startBackend spawns the depth-anything-cpp binary on the given port and waits
// until it accepts TCP connections (up to 10s). It mirrors how main.go resolves
// the purego library: the DEPTHANYTHING_LIBRARY env var points the dlopen at the
// freshly built fallback .so. The returned cleanup func kills the process.
func startBackend(port int) func() {
binary, err := filepath.Abs("./depth-anything-cpp")
Expect(err).ToNot(HaveOccurred())
if _, err := os.Stat(binary); err != nil {
Skip(fmt.Sprintf("backend binary not built: %s (run `make depth-anything-cpp` first)", binary))
}
libPath, err := filepath.Abs("./libdepthanythingcpp-fallback.so")
Expect(err).ToNot(HaveOccurred())
if _, err := os.Stat(libPath); err != nil {
Skip(fmt.Sprintf("fallback library not built: %s (run `make libdepthanythingcpp-fallback.so` first)", libPath))
}
addr := fmt.Sprintf("127.0.0.1:%d", port)
cmd := exec.Command(binary, "--addr", addr)
cmd.Env = append(os.Environ(), "DEPTHANYTHING_LIBRARY="+libPath)
cmd.Stdout = os.Stderr
cmd.Stderr = os.Stderr
Expect(cmd.Start()).To(Succeed())
cleanup := func() {
if cmd.Process != nil {
_ = cmd.Process.Kill()
_, _ = cmd.Process.Wait()
}
}
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
c, err := net.DialTimeout("tcp", addr, 200*time.Millisecond)
if err == nil {
_ = c.Close()
return cleanup
}
time.Sleep(200 * time.Millisecond)
}
cleanup()
Fail(fmt.Sprintf("backend did not become ready on %s within 10s", addr))
return func() {}
}
// loadTestImage reads the test image downloaded by test.sh and returns its
// base64-encoded content (one of the wire formats accepted by Predict).
func loadTestImage() string {
imgPath, err := filepath.Abs("test-data/test.jpg")
Expect(err).ToNot(HaveOccurred())
imgBytes, err := os.ReadFile(imgPath)
if err != nil {
Skip(fmt.Sprintf("test image not present: %s (run test.sh first)", imgPath))
}
return base64.StdEncoding.EncodeToString(imgBytes)
}
// dialBackend opens a gRPC client connection to the spawned backend.
func dialBackend(port int) (pb.BackendClient, func()) {
addr := fmt.Sprintf("127.0.0.1:%d", port)
conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials()))
Expect(err).ToNot(HaveOccurred())
return pb.NewBackendClient(conn), func() { _ = conn.Close() }
}
// modelPathOrSkip resolves the model file under ./test-models/ and Skip()s the
// current spec if it's missing (not present on a fresh checkout / on CI runners
// without the download).
func modelPathOrSkip(name string) string {
modelDir, err := filepath.Abs("test-models")
Expect(err).ToNot(HaveOccurred())
modelPath := filepath.Join(modelDir, name)
if _, err := os.Stat(modelPath); err != nil {
Skip(fmt.Sprintf("model not present: %s (run test.sh first)", modelPath))
}
return modelPath
}
var _ = Describe("depth-anything-cpp backend", func() {
It("runs depth+pose against a known-good image", func() {
modelPath := modelPathOrSkip("depth-anything-small-f32.gguf")
imgB64 := loadTestImage()
port := freePort()
cleanup := startBackend(port)
defer cleanup()
client, closeConn := dialBackend(port)
defer closeConn()
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute)
defer cancel()
loadResp, err := client.LoadModel(ctx, &pb.ModelOptions{
Model: "depth-anything-small-f32.gguf",
ModelFile: modelPath,
Threads: 4,
})
Expect(err).ToNot(HaveOccurred(), "LoadModel")
Expect(loadResp.GetSuccess()).To(BeTrue(), "LoadModel reported failure: %s", loadResp.GetMessage())
// Predict runs depth+pose and returns the JSON depthResult in Reply.Message.
reply, err := client.Predict(ctx, &pb.PredictOptions{
Images: []string{imgB64},
})
Expect(err).ToNot(HaveOccurred(), "Predict")
var res depthResult
Expect(json.Unmarshal(reply.GetMessage(), &res)).To(Succeed(), "Predict returned non-JSON: %q", string(reply.GetMessage()))
Expect(res.DepthW).To(BeNumerically(">", 0), "depth width should be positive")
Expect(res.DepthH).To(BeNumerically(">", 0), "depth height should be positive")
_, _ = fmt.Fprintf(GinkgoWriter, "depth OK: %dx%d min=%.3f max=%.3f\n",
res.DepthW, res.DepthH, res.DepthMin, res.DepthMax)
})
})

View File

@@ -0,0 +1,59 @@
#!/bin/bash
# Script to copy the appropriate libraries based on architecture
set -e
CURDIR=$(dirname "$(realpath $0)")
REPO_ROOT="${CURDIR}/../../.."
# Create lib directory
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/
cp -avf $CURDIR/depth-anything-cpp $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
# x86_64 architecture
echo "Detected x86_64 architecture, copying x86_64 libraries..."
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
# ARM64 architecture
echo "Detected ARM64 architecture, copying ARM64 libraries..."
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
elif [ $(uname -s) = "Darwin" ]; then
echo "Detected Darwin"
else
echo "Error: Could not detect architecture"
exit 1
fi
# Package GPU libraries based on BUILD_TYPE
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
if [ -f "$GPU_LIB_SCRIPT" ]; then
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
package_gpu_libs
fi
echo "Packaging completed successfully"
ls -liah $CURDIR/package/
ls -liah $CURDIR/package/lib/

View File

@@ -0,0 +1,52 @@
#!/bin/bash
set -ex
# Get the absolute current dir where the script is located
CURDIR=$(dirname "$(realpath $0)")
cd /
echo "CPU info:"
if [ "$(uname)" != "Darwin" ]; then
grep -e "model\sname" /proc/cpuinfo | head -1
grep -e "flags" /proc/cpuinfo | head -1
fi
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then
LIBRARY="$CURDIR/libdepthanythingcpp-avx.so"
fi
fi
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
echo "CPU: AVX2 found OK"
if [ -e $CURDIR/libdepthanythingcpp-avx2.so ]; then
LIBRARY="$CURDIR/libdepthanythingcpp-avx2.so"
fi
fi
# Check avx 512
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
echo "CPU: AVX512F found OK"
if [ -e $CURDIR/libdepthanythingcpp-avx512.so ]; then
LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so"
fi
fi
fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export DEPTHANYTHING_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it
if [ -f $CURDIR/lib/ld.so ]; then
echo "Using lib/ld.so"
echo "Using library: $LIBRARY"
exec $CURDIR/lib/ld.so $CURDIR/depth-anything-cpp "$@"
fi
echo "Using library: $LIBRARY"
exec $CURDIR/depth-anything-cpp "$@"

View File

@@ -0,0 +1,45 @@
#!/bin/bash
set -e
CURDIR=$(dirname "$(realpath $0)")
echo "Running depth-anything-cpp backend tests..."
# Test model from the mudler/depth-anything.cpp-gguf HuggingFace repo. The small
# (vits) f32 GGUF is the lightest backbone (~131 MB), so it keeps the download
# cheap. It is resumed with `curl -C -` and skipped entirely if already present.
DEPTHANYTHING_MODEL_DIR="${DEPTHANYTHING_MODEL_DIR:-$CURDIR/test-models}"
DEPTHANYTHING_MODEL_FILE="${DEPTHANYTHING_MODEL_FILE:-depth-anything-small-f32.gguf}"
DEPTHANYTHING_MODEL_URL="${DEPTHANYTHING_MODEL_URL:-https://huggingface.co/mudler/depth-anything.cpp-gguf/resolve/main/depth-anything-small-f32.gguf}"
mkdir -p "$DEPTHANYTHING_MODEL_DIR"
if [ ! -f "$DEPTHANYTHING_MODEL_DIR/$DEPTHANYTHING_MODEL_FILE" ]; then
echo "Downloading depth-anything small f32 model (~131 MB)..."
# -C - resumes a partial download so an interrupted run doesn't restart from 0.
curl -L -C - -o "$DEPTHANYTHING_MODEL_DIR/$DEPTHANYTHING_MODEL_FILE" "$DEPTHANYTHING_MODEL_URL" --progress-bar
fi
# Use a real photo (people + cars) from the upstream rf-detr.cpp repo (~46 KB).
# Depth estimation needs real content; a synthetic image would be degenerate.
TEST_IMAGE_DIR="$CURDIR/test-data"
TEST_IMAGE_FILE="$TEST_IMAGE_DIR/test.jpg"
TEST_IMAGE_URL="${TEST_IMAGE_URL:-https://raw.githubusercontent.com/mudler/rf-detr.cpp/main/tests/fixtures/ci/test_image.jpg}"
mkdir -p "$TEST_IMAGE_DIR"
if [ ! -f "$TEST_IMAGE_FILE" ]; then
echo "Downloading test image..."
curl -L -o "$TEST_IMAGE_FILE" "$TEST_IMAGE_URL" --progress-bar
fi
echo "depth-anything-cpp test setup complete."
echo " model: $DEPTHANYTHING_MODEL_DIR/$DEPTHANYTHING_MODEL_FILE"
echo " test image: $TEST_IMAGE_FILE"
# Run the Go smoke test: spawns the backend binary on a free port, calls
# LoadModel + Predict via gRPC against the downloaded GGUF + image.
echo ""
echo "Running Go smoke test..."
cd "$CURDIR"
go test -v -timeout 30m ./...

View File

@@ -458,6 +458,126 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-locate-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-vulkan-locate-anything-cpp
- &depthanything
name: "depth-anything"
alias: "depth-anything"
license: apache-2.0
description: |
Depth Anything 3 monocular metric depth + camera pose estimation in C/C++
using GGML. Loads pre-built GGUF weights and, given an image, returns a
dense depth map plus the recovered camera extrinsics (3x4) and intrinsics
(3x3). No Python at inference (purego, cgo-less).
urls:
- https://github.com/mudler/depth-anything.cpp
- https://huggingface.co/depth-anything/Depth-Anything-V3
tags:
- depth-estimation
- camera-pose
- depth-anything
- gpu
- cpu
capabilities:
default: "cpu-depth-anything-cpp"
nvidia: "cuda12-depth-anything-cpp"
nvidia-cuda-12: "cuda12-depth-anything-cpp"
nvidia-cuda-13: "cuda13-depth-anything-cpp"
nvidia-l4t: "nvidia-l4t-arm64-depth-anything-cpp"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-depth-anything-cpp"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-depth-anything-cpp"
intel: "intel-sycl-f32-depth-anything-cpp"
vulkan: "vulkan-depth-anything-cpp"
- !!merge <<: *depthanything
name: "depth-anything-development"
capabilities:
default: "cpu-depth-anything-cpp-development"
nvidia: "cuda12-depth-anything-cpp-development"
nvidia-cuda-12: "cuda12-depth-anything-cpp-development"
nvidia-cuda-13: "cuda13-depth-anything-cpp-development"
nvidia-l4t: "nvidia-l4t-arm64-depth-anything-cpp-development"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-depth-anything-cpp-development"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-depth-anything-cpp-development"
intel: "intel-sycl-f32-depth-anything-cpp-development"
vulkan: "vulkan-depth-anything-cpp-development"
- !!merge <<: *depthanything
name: "cpu-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-cpu-depth-anything-cpp
- !!merge <<: *depthanything
name: "cpu-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-cpu-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda12-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda12-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda13-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda13-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-depth-anything-cpp
- !!merge <<: *depthanything
name: "nvidia-l4t-arm64-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-arm64-depth-anything-cpp
- !!merge <<: *depthanything
name: "nvidia-l4t-arm64-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-nvidia-l4t-arm64-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda13-nvidia-l4t-arm64-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda13-nvidia-l4t-arm64-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-depth-anything-cpp
- !!merge <<: *depthanything
name: "intel-sycl-f32-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-gpu-intel-sycl-f32-depth-anything-cpp
- !!merge <<: *depthanything
name: "intel-sycl-f32-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f32-depth-anything-cpp
- !!merge <<: *depthanything
name: "intel-sycl-f16-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-gpu-intel-sycl-f16-depth-anything-cpp
- !!merge <<: *depthanything
name: "intel-sycl-f16-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f16-depth-anything-cpp
- !!merge <<: *depthanything
name: "vulkan-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-gpu-vulkan-depth-anything-cpp
- !!merge <<: *depthanything
name: "vulkan-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-vulkan-depth-anything-cpp
- &vllm
name: "vllm"
license: apache-2.0

66
core/backend/depth.go Normal file
View File

@@ -0,0 +1,66 @@
package backend
import (
"context"
"fmt"
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
)
// Depth runs depth estimation (Depth Anything 3) on the supplied image and
// returns the full DepthResponse: per-pixel metric depth + confidence + sky,
// camera pose (extrinsics/intrinsics), an optional 3D point cloud and any
// requested exports (glb/colmap). The include_* flags and exports mirror the
// DepthRequest proto so callers can ask for less work.
func Depth(
ctx context.Context,
in *proto.DepthRequest,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
modelConfig config.ModelConfig,
) (*proto.DepthResponse, error) {
opts := ModelOptions(modelConfig, appConfig)
depthModel, err := loader.Load(opts...)
if err != nil {
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
return nil, err
}
if depthModel == nil {
return nil, fmt.Errorf("could not load depth model")
}
var startTime time.Time
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
startTime = time.Now()
}
res, err := depthModel.Depth(ctx, in)
if appConfig.EnableTracing {
errStr := ""
if err != nil {
errStr = err.Error()
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Type: trace.BackendTraceDepth,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(in.GetSrc(), 200),
Error: errStr,
Data: map[string]any{
"exports": in.GetExports(),
},
})
}
return res, err
}

View File

@@ -21,6 +21,7 @@ const (
UsecaseSoundGeneration = "sound_generation"
UsecaseRerank = "rerank"
UsecaseDetection = "detection"
UsecaseDepth = "depth"
UsecaseVAD = "vad"
UsecaseAudioTransform = "audio_transform"
UsecaseDiarization = "diarization"
@@ -44,6 +45,7 @@ const (
MethodSoundGeneration GRPCMethod = "SoundGeneration"
MethodTokenizeString GRPCMethod = "TokenizeString"
MethodDetect GRPCMethod = "Detect"
MethodDepth GRPCMethod = "Depth"
MethodRerank GRPCMethod = "Rerank"
MethodVAD GRPCMethod = "VAD"
MethodAudioTransform GRPCMethod = "AudioTransform"
@@ -141,6 +143,11 @@ var UsecaseInfoMap = map[string]UsecaseInfo{
GRPCMethod: MethodDetect,
Description: "Object detection via the Detect RPC with bounding boxes.",
},
UsecaseDepth: {
Flag: FLAG_DEPTH,
GRPCMethod: MethodDepth,
Description: "Per-pixel metric depth, camera pose and 3D point cloud via the Depth RPC (Depth Anything 3).",
},
UsecaseVAD: {
Flag: FLAG_VAD,
GRPCMethod: MethodVAD,
@@ -488,6 +495,13 @@ var BackendCapabilities = map[string]BackendCapability{
DefaultUsecases: []string{UsecaseDetection},
Description: "RF-DETR C++ object detection",
},
"depth-anything": {
GRPCMethods: []GRPCMethod{MethodDepth, MethodPredict, MethodGenerateImage},
PossibleUsecases: []string{UsecaseDepth},
DefaultUsecases: []string{UsecaseDepth},
AcceptsImages: true,
Description: "Depth Anything 3 C++ — per-pixel metric depth, camera pose and 3D point cloud",
},
// --- Face and speaker recognition backends ---
"insightface": {

View File

@@ -64,6 +64,7 @@ var UsecaseOptions = []FieldOption{
{Value: "image", Label: "Image"},
{Value: "vision", Label: "Vision"},
{Value: "detection", Label: "Detection"},
{Value: "depth", Label: "Depth"},
{Value: "face_recognition", Label: "Face Recognition"},
{Value: "transcript", Label: "Transcript"},
{Value: "diarization", Label: "Diarization"},

View File

@@ -1291,6 +1291,10 @@ const (
// chat/completion/embeddings.
FLAG_SCORE ModelConfigUsecase = 0b10000000000000000000
// Marks a model as wired for the Depth gRPC primitive (per-pixel
// metric depth + camera pose + 3D point cloud via Depth Anything 3).
FLAG_DEPTH ModelConfigUsecase = 0b100000000000000000000
// Common Subsets
FLAG_LLM ModelConfigUsecase = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
)
@@ -1348,6 +1352,7 @@ func GetAllModelConfigUsecases() map[string]ModelConfigUsecase {
"FLAG_DIARIZATION": FLAG_DIARIZATION,
"FLAG_REALTIME_AUDIO": FLAG_REALTIME_AUDIO,
"FLAG_SCORE": FLAG_SCORE,
"FLAG_DEPTH": FLAG_DEPTH,
}
}
@@ -1491,6 +1496,13 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecase) bool {
}
}
if (u & FLAG_DEPTH) == FLAG_DEPTH {
depthBackends := []string{"depth-anything"}
if !slices.Contains(depthBackends, c.Backend) {
return false
}
}
if (u & FLAG_FACE_RECOGNITION) == FLAG_FACE_RECOGNITION {
faceBackends := []string{"insightface"}
if !slices.Contains(faceBackends, c.Backend) {

View File

@@ -0,0 +1,95 @@
package localai
import (
"encoding/base64"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/xlog"
)
// DepthEndpoint is the LocalAI Depth endpoint exposing the full Depth Anything 3
// output (per-pixel metric depth + confidence + sky, camera pose, 3D point cloud
// and optional glb/COLMAP exports).
// @Summary Estimates per-pixel depth (and optionally pose/points) from an image.
// @Tags depth
// @Param request body schema.DepthRequest true "query params"
// @Success 200 {object} schema.DepthResponse "Response"
// @Router /v1/depth [post]
func DepthEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.DepthRequest)
if !ok || input.Model == "" {
return echo.ErrBadRequest
}
cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || cfg == nil {
return echo.ErrBadRequest
}
xlog.Debug("Depth", "image", input.Image, "backend", cfg.Backend)
image, err := decodeImageInput(input.Image)
if err != nil {
return err
}
// Default to returning everything the model can produce when the
// caller hasn't asked for any specific subset, so a bare request is
// still useful.
includeDepth := input.IncludeDepth
includeConfidence := input.IncludeConfidence
includePose := input.IncludePose
includeSky := input.IncludeSky
includePoints := input.IncludePoints
if !includeDepth && !includeConfidence && !includePose && !includeSky && !includePoints {
includeDepth = true
includeConfidence = true
includePose = true
includeSky = true
}
req := &proto.DepthRequest{
Src: image,
Dst: input.Dst,
IncludeDepth: includeDepth,
IncludeConfidence: includeConfidence,
IncludePose: includePose,
IncludeSky: includeSky,
IncludePoints: includePoints,
PointsConfThresh: input.PointsConfThresh,
Exports: input.Exports,
}
res, err := backend.Depth(c.Request().Context(), req, ml, appConfig, *cfg)
if err != nil {
return mapBackendError(err)
}
response := schema.DepthResponse{
Width: res.GetWidth(),
Height: res.GetHeight(),
Depth: res.GetDepth(),
Confidence: res.GetConfidence(),
Sky: res.GetSky(),
Extrinsics: res.GetExtrinsics(),
Intrinsics: res.GetIntrinsics(),
NumPoints: res.GetNumPoints(),
Points: res.GetPoints(),
ExportPaths: res.GetExportPaths(),
IsMetric: res.GetIsMetric(),
}
if len(res.GetPointColors()) > 0 {
response.PointColors = base64.StdEncoding.EncodeToString(res.GetPointColors())
}
return c.JSON(200, response)
}
}

View File

@@ -98,6 +98,12 @@ func RegisterLocalAIRoutes(router *echo.Echo,
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_DETECTION)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.DetectionRequest) }))
depthHandler := localai.DepthEndpoint(cl, ml, appConfig)
router.POST("/v1/depth",
depthHandler,
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_DEPTH)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.DepthRequest) }))
// Face recognition endpoints
faceMw := []echo.MiddlewareFunc{
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_FACE_RECOGNITION)),

View File

@@ -181,6 +181,40 @@ type Detection struct {
Mask string `json:"mask,omitempty"` // base64-encoded PNG segmentation mask
}
// DepthRequest is the request body for the /v1/depth endpoint. It exposes the
// full Depth Anything 3 output surface; the include_* flags and exports let a
// caller ask for less work (e.g. depth only, or depth+pose without the point
// cloud).
type DepthRequest struct {
BasicModelRequest
Image string `json:"image"` // URL or base64-encoded image to analyze
Dst string `json:"dst,omitempty"` // optional output directory for exports (glb/colmap)
IncludeDepth bool `json:"include_depth,omitempty"` // return the per-pixel depth map
IncludeConfidence bool `json:"include_confidence,omitempty"` // return the per-pixel confidence map (DualDPT)
IncludePose bool `json:"include_pose,omitempty"` // return camera extrinsics/intrinsics (DualDPT)
IncludeSky bool `json:"include_sky,omitempty"` // return the per-pixel sky map (mono models)
IncludePoints bool `json:"include_points,omitempty"` // back-project to a 3D point cloud (DualDPT)
PointsConfThresh float32 `json:"points_conf_thresh,omitempty"` // keep points with confidence >= this threshold
Exports []string `json:"exports,omitempty"` // requested exports: "glb", "colmap"
}
// DepthResponse is the JSON response for the /v1/depth endpoint, mirroring the
// DepthResponse proto.
type DepthResponse struct {
Width int32 `json:"width"`
Height int32 `json:"height"`
Depth []float32 `json:"depth,omitempty"` // width*height row-major metric depth
Confidence []float32 `json:"confidence,omitempty"` // width*height row-major confidence (DualDPT)
Sky []float32 `json:"sky,omitempty"` // width*height row-major sky map (mono)
Extrinsics []float32 `json:"extrinsics,omitempty"` // 12 floats, 3x4 row-major (world-to-camera)
Intrinsics []float32 `json:"intrinsics,omitempty"` // 9 floats, 3x3 row-major
NumPoints int32 `json:"num_points,omitempty"` // number of 3D points
Points []float32 `json:"points,omitempty"` // num_points*3 xyz, world space
PointColors string `json:"point_colors,omitempty"` // base64-encoded num_points*3 uint8 rgb
ExportPaths []string `json:"export_paths,omitempty"` // paths written for the requested exports
IsMetric bool `json:"is_metric"` // depth is in metric units
}
// ─── Face recognition ──────────────────────────────────────────────
//
// FacialArea describes a bounding box for a detected face.

View File

@@ -169,6 +169,9 @@ func (c *fakeBackendClient) SoundGeneration(_ context.Context, _ *pb.SoundGenera
func (c *fakeBackendClient) Detect(_ context.Context, _ *pb.DetectOptions, _ ...ggrpc.CallOption) (*pb.DetectResponse, error) {
return nil, nil
}
func (c *fakeBackendClient) Depth(_ context.Context, _ *pb.DepthRequest, _ ...ggrpc.CallOption) (*pb.DepthResponse, error) {
return nil, nil
}
func (c *fakeBackendClient) FaceVerify(_ context.Context, _ *pb.FaceVerifyRequest, _ ...ggrpc.CallOption) (*pb.FaceVerifyResponse, error) {
return nil, nil
}

View File

@@ -152,6 +152,12 @@ func (c *InFlightTrackingClient) Detect(ctx context.Context, in *pb.DetectOption
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) Depth(ctx context.Context, in *pb.DepthRequest, opts ...ggrpc.CallOption) (*pb.DepthResponse, error) {
defer c.track(ctx)()
res, err := c.Backend.Depth(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...ggrpc.CallOption) (*pb.RerankResult, error) {
defer c.track(ctx)()
res, err := c.Backend.Rerank(ctx, in, opts...)

View File

@@ -100,6 +100,10 @@ func (f *fakeGRPCBackend) Detect(_ context.Context, _ *pb.DetectOptions, _ ...gg
return &pb.DetectResponse{}, nil
}
func (f *fakeGRPCBackend) Depth(_ context.Context, _ *pb.DepthRequest, _ ...ggrpc.CallOption) (*pb.DepthResponse, error) {
return &pb.DepthResponse{}, nil
}
func (f *fakeGRPCBackend) FaceVerify(_ context.Context, _ *pb.FaceVerifyRequest, _ ...ggrpc.CallOption) (*pb.FaceVerifyResponse, error) {
return &pb.FaceVerifyResponse{}, nil
}

View File

@@ -25,6 +25,7 @@ const (
BackendTraceRerank BackendTraceType = "rerank"
BackendTraceTokenize BackendTraceType = "tokenize"
BackendTraceDetection BackendTraceType = "detection"
BackendTraceDepth BackendTraceType = "depth"
BackendTraceFaceVerify BackendTraceType = "face_verify"
BackendTraceFaceAnalyze BackendTraceType = "face_analyze"
BackendTraceVoiceVerify BackendTraceType = "voice_verify"

View File

@@ -8018,6 +8018,150 @@
- filename: locate-anything-q8_0.gguf
uri: huggingface://mudler/locate-anything.cpp-gguf/locate-anything-q8_0.gguf
sha256: 0909d8a1aba584b482d501baae032611d1559878be1b7f6606ba516687c5380d
- &depth-anything-3-base
name: depth-anything-3-base
url: github:mudler/LocalAI/gallery/virtual.yaml@master
urls:
- https://github.com/mudler/depth-anything.cpp
- https://huggingface.co/depth-anything/Depth-Anything-V3
- https://huggingface.co/mudler/depth-anything.cpp-gguf
description: |
Depth Anything 3 (base) monocular metric depth + camera pose, served via the
native depth-anything.cpp backend (C++/ggml + purego, no Python at inference).
Given an image it returns a dense depth map plus the recovered camera
extrinsics (3x4) and intrinsics (3x3). Use GenerateImage (src -> normalized
depth PNG at dst) or Predict (JSON depth stats + pose). q4_k is the
recommended CPU default.
license: apache-2.0
icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4
tags:
- depth-estimation
- camera-pose
- depth-anything
- native
- cpp
- cpu
overrides:
backend: depth-anything
parameters:
model: depth-anything-base-q4_k.gguf
files:
- filename: depth-anything-base-q4_k.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-base-q4_k.gguf
sha256: "43cd45d00f9024f4319f4beabd73155db5132e4b575bc52eff4131262c9d78f1"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-base-q8_0
description: |
Depth Anything 3 (base), q8_0 — near-lossless 8-bit quant (~149 MB). Same
depth + camera pose output as the q4_k default at higher fidelity.
overrides:
backend: depth-anything
parameters:
model: depth-anything-base-q8_0.gguf
files:
- filename: depth-anything-base-q8_0.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-base-q8_0.gguf
sha256: "71b1c953113657f9a4fbef43ab6a16fe7a6f87b36da113a184f13c4a564968a0"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-base-f16
description: |
Depth Anything 3 (base), f16 — half precision (~233 MB), no measurable
accuracy loss vs f32. Depth + camera pose.
overrides:
backend: depth-anything
parameters:
model: depth-anything-base-f16.gguf
files:
- filename: depth-anything-base-f16.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-base-f16.gguf
sha256: "2975419c99702ca646d5b7120c53e35c9fee158f0a803577241d16957f75624b"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-base-f32
description: |
Depth Anything 3 (base), f32 — maximum fidelity (~412 MB). Reference-parity
depth + camera pose.
overrides:
backend: depth-anything
parameters:
model: depth-anything-base-f32.gguf
files:
- filename: depth-anything-base-f32.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-base-f32.gguf
sha256: "1b13b166e8a8b4f2c862f42d36edb2f9aab995a18cc527a52b9f160b99c6b8da"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-giant
description: |
Depth Anything 3 (giant / vitg), f32 — the large backbone (~4.9 GB) for
maximum quality depth + camera pose. GPU recommended.
tags:
- depth-estimation
- camera-pose
- depth-anything
- native
- cpp
- gpu
overrides:
backend: depth-anything
parameters:
model: depth-anything-giant-f32.gguf
files:
- filename: depth-anything-giant-f32.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-giant-f32.gguf
sha256: "392edf64626be6a985487beb39c8d54cdc14f7feb2b53323742c96b71e7e7181"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-small
description: |
Depth Anything 3 (small / vits), f32 — the smallest backbone (~131 MB) for
fast CPU depth + camera pose. Same output as base at lower latency.
overrides:
backend: depth-anything
parameters:
model: depth-anything-small-f32.gguf
files:
- filename: depth-anything-small-f32.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-small-f32.gguf
sha256: "eab5597e01dedde1a20c038590ae8c887b85ec35b882581138c08308e92c41e5"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-large
description: |
Depth Anything 3 (large / vitl), f32 (~1.6 GB) — higher quality depth +
camera pose than base. GPU recommended for interactive use.
tags:
- depth-estimation
- camera-pose
- depth-anything
- native
- cpp
- gpu
overrides:
backend: depth-anything
parameters:
model: depth-anything-large-f32.gguf
files:
- filename: depth-anything-large-f32.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-large-f32.gguf
sha256: "a79eb3e19e8ec49f4daac484fb5fb67e15baac61518d229cf819e40c87080906"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-mono-large
description: |
Depth Anything 3 (monocular large / vitl), f32 (~1.3 GB) — single-image
monocular depth + a sky mask (no camera pose). DPT single-head variant; use
GenerateImage (src -> normalized depth PNG) or Predict (JSON depth stats).
overrides:
backend: depth-anything
parameters:
model: depth-anything-mono-large-f32.gguf
files:
- filename: depth-anything-mono-large-f32.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-mono-large-f32.gguf
sha256: "291b1a554af907c3f79986ee225da8933be5f7a31d73c81d06784cda284535de"
- name: rfdetr-cpp-base
url: github:mudler/LocalAI/gallery/virtual.yaml@master
urls:

View File

@@ -54,6 +54,7 @@ type Backend interface {
TTSStream(ctx context.Context, in *pb.TTSRequest, f func(reply *pb.Reply), opts ...grpc.CallOption) error
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.CallOption) (*pb.DetectResponse, error)
Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error)
FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error)
FaceAnalyze(ctx context.Context, in *pb.FaceAnalyzeRequest, opts ...grpc.CallOption) (*pb.FaceAnalyzeResponse, error)
VoiceVerify(ctx context.Context, in *pb.VoiceVerifyRequest, opts ...grpc.CallOption) (*pb.VoiceVerifyResponse, error)

View File

@@ -82,6 +82,10 @@ func (llm *Base) Detect(*pb.DetectOptions) (pb.DetectResponse, error) {
return pb.DetectResponse{}, fmt.Errorf("unimplemented")
}
func (llm *Base) Depth(*pb.DepthRequest) (pb.DepthResponse, error) {
return pb.DepthResponse{}, fmt.Errorf("unimplemented")
}
func (llm *Base) FaceVerify(*pb.FaceVerifyRequest) (pb.FaceVerifyResponse, error) {
return pb.FaceVerifyResponse{}, fmt.Errorf("unimplemented")
}

View File

@@ -634,6 +634,24 @@ func (c *Client) Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.
return client.Detect(ctx, in, opts...)
}
func (c *Client) Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error) {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
}
c.setBusy(true)
defer c.setBusy(false)
c.wdMark()
defer c.wdUnMark()
conn, err := c.dial()
if err != nil {
return nil, err
}
defer conn.Close()
client := pb.NewBackendClient(conn)
return client.Depth(ctx, in, opts...)
}
func (c *Client) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error) {
if !c.parallel {
c.opMutex.Lock()

View File

@@ -73,6 +73,10 @@ func (e *embedBackend) Detect(ctx context.Context, in *pb.DetectOptions, opts ..
return e.s.Detect(ctx, in)
}
func (e *embedBackend) Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error) {
return e.s.Depth(ctx, in)
}
func (e *embedBackend) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error) {
return e.s.FaceVerify(ctx, in)
}

View File

@@ -19,6 +19,7 @@ type AIModel interface {
GenerateImage(*pb.GenerateImageRequest) error
GenerateVideo(*pb.GenerateVideoRequest) error
Detect(*pb.DetectOptions) (pb.DetectResponse, error)
Depth(*pb.DepthRequest) (pb.DepthResponse, error)
FaceVerify(*pb.FaceVerifyRequest) (pb.FaceVerifyResponse, error)
FaceAnalyze(*pb.FaceAnalyzeRequest) (pb.FaceAnalyzeResponse, error)
VoiceVerify(*pb.VoiceVerifyRequest) (pb.VoiceVerifyResponse, error)

View File

@@ -156,6 +156,18 @@ func (s *server) Detect(ctx context.Context, in *pb.DetectOptions) (*pb.DetectRe
return &res, nil
}
func (s *server) Depth(ctx context.Context, in *pb.DepthRequest) (*pb.DepthResponse, error) {
if s.llm.Locking() {
s.llm.Lock()
defer s.llm.Unlock()
}
res, err := s.llm.Depth(in)
if err != nil {
return nil, err
}
return &res, nil
}
func (s *server) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest) (*pb.FaceVerifyResponse, error) {
if s.llm.Locking() {
s.llm.Lock()

View File

@@ -108,6 +108,12 @@ func (c *ConnectionEvictingClient) Detect(ctx context.Context, in *pb.DetectOpti
return result, err
}
func (c *ConnectionEvictingClient) Depth(ctx context.Context, in *pb.DepthRequest, opts ...ggrpc.CallOption) (*pb.DepthResponse, error) {
result, err := c.Backend.Depth(ctx, in, opts...)
c.checkErr(err)
return result, err
}
func (c *ConnectionEvictingClient) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...ggrpc.CallOption) (*pb.RerankResult, error) {
result, err := c.Backend.Rerank(ctx, in, opts...)
c.checkErr(err)