Compare commits

..

11 Commits

Author SHA1 Message Date
Ettore Di Giacinto
ed29a87db5 docs: document custom chat_template_kwargs (model + per-request)
Issue #10329.

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-16 08:07:11 +00:00
Ettore Di Giacinto
826d91ddf4 feat(llama-cpp): generic chat_template_kwargs merge (drop per-key blocks)
Replace the per-key enable_thinking/reasoning_effort handling in both the
streaming and non-streaming chat paths with a single block that parses the
chat_template_kwargs JSON blob resolved by the Go layer and merges every key
into body_json. New jinja template levers (e.g. preserve_thinking) now need
no C++ change. Issue #10329.

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-16 08:03:50 +00:00
Ettore Di Giacinto
5b7318816a feat(http): wire request metadata to config.RequestMetadata
The OpenAI request metadata field was parsed but unused; stamp it onto the
per-request ModelConfig so gRPCPredictOpts forwards it as chat_template_kwargs
overrides. Issue #10329.

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-16 07:56:31 +00:00
Ettore Di Giacinto
acf4f5add3 feat(backend): forward resolved chat_template_kwargs blob to backends
gRPCPredictOpts now merges per-request client metadata over the server-derived
enable_thinking/reasoning_effort (reaching all backends via the standalone keys)
and serialises the resolved chat_template_kwargs map into a JSON blob for
llama.cpp, written last so a client cannot clobber it. Issue #10329.

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-16 07:54:48 +00:00
Ettore Di Giacinto
bc8a1be801 feat(config): add chat_template_kwargs model field + resolver
Adds the ChatTemplateKwargs model-config map and RequestMetadata carrier,
plus ResolveChatTemplateKwargs which layers the config map under coerced
request metadata. Foundation for generic jinja chat-template kwargs (issue #10329).

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-16 07:49:13 +00:00
LocalAI [bot]
8bd2df8f68 fix(launcher): truncate download status labels to stop progress dialog blowout (#10357)
fix(launcher): truncate download status labels to stop dialog blowout

The download progress windows place a ProgressBar and a status Label in the
same VBox. On failure the status label is set to "Download failed: <error>",
and the error commonly contains a long, unbreakable URL/path. A Fyne label
with default settings reports its MinSize as the full single-line text width,
so a long message stretches the window — and the progress bar sharing the
VBox — arbitrarily wide (fixes #10355).

Set Truncation = fyne.TextTruncateEllipsis on the four affected status labels
(the main-window status label plus the status label in each of the three
showDownloadProgress implementations). Truncation collapses the label's
MinSize to roughly one character plus the ellipsis regardless of content, so
the window keeps its intended size. TextWrapWord is not enough because it
cannot break a spaceless URL. The full error text remains visible via the
dialog.ShowError call already present in each path.


Assisted-by: Claude:claude-opus-4-8 [Claude Code]

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-16 09:42:07 +02:00
neo
6799d802d3 docs: add translated README links (#10353) 2026-06-16 09:06:49 +02:00
LocalAI [bot]
40cc549882 fix(ci): track ServeurpersoCom/qwentts.cpp for QWEN3TTS_CPP_VERSION bumps (#10356)
The qwen3-tts backend migrated from predict-woo/qwen3-tts.cpp to
ServeurpersoCom/qwentts.cpp (the Makefile QWEN3TTS_REPO already points
there), but the bump_deps matrix still tracked the old repo. That made
the nightly bumper open PRs (e.g. #10334) against the wrong upstream.
Point the matrix entry at the new repo and its master branch.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-16 09:04:52 +02:00
LocalAI [bot]
3d295adfa8 chore: ⬆️ Update ikawrakow/ik_llama.cpp to 2f524850a1f67716bc0ba80ffa30ce39c5b8bd5f (#10336)
⬆️ Update ikawrakow/ik_llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2026-06-16 09:04:35 +02:00
LocalAI [bot]
4fa2064875 chore: ⬆️ Update ggml-org/llama.cpp to 7dad2f1a17d65b5e2034c277125bc9f97573a779 (#10337)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-06-16 08:22:26 +02:00
LocalAI [bot]
cb74399b3a chore: ⬆️ Update ggml-org/whisper.cpp to 0ec0845110dc934911dc48e8c5beb5ad3189b3f3 (#10349)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-06-16 08:22:10 +02:00
46 changed files with 338 additions and 1808 deletions

View File

@@ -716,19 +716,6 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
@@ -1595,19 +1582,6 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -1647,19 +1621,6 @@ include:
backend: "locate-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-depth-anything-cpp'
base-image: "ubuntu:24.04"
ubuntu-version: '2404'
runs-on: 'ubuntu-24.04-arm'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -2937,19 +2898,6 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
@@ -2963,19 +2911,6 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
@@ -2989,19 +2924,6 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
@@ -3016,20 +2938,6 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
platform-tag: 'amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-depth-anything-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
@@ -3044,20 +2952,6 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/arm64'
platform-tag: 'arm64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-depth-anything-cpp'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
@@ -3164,19 +3058,6 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2204'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64-depth-anything-cpp'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
backend: "depth-anything-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2204'
# whisper
- build-type: ''
cuda-major-version: ""

View File

@@ -66,9 +66,9 @@ jobs:
variable: "LOCATEANYTHING_VERSION"
branch: "master"
file: "backend/go/locate-anything-cpp/Makefile"
- repository: "predict-woo/qwen3-tts.cpp"
- repository: "ServeurpersoCom/qwentts.cpp"
variable: "QWEN3TTS_CPP_VERSION"
branch: "main"
branch: "master"
file: "backend/go/qwen3-tts-cpp/Makefile"
- repository: "ServeurpersoCom/omnivoice.cpp"
variable: "OMNIVOICE_VERSION"

View File

@@ -29,6 +29,18 @@
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</p>
<!-- Keep these links, translations synced daily. -->
<p align="center">
<a href="https://zdoc.app/de/mudler/LocalAI">Deutsch</a> |
<a href="https://zdoc.app/es/mudler/LocalAI">Español</a> |
<a href="https://zdoc.app/fr/mudler/LocalAI">français</a> |
<a href="https://zdoc.app/ja/mudler/LocalAI">日本語</a> |
<a href="https://zdoc.app/ko/mudler/LocalAI">한국어</a> |
<a href="https://zdoc.app/pt/mudler/LocalAI">Português</a> |
<a href="https://zdoc.app/ru/mudler/LocalAI">Русский</a> |
<a href="https://zdoc.app/zh/mudler/LocalAI">中文</a>
</p>
**LocalAI** is the open-source AI engine. Run any model - LLMs, vision, voice, image, video - on any hardware. No GPU required.
**A small core, not a bundle.** Each backend wraps a best-in-class engine (llama.cpp, vLLM, whisper.cpp, stable-diffusion, MLX...) in its own image, pulled only when a model needs it. You install nothing you don't use.

View File

@@ -24,7 +24,6 @@ service Backend {
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
rpc Status(HealthMessage) returns (StatusResponse) {}
rpc Detect(DetectOptions) returns (DetectResponse) {}
rpc Depth(DepthRequest) returns (DepthResponse) {}
rpc FaceVerify(FaceVerifyRequest) returns (FaceVerifyResponse) {}
rpc FaceAnalyze(FaceAnalyzeRequest) returns (FaceAnalyzeResponse) {}
rpc VoiceVerify(VoiceVerifyRequest) returns (VoiceVerifyResponse) {}
@@ -671,35 +670,6 @@ message DetectResponse {
repeated Detection Detections = 1;
}
// --- Depth estimation messages (Depth Anything 3) ---
message DepthRequest {
string src = 1; // input image (filesystem path or base64-encoded payload)
string dst = 2; // optional output directory for exports (glb/colmap)
bool include_depth = 3; // return the per-pixel metric depth map
bool include_confidence = 4; // return the per-pixel confidence map (DualDPT)
bool include_pose = 5; // return camera extrinsics/intrinsics (DualDPT)
bool include_sky = 6; // return the per-pixel sky map (mono models)
bool include_points = 7; // back-project to a 3D point cloud (DualDPT)
float points_conf_thresh = 8; // keep points with confidence >= this threshold
repeated string exports = 9; // requested exports: "glb", "colmap"
}
message DepthResponse {
int32 width = 1; // processed depth-map width
int32 height = 2; // processed depth-map height
repeated float depth = 3; // width*height row-major metric depth
repeated float confidence = 4; // width*height row-major confidence (DualDPT)
repeated float sky = 5; // width*height row-major sky map (mono)
repeated float extrinsics = 6; // 12 floats, 3x4 row-major (world-to-camera)
repeated float intrinsics = 7; // 9 floats, 3x3 row-major
int32 num_points = 8; // number of 3D points
repeated float points = 9; // num_points*3 xyz, world space
bytes point_colors = 10; // num_points*3 uint8 rgb
repeated string export_paths = 11; // paths written for the requested exports
bool is_metric = 12; // depth is in metric units
}
// --- Face recognition messages ---
message FacialArea {

View File

@@ -1,5 +1,5 @@
IK_LLAMA_VERSION?=5f917a64b391b7d31839845153a473a65f630458
IK_LLAMA_VERSION?=2f524850a1f67716bc0ba80ffa30ce39c5b8bd5f
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
CMAKE_ARGS?=

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=4988f6e866057afd130c1515ecef0c9bab9a15f8
LLAMA_VERSION?=7dad2f1a17d65b5e2034c277125bc9f97573a779
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

@@ -1922,25 +1922,27 @@ public:
body_json["min_p"] = data["min_p"];
}
// Pass enable_thinking via chat_template_kwargs (where oaicompat_chat_params_parse reads it)
// Forward the chat_template_kwargs the Go layer resolved (model config
// chat_template_kwargs + per-request metadata: enable_thinking,
// reasoning_effort, preserve_thinking, ...). One generic merge replaces
// the previous per-key handling - new template levers need no C++ change.
// oaicompat_chat_params_parse reads these from body_json.
const auto& metadata = request->metadata();
auto et_it = metadata.find("enable_thinking");
if (et_it != metadata.end()) {
if (!body_json.contains("chat_template_kwargs")) {
body_json["chat_template_kwargs"] = json::object();
auto ctk_it = metadata.find("chat_template_kwargs");
if (ctk_it != metadata.end() && !ctk_it->second.empty()) {
try {
json ctk = json::parse(ctk_it->second);
if (ctk.is_object()) {
if (!body_json.contains("chat_template_kwargs")) {
body_json["chat_template_kwargs"] = json::object();
}
for (auto& el : ctk.items()) {
body_json["chat_template_kwargs"][el.key()] = el.value();
}
}
} catch (const std::exception & e) {
SRV_WRN("failed to parse chat_template_kwargs metadata: %s\n", e.what());
}
body_json["chat_template_kwargs"]["enable_thinking"] = (et_it->second == "true");
}
// Pass reasoning_effort via chat_template_kwargs too: the lever
// jinja templates like gpt-oss (Harmony) / LFM2.5 read, distinct
// from enable_thinking which those templates ignore.
auto re_it = metadata.find("reasoning_effort");
if (re_it != metadata.end() && !re_it->second.empty()) {
if (!body_json.contains("chat_template_kwargs")) {
body_json["chat_template_kwargs"] = json::object();
}
body_json["chat_template_kwargs"]["reasoning_effort"] = re_it->second;
}
// Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
@@ -2756,25 +2758,26 @@ public:
body_json["min_p"] = data["min_p"];
}
// Pass enable_thinking via chat_template_kwargs (where oaicompat_chat_params_parse reads it)
// Forward the chat_template_kwargs the Go layer resolved (model config
// chat_template_kwargs + per-request metadata: enable_thinking,
// reasoning_effort, preserve_thinking, ...). One generic merge replaces
// the previous per-key handling - new template levers need no C++ change.
const auto& predict_metadata = request->metadata();
auto predict_et_it = predict_metadata.find("enable_thinking");
if (predict_et_it != predict_metadata.end()) {
if (!body_json.contains("chat_template_kwargs")) {
body_json["chat_template_kwargs"] = json::object();
auto predict_ctk_it = predict_metadata.find("chat_template_kwargs");
if (predict_ctk_it != predict_metadata.end() && !predict_ctk_it->second.empty()) {
try {
json ctk = json::parse(predict_ctk_it->second);
if (ctk.is_object()) {
if (!body_json.contains("chat_template_kwargs")) {
body_json["chat_template_kwargs"] = json::object();
}
for (auto& el : ctk.items()) {
body_json["chat_template_kwargs"][el.key()] = el.value();
}
}
} catch (const std::exception & e) {
SRV_WRN("failed to parse chat_template_kwargs metadata: %s\n", e.what());
}
body_json["chat_template_kwargs"]["enable_thinking"] = (predict_et_it->second == "true");
}
// Pass reasoning_effort via chat_template_kwargs too: the lever
// jinja templates like gpt-oss (Harmony) / LFM2.5 read, distinct
// from enable_thinking which those templates ignore.
auto predict_re_it = predict_metadata.find("reasoning_effort");
if (predict_re_it != predict_metadata.end() && !predict_re_it->second.empty()) {
if (!body_json.contains("chat_template_kwargs")) {
body_json["chat_template_kwargs"] = json::object();
}
body_json["chat_template_kwargs"]["reasoning_effort"] = predict_re_it->second;
}
// Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)

View File

@@ -1,7 +0,0 @@
sources/
build*/
package/
libdepthanythingcpp*.so
depth-anything-cpp
test-models/
test-data/

View File

@@ -1,28 +0,0 @@
cmake_minimum_required(VERSION 3.18)
project(libdepthanythingcpp LANGUAGES C CXX)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Static-link ggml into the depth-anything shared library so the resulting .so
# has no runtime dependency on an external libggml — only on
# libc/libstdc++/libgomp, which the LocalAI package step bundles into the
# docker image.
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static libraries" FORCE)
# depth-anything.cpp build switches: skip CLI/tests, but build libdepthanything
# itself as a SHARED library (DA_SHARED) while ggml stays static
# (BUILD_SHARED_LIBS OFF above). The da_capi_* C ABI is compiled into
# src/da_capi.cpp and re-exported by that shared library, so no extra MODULE
# wrapper is needed (unlike locate-anything.cpp).
set(DA_BUILD_CLI OFF CACHE BOOL "Disable depth-anything CLI" FORCE)
set(DA_BUILD_TESTS OFF CACHE BOOL "Disable depth-anything tests" FORCE)
set(DA_SHARED ON CACHE BOOL "Build libdepthanything as a shared lib" FORCE)
add_subdirectory(./sources/depth-anything.cpp)
# Emit libdepthanything.so into the top-level build dir so the Makefile can
# rename it to the per-variant libdepthanythingcpp-<variant>.so.
set_target_properties(depthanything PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})

View File

@@ -1,136 +0,0 @@
CMAKE_ARGS?=
BUILD_TYPE?=
NATIVE?=false
GOCMD?=go
GO_TAGS?=
JOBS?=$(shell nproc --ignore=1)
# depth-anything.cpp. Pin to a specific commit for a stable build; a squash
# merge upstream can orphan a branch, so the native version is pinned by SHA.
DEPTHANYTHING_REPO?=https://github.com/mudler/depth-anything.cpp.git
DEPTHANYTHING_VERSION?=e0b6814d2f58261216da69d63326f1f2d75d4435
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
# Forward LocalAI's BUILD_TYPE to the matching ggml backend switch. depth-anything.cpp
# force-sets GGML_CUDA/GGML_VULKAN/GGML_METAL from its own DA_GGML_* options, so
# those must be toggled via the DA_GGML_* names (a bare -DGGML_CUDA=ON would be
# overridden); the remaining ggml switches pass straight through.
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DGGML_CUDA=ON -DDA_GGML_CUDA=ON
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON
else ifeq ($(BUILD_TYPE),hipblas)
ROCM_HOME ?= /opt/rocm
ROCM_PATH ?= /opt/rocm
export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang
AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
else ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=ON -DDA_GGML_VULKAN=ON
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
else
CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
CMAKE_ARGS+=-DDA_GGML_METAL=ON
endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx \
-DGGML_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx
endif
sources/depth-anything.cpp:
mkdir -p sources && \
git clone --recursive $(DEPTHANYTHING_REPO) sources/depth-anything.cpp && \
cd sources/depth-anything.cpp && \
git checkout $(DEPTHANYTHING_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
# Detect OS
UNAME_S := $(shell uname -s)
# Only build CPU variants on Linux
ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libdepthanythingcpp-avx.so libdepthanythingcpp-avx2.so libdepthanythingcpp-avx512.so libdepthanythingcpp-fallback.so
else
# On non-Linux (e.g., Darwin), build only fallback variant
VARIANT_TARGETS = libdepthanythingcpp-fallback.so
endif
depth-anything-cpp: main.go godepthanythingcpp.go $(VARIANT_TARGETS)
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o depth-anything-cpp ./
package: depth-anything-cpp
bash package.sh
build: package
clean: purge
rm -rf libdepthanythingcpp*.so depth-anything-cpp package sources
purge:
rm -rf build*
# Build all variants (Linux only)
ifeq ($(UNAME_S),Linux)
libdepthanythingcpp-avx.so: sources/depth-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I depth-anything-cpp build info:avx${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
rm -rfv build-$@
libdepthanythingcpp-avx2.so: sources/depth-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I depth-anything-cpp build info:avx2${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libdepthanythingcpp-custom
rm -rfv build-$@
libdepthanythingcpp-avx512.so: sources/depth-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I depth-anything-cpp build info:avx512${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libdepthanythingcpp-custom
rm -rfv build-$@
endif
# Build fallback variant (all platforms)
libdepthanythingcpp-fallback.so: sources/depth-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
rm -rfv build-$@
libdepthanythingcpp-custom: CMakeLists.txt
mkdir -p build-$(SO_TARGET) && \
cd build-$(SO_TARGET) && \
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \
cd .. && \
mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET)
all: depth-anything-cpp package
# `test` is invoked by the top-level Makefile's `test-extra` target. It builds
# the backend binary + the fallback shared library (needed for dlopen at
# runtime), then runs test.sh which downloads a small GGUF + a test image and
# exercises the gRPC Load/Predict wire path via the Go smoke test in
# main_test.go.
test: depth-anything-cpp libdepthanythingcpp-fallback.so
bash test.sh

View File

@@ -1,509 +0,0 @@
package main
// godepthanythingcpp.go - gRPC handlers (Load, Predict, GenerateImage) for the
// depth-anything-cpp backend, wrapping the Depth Anything 3 ggml C-API
// (libdepthanythingcpp-<variant>.so) via purego.
//
// Embeds base.SingleThread to default the unimplemented RPCs to "not supported"
// and to serialize calls — the C side shares a ggml graph allocator and is NOT
// reentrant, so all inference must run one-at-a-time.
//
// Depth has no native OpenAI endpoint, so the model is exposed two ways:
//
// - GenerateImage(src, dst): run depth on the src image and write a
// min-max-normalised grayscale depth PNG to dst.
// - Predict(images[0]): run depth+pose and return a JSON blob with the depth
// dimensions, depth stats and the camera extrinsics (3x4) / intrinsics (3x3).
import (
"encoding/base64"
"encoding/json"
"fmt"
"image"
"image/png"
"math"
"os"
"path/filepath"
"unsafe"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
// C-API function pointers, registered in main.go via purego. The da_capi_*
// symbols live inside libdepthanything (src/da_capi.cpp) and are re-exported by
// the DA_SHARED build.
var (
// da_capi_load(const char* gguf_path, int n_threads) -> da_ctx* (0 = fail)
CapiLoad func(gguf string, nThreads int32) uintptr
// da_capi_free(da_ctx* ctx) — safe on a 0 handle.
CapiFree func(handle uintptr)
// da_capi_last_error(da_ctx* ctx) -> const char* (owned by ctx, "" if none).
// purego marshals the returned C string into a Go string (a copy), so we
// never free it.
CapiLastError func(handle uintptr) string
// da_capi_depth_path(ctx, image_path, out_h*, out_w*) -> float* depth map
// (row-major H*W); nil on error. Caller frees via da_capi_free_floats.
CapiDepthPath func(handle uintptr, imagePath string, outH *int32, outW *int32) *float32
// da_capi_free_floats(float* p)
CapiFreeFloats func(p *float32)
// da_capi_pose_path(ctx, image_path, out_ext[12], out_intr[9]) -> 0 ok, -1 err
CapiPosePath func(handle uintptr, imagePath string, outExt *float32, outIntr *float32) int32
// da_capi_depth_dense(ctx, image_path, out_h*, out_w*, out_depth**, out_conf**,
// out_sky**, out_ext[12], out_intr[9], out_is_metric*) -> 0 ok, -1 err.
// Each non-NULL out_depth/out_conf/out_sky receives a malloc'd float[H*W] (free
// via da_capi_free_floats); buffers the model doesn't produce are set NULL.
CapiDepthDense func(handle uintptr, imagePath string,
outH, outW *int32,
outDepth, outConf, outSky **float32,
outExt, outIntr *float32,
outIsMetric *int32) int32
// da_capi_points(ctx, image_path, conf_thresh, out_n*, out_xyz**, out_rgb**) ->
// 0 ok, -1 err. *out_xyz = malloc'd float[3*N] (free via da_capi_free_floats),
// *out_rgb = malloc'd uint8[3*N] (free via da_capi_free_bytes).
CapiPoints func(handle uintptr, imagePath string, confThresh float32,
outN *int32, outXyz **float32, outRgb **byte) int32
// da_capi_free_bytes(unsigned char* p)
CapiFreeBytes func(p *byte)
// da_capi_export_glb(ctx, image_path, out_glb) -> 0 ok, -1 err
CapiExportGlb func(handle uintptr, imagePath string, outGlb string) int32
// da_capi_export_colmap(ctx, image_path, out_dir, binary) -> 0 ok, -1 err
CapiExportColmap func(handle uintptr, imagePath string, outDir string, binary int32) int32
)
type DepthAnythingCpp struct {
base.SingleThread
handle uintptr
}
// Load loads the GGUF model at opts.ModelFile (joined with opts.ModelPath if
// relative) and stores the da_ctx handle for later inference calls.
func (r *DepthAnythingCpp) Load(opts *pb.ModelOptions) error {
modelFile := opts.ModelFile
if modelFile == "" {
modelFile = opts.Model
}
if modelFile == "" {
return fmt.Errorf("depth-anything-cpp: ModelFile is empty")
}
var modelPath string
if filepath.IsAbs(modelFile) {
modelPath = modelFile
} else {
modelPath = filepath.Join(opts.ModelPath, modelFile)
}
if _, err := os.Stat(modelPath); err != nil {
return fmt.Errorf("depth-anything-cpp: model file not found: %s: %w", modelPath, err)
}
threads := opts.Threads
if threads <= 0 {
threads = 4
}
// Release previous model if any (re-Load).
if r.handle != 0 {
CapiFree(r.handle)
r.handle = 0
}
h := CapiLoad(modelPath, threads)
if h == 0 {
// da_capi_last_error needs a ctx; on a failed load we have none (it
// returns "" for a null ctx), so the text is best-effort.
if msg := CapiLastError(0); msg != "" {
return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s: %s", modelPath, msg)
}
return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s", modelPath)
}
r.handle = h
return nil
}
// depthResult is the JSON payload returned by Predict.
type depthResult struct {
DepthW int `json:"depth_w"`
DepthH int `json:"depth_h"`
DepthMin float32 `json:"depth_min"`
DepthMax float32 `json:"depth_max"`
Extrinsics [12]float32 `json:"extrinsics"` // 3x4 row-major
Intrinsics [9]float32 `json:"intrinsics"` // 3x3 row-major
}
// Predict runs depth+pose on the first supplied image and returns depth
// statistics + camera pose as a JSON string. LocalAI wraps the string into the
// Reply.Message of the gRPC response. The image in Images[0] may be a
// filesystem path or a base64-encoded payload.
func (r *DepthAnythingCpp) Predict(opts *pb.PredictOptions) (string, error) {
imgs := opts.GetImages()
if len(imgs) == 0 {
return "", fmt.Errorf("depth-anything-cpp: Predict requires an image in Images[]")
}
imgPath, cleanup, err := materializeImage(imgs[0])
if err != nil {
return "", fmt.Errorf("depth-anything-cpp: %w", err)
}
defer cleanup()
depth, h, w, ext, intr, err := r.runDepthPose(imgPath)
if err != nil {
return "", err
}
dmin, dmax := minMax(depth)
payload, err := json.Marshal(depthResult{
DepthW: w, DepthH: h,
DepthMin: dmin, DepthMax: dmax,
Extrinsics: ext, Intrinsics: intr,
})
if err != nil {
return "", fmt.Errorf("depth-anything-cpp: marshal: %w", err)
}
return string(payload), nil
}
// GenerateImage runs depth on req.Src and writes a normalised grayscale depth
// PNG to req.Dst.
func (r *DepthAnythingCpp) GenerateImage(req *pb.GenerateImageRequest) error {
if req.GetSrc() == "" {
return fmt.Errorf("depth-anything-cpp: GenerateImage requires src")
}
if req.GetDst() == "" {
return fmt.Errorf("depth-anything-cpp: GenerateImage requires dst")
}
imgPath, cleanup, err := materializeImage(req.GetSrc())
if err != nil {
return fmt.Errorf("depth-anything-cpp: %w", err)
}
defer cleanup()
depth, h, w, _, _, err := r.runDepthPose(imgPath)
if err != nil {
return err
}
return writeDepthPNG(req.GetDst(), depth, h, w)
}
// Depth is the typed Depth RPC. It runs the Depth Anything 3 pipeline on the
// request's src image and fills a DepthResponse honoring the include_* flags and
// exports: per-pixel metric depth + confidence (DualDPT) or depth + sky (mono),
// camera extrinsics/intrinsics, an optional back-projected 3D point cloud and
// glb/COLMAP exports. The src may be a filesystem path or a base64 payload.
func (r *DepthAnythingCpp) Depth(in *pb.DepthRequest) (pb.DepthResponse, error) {
// Accumulate into locals and return a single composite literal at the end:
// returning a named pb.DepthResponse value would copy its embedded mutex
// (go vet copylocks).
if r.handle == 0 {
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: model not loaded")
}
if in.GetSrc() == "" {
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: Depth requires src")
}
imgPath, cleanup, err := materializeImage(in.GetSrc())
if err != nil {
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: %w", err)
}
defer cleanup()
// Dense per-pixel output + pose. Pass buffer pointers only for the
// requested maps so the native side can skip unrequested work; ext/intr
// must always point at 12/9 floats per the C ABI.
var (
h, w, isMetric int32
depthPtr, confPtr *float32
skyPtr *float32
ext [12]float32
intr [9]float32
pDepth, pConf, pSky **float32
)
if in.GetIncludeDepth() {
pDepth = &depthPtr
}
if in.GetIncludeConfidence() {
pConf = &confPtr
}
if in.GetIncludeSky() {
pSky = &skyPtr
}
rc := CapiDepthDense(r.handle, imgPath, &h, &w, pDepth, pConf, pSky, &ext[0], &intr[0], &isMetric)
if rc != 0 {
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: da_capi_depth_dense failed (rc=%d): %s", rc, r.lastError())
}
n := int(h) * int(w)
var (
depth, conf, sky []float32
extrinsics, intrinsic []float32
numPoints int32
points []float32
pointColors []byte
exportPaths []string
)
if depthPtr != nil {
depth = copyFloats(depthPtr, n)
CapiFreeFloats(depthPtr)
}
if confPtr != nil {
conf = copyFloats(confPtr, n)
CapiFreeFloats(confPtr)
}
if skyPtr != nil {
sky = copyFloats(skyPtr, n)
CapiFreeFloats(skyPtr)
}
if in.GetIncludePose() {
extrinsics = append([]float32(nil), ext[:]...)
intrinsic = append([]float32(nil), intr[:]...)
}
// 3D point cloud (DualDPT / pose-capable models only).
if in.GetIncludePoints() {
var (
np int32
xyzPtr *float32
rgbPtr *byte
)
if rc := CapiPoints(r.handle, imgPath, in.GetPointsConfThresh(), &np, &xyzPtr, &rgbPtr); rc != 0 {
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: da_capi_points failed (rc=%d): %s", rc, r.lastError())
}
numPoints = np
if xyzPtr != nil {
points = copyFloats(xyzPtr, int(np)*3)
CapiFreeFloats(xyzPtr)
}
if rgbPtr != nil {
pointColors = copyBytes(rgbPtr, int(np)*3)
CapiFreeBytes(rgbPtr)
}
}
// Exports (glb / colmap). They are written under in.Dst (a directory); a
// temp dir is used when Dst is empty.
if len(in.GetExports()) > 0 {
exportPaths, err = r.runExports(imgPath, in.GetDst(), in.GetExports())
if err != nil {
return pb.DepthResponse{}, err
}
}
return pb.DepthResponse{
Width: w,
Height: h,
Depth: depth,
Confidence: conf,
Sky: sky,
Extrinsics: extrinsics,
Intrinsics: intrinsic,
NumPoints: numPoints,
Points: points,
PointColors: pointColors,
ExportPaths: exportPaths,
IsMetric: isMetric != 0,
}, nil
}
// runExports writes the requested exports for imgPath into dstDir and returns
// the written paths. Supported exports: "glb", "colmap".
func (r *DepthAnythingCpp) runExports(imgPath, dstDir string, exports []string) ([]string, error) {
if dstDir == "" {
tmp, err := os.MkdirTemp("", "depth-anything-export-*")
if err != nil {
return nil, fmt.Errorf("depth-anything-cpp: mkdir export dir: %w", err)
}
dstDir = tmp
} else if err := os.MkdirAll(dstDir, 0o755); err != nil {
return nil, fmt.Errorf("depth-anything-cpp: mkdir %s: %w", dstDir, err)
}
var paths []string
for _, exp := range exports {
switch exp {
case "glb":
out := filepath.Join(dstDir, "pointcloud.glb")
if rc := CapiExportGlb(r.handle, imgPath, out); rc != 0 {
return nil, fmt.Errorf("depth-anything-cpp: da_capi_export_glb failed (rc=%d): %s", rc, r.lastError())
}
paths = append(paths, out)
case "colmap":
out := filepath.Join(dstDir, "colmap")
if err := os.MkdirAll(out, 0o755); err != nil {
return nil, fmt.Errorf("depth-anything-cpp: mkdir %s: %w", out, err)
}
if rc := CapiExportColmap(r.handle, imgPath, out, 1); rc != 0 {
return nil, fmt.Errorf("depth-anything-cpp: da_capi_export_colmap failed (rc=%d): %s", rc, r.lastError())
}
paths = append(paths, out)
default:
return nil, fmt.Errorf("depth-anything-cpp: unknown export %q (want glb|colmap)", exp)
}
}
return paths, nil
}
// copyFloats copies n float32 values from a C heap pointer into a fresh Go
// slice so the C buffer can be freed afterwards.
func copyFloats(p *float32, n int) []float32 {
if p == nil || n <= 0 {
return nil
}
src := unsafe.Slice(p, n)
out := make([]float32, n)
copy(out, src)
return out
}
// copyBytes copies n bytes from a C heap pointer into a fresh Go slice.
func copyBytes(p *byte, n int) []byte {
if p == nil || n <= 0 {
return nil
}
src := unsafe.Slice(p, n)
out := make([]byte, n)
copy(out, src)
return out
}
// runDepthPose runs depth estimation then pose recovery on an image file. It
// returns the row-major depth map (length h*w), its dimensions, the 3x4
// extrinsics (12 floats) and 3x3 intrinsics (9 floats).
func (r *DepthAnythingCpp) runDepthPose(imagePath string) (depth []float32, h, w int, ext [12]float32, intr [9]float32, err error) {
if r.handle == 0 {
err = fmt.Errorf("depth-anything-cpp: model not loaded")
return
}
var ch, cw int32
ptr := CapiDepthPath(r.handle, imagePath, &ch, &cw)
if ptr == nil {
err = fmt.Errorf("depth-anything-cpp: da_capi_depth_path failed: %s", r.lastError())
return
}
h, w = int(ch), int(cw)
n := h * w
if n > 0 {
src := unsafe.Slice(ptr, n)
depth = make([]float32, n)
copy(depth, src)
}
CapiFreeFloats(ptr)
if rc := CapiPosePath(r.handle, imagePath, &ext[0], &intr[0]); rc != 0 {
err = fmt.Errorf("depth-anything-cpp: da_capi_pose_path failed (rc=%d): %s", rc, r.lastError())
return
}
return
}
// lastError returns the context's last error string, or "" if none.
func (r *DepthAnythingCpp) lastError() string {
if CapiLastError == nil || r.handle == 0 {
return ""
}
return CapiLastError(r.handle)
}
// materializeImage returns a filesystem path for an image argument that may be
// either an existing path or a base64-encoded payload. When the input is
// base64 it is decoded into a temp file; cleanup removes it (no-op for a path).
func materializeImage(arg string) (path string, cleanup func(), err error) {
cleanup = func() {}
if _, statErr := os.Stat(arg); statErr == nil {
return arg, cleanup, nil
}
// Strip an optional data URL prefix (data:image/...;base64,<payload>).
b64 := arg
if i := indexComma(b64); i >= 0 && hasDataPrefix(b64) {
b64 = b64[i+1:]
}
data, decErr := base64.StdEncoding.DecodeString(b64)
if decErr != nil {
return "", cleanup, fmt.Errorf("image is neither an existing path nor valid base64: %v", decErr)
}
f, tErr := os.CreateTemp("", "depth-anything-*.img")
if tErr != nil {
return "", cleanup, tErr
}
if _, wErr := f.Write(data); wErr != nil {
_ = f.Close()
_ = os.Remove(f.Name())
return "", cleanup, wErr
}
_ = f.Close()
name := f.Name()
return name, func() { _ = os.Remove(name) }, nil
}
func hasDataPrefix(s string) bool {
return len(s) >= 5 && s[:5] == "data:"
}
func indexComma(s string) int {
for i := 0; i < len(s); i++ {
if s[i] == ',' {
return i
}
}
return -1
}
// writeDepthPNG min-max normalises a depth map and writes it as an 8-bit
// grayscale PNG. Near = bright (255), far = dark (0), matching the usual
// depth-map convention for inverse-depth-like outputs.
func writeDepthPNG(dst string, depth []float32, h, w int) error {
if h <= 0 || w <= 0 || len(depth) < h*w {
return fmt.Errorf("depth-anything-cpp: writeDepthPNG: bad dims h=%d w=%d len=%d", h, w, len(depth))
}
dmin, dmax := minMax(depth)
span := dmax - dmin
if span <= 0 || math.IsNaN(float64(span)) {
span = 1
}
img := image.NewGray(image.Rect(0, 0, w, h))
for y := 0; y < h; y++ {
for x := 0; x < w; x++ {
v := depth[y*w+x]
n := (v - dmin) / span // 0..1
if math.IsNaN(float64(n)) {
n = 0
}
if n < 0 {
n = 0
} else if n > 1 {
n = 1
}
img.Pix[y*img.Stride+x] = uint8(n * 255)
}
}
f, err := os.Create(dst)
if err != nil {
return err
}
defer func() { _ = f.Close() }()
return png.Encode(f, img)
}
func minMax(v []float32) (mn, mx float32) {
if len(v) == 0 {
return 0, 0
}
mn, mx = v[0], v[0]
for _, x := range v {
if math.IsNaN(float64(x)) || math.IsInf(float64(x), 0) {
continue
}
if x < mn {
mn = x
}
if x > mx {
mx = x
}
}
return mn, mx
}

View File

@@ -1,61 +0,0 @@
package main
// main.go - entry point for the depth-anything-cpp gRPC backend.
//
// Dlopens libdepthanythingcpp-<variant>.so via purego at the path in
// DEPTHANYTHING_LIBRARY (set by run.sh based on /proc/cpuinfo), registers the
// da_capi_* C ABI symbols, then starts the gRPC server.
import (
"flag"
"os"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
type LibFuncs struct {
FuncPtr any
Name string
}
func main() {
// Get library name from environment variable, default to fallback
libName := os.Getenv("DEPTHANYTHING_LIBRARY")
if libName == "" {
libName = "./libdepthanythingcpp-fallback.so"
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil {
panic(err)
}
libFuncs := []LibFuncs{
{&CapiLoad, "da_capi_load"},
{&CapiFree, "da_capi_free"},
{&CapiLastError, "da_capi_last_error"},
{&CapiDepthPath, "da_capi_depth_path"},
{&CapiFreeFloats, "da_capi_free_floats"},
{&CapiPosePath, "da_capi_pose_path"},
{&CapiDepthDense, "da_capi_depth_dense"},
{&CapiPoints, "da_capi_points"},
{&CapiFreeBytes, "da_capi_free_bytes"},
{&CapiExportGlb, "da_capi_export_glb"},
{&CapiExportColmap, "da_capi_export_colmap"},
}
for _, lf := range libFuncs {
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
}
flag.Parse()
if err := grpc.StartServer(*addr, &DepthAnythingCpp{}); err != nil {
panic(err)
}
}

View File

@@ -1,167 +0,0 @@
package main
// main_test.go - end-to-end smoke test for the depth-anything-cpp gRPC backend.
//
// Spawns the compiled depth-anything-cpp binary on a free local port, dials it
// via gRPC, and exercises LoadModel + Predict against the test fixtures
// downloaded by test.sh: the small (vits) f32 GGUF of Depth Anything 3 and a
// real photo. Asserts that Predict returns a JSON payload with a positive
// depth-map width/height.
//
// The spec Skip()s cleanly if its fixtures (the model, the test image, the
// built binary, or the fallback .so) are missing, so the test target stays
// usable on a fresh checkout / on CI runners where the model hasn't been
// downloaded.
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"testing"
"time"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
func TestDepth(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "depth-anything-cpp backend smoke suite")
}
// freePort grabs an ephemeral TCP port and immediately releases it so the
// spawned backend can bind to it. There is a tiny TOCTOU window here but in
// practice it's adequate for a smoke test on a quiet runner.
func freePort() int {
l, err := net.Listen("tcp", "127.0.0.1:0")
Expect(err).ToNot(HaveOccurred(), "freePort listen")
port := l.Addr().(*net.TCPAddr).Port
Expect(l.Close()).To(Succeed())
return port
}
// startBackend spawns the depth-anything-cpp binary on the given port and waits
// until it accepts TCP connections (up to 10s). It mirrors how main.go resolves
// the purego library: the DEPTHANYTHING_LIBRARY env var points the dlopen at the
// freshly built fallback .so. The returned cleanup func kills the process.
func startBackend(port int) func() {
binary, err := filepath.Abs("./depth-anything-cpp")
Expect(err).ToNot(HaveOccurred())
if _, err := os.Stat(binary); err != nil {
Skip(fmt.Sprintf("backend binary not built: %s (run `make depth-anything-cpp` first)", binary))
}
libPath, err := filepath.Abs("./libdepthanythingcpp-fallback.so")
Expect(err).ToNot(HaveOccurred())
if _, err := os.Stat(libPath); err != nil {
Skip(fmt.Sprintf("fallback library not built: %s (run `make libdepthanythingcpp-fallback.so` first)", libPath))
}
addr := fmt.Sprintf("127.0.0.1:%d", port)
cmd := exec.Command(binary, "--addr", addr)
cmd.Env = append(os.Environ(), "DEPTHANYTHING_LIBRARY="+libPath)
cmd.Stdout = os.Stderr
cmd.Stderr = os.Stderr
Expect(cmd.Start()).To(Succeed())
cleanup := func() {
if cmd.Process != nil {
_ = cmd.Process.Kill()
_, _ = cmd.Process.Wait()
}
}
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
c, err := net.DialTimeout("tcp", addr, 200*time.Millisecond)
if err == nil {
_ = c.Close()
return cleanup
}
time.Sleep(200 * time.Millisecond)
}
cleanup()
Fail(fmt.Sprintf("backend did not become ready on %s within 10s", addr))
return func() {}
}
// loadTestImage reads the test image downloaded by test.sh and returns its
// base64-encoded content (one of the wire formats accepted by Predict).
func loadTestImage() string {
imgPath, err := filepath.Abs("test-data/test.jpg")
Expect(err).ToNot(HaveOccurred())
imgBytes, err := os.ReadFile(imgPath)
if err != nil {
Skip(fmt.Sprintf("test image not present: %s (run test.sh first)", imgPath))
}
return base64.StdEncoding.EncodeToString(imgBytes)
}
// dialBackend opens a gRPC client connection to the spawned backend.
func dialBackend(port int) (pb.BackendClient, func()) {
addr := fmt.Sprintf("127.0.0.1:%d", port)
conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials()))
Expect(err).ToNot(HaveOccurred())
return pb.NewBackendClient(conn), func() { _ = conn.Close() }
}
// modelPathOrSkip resolves the model file under ./test-models/ and Skip()s the
// current spec if it's missing (not present on a fresh checkout / on CI runners
// without the download).
func modelPathOrSkip(name string) string {
modelDir, err := filepath.Abs("test-models")
Expect(err).ToNot(HaveOccurred())
modelPath := filepath.Join(modelDir, name)
if _, err := os.Stat(modelPath); err != nil {
Skip(fmt.Sprintf("model not present: %s (run test.sh first)", modelPath))
}
return modelPath
}
var _ = Describe("depth-anything-cpp backend", func() {
It("runs depth+pose against a known-good image", func() {
modelPath := modelPathOrSkip("depth-anything-small-f32.gguf")
imgB64 := loadTestImage()
port := freePort()
cleanup := startBackend(port)
defer cleanup()
client, closeConn := dialBackend(port)
defer closeConn()
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute)
defer cancel()
loadResp, err := client.LoadModel(ctx, &pb.ModelOptions{
Model: "depth-anything-small-f32.gguf",
ModelFile: modelPath,
Threads: 4,
})
Expect(err).ToNot(HaveOccurred(), "LoadModel")
Expect(loadResp.GetSuccess()).To(BeTrue(), "LoadModel reported failure: %s", loadResp.GetMessage())
// Predict runs depth+pose and returns the JSON depthResult in Reply.Message.
reply, err := client.Predict(ctx, &pb.PredictOptions{
Images: []string{imgB64},
})
Expect(err).ToNot(HaveOccurred(), "Predict")
var res depthResult
Expect(json.Unmarshal(reply.GetMessage(), &res)).To(Succeed(), "Predict returned non-JSON: %q", string(reply.GetMessage()))
Expect(res.DepthW).To(BeNumerically(">", 0), "depth width should be positive")
Expect(res.DepthH).To(BeNumerically(">", 0), "depth height should be positive")
_, _ = fmt.Fprintf(GinkgoWriter, "depth OK: %dx%d min=%.3f max=%.3f\n",
res.DepthW, res.DepthH, res.DepthMin, res.DepthMax)
})
})

View File

@@ -1,59 +0,0 @@
#!/bin/bash
# Script to copy the appropriate libraries based on architecture
set -e
CURDIR=$(dirname "$(realpath $0)")
REPO_ROOT="${CURDIR}/../../.."
# Create lib directory
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/
cp -avf $CURDIR/depth-anything-cpp $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
# x86_64 architecture
echo "Detected x86_64 architecture, copying x86_64 libraries..."
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
# ARM64 architecture
echo "Detected ARM64 architecture, copying ARM64 libraries..."
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
elif [ $(uname -s) = "Darwin" ]; then
echo "Detected Darwin"
else
echo "Error: Could not detect architecture"
exit 1
fi
# Package GPU libraries based on BUILD_TYPE
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
if [ -f "$GPU_LIB_SCRIPT" ]; then
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
package_gpu_libs
fi
echo "Packaging completed successfully"
ls -liah $CURDIR/package/
ls -liah $CURDIR/package/lib/

View File

@@ -1,52 +0,0 @@
#!/bin/bash
set -ex
# Get the absolute current dir where the script is located
CURDIR=$(dirname "$(realpath $0)")
cd /
echo "CPU info:"
if [ "$(uname)" != "Darwin" ]; then
grep -e "model\sname" /proc/cpuinfo | head -1
grep -e "flags" /proc/cpuinfo | head -1
fi
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then
LIBRARY="$CURDIR/libdepthanythingcpp-avx.so"
fi
fi
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
echo "CPU: AVX2 found OK"
if [ -e $CURDIR/libdepthanythingcpp-avx2.so ]; then
LIBRARY="$CURDIR/libdepthanythingcpp-avx2.so"
fi
fi
# Check avx 512
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
echo "CPU: AVX512F found OK"
if [ -e $CURDIR/libdepthanythingcpp-avx512.so ]; then
LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so"
fi
fi
fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export DEPTHANYTHING_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it
if [ -f $CURDIR/lib/ld.so ]; then
echo "Using lib/ld.so"
echo "Using library: $LIBRARY"
exec $CURDIR/lib/ld.so $CURDIR/depth-anything-cpp "$@"
fi
echo "Using library: $LIBRARY"
exec $CURDIR/depth-anything-cpp "$@"

View File

@@ -1,45 +0,0 @@
#!/bin/bash
set -e
CURDIR=$(dirname "$(realpath $0)")
echo "Running depth-anything-cpp backend tests..."
# Test model from the mudler/depth-anything.cpp-gguf HuggingFace repo. The small
# (vits) f32 GGUF is the lightest backbone (~131 MB), so it keeps the download
# cheap. It is resumed with `curl -C -` and skipped entirely if already present.
DEPTHANYTHING_MODEL_DIR="${DEPTHANYTHING_MODEL_DIR:-$CURDIR/test-models}"
DEPTHANYTHING_MODEL_FILE="${DEPTHANYTHING_MODEL_FILE:-depth-anything-small-f32.gguf}"
DEPTHANYTHING_MODEL_URL="${DEPTHANYTHING_MODEL_URL:-https://huggingface.co/mudler/depth-anything.cpp-gguf/resolve/main/depth-anything-small-f32.gguf}"
mkdir -p "$DEPTHANYTHING_MODEL_DIR"
if [ ! -f "$DEPTHANYTHING_MODEL_DIR/$DEPTHANYTHING_MODEL_FILE" ]; then
echo "Downloading depth-anything small f32 model (~131 MB)..."
# -C - resumes a partial download so an interrupted run doesn't restart from 0.
curl -L -C - -o "$DEPTHANYTHING_MODEL_DIR/$DEPTHANYTHING_MODEL_FILE" "$DEPTHANYTHING_MODEL_URL" --progress-bar
fi
# Use a real photo (people + cars) from the upstream rf-detr.cpp repo (~46 KB).
# Depth estimation needs real content; a synthetic image would be degenerate.
TEST_IMAGE_DIR="$CURDIR/test-data"
TEST_IMAGE_FILE="$TEST_IMAGE_DIR/test.jpg"
TEST_IMAGE_URL="${TEST_IMAGE_URL:-https://raw.githubusercontent.com/mudler/rf-detr.cpp/main/tests/fixtures/ci/test_image.jpg}"
mkdir -p "$TEST_IMAGE_DIR"
if [ ! -f "$TEST_IMAGE_FILE" ]; then
echo "Downloading test image..."
curl -L -o "$TEST_IMAGE_FILE" "$TEST_IMAGE_URL" --progress-bar
fi
echo "depth-anything-cpp test setup complete."
echo " model: $DEPTHANYTHING_MODEL_DIR/$DEPTHANYTHING_MODEL_FILE"
echo " test image: $TEST_IMAGE_FILE"
# Run the Go smoke test: spawns the backend binary on a free port, calls
# LoadModel + Predict via gRPC against the downloaded GGUF + image.
echo ""
echo "Running Go smoke test..."
cd "$CURDIR"
go test -v -timeout 30m ./...

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=df7638d8229a243af8a4b5a8ae557e0d74e0a0ae
WHISPER_CPP_VERSION?=0ec0845110dc934911dc48e8c5beb5ad3189b3f3
SO_TARGET?=libgowhisper.so
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

View File

@@ -458,126 +458,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-locate-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-vulkan-locate-anything-cpp
- &depthanything
name: "depth-anything"
alias: "depth-anything"
license: apache-2.0
description: |
Depth Anything 3 monocular metric depth + camera pose estimation in C/C++
using GGML. Loads pre-built GGUF weights and, given an image, returns a
dense depth map plus the recovered camera extrinsics (3x4) and intrinsics
(3x3). No Python at inference (purego, cgo-less).
urls:
- https://github.com/mudler/depth-anything.cpp
- https://huggingface.co/depth-anything/Depth-Anything-V3
tags:
- depth-estimation
- camera-pose
- depth-anything
- gpu
- cpu
capabilities:
default: "cpu-depth-anything-cpp"
nvidia: "cuda12-depth-anything-cpp"
nvidia-cuda-12: "cuda12-depth-anything-cpp"
nvidia-cuda-13: "cuda13-depth-anything-cpp"
nvidia-l4t: "nvidia-l4t-arm64-depth-anything-cpp"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-depth-anything-cpp"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-depth-anything-cpp"
intel: "intel-sycl-f32-depth-anything-cpp"
vulkan: "vulkan-depth-anything-cpp"
- !!merge <<: *depthanything
name: "depth-anything-development"
capabilities:
default: "cpu-depth-anything-cpp-development"
nvidia: "cuda12-depth-anything-cpp-development"
nvidia-cuda-12: "cuda12-depth-anything-cpp-development"
nvidia-cuda-13: "cuda13-depth-anything-cpp-development"
nvidia-l4t: "nvidia-l4t-arm64-depth-anything-cpp-development"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-depth-anything-cpp-development"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-depth-anything-cpp-development"
intel: "intel-sycl-f32-depth-anything-cpp-development"
vulkan: "vulkan-depth-anything-cpp-development"
- !!merge <<: *depthanything
name: "cpu-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-cpu-depth-anything-cpp
- !!merge <<: *depthanything
name: "cpu-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-cpu-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda12-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda12-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda13-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda13-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-depth-anything-cpp
- !!merge <<: *depthanything
name: "nvidia-l4t-arm64-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-arm64-depth-anything-cpp
- !!merge <<: *depthanything
name: "nvidia-l4t-arm64-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-nvidia-l4t-arm64-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda13-nvidia-l4t-arm64-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-depth-anything-cpp
- !!merge <<: *depthanything
name: "cuda13-nvidia-l4t-arm64-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-depth-anything-cpp
- !!merge <<: *depthanything
name: "intel-sycl-f32-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-gpu-intel-sycl-f32-depth-anything-cpp
- !!merge <<: *depthanything
name: "intel-sycl-f32-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f32-depth-anything-cpp
- !!merge <<: *depthanything
name: "intel-sycl-f16-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-gpu-intel-sycl-f16-depth-anything-cpp
- !!merge <<: *depthanything
name: "intel-sycl-f16-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f16-depth-anything-cpp
- !!merge <<: *depthanything
name: "vulkan-depth-anything-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-depth-anything-cpp"
mirrors:
- localai/localai-backends:latest-gpu-vulkan-depth-anything-cpp
- !!merge <<: *depthanything
name: "vulkan-depth-anything-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-depth-anything-cpp"
mirrors:
- localai/localai-backends:master-gpu-vulkan-depth-anything-cpp
- &vllm
name: "vllm"
license: apache-2.0

View File

@@ -635,8 +635,11 @@ func (l *Launcher) showDownloadProgress(version, title string) {
progressBar := widget.NewProgressBar()
progressBar.SetValue(0)
// Status label
// Status label. Truncate with an ellipsis so a long "Download failed:
// <url>" message can't stretch the window (and progress bar) to fit the
// whole error on one line; the full error is shown in the dialog below.
statusLabel := widget.NewLabel("Preparing download...")
statusLabel.Truncation = fyne.TextTruncateEllipsis
// Release notes button
releaseNotesButton := widget.NewButton("View Release Notes", func() {

View File

@@ -454,8 +454,11 @@ func (sm *SystrayManager) showDownloadProgress(version string) {
progressBar := widget.NewProgressBar()
progressBar.SetValue(0)
// Status label
// Status label. Truncate with an ellipsis so a long "Download failed:
// <url>" message can't stretch the window (and progress bar) to fit the
// whole error on one line; the full error is shown in the dialog below.
statusLabel := widget.NewLabel("Preparing download...")
statusLabel.Truncation = fyne.TextTruncateEllipsis
// Release notes button
releaseNotesButton := widget.NewButton("View Release Notes", func() {

View File

@@ -57,8 +57,16 @@ type LauncherUI struct {
// NewLauncherUI creates a new UI instance
func NewLauncherUI() *LauncherUI {
// Truncate the status text with an ellipsis. Status messages can carry a
// download error containing a long, unbreakable URL/path; without this the
// label demands the full single-line width and stretches the window (and
// the progress bar) arbitrarily wide. The full error is still shown in the
// error dialog.
statusLabel := widget.NewLabel("Initializing...")
statusLabel.Truncation = fyne.TextTruncateEllipsis
return &LauncherUI{
statusLabel: widget.NewLabel("Initializing..."),
statusLabel: statusLabel,
versionLabel: widget.NewLabel("Version: Unknown"),
startStopButton: widget.NewButton("Start LocalAI", nil),
webUIButton: widget.NewButton("Open WebUI", nil),
@@ -602,8 +610,11 @@ func (ui *LauncherUI) showDownloadProgress(version, title string) {
progressBar := widget.NewProgressBar()
progressBar.SetValue(0)
// Status label
// Status label. Truncate with an ellipsis so a long "Download failed:
// <url>" message can't stretch the window (and progress bar) to fit the
// whole error on one line; the full error is shown in the dialog below.
statusLabel := widget.NewLabel("Preparing download...")
statusLabel.Truncation = fyne.TextTruncateEllipsis
// Release notes button
releaseNotesButton := widget.NewButton("View Release Notes", func() {

View File

@@ -1,66 +0,0 @@
package backend
import (
"context"
"fmt"
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
)
// Depth runs depth estimation (Depth Anything 3) on the supplied image and
// returns the full DepthResponse: per-pixel metric depth + confidence + sky,
// camera pose (extrinsics/intrinsics), an optional 3D point cloud and any
// requested exports (glb/colmap). The include_* flags and exports mirror the
// DepthRequest proto so callers can ask for less work.
func Depth(
ctx context.Context,
in *proto.DepthRequest,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
modelConfig config.ModelConfig,
) (*proto.DepthResponse, error) {
opts := ModelOptions(modelConfig, appConfig)
depthModel, err := loader.Load(opts...)
if err != nil {
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
return nil, err
}
if depthModel == nil {
return nil, fmt.Errorf("could not load depth model")
}
var startTime time.Time
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
startTime = time.Now()
}
res, err := depthModel.Depth(ctx, in)
if appConfig.EnableTracing {
errStr := ""
if err != nil {
errStr = err.Error()
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Type: trace.BackendTraceDepth,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(in.GetSrc(), 200),
Error: errStr,
Data: map[string]any{
"exports": in.GetExports(),
},
})
}
return res, err
}

View File

@@ -368,6 +368,25 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
if c.ReasoningEffort != "" {
metadata["reasoning_effort"] = c.ReasoningEffort
}
// Client request metadata overrides the server-derived reasoning levers and
// reaches every backend through these standalone string keys (Python backends
// read them directly). The reserved blob key is server-owned and skipped.
for k, v := range c.RequestMetadata {
if k == "chat_template_kwargs" {
continue
}
metadata[k] = v
}
// Build the generic chat_template_kwargs blob (model config map + coerced
// metadata) for llama.cpp and write it LAST so a client cannot clobber it.
if blob := c.ResolveChatTemplateKwargs(metadata); len(blob) > 0 {
b, err := json.Marshal(blob)
if err != nil {
xlog.Warn("failed to marshal chat_template_kwargs", "error", err)
} else {
metadata["chat_template_kwargs"] = string(b)
}
}
pbOpts.Metadata = metadata
// Logprobs and TopLogprobs are set by the caller if provided

View File

@@ -161,3 +161,54 @@ var _ = Describe("grpcModelOpts NBatch", func() {
Expect(opts.ContextSize).To(BeEquivalentTo(4096), "n_batch must match the effective n_ctx the backend receives")
})
})
// Guards the generic chat_template_kwargs forwarding: the model config map plus any
// per-request metadata overrides are merged, coerced, and serialised into the
// backend metadata blob that llama.cpp reads. Client metadata also overrides the
// server-derived standalone enable_thinking key (cross-backend consistency).
var _ = Describe("gRPCPredictOpts chat_template_kwargs metadata", func() {
baseCfg := func() config.ModelConfig {
cfg := config.ModelConfig{}
cfg.SetDefaults()
return cfg
}
It("serialises the config map into the chat_template_kwargs blob", func() {
cfg := baseCfg()
cfg.ChatTemplateKwargs = map[string]any{"preserve_thinking": true}
opts := gRPCPredictOpts(cfg, "/tmp/models")
Expect(opts.Metadata).To(HaveKey("chat_template_kwargs"))
var blob map[string]any
Expect(json.Unmarshal([]byte(opts.Metadata["chat_template_kwargs"]), &blob)).To(Succeed())
Expect(blob).To(HaveKeyWithValue("preserve_thinking", true))
})
It("lets client request metadata override the server-derived enable_thinking key", func() {
cfg := baseCfg()
disable := true
cfg.ReasoningConfig = reasoning.Config{DisableReasoning: &disable} // server: enable_thinking=false
cfg.RequestMetadata = map[string]string{"enable_thinking": "true"} // client overrides
opts := gRPCPredictOpts(cfg, "/tmp/models")
// standalone key (Python backends) reflects the client override
Expect(opts.Metadata).To(HaveKeyWithValue("enable_thinking", "true"))
// blob (llama.cpp) reflects it too, as a real bool
var blob map[string]any
Expect(json.Unmarshal([]byte(opts.Metadata["chat_template_kwargs"]), &blob)).To(Succeed())
Expect(blob).To(HaveKeyWithValue("enable_thinking", true))
})
It("does not let a client clobber the blob via a chat_template_kwargs metadata key", func() {
cfg := baseCfg()
cfg.ChatTemplateKwargs = map[string]any{"preserve_thinking": true}
cfg.RequestMetadata = map[string]string{"chat_template_kwargs": "{\"preserve_thinking\": false}"}
opts := gRPCPredictOpts(cfg, "/tmp/models")
var blob map[string]any
Expect(json.Unmarshal([]byte(opts.Metadata["chat_template_kwargs"]), &blob)).To(Succeed())
Expect(blob).To(HaveKeyWithValue("preserve_thinking", true))
})
It("omits the blob when there is nothing to forward", func() {
opts := gRPCPredictOpts(baseCfg(), "/tmp/models")
Expect(opts.Metadata).ToNot(HaveKey("chat_template_kwargs"))
})
})

View File

@@ -21,7 +21,6 @@ const (
UsecaseSoundGeneration = "sound_generation"
UsecaseRerank = "rerank"
UsecaseDetection = "detection"
UsecaseDepth = "depth"
UsecaseVAD = "vad"
UsecaseAudioTransform = "audio_transform"
UsecaseDiarization = "diarization"
@@ -45,7 +44,6 @@ const (
MethodSoundGeneration GRPCMethod = "SoundGeneration"
MethodTokenizeString GRPCMethod = "TokenizeString"
MethodDetect GRPCMethod = "Detect"
MethodDepth GRPCMethod = "Depth"
MethodRerank GRPCMethod = "Rerank"
MethodVAD GRPCMethod = "VAD"
MethodAudioTransform GRPCMethod = "AudioTransform"
@@ -143,11 +141,6 @@ var UsecaseInfoMap = map[string]UsecaseInfo{
GRPCMethod: MethodDetect,
Description: "Object detection via the Detect RPC with bounding boxes.",
},
UsecaseDepth: {
Flag: FLAG_DEPTH,
GRPCMethod: MethodDepth,
Description: "Per-pixel metric depth, camera pose and 3D point cloud via the Depth RPC (Depth Anything 3).",
},
UsecaseVAD: {
Flag: FLAG_VAD,
GRPCMethod: MethodVAD,
@@ -495,13 +488,6 @@ var BackendCapabilities = map[string]BackendCapability{
DefaultUsecases: []string{UsecaseDetection},
Description: "RF-DETR C++ object detection",
},
"depth-anything": {
GRPCMethods: []GRPCMethod{MethodDepth, MethodPredict, MethodGenerateImage},
PossibleUsecases: []string{UsecaseDepth},
DefaultUsecases: []string{UsecaseDepth},
AcceptsImages: true,
Description: "Depth Anything 3 C++ — per-pixel metric depth, camera pose and 3D point cloud",
},
// --- Face and speaker recognition backends ---
"insightface": {

View File

@@ -0,0 +1,48 @@
package config_test
import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/mudler/LocalAI/core/config"
)
// ResolveChatTemplateKwargs layers the model config map (base) under the coerced
// backend metadata (server reasoning levers + client request overrides).
var _ = Describe("ModelConfig.ResolveChatTemplateKwargs", func() {
It("returns nil when nothing is set", func() {
c := &config.ModelConfig{}
Expect(c.ResolveChatTemplateKwargs(nil)).To(BeNil())
})
It("returns the config map when no metadata is present", func() {
c := &config.ModelConfig{ChatTemplateKwargs: map[string]any{"preserve_thinking": true}}
Expect(c.ResolveChatTemplateKwargs(nil)).To(HaveKeyWithValue("preserve_thinking", true))
})
It("lets metadata override the config map", func() {
c := &config.ModelConfig{ChatTemplateKwargs: map[string]any{"enable_thinking": true}}
got := c.ResolveChatTemplateKwargs(map[string]string{"enable_thinking": "false"})
Expect(got).To(HaveKeyWithValue("enable_thinking", false))
})
It("coerces true/false to bool and leaves other strings as-is", func() {
c := &config.ModelConfig{}
got := c.ResolveChatTemplateKwargs(map[string]string{
"enable_thinking": "true",
"reasoning_effort": "high",
})
Expect(got).To(HaveKeyWithValue("enable_thinking", true))
Expect(got).To(HaveKeyWithValue("reasoning_effort", "high"))
})
It("skips the reserved chat_template_kwargs metadata key but keeps siblings", func() {
c := &config.ModelConfig{}
got := c.ResolveChatTemplateKwargs(map[string]string{
"chat_template_kwargs": "{\"x\":1}",
"preserve_thinking": "true",
})
Expect(got).ToNot(HaveKey("chat_template_kwargs"))
Expect(got).To(HaveKeyWithValue("preserve_thinking", true))
})
})

View File

@@ -64,7 +64,6 @@ var UsecaseOptions = []FieldOption{
{Value: "image", Label: "Image"},
{Value: "vision", Label: "Vision"},
{Value: "detection", Label: "Detection"},
{Value: "depth", Label: "Depth"},
{Value: "face_recognition", Label: "Face Recognition"},
{Value: "transcript", Label: "Transcript"},
{Value: "diarization", Label: "Diarization"},

View File

@@ -70,6 +70,19 @@ type ModelConfig struct {
// (Harmony) or LFM2.5 — honor it; "none" also toggles enable_thinking off.
ReasoningEffort string `yaml:"reasoning_effort,omitempty" json:"reasoning_effort,omitempty"`
// ChatTemplateKwargs are arbitrary key/values forwarded to the backend's jinja
// chat template via chat_template_kwargs (e.g. preserve_thinking: true). The
// server-derived reasoning levers (enable_thinking / reasoning_effort) and any
// per-request metadata overrides layer on top. See gRPCPredictOpts.
ChatTemplateKwargs map[string]any `yaml:"chat_template_kwargs,omitempty" json:"chat_template_kwargs,omitempty"`
// RequestMetadata holds the raw client request `metadata` map for the current
// request. The request middleware stamps it; gRPCPredictOpts merges it into the
// backend gRPC metadata (overriding the server-derived enable_thinking /
// reasoning_effort) and folds it, coerced, into the chat_template_kwargs blob.
// Never persisted to YAML.
RequestMetadata map[string]string `yaml:"-" json:"-"`
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
// LLM configs (GPT4ALL, Llama.cpp, ...)
LLMConfig `yaml:",inline" json:",inline"`
@@ -551,6 +564,44 @@ func (c *ModelConfig) ApplyReasoningEffort(requestEffort string) {
}
}
// coerceChatTemplateKwarg coerces a request-metadata string value for use as a
// jinja chat_template_kwarg. "true"/"false" become real booleans (so a jinja
// `{% if preserve_thinking %}` reads false correctly, since any non-empty string
// is truthy); everything else stays a string. Numeric/typed per-request values are
// out of scope - set those in the model YAML chat_template_kwargs (YAML keeps the type).
func coerceChatTemplateKwarg(v string) any {
switch v {
case "true":
return true
case "false":
return false
default:
return v
}
}
// ResolveChatTemplateKwargs builds the final chat_template_kwargs map forwarded to
// the backend, layered: the model config map (base) < the coerced backend metadata
// (server reasoning levers + client request overrides). `meta` is the already-merged
// backend metadata string map. The reserved "chat_template_kwargs" key is skipped so
// a client cannot smuggle a nested blob. Returns nil when there is nothing to forward.
func (c *ModelConfig) ResolveChatTemplateKwargs(meta map[string]string) map[string]any {
out := map[string]any{}
for k, v := range c.ChatTemplateKwargs {
out[k] = v
}
for k, v := range meta {
if k == "chat_template_kwargs" {
continue
}
out[k] = coerceChatTemplateKwarg(v)
}
if len(out) == 0 {
return nil
}
return out
}
// @Description PipelineStreaming toggles incremental delivery per realtime stage.
type PipelineStreaming struct {
LLM *bool `yaml:"llm,omitempty" json:"llm,omitempty"`
@@ -1291,10 +1342,6 @@ const (
// chat/completion/embeddings.
FLAG_SCORE ModelConfigUsecase = 0b10000000000000000000
// Marks a model as wired for the Depth gRPC primitive (per-pixel
// metric depth + camera pose + 3D point cloud via Depth Anything 3).
FLAG_DEPTH ModelConfigUsecase = 0b100000000000000000000
// Common Subsets
FLAG_LLM ModelConfigUsecase = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
)
@@ -1352,7 +1399,6 @@ func GetAllModelConfigUsecases() map[string]ModelConfigUsecase {
"FLAG_DIARIZATION": FLAG_DIARIZATION,
"FLAG_REALTIME_AUDIO": FLAG_REALTIME_AUDIO,
"FLAG_SCORE": FLAG_SCORE,
"FLAG_DEPTH": FLAG_DEPTH,
}
}
@@ -1496,13 +1542,6 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecase) bool {
}
}
if (u & FLAG_DEPTH) == FLAG_DEPTH {
depthBackends := []string{"depth-anything"}
if !slices.Contains(depthBackends, c.Backend) {
return false
}
}
if (u & FLAG_FACE_RECOGNITION) == FLAG_FACE_RECOGNITION {
faceBackends := []string{"insightface"}
if !slices.Contains(faceBackends, c.Backend) {

View File

@@ -1,95 +0,0 @@
package localai
import (
"encoding/base64"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/xlog"
)
// DepthEndpoint is the LocalAI Depth endpoint exposing the full Depth Anything 3
// output (per-pixel metric depth + confidence + sky, camera pose, 3D point cloud
// and optional glb/COLMAP exports).
// @Summary Estimates per-pixel depth (and optionally pose/points) from an image.
// @Tags depth
// @Param request body schema.DepthRequest true "query params"
// @Success 200 {object} schema.DepthResponse "Response"
// @Router /v1/depth [post]
func DepthEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.DepthRequest)
if !ok || input.Model == "" {
return echo.ErrBadRequest
}
cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || cfg == nil {
return echo.ErrBadRequest
}
xlog.Debug("Depth", "image", input.Image, "backend", cfg.Backend)
image, err := decodeImageInput(input.Image)
if err != nil {
return err
}
// Default to returning everything the model can produce when the
// caller hasn't asked for any specific subset, so a bare request is
// still useful.
includeDepth := input.IncludeDepth
includeConfidence := input.IncludeConfidence
includePose := input.IncludePose
includeSky := input.IncludeSky
includePoints := input.IncludePoints
if !includeDepth && !includeConfidence && !includePose && !includeSky && !includePoints {
includeDepth = true
includeConfidence = true
includePose = true
includeSky = true
}
req := &proto.DepthRequest{
Src: image,
Dst: input.Dst,
IncludeDepth: includeDepth,
IncludeConfidence: includeConfidence,
IncludePose: includePose,
IncludeSky: includeSky,
IncludePoints: includePoints,
PointsConfThresh: input.PointsConfThresh,
Exports: input.Exports,
}
res, err := backend.Depth(c.Request().Context(), req, ml, appConfig, *cfg)
if err != nil {
return mapBackendError(err)
}
response := schema.DepthResponse{
Width: res.GetWidth(),
Height: res.GetHeight(),
Depth: res.GetDepth(),
Confidence: res.GetConfidence(),
Sky: res.GetSky(),
Extrinsics: res.GetExtrinsics(),
Intrinsics: res.GetIntrinsics(),
NumPoints: res.GetNumPoints(),
Points: res.GetPoints(),
ExportPaths: res.GetExportPaths(),
IsMetric: res.GetIsMetric(),
}
if len(res.GetPointColors()) > 0 {
response.PointColors = base64.StdEncoding.EncodeToString(res.GetPointColors())
}
return c.JSON(200, response)
}
}

View File

@@ -318,6 +318,13 @@ func mergeOpenAIRequestAndModelConfig(config *config.ModelConfig, input *schema.
// (an operator's explicit disable wins over a request asking to think).
config.ApplyReasoningEffort(input.ReasoningEffort)
// Forward the client's request metadata so chat-template kwargs set per-request
// (enable_thinking, reasoning_effort, preserve_thinking, ...) reach the backend
// and override the model's reasoning-config defaults. See gRPCPredictOpts.
if len(input.Metadata) > 0 {
config.RequestMetadata = input.Metadata
}
// Collapse the modern max_completion_tokens alias into the
// legacy Maxtokens field so downstream code reads exactly one.
// MaxCompletionTokens wins on conflict — it's the canonical

View File

@@ -731,3 +731,60 @@ var _ = Describe("SetModelAndConfig reasoning_effort parsing (chat completions)"
Expect(*(*captured2).ReasoningConfig.DisableReasoning).To(BeFalse())
})
})
var _ = Describe("SetModelAndConfig metadata passthrough (chat completions)", func() {
var modelDir string
BeforeEach(func() {
var err error
modelDir, err = os.MkdirTemp("", "localai-test-models-*")
Expect(err).ToNot(HaveOccurred())
})
AfterEach(func() { _ = os.RemoveAll(modelDir) })
buildApp := func() (*echo.Echo, **config.ModelConfig) {
Expect(os.WriteFile(filepath.Join(modelDir, "test-model.yaml"),
[]byte("name: test-model\nbackend: llama\n"), 0644)).To(Succeed())
ss := &system.SystemState{Model: system.Model{ModelsPath: modelDir}}
appConfig := config.NewApplicationConfig()
appConfig.SystemState = ss
mcl := config.NewModelConfigLoader(modelDir)
ml := model.NewModelLoader(ss)
re := NewRequestExtractor(mcl, ml, appConfig)
captured := new(*config.ModelConfig)
app := echo.New()
app.POST("/v1/chat/completions",
func(c echo.Context) error {
if cfg, ok := c.Get(CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig); ok {
*captured = cfg
}
return c.String(http.StatusOK, "ok")
},
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
if err := re.SetOpenAIRequest(c); err != nil {
return err
}
return next(c)
}
},
)
return app, captured
}
It("stamps request metadata onto the config", func() {
app, captured := buildApp()
body := `{"model":"test-model","messages":[{"role":"user","content":"hi"}],` +
`"metadata":{"preserve_thinking":"true"}}`
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(body))
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
app.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
Expect(*captured).ToNot(BeNil())
Expect((*captured).RequestMetadata).To(HaveKeyWithValue("preserve_thinking", "true"))
})
})

View File

@@ -98,12 +98,6 @@ func RegisterLocalAIRoutes(router *echo.Echo,
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_DETECTION)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.DetectionRequest) }))
depthHandler := localai.DepthEndpoint(cl, ml, appConfig)
router.POST("/v1/depth",
depthHandler,
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_DEPTH)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.DepthRequest) }))
// Face recognition endpoints
faceMw := []echo.MiddlewareFunc{
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_FACE_RECOGNITION)),

View File

@@ -181,40 +181,6 @@ type Detection struct {
Mask string `json:"mask,omitempty"` // base64-encoded PNG segmentation mask
}
// DepthRequest is the request body for the /v1/depth endpoint. It exposes the
// full Depth Anything 3 output surface; the include_* flags and exports let a
// caller ask for less work (e.g. depth only, or depth+pose without the point
// cloud).
type DepthRequest struct {
BasicModelRequest
Image string `json:"image"` // URL or base64-encoded image to analyze
Dst string `json:"dst,omitempty"` // optional output directory for exports (glb/colmap)
IncludeDepth bool `json:"include_depth,omitempty"` // return the per-pixel depth map
IncludeConfidence bool `json:"include_confidence,omitempty"` // return the per-pixel confidence map (DualDPT)
IncludePose bool `json:"include_pose,omitempty"` // return camera extrinsics/intrinsics (DualDPT)
IncludeSky bool `json:"include_sky,omitempty"` // return the per-pixel sky map (mono models)
IncludePoints bool `json:"include_points,omitempty"` // back-project to a 3D point cloud (DualDPT)
PointsConfThresh float32 `json:"points_conf_thresh,omitempty"` // keep points with confidence >= this threshold
Exports []string `json:"exports,omitempty"` // requested exports: "glb", "colmap"
}
// DepthResponse is the JSON response for the /v1/depth endpoint, mirroring the
// DepthResponse proto.
type DepthResponse struct {
Width int32 `json:"width"`
Height int32 `json:"height"`
Depth []float32 `json:"depth,omitempty"` // width*height row-major metric depth
Confidence []float32 `json:"confidence,omitempty"` // width*height row-major confidence (DualDPT)
Sky []float32 `json:"sky,omitempty"` // width*height row-major sky map (mono)
Extrinsics []float32 `json:"extrinsics,omitempty"` // 12 floats, 3x4 row-major (world-to-camera)
Intrinsics []float32 `json:"intrinsics,omitempty"` // 9 floats, 3x3 row-major
NumPoints int32 `json:"num_points,omitempty"` // number of 3D points
Points []float32 `json:"points,omitempty"` // num_points*3 xyz, world space
PointColors string `json:"point_colors,omitempty"` // base64-encoded num_points*3 uint8 rgb
ExportPaths []string `json:"export_paths,omitempty"` // paths written for the requested exports
IsMetric bool `json:"is_metric"` // depth is in metric units
}
// ─── Face recognition ──────────────────────────────────────────────
//
// FacialArea describes a bounding box for a detected face.

View File

@@ -169,9 +169,6 @@ func (c *fakeBackendClient) SoundGeneration(_ context.Context, _ *pb.SoundGenera
func (c *fakeBackendClient) Detect(_ context.Context, _ *pb.DetectOptions, _ ...ggrpc.CallOption) (*pb.DetectResponse, error) {
return nil, nil
}
func (c *fakeBackendClient) Depth(_ context.Context, _ *pb.DepthRequest, _ ...ggrpc.CallOption) (*pb.DepthResponse, error) {
return nil, nil
}
func (c *fakeBackendClient) FaceVerify(_ context.Context, _ *pb.FaceVerifyRequest, _ ...ggrpc.CallOption) (*pb.FaceVerifyResponse, error) {
return nil, nil
}

View File

@@ -152,12 +152,6 @@ func (c *InFlightTrackingClient) Detect(ctx context.Context, in *pb.DetectOption
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) Depth(ctx context.Context, in *pb.DepthRequest, opts ...ggrpc.CallOption) (*pb.DepthResponse, error) {
defer c.track(ctx)()
res, err := c.Backend.Depth(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...ggrpc.CallOption) (*pb.RerankResult, error) {
defer c.track(ctx)()
res, err := c.Backend.Rerank(ctx, in, opts...)

View File

@@ -100,10 +100,6 @@ func (f *fakeGRPCBackend) Detect(_ context.Context, _ *pb.DetectOptions, _ ...gg
return &pb.DetectResponse{}, nil
}
func (f *fakeGRPCBackend) Depth(_ context.Context, _ *pb.DepthRequest, _ ...ggrpc.CallOption) (*pb.DepthResponse, error) {
return &pb.DepthResponse{}, nil
}
func (f *fakeGRPCBackend) FaceVerify(_ context.Context, _ *pb.FaceVerifyRequest, _ ...ggrpc.CallOption) (*pb.FaceVerifyResponse, error) {
return &pb.FaceVerifyResponse{}, nil
}

View File

@@ -25,7 +25,6 @@ const (
BackendTraceRerank BackendTraceType = "rerank"
BackendTraceTokenize BackendTraceType = "tokenize"
BackendTraceDetection BackendTraceType = "detection"
BackendTraceDepth BackendTraceType = "depth"
BackendTraceFaceVerify BackendTraceType = "face_verify"
BackendTraceFaceAnalyze BackendTraceType = "face_analyze"
BackendTraceVoiceVerify BackendTraceType = "voice_verify"

View File

@@ -438,6 +438,36 @@ pipeline:
reasoning_effort: none # overrides the LLM model's own reasoning_effort
```
#### Custom `chat_template_kwargs`
Some jinja chat templates expose extra variables beyond `enable_thinking` /
`reasoning_effort` (for example Qwen3's `preserve_thinking`). Set arbitrary key/values in
the model config and they are forwarded to the backend's `chat_template_kwargs` as-is, so
you don't need a dedicated server option per template variable:
```yaml
name: qwen3
chat_template_kwargs:
preserve_thinking: true
```
You can also override (or add) any of these per request through the OpenAI `metadata`
field on `/v1/chat/completions`. Values are strings; `"true"` / `"false"` are coerced to
booleans, anything else is passed through as a string:
```json
{
"model": "qwen3",
"messages": [{"role": "user", "content": "hi"}],
"metadata": { "preserve_thinking": "true", "enable_thinking": "false" }
}
```
Per-request `metadata` overrides the model config defaults and the reasoning-config levers,
and (for `enable_thinking` / `reasoning_effort`) takes effect across every backend that
reads them, not just llama.cpp. Typed (non-boolean) values are only supported through the
model YAML `chat_template_kwargs`, where YAML preserves the type.
### Multimodal Backend Options
| Option | Type | Default | Description |

View File

@@ -8018,150 +8018,6 @@
- filename: locate-anything-q8_0.gguf
uri: huggingface://mudler/locate-anything.cpp-gguf/locate-anything-q8_0.gguf
sha256: 0909d8a1aba584b482d501baae032611d1559878be1b7f6606ba516687c5380d
- &depth-anything-3-base
name: depth-anything-3-base
url: github:mudler/LocalAI/gallery/virtual.yaml@master
urls:
- https://github.com/mudler/depth-anything.cpp
- https://huggingface.co/depth-anything/Depth-Anything-V3
- https://huggingface.co/mudler/depth-anything.cpp-gguf
description: |
Depth Anything 3 (base) monocular metric depth + camera pose, served via the
native depth-anything.cpp backend (C++/ggml + purego, no Python at inference).
Given an image it returns a dense depth map plus the recovered camera
extrinsics (3x4) and intrinsics (3x3). Use GenerateImage (src -> normalized
depth PNG at dst) or Predict (JSON depth stats + pose). q4_k is the
recommended CPU default.
license: apache-2.0
icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4
tags:
- depth-estimation
- camera-pose
- depth-anything
- native
- cpp
- cpu
overrides:
backend: depth-anything
parameters:
model: depth-anything-base-q4_k.gguf
files:
- filename: depth-anything-base-q4_k.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-base-q4_k.gguf
sha256: "43cd45d00f9024f4319f4beabd73155db5132e4b575bc52eff4131262c9d78f1"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-base-q8_0
description: |
Depth Anything 3 (base), q8_0 — near-lossless 8-bit quant (~149 MB). Same
depth + camera pose output as the q4_k default at higher fidelity.
overrides:
backend: depth-anything
parameters:
model: depth-anything-base-q8_0.gguf
files:
- filename: depth-anything-base-q8_0.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-base-q8_0.gguf
sha256: "71b1c953113657f9a4fbef43ab6a16fe7a6f87b36da113a184f13c4a564968a0"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-base-f16
description: |
Depth Anything 3 (base), f16 — half precision (~233 MB), no measurable
accuracy loss vs f32. Depth + camera pose.
overrides:
backend: depth-anything
parameters:
model: depth-anything-base-f16.gguf
files:
- filename: depth-anything-base-f16.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-base-f16.gguf
sha256: "2975419c99702ca646d5b7120c53e35c9fee158f0a803577241d16957f75624b"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-base-f32
description: |
Depth Anything 3 (base), f32 — maximum fidelity (~412 MB). Reference-parity
depth + camera pose.
overrides:
backend: depth-anything
parameters:
model: depth-anything-base-f32.gguf
files:
- filename: depth-anything-base-f32.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-base-f32.gguf
sha256: "1b13b166e8a8b4f2c862f42d36edb2f9aab995a18cc527a52b9f160b99c6b8da"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-giant
description: |
Depth Anything 3 (giant / vitg), f32 — the large backbone (~4.9 GB) for
maximum quality depth + camera pose. GPU recommended.
tags:
- depth-estimation
- camera-pose
- depth-anything
- native
- cpp
- gpu
overrides:
backend: depth-anything
parameters:
model: depth-anything-giant-f32.gguf
files:
- filename: depth-anything-giant-f32.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-giant-f32.gguf
sha256: "392edf64626be6a985487beb39c8d54cdc14f7feb2b53323742c96b71e7e7181"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-small
description: |
Depth Anything 3 (small / vits), f32 — the smallest backbone (~131 MB) for
fast CPU depth + camera pose. Same output as base at lower latency.
overrides:
backend: depth-anything
parameters:
model: depth-anything-small-f32.gguf
files:
- filename: depth-anything-small-f32.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-small-f32.gguf
sha256: "eab5597e01dedde1a20c038590ae8c887b85ec35b882581138c08308e92c41e5"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-large
description: |
Depth Anything 3 (large / vitl), f32 (~1.6 GB) — higher quality depth +
camera pose than base. GPU recommended for interactive use.
tags:
- depth-estimation
- camera-pose
- depth-anything
- native
- cpp
- gpu
overrides:
backend: depth-anything
parameters:
model: depth-anything-large-f32.gguf
files:
- filename: depth-anything-large-f32.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-large-f32.gguf
sha256: "a79eb3e19e8ec49f4daac484fb5fb67e15baac61518d229cf819e40c87080906"
- !!merge <<: *depth-anything-3-base
name: depth-anything-3-mono-large
description: |
Depth Anything 3 (monocular large / vitl), f32 (~1.3 GB) — single-image
monocular depth + a sky mask (no camera pose). DPT single-head variant; use
GenerateImage (src -> normalized depth PNG) or Predict (JSON depth stats).
overrides:
backend: depth-anything
parameters:
model: depth-anything-mono-large-f32.gguf
files:
- filename: depth-anything-mono-large-f32.gguf
uri: huggingface://mudler/depth-anything.cpp-gguf/depth-anything-mono-large-f32.gguf
sha256: "291b1a554af907c3f79986ee225da8933be5f7a31d73c81d06784cda284535de"
- name: rfdetr-cpp-base
url: github:mudler/LocalAI/gallery/virtual.yaml@master
urls:

View File

@@ -54,7 +54,6 @@ type Backend interface {
TTSStream(ctx context.Context, in *pb.TTSRequest, f func(reply *pb.Reply), opts ...grpc.CallOption) error
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.CallOption) (*pb.DetectResponse, error)
Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error)
FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error)
FaceAnalyze(ctx context.Context, in *pb.FaceAnalyzeRequest, opts ...grpc.CallOption) (*pb.FaceAnalyzeResponse, error)
VoiceVerify(ctx context.Context, in *pb.VoiceVerifyRequest, opts ...grpc.CallOption) (*pb.VoiceVerifyResponse, error)

View File

@@ -82,10 +82,6 @@ func (llm *Base) Detect(*pb.DetectOptions) (pb.DetectResponse, error) {
return pb.DetectResponse{}, fmt.Errorf("unimplemented")
}
func (llm *Base) Depth(*pb.DepthRequest) (pb.DepthResponse, error) {
return pb.DepthResponse{}, fmt.Errorf("unimplemented")
}
func (llm *Base) FaceVerify(*pb.FaceVerifyRequest) (pb.FaceVerifyResponse, error) {
return pb.FaceVerifyResponse{}, fmt.Errorf("unimplemented")
}

View File

@@ -634,24 +634,6 @@ func (c *Client) Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.
return client.Detect(ctx, in, opts...)
}
func (c *Client) Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error) {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
}
c.setBusy(true)
defer c.setBusy(false)
c.wdMark()
defer c.wdUnMark()
conn, err := c.dial()
if err != nil {
return nil, err
}
defer conn.Close()
client := pb.NewBackendClient(conn)
return client.Depth(ctx, in, opts...)
}
func (c *Client) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error) {
if !c.parallel {
c.opMutex.Lock()

View File

@@ -73,10 +73,6 @@ func (e *embedBackend) Detect(ctx context.Context, in *pb.DetectOptions, opts ..
return e.s.Detect(ctx, in)
}
func (e *embedBackend) Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error) {
return e.s.Depth(ctx, in)
}
func (e *embedBackend) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error) {
return e.s.FaceVerify(ctx, in)
}

View File

@@ -19,7 +19,6 @@ type AIModel interface {
GenerateImage(*pb.GenerateImageRequest) error
GenerateVideo(*pb.GenerateVideoRequest) error
Detect(*pb.DetectOptions) (pb.DetectResponse, error)
Depth(*pb.DepthRequest) (pb.DepthResponse, error)
FaceVerify(*pb.FaceVerifyRequest) (pb.FaceVerifyResponse, error)
FaceAnalyze(*pb.FaceAnalyzeRequest) (pb.FaceAnalyzeResponse, error)
VoiceVerify(*pb.VoiceVerifyRequest) (pb.VoiceVerifyResponse, error)

View File

@@ -156,18 +156,6 @@ func (s *server) Detect(ctx context.Context, in *pb.DetectOptions) (*pb.DetectRe
return &res, nil
}
func (s *server) Depth(ctx context.Context, in *pb.DepthRequest) (*pb.DepthResponse, error) {
if s.llm.Locking() {
s.llm.Lock()
defer s.llm.Unlock()
}
res, err := s.llm.Depth(in)
if err != nil {
return nil, err
}
return &res, nil
}
func (s *server) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest) (*pb.FaceVerifyResponse, error) {
if s.llm.Locking() {
s.llm.Lock()

View File

@@ -108,12 +108,6 @@ func (c *ConnectionEvictingClient) Detect(ctx context.Context, in *pb.DetectOpti
return result, err
}
func (c *ConnectionEvictingClient) Depth(ctx context.Context, in *pb.DepthRequest, opts ...ggrpc.CallOption) (*pb.DepthResponse, error) {
result, err := c.Backend.Depth(ctx, in, opts...)
c.checkErr(err)
return result, err
}
func (c *ConnectionEvictingClient) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...ggrpc.CallOption) (*pb.RerankResult, error) {
result, err := c.Backend.Rerank(ctx, in, opts...)
c.checkErr(err)