mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-16 04:38:50 -04:00
Compare commits
17 Commits
v4.4.3
...
backend/de
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
58581c6c34 | ||
|
|
fa443d16a2 | ||
|
|
a4550b0bd2 | ||
|
|
2388686369 | ||
|
|
edc61053aa | ||
|
|
9ba8521e7e | ||
|
|
51c23197ed | ||
|
|
2df2876db2 | ||
|
|
f648f07b13 | ||
|
|
1dedb5277c | ||
|
|
7d2a762b53 | ||
|
|
61cde6fd77 | ||
|
|
ca1668dd85 | ||
|
|
fdc352a618 | ||
|
|
692970e507 | ||
|
|
e046a7749f | ||
|
|
e5c95e0449 |
149
.github/backend-matrix.yml
vendored
149
.github/backend-matrix.yml
vendored
@@ -716,6 +716,19 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-depth-anything-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "depth-anything-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "8"
|
||||
@@ -1582,6 +1595,19 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-depth-anything-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "depth-anything-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
@@ -1621,6 +1647,19 @@ include:
|
||||
backend: "locate-anything-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-cuda-13-arm64-depth-anything-cpp'
|
||||
base-image: "ubuntu:24.04"
|
||||
ubuntu-version: '2404'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "depth-anything-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
@@ -2898,6 +2937,19 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-depth-anything-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "depth-anything-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f32'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -2911,6 +2963,19 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f32'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-intel-sycl-f32-depth-anything-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "depth-anything-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f16'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -2924,6 +2989,19 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f16'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-intel-sycl-f16-depth-anything-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "depth-anything-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -2938,6 +3016,20 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
platform-tag: 'amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-vulkan-depth-anything-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "depth-anything-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -2952,6 +3044,20 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/arm64'
|
||||
platform-tag: 'arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-vulkan-depth-anything-cpp'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "depth-anything-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f32'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -3058,6 +3164,19 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2204'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-depth-anything-cpp'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "depth-anything-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2204'
|
||||
# whisper
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
@@ -4490,6 +4609,36 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
# supertonic CPU (amd64)
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
platform-tag: 'amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-supertonic'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "supertonic"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
# supertonic CPU (arm64)
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/arm64'
|
||||
platform-tag: 'arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-supertonic'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "supertonic"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
|
||||
# Darwin matrix (consumed by backend-jobs-darwin).
|
||||
includeDarwin:
|
||||
|
||||
5
.github/workflows/secscan.yaml
vendored
5
.github/workflows/secscan.yaml
vendored
@@ -21,7 +21,10 @@ jobs:
|
||||
uses: securego/gosec@v2.27.1
|
||||
with:
|
||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||
# backend/go/supertonic is excluded: it vendors upstream supertone-inc/supertonic
|
||||
# (helper.go), whose findings (G304 model-file loads, G404 math/rand for flow-matching
|
||||
# noise, G104 unhandled errors) are inherent to that upstream code, not ours to rewrite.
|
||||
args: '-no-fail -exclude-dir=backend/go/supertonic -fmt sarif -out results.sarif ./...'
|
||||
- name: Upload SARIF file
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
uses: github/codeql-action/upload-sarif@v4
|
||||
|
||||
@@ -74,6 +74,8 @@ linters:
|
||||
paths:
|
||||
# Upstream whisper.cpp source tree fetched by the whisper backend Makefile.
|
||||
- 'backend/go/whisper/sources'
|
||||
# Vendored upstream supertonic pipeline (supertone-inc/supertonic go/helper.go).
|
||||
- 'backend/go/supertonic/helper.go'
|
||||
- 'docs/'
|
||||
rules:
|
||||
# CLI entry points: kong's `env:"..."` tag is the legitimate env→struct
|
||||
|
||||
7
Makefile
7
Makefile
@@ -1,5 +1,5 @@
|
||||
# Disable parallel execution for backend builds
|
||||
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/crispasr backends/parakeet-cpp backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/omnivoice-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio
|
||||
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/crispasr backends/parakeet-cpp backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/omnivoice-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio backends/supertonic
|
||||
|
||||
GOCMD=go
|
||||
GOTEST=$(GOCMD) test
|
||||
@@ -595,6 +595,7 @@ test-extra: prepare-test-extra
|
||||
$(MAKE) -C backend/rust/kokoros test
|
||||
$(MAKE) -C backend/go/rfdetr-cpp test
|
||||
$(MAKE) -C backend/go/locate-anything-cpp test
|
||||
$(MAKE) -C backend/go/supertonic test
|
||||
|
||||
##
|
||||
## End-to-end gRPC tests that exercise a built backend container image.
|
||||
@@ -1181,6 +1182,7 @@ BACKEND_VIBEVOICE_CPP = vibevoice-cpp|golang|.|false|true
|
||||
BACKEND_LOCALVQE = localvqe|golang|.|false|true
|
||||
BACKEND_OPUS = opus|golang|.|false|true
|
||||
BACKEND_SHERPA_ONNX = sherpa-onnx|golang|.|false|true
|
||||
BACKEND_SUPERTONIC = supertonic|golang|.|false|true
|
||||
|
||||
# Python backends with root context
|
||||
BACKEND_RERANKERS = rerankers|python|.|false|true
|
||||
@@ -1308,12 +1310,13 @@ $(eval $(call generate-docker-build-target,$(BACKEND_KOKOROS)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_SAM3_CPP)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_RFDETR_CPP)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_SHERPA_ONNX)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_SUPERTONIC)))
|
||||
|
||||
# Pattern rule for docker-save targets
|
||||
docker-save-%: backend-images
|
||||
docker save local-ai-backend:$* -o backend-images/$*.tar
|
||||
|
||||
docker-build-backends: docker-build-llama-cpp docker-build-ik-llama-cpp docker-build-turboquant docker-build-ds4 docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-sglang docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-crispasr docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-liquid-audio docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-tinygrad docker-build-kokoros docker-build-sam3-cpp docker-build-rfdetr-cpp docker-build-qwen3-tts-cpp docker-build-omnivoice-cpp docker-build-vibevoice-cpp docker-build-localvqe docker-build-insightface docker-build-speaker-recognition docker-build-sherpa-onnx docker-build-cloud-proxy
|
||||
docker-build-backends: docker-build-llama-cpp docker-build-ik-llama-cpp docker-build-turboquant docker-build-ds4 docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-sglang docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-crispasr docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-liquid-audio docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-tinygrad docker-build-kokoros docker-build-sam3-cpp docker-build-rfdetr-cpp docker-build-qwen3-tts-cpp docker-build-omnivoice-cpp docker-build-vibevoice-cpp docker-build-localvqe docker-build-insightface docker-build-speaker-recognition docker-build-sherpa-onnx docker-build-cloud-proxy docker-build-supertonic
|
||||
|
||||
########################################################
|
||||
### Mock Backend for E2E Tests
|
||||
|
||||
@@ -24,6 +24,7 @@ service Backend {
|
||||
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
||||
rpc Status(HealthMessage) returns (StatusResponse) {}
|
||||
rpc Detect(DetectOptions) returns (DetectResponse) {}
|
||||
rpc Depth(DepthRequest) returns (DepthResponse) {}
|
||||
rpc FaceVerify(FaceVerifyRequest) returns (FaceVerifyResponse) {}
|
||||
rpc FaceAnalyze(FaceAnalyzeRequest) returns (FaceAnalyzeResponse) {}
|
||||
rpc VoiceVerify(VoiceVerifyRequest) returns (VoiceVerifyResponse) {}
|
||||
@@ -670,6 +671,35 @@ message DetectResponse {
|
||||
repeated Detection Detections = 1;
|
||||
}
|
||||
|
||||
// --- Depth estimation messages (Depth Anything 3) ---
|
||||
|
||||
message DepthRequest {
|
||||
string src = 1; // input image (filesystem path or base64-encoded payload)
|
||||
string dst = 2; // optional output directory for exports (glb/colmap)
|
||||
bool include_depth = 3; // return the per-pixel metric depth map
|
||||
bool include_confidence = 4; // return the per-pixel confidence map (DualDPT)
|
||||
bool include_pose = 5; // return camera extrinsics/intrinsics (DualDPT)
|
||||
bool include_sky = 6; // return the per-pixel sky map (mono models)
|
||||
bool include_points = 7; // back-project to a 3D point cloud (DualDPT)
|
||||
float points_conf_thresh = 8; // keep points with confidence >= this threshold
|
||||
repeated string exports = 9; // requested exports: "glb", "colmap"
|
||||
}
|
||||
|
||||
message DepthResponse {
|
||||
int32 width = 1; // processed depth-map width
|
||||
int32 height = 2; // processed depth-map height
|
||||
repeated float depth = 3; // width*height row-major metric depth
|
||||
repeated float confidence = 4; // width*height row-major confidence (DualDPT)
|
||||
repeated float sky = 5; // width*height row-major sky map (mono)
|
||||
repeated float extrinsics = 6; // 12 floats, 3x4 row-major (world-to-camera)
|
||||
repeated float intrinsics = 7; // 9 floats, 3x3 row-major
|
||||
int32 num_points = 8; // number of 3D points
|
||||
repeated float points = 9; // num_points*3 xyz, world space
|
||||
bytes point_colors = 10; // num_points*3 uint8 rgb
|
||||
repeated string export_paths = 11; // paths written for the requested exports
|
||||
bool is_metric = 12; // depth is in metric units
|
||||
}
|
||||
|
||||
// --- Face recognition messages ---
|
||||
|
||||
message FacialArea {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=e6f8112f3ba126eed3ff5b30cdd08085414a7516
|
||||
IK_LLAMA_VERSION?=5f917a64b391b7d31839845153a473a65f630458
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=4c6595503fe45d5a39f88d194e270f64c7424677
|
||||
LLAMA_VERSION?=4988f6e866057afd130c1515ecef0c9bab9a15f8
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
7
backend/go/depth-anything-cpp/.gitignore
vendored
Normal file
7
backend/go/depth-anything-cpp/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
sources/
|
||||
build*/
|
||||
package/
|
||||
libdepthanythingcpp*.so
|
||||
depth-anything-cpp
|
||||
test-models/
|
||||
test-data/
|
||||
28
backend/go/depth-anything-cpp/CMakeLists.txt
Normal file
28
backend/go/depth-anything-cpp/CMakeLists.txt
Normal file
@@ -0,0 +1,28 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(libdepthanythingcpp LANGUAGES C CXX)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
# Static-link ggml into the depth-anything shared library so the resulting .so
|
||||
# has no runtime dependency on an external libggml — only on
|
||||
# libc/libstdc++/libgomp, which the LocalAI package step bundles into the
|
||||
# docker image.
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static libraries" FORCE)
|
||||
|
||||
# depth-anything.cpp build switches: skip CLI/tests, but build libdepthanything
|
||||
# itself as a SHARED library (DA_SHARED) while ggml stays static
|
||||
# (BUILD_SHARED_LIBS OFF above). The da_capi_* C ABI is compiled into
|
||||
# src/da_capi.cpp and re-exported by that shared library, so no extra MODULE
|
||||
# wrapper is needed (unlike locate-anything.cpp).
|
||||
set(DA_BUILD_CLI OFF CACHE BOOL "Disable depth-anything CLI" FORCE)
|
||||
set(DA_BUILD_TESTS OFF CACHE BOOL "Disable depth-anything tests" FORCE)
|
||||
set(DA_SHARED ON CACHE BOOL "Build libdepthanything as a shared lib" FORCE)
|
||||
|
||||
add_subdirectory(./sources/depth-anything.cpp)
|
||||
|
||||
# Emit libdepthanything.so into the top-level build dir so the Makefile can
|
||||
# rename it to the per-variant libdepthanythingcpp-<variant>.so.
|
||||
set_target_properties(depthanything PROPERTIES
|
||||
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
136
backend/go/depth-anything-cpp/Makefile
Normal file
136
backend/go/depth-anything-cpp/Makefile
Normal file
@@ -0,0 +1,136 @@
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
NATIVE?=false
|
||||
|
||||
GOCMD?=go
|
||||
GO_TAGS?=
|
||||
JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# depth-anything.cpp. Pin to a specific commit for a stable build; a squash
|
||||
# merge upstream can orphan a branch, so the native version is pinned by SHA.
|
||||
DEPTHANYTHING_REPO?=https://github.com/mudler/depth-anything.cpp.git
|
||||
DEPTHANYTHING_VERSION?=e0b6814d2f58261216da69d63326f1f2d75d4435
|
||||
|
||||
ifeq ($(NATIVE),false)
|
||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
endif
|
||||
|
||||
# Forward LocalAI's BUILD_TYPE to the matching ggml backend switch. depth-anything.cpp
|
||||
# force-sets GGML_CUDA/GGML_VULKAN/GGML_METAL from its own DA_GGML_* options, so
|
||||
# those must be toggled via the DA_GGML_* names (a bare -DGGML_CUDA=ON would be
|
||||
# overridden); the remaining ggml switches pass straight through.
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CMAKE_ARGS+=-DGGML_CUDA=ON -DDA_GGML_CUDA=ON
|
||||
else ifeq ($(BUILD_TYPE),openblas)
|
||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON -DDA_GGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||
CMAKE_ARGS+=-DDA_GGML_METAL=ON
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DGGML_SYCL_F16=ON
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx
|
||||
endif
|
||||
|
||||
sources/depth-anything.cpp:
|
||||
mkdir -p sources && \
|
||||
git clone --recursive $(DEPTHANYTHING_REPO) sources/depth-anything.cpp && \
|
||||
cd sources/depth-anything.cpp && \
|
||||
git checkout $(DEPTHANYTHING_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
# Detect OS
|
||||
UNAME_S := $(shell uname -s)
|
||||
|
||||
# Only build CPU variants on Linux
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libdepthanythingcpp-avx.so libdepthanythingcpp-avx2.so libdepthanythingcpp-avx512.so libdepthanythingcpp-fallback.so
|
||||
else
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||
VARIANT_TARGETS = libdepthanythingcpp-fallback.so
|
||||
endif
|
||||
|
||||
depth-anything-cpp: main.go godepthanythingcpp.go $(VARIANT_TARGETS)
|
||||
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o depth-anything-cpp ./
|
||||
|
||||
package: depth-anything-cpp
|
||||
bash package.sh
|
||||
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libdepthanythingcpp*.so depth-anything-cpp package sources
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
|
||||
# Build all variants (Linux only)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
libdepthanythingcpp-avx.so: sources/depth-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I depth-anything-cpp build info:avx${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
|
||||
libdepthanythingcpp-avx2.so: sources/depth-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I depth-anything-cpp build info:avx2${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libdepthanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
|
||||
libdepthanythingcpp-avx512.so: sources/depth-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I depth-anything-cpp build info:avx512${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libdepthanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
endif
|
||||
|
||||
# Build fallback variant (all platforms)
|
||||
libdepthanythingcpp-fallback.so: sources/depth-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
|
||||
libdepthanythingcpp-custom: CMakeLists.txt
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
cd build-$(SO_TARGET) && \
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET)
|
||||
|
||||
all: depth-anything-cpp package
|
||||
|
||||
# `test` is invoked by the top-level Makefile's `test-extra` target. It builds
|
||||
# the backend binary + the fallback shared library (needed for dlopen at
|
||||
# runtime), then runs test.sh which downloads a small GGUF + a test image and
|
||||
# exercises the gRPC Load/Predict wire path via the Go smoke test in
|
||||
# main_test.go.
|
||||
test: depth-anything-cpp libdepthanythingcpp-fallback.so
|
||||
bash test.sh
|
||||
509
backend/go/depth-anything-cpp/godepthanythingcpp.go
Normal file
509
backend/go/depth-anything-cpp/godepthanythingcpp.go
Normal file
@@ -0,0 +1,509 @@
|
||||
package main
|
||||
|
||||
// godepthanythingcpp.go - gRPC handlers (Load, Predict, GenerateImage) for the
|
||||
// depth-anything-cpp backend, wrapping the Depth Anything 3 ggml C-API
|
||||
// (libdepthanythingcpp-<variant>.so) via purego.
|
||||
//
|
||||
// Embeds base.SingleThread to default the unimplemented RPCs to "not supported"
|
||||
// and to serialize calls — the C side shares a ggml graph allocator and is NOT
|
||||
// reentrant, so all inference must run one-at-a-time.
|
||||
//
|
||||
// Depth has no native OpenAI endpoint, so the model is exposed two ways:
|
||||
//
|
||||
// - GenerateImage(src, dst): run depth on the src image and write a
|
||||
// min-max-normalised grayscale depth PNG to dst.
|
||||
// - Predict(images[0]): run depth+pose and return a JSON blob with the depth
|
||||
// dimensions, depth stats and the camera extrinsics (3x4) / intrinsics (3x3).
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/png"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"unsafe"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
// C-API function pointers, registered in main.go via purego. The da_capi_*
|
||||
// symbols live inside libdepthanything (src/da_capi.cpp) and are re-exported by
|
||||
// the DA_SHARED build.
|
||||
var (
|
||||
// da_capi_load(const char* gguf_path, int n_threads) -> da_ctx* (0 = fail)
|
||||
CapiLoad func(gguf string, nThreads int32) uintptr
|
||||
// da_capi_free(da_ctx* ctx) — safe on a 0 handle.
|
||||
CapiFree func(handle uintptr)
|
||||
// da_capi_last_error(da_ctx* ctx) -> const char* (owned by ctx, "" if none).
|
||||
// purego marshals the returned C string into a Go string (a copy), so we
|
||||
// never free it.
|
||||
CapiLastError func(handle uintptr) string
|
||||
// da_capi_depth_path(ctx, image_path, out_h*, out_w*) -> float* depth map
|
||||
// (row-major H*W); nil on error. Caller frees via da_capi_free_floats.
|
||||
CapiDepthPath func(handle uintptr, imagePath string, outH *int32, outW *int32) *float32
|
||||
// da_capi_free_floats(float* p)
|
||||
CapiFreeFloats func(p *float32)
|
||||
// da_capi_pose_path(ctx, image_path, out_ext[12], out_intr[9]) -> 0 ok, -1 err
|
||||
CapiPosePath func(handle uintptr, imagePath string, outExt *float32, outIntr *float32) int32
|
||||
// da_capi_depth_dense(ctx, image_path, out_h*, out_w*, out_depth**, out_conf**,
|
||||
// out_sky**, out_ext[12], out_intr[9], out_is_metric*) -> 0 ok, -1 err.
|
||||
// Each non-NULL out_depth/out_conf/out_sky receives a malloc'd float[H*W] (free
|
||||
// via da_capi_free_floats); buffers the model doesn't produce are set NULL.
|
||||
CapiDepthDense func(handle uintptr, imagePath string,
|
||||
outH, outW *int32,
|
||||
outDepth, outConf, outSky **float32,
|
||||
outExt, outIntr *float32,
|
||||
outIsMetric *int32) int32
|
||||
// da_capi_points(ctx, image_path, conf_thresh, out_n*, out_xyz**, out_rgb**) ->
|
||||
// 0 ok, -1 err. *out_xyz = malloc'd float[3*N] (free via da_capi_free_floats),
|
||||
// *out_rgb = malloc'd uint8[3*N] (free via da_capi_free_bytes).
|
||||
CapiPoints func(handle uintptr, imagePath string, confThresh float32,
|
||||
outN *int32, outXyz **float32, outRgb **byte) int32
|
||||
// da_capi_free_bytes(unsigned char* p)
|
||||
CapiFreeBytes func(p *byte)
|
||||
// da_capi_export_glb(ctx, image_path, out_glb) -> 0 ok, -1 err
|
||||
CapiExportGlb func(handle uintptr, imagePath string, outGlb string) int32
|
||||
// da_capi_export_colmap(ctx, image_path, out_dir, binary) -> 0 ok, -1 err
|
||||
CapiExportColmap func(handle uintptr, imagePath string, outDir string, binary int32) int32
|
||||
)
|
||||
|
||||
type DepthAnythingCpp struct {
|
||||
base.SingleThread
|
||||
handle uintptr
|
||||
}
|
||||
|
||||
// Load loads the GGUF model at opts.ModelFile (joined with opts.ModelPath if
|
||||
// relative) and stores the da_ctx handle for later inference calls.
|
||||
func (r *DepthAnythingCpp) Load(opts *pb.ModelOptions) error {
|
||||
modelFile := opts.ModelFile
|
||||
if modelFile == "" {
|
||||
modelFile = opts.Model
|
||||
}
|
||||
if modelFile == "" {
|
||||
return fmt.Errorf("depth-anything-cpp: ModelFile is empty")
|
||||
}
|
||||
|
||||
var modelPath string
|
||||
if filepath.IsAbs(modelFile) {
|
||||
modelPath = modelFile
|
||||
} else {
|
||||
modelPath = filepath.Join(opts.ModelPath, modelFile)
|
||||
}
|
||||
|
||||
if _, err := os.Stat(modelPath); err != nil {
|
||||
return fmt.Errorf("depth-anything-cpp: model file not found: %s: %w", modelPath, err)
|
||||
}
|
||||
|
||||
threads := opts.Threads
|
||||
if threads <= 0 {
|
||||
threads = 4
|
||||
}
|
||||
|
||||
// Release previous model if any (re-Load).
|
||||
if r.handle != 0 {
|
||||
CapiFree(r.handle)
|
||||
r.handle = 0
|
||||
}
|
||||
|
||||
h := CapiLoad(modelPath, threads)
|
||||
if h == 0 {
|
||||
// da_capi_last_error needs a ctx; on a failed load we have none (it
|
||||
// returns "" for a null ctx), so the text is best-effort.
|
||||
if msg := CapiLastError(0); msg != "" {
|
||||
return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s: %s", modelPath, msg)
|
||||
}
|
||||
return fmt.Errorf("depth-anything-cpp: da_capi_load failed for %s", modelPath)
|
||||
}
|
||||
r.handle = h
|
||||
return nil
|
||||
}
|
||||
|
||||
// depthResult is the JSON payload returned by Predict.
|
||||
type depthResult struct {
|
||||
DepthW int `json:"depth_w"`
|
||||
DepthH int `json:"depth_h"`
|
||||
DepthMin float32 `json:"depth_min"`
|
||||
DepthMax float32 `json:"depth_max"`
|
||||
Extrinsics [12]float32 `json:"extrinsics"` // 3x4 row-major
|
||||
Intrinsics [9]float32 `json:"intrinsics"` // 3x3 row-major
|
||||
}
|
||||
|
||||
// Predict runs depth+pose on the first supplied image and returns depth
|
||||
// statistics + camera pose as a JSON string. LocalAI wraps the string into the
|
||||
// Reply.Message of the gRPC response. The image in Images[0] may be a
|
||||
// filesystem path or a base64-encoded payload.
|
||||
func (r *DepthAnythingCpp) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
imgs := opts.GetImages()
|
||||
if len(imgs) == 0 {
|
||||
return "", fmt.Errorf("depth-anything-cpp: Predict requires an image in Images[]")
|
||||
}
|
||||
|
||||
imgPath, cleanup, err := materializeImage(imgs[0])
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("depth-anything-cpp: %w", err)
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
depth, h, w, ext, intr, err := r.runDepthPose(imgPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
dmin, dmax := minMax(depth)
|
||||
payload, err := json.Marshal(depthResult{
|
||||
DepthW: w, DepthH: h,
|
||||
DepthMin: dmin, DepthMax: dmax,
|
||||
Extrinsics: ext, Intrinsics: intr,
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("depth-anything-cpp: marshal: %w", err)
|
||||
}
|
||||
return string(payload), nil
|
||||
}
|
||||
|
||||
// GenerateImage runs depth on req.Src and writes a normalised grayscale depth
|
||||
// PNG to req.Dst.
|
||||
func (r *DepthAnythingCpp) GenerateImage(req *pb.GenerateImageRequest) error {
|
||||
if req.GetSrc() == "" {
|
||||
return fmt.Errorf("depth-anything-cpp: GenerateImage requires src")
|
||||
}
|
||||
if req.GetDst() == "" {
|
||||
return fmt.Errorf("depth-anything-cpp: GenerateImage requires dst")
|
||||
}
|
||||
|
||||
imgPath, cleanup, err := materializeImage(req.GetSrc())
|
||||
if err != nil {
|
||||
return fmt.Errorf("depth-anything-cpp: %w", err)
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
depth, h, w, _, _, err := r.runDepthPose(imgPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeDepthPNG(req.GetDst(), depth, h, w)
|
||||
}
|
||||
|
||||
// Depth is the typed Depth RPC. It runs the Depth Anything 3 pipeline on the
|
||||
// request's src image and fills a DepthResponse honoring the include_* flags and
|
||||
// exports: per-pixel metric depth + confidence (DualDPT) or depth + sky (mono),
|
||||
// camera extrinsics/intrinsics, an optional back-projected 3D point cloud and
|
||||
// glb/COLMAP exports. The src may be a filesystem path or a base64 payload.
|
||||
func (r *DepthAnythingCpp) Depth(in *pb.DepthRequest) (pb.DepthResponse, error) {
|
||||
// Accumulate into locals and return a single composite literal at the end:
|
||||
// returning a named pb.DepthResponse value would copy its embedded mutex
|
||||
// (go vet copylocks).
|
||||
if r.handle == 0 {
|
||||
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: model not loaded")
|
||||
}
|
||||
if in.GetSrc() == "" {
|
||||
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: Depth requires src")
|
||||
}
|
||||
|
||||
imgPath, cleanup, err := materializeImage(in.GetSrc())
|
||||
if err != nil {
|
||||
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: %w", err)
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
// Dense per-pixel output + pose. Pass buffer pointers only for the
|
||||
// requested maps so the native side can skip unrequested work; ext/intr
|
||||
// must always point at 12/9 floats per the C ABI.
|
||||
var (
|
||||
h, w, isMetric int32
|
||||
depthPtr, confPtr *float32
|
||||
skyPtr *float32
|
||||
ext [12]float32
|
||||
intr [9]float32
|
||||
pDepth, pConf, pSky **float32
|
||||
)
|
||||
if in.GetIncludeDepth() {
|
||||
pDepth = &depthPtr
|
||||
}
|
||||
if in.GetIncludeConfidence() {
|
||||
pConf = &confPtr
|
||||
}
|
||||
if in.GetIncludeSky() {
|
||||
pSky = &skyPtr
|
||||
}
|
||||
|
||||
rc := CapiDepthDense(r.handle, imgPath, &h, &w, pDepth, pConf, pSky, &ext[0], &intr[0], &isMetric)
|
||||
if rc != 0 {
|
||||
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: da_capi_depth_dense failed (rc=%d): %s", rc, r.lastError())
|
||||
}
|
||||
|
||||
n := int(h) * int(w)
|
||||
var (
|
||||
depth, conf, sky []float32
|
||||
extrinsics, intrinsic []float32
|
||||
numPoints int32
|
||||
points []float32
|
||||
pointColors []byte
|
||||
exportPaths []string
|
||||
)
|
||||
|
||||
if depthPtr != nil {
|
||||
depth = copyFloats(depthPtr, n)
|
||||
CapiFreeFloats(depthPtr)
|
||||
}
|
||||
if confPtr != nil {
|
||||
conf = copyFloats(confPtr, n)
|
||||
CapiFreeFloats(confPtr)
|
||||
}
|
||||
if skyPtr != nil {
|
||||
sky = copyFloats(skyPtr, n)
|
||||
CapiFreeFloats(skyPtr)
|
||||
}
|
||||
if in.GetIncludePose() {
|
||||
extrinsics = append([]float32(nil), ext[:]...)
|
||||
intrinsic = append([]float32(nil), intr[:]...)
|
||||
}
|
||||
|
||||
// 3D point cloud (DualDPT / pose-capable models only).
|
||||
if in.GetIncludePoints() {
|
||||
var (
|
||||
np int32
|
||||
xyzPtr *float32
|
||||
rgbPtr *byte
|
||||
)
|
||||
if rc := CapiPoints(r.handle, imgPath, in.GetPointsConfThresh(), &np, &xyzPtr, &rgbPtr); rc != 0 {
|
||||
return pb.DepthResponse{}, fmt.Errorf("depth-anything-cpp: da_capi_points failed (rc=%d): %s", rc, r.lastError())
|
||||
}
|
||||
numPoints = np
|
||||
if xyzPtr != nil {
|
||||
points = copyFloats(xyzPtr, int(np)*3)
|
||||
CapiFreeFloats(xyzPtr)
|
||||
}
|
||||
if rgbPtr != nil {
|
||||
pointColors = copyBytes(rgbPtr, int(np)*3)
|
||||
CapiFreeBytes(rgbPtr)
|
||||
}
|
||||
}
|
||||
|
||||
// Exports (glb / colmap). They are written under in.Dst (a directory); a
|
||||
// temp dir is used when Dst is empty.
|
||||
if len(in.GetExports()) > 0 {
|
||||
exportPaths, err = r.runExports(imgPath, in.GetDst(), in.GetExports())
|
||||
if err != nil {
|
||||
return pb.DepthResponse{}, err
|
||||
}
|
||||
}
|
||||
|
||||
return pb.DepthResponse{
|
||||
Width: w,
|
||||
Height: h,
|
||||
Depth: depth,
|
||||
Confidence: conf,
|
||||
Sky: sky,
|
||||
Extrinsics: extrinsics,
|
||||
Intrinsics: intrinsic,
|
||||
NumPoints: numPoints,
|
||||
Points: points,
|
||||
PointColors: pointColors,
|
||||
ExportPaths: exportPaths,
|
||||
IsMetric: isMetric != 0,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// runExports writes the requested exports for imgPath into dstDir and returns
|
||||
// the written paths. Supported exports: "glb", "colmap".
|
||||
func (r *DepthAnythingCpp) runExports(imgPath, dstDir string, exports []string) ([]string, error) {
|
||||
if dstDir == "" {
|
||||
tmp, err := os.MkdirTemp("", "depth-anything-export-*")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("depth-anything-cpp: mkdir export dir: %w", err)
|
||||
}
|
||||
dstDir = tmp
|
||||
} else if err := os.MkdirAll(dstDir, 0o755); err != nil {
|
||||
return nil, fmt.Errorf("depth-anything-cpp: mkdir %s: %w", dstDir, err)
|
||||
}
|
||||
|
||||
var paths []string
|
||||
for _, exp := range exports {
|
||||
switch exp {
|
||||
case "glb":
|
||||
out := filepath.Join(dstDir, "pointcloud.glb")
|
||||
if rc := CapiExportGlb(r.handle, imgPath, out); rc != 0 {
|
||||
return nil, fmt.Errorf("depth-anything-cpp: da_capi_export_glb failed (rc=%d): %s", rc, r.lastError())
|
||||
}
|
||||
paths = append(paths, out)
|
||||
case "colmap":
|
||||
out := filepath.Join(dstDir, "colmap")
|
||||
if err := os.MkdirAll(out, 0o755); err != nil {
|
||||
return nil, fmt.Errorf("depth-anything-cpp: mkdir %s: %w", out, err)
|
||||
}
|
||||
if rc := CapiExportColmap(r.handle, imgPath, out, 1); rc != 0 {
|
||||
return nil, fmt.Errorf("depth-anything-cpp: da_capi_export_colmap failed (rc=%d): %s", rc, r.lastError())
|
||||
}
|
||||
paths = append(paths, out)
|
||||
default:
|
||||
return nil, fmt.Errorf("depth-anything-cpp: unknown export %q (want glb|colmap)", exp)
|
||||
}
|
||||
}
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
// copyFloats copies n float32 values from a C heap pointer into a fresh Go
|
||||
// slice so the C buffer can be freed afterwards.
|
||||
func copyFloats(p *float32, n int) []float32 {
|
||||
if p == nil || n <= 0 {
|
||||
return nil
|
||||
}
|
||||
src := unsafe.Slice(p, n)
|
||||
out := make([]float32, n)
|
||||
copy(out, src)
|
||||
return out
|
||||
}
|
||||
|
||||
// copyBytes copies n bytes from a C heap pointer into a fresh Go slice.
|
||||
func copyBytes(p *byte, n int) []byte {
|
||||
if p == nil || n <= 0 {
|
||||
return nil
|
||||
}
|
||||
src := unsafe.Slice(p, n)
|
||||
out := make([]byte, n)
|
||||
copy(out, src)
|
||||
return out
|
||||
}
|
||||
|
||||
// runDepthPose runs depth estimation then pose recovery on an image file. It
|
||||
// returns the row-major depth map (length h*w), its dimensions, the 3x4
|
||||
// extrinsics (12 floats) and 3x3 intrinsics (9 floats).
|
||||
func (r *DepthAnythingCpp) runDepthPose(imagePath string) (depth []float32, h, w int, ext [12]float32, intr [9]float32, err error) {
|
||||
if r.handle == 0 {
|
||||
err = fmt.Errorf("depth-anything-cpp: model not loaded")
|
||||
return
|
||||
}
|
||||
|
||||
var ch, cw int32
|
||||
ptr := CapiDepthPath(r.handle, imagePath, &ch, &cw)
|
||||
if ptr == nil {
|
||||
err = fmt.Errorf("depth-anything-cpp: da_capi_depth_path failed: %s", r.lastError())
|
||||
return
|
||||
}
|
||||
h, w = int(ch), int(cw)
|
||||
n := h * w
|
||||
if n > 0 {
|
||||
src := unsafe.Slice(ptr, n)
|
||||
depth = make([]float32, n)
|
||||
copy(depth, src)
|
||||
}
|
||||
CapiFreeFloats(ptr)
|
||||
|
||||
if rc := CapiPosePath(r.handle, imagePath, &ext[0], &intr[0]); rc != 0 {
|
||||
err = fmt.Errorf("depth-anything-cpp: da_capi_pose_path failed (rc=%d): %s", rc, r.lastError())
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// lastError returns the context's last error string, or "" if none.
|
||||
func (r *DepthAnythingCpp) lastError() string {
|
||||
if CapiLastError == nil || r.handle == 0 {
|
||||
return ""
|
||||
}
|
||||
return CapiLastError(r.handle)
|
||||
}
|
||||
|
||||
// materializeImage returns a filesystem path for an image argument that may be
|
||||
// either an existing path or a base64-encoded payload. When the input is
|
||||
// base64 it is decoded into a temp file; cleanup removes it (no-op for a path).
|
||||
func materializeImage(arg string) (path string, cleanup func(), err error) {
|
||||
cleanup = func() {}
|
||||
if _, statErr := os.Stat(arg); statErr == nil {
|
||||
return arg, cleanup, nil
|
||||
}
|
||||
// Strip an optional data URL prefix (data:image/...;base64,<payload>).
|
||||
b64 := arg
|
||||
if i := indexComma(b64); i >= 0 && hasDataPrefix(b64) {
|
||||
b64 = b64[i+1:]
|
||||
}
|
||||
data, decErr := base64.StdEncoding.DecodeString(b64)
|
||||
if decErr != nil {
|
||||
return "", cleanup, fmt.Errorf("image is neither an existing path nor valid base64: %v", decErr)
|
||||
}
|
||||
f, tErr := os.CreateTemp("", "depth-anything-*.img")
|
||||
if tErr != nil {
|
||||
return "", cleanup, tErr
|
||||
}
|
||||
if _, wErr := f.Write(data); wErr != nil {
|
||||
_ = f.Close()
|
||||
_ = os.Remove(f.Name())
|
||||
return "", cleanup, wErr
|
||||
}
|
||||
_ = f.Close()
|
||||
name := f.Name()
|
||||
return name, func() { _ = os.Remove(name) }, nil
|
||||
}
|
||||
|
||||
func hasDataPrefix(s string) bool {
|
||||
return len(s) >= 5 && s[:5] == "data:"
|
||||
}
|
||||
|
||||
func indexComma(s string) int {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == ',' {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// writeDepthPNG min-max normalises a depth map and writes it as an 8-bit
|
||||
// grayscale PNG. Near = bright (255), far = dark (0), matching the usual
|
||||
// depth-map convention for inverse-depth-like outputs.
|
||||
func writeDepthPNG(dst string, depth []float32, h, w int) error {
|
||||
if h <= 0 || w <= 0 || len(depth) < h*w {
|
||||
return fmt.Errorf("depth-anything-cpp: writeDepthPNG: bad dims h=%d w=%d len=%d", h, w, len(depth))
|
||||
}
|
||||
dmin, dmax := minMax(depth)
|
||||
span := dmax - dmin
|
||||
if span <= 0 || math.IsNaN(float64(span)) {
|
||||
span = 1
|
||||
}
|
||||
img := image.NewGray(image.Rect(0, 0, w, h))
|
||||
for y := 0; y < h; y++ {
|
||||
for x := 0; x < w; x++ {
|
||||
v := depth[y*w+x]
|
||||
n := (v - dmin) / span // 0..1
|
||||
if math.IsNaN(float64(n)) {
|
||||
n = 0
|
||||
}
|
||||
if n < 0 {
|
||||
n = 0
|
||||
} else if n > 1 {
|
||||
n = 1
|
||||
}
|
||||
img.Pix[y*img.Stride+x] = uint8(n * 255)
|
||||
}
|
||||
}
|
||||
f, err := os.Create(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = f.Close() }()
|
||||
return png.Encode(f, img)
|
||||
}
|
||||
|
||||
func minMax(v []float32) (mn, mx float32) {
|
||||
if len(v) == 0 {
|
||||
return 0, 0
|
||||
}
|
||||
mn, mx = v[0], v[0]
|
||||
for _, x := range v {
|
||||
if math.IsNaN(float64(x)) || math.IsInf(float64(x), 0) {
|
||||
continue
|
||||
}
|
||||
if x < mn {
|
||||
mn = x
|
||||
}
|
||||
if x > mx {
|
||||
mx = x
|
||||
}
|
||||
}
|
||||
return mn, mx
|
||||
}
|
||||
61
backend/go/depth-anything-cpp/main.go
Normal file
61
backend/go/depth-anything-cpp/main.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package main
|
||||
|
||||
// main.go - entry point for the depth-anything-cpp gRPC backend.
|
||||
//
|
||||
// Dlopens libdepthanythingcpp-<variant>.so via purego at the path in
|
||||
// DEPTHANYTHING_LIBRARY (set by run.sh based on /proc/cpuinfo), registers the
|
||||
// da_capi_* C ABI symbols, then starts the gRPC server.
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
type LibFuncs struct {
|
||||
FuncPtr any
|
||||
Name string
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("DEPTHANYTHING_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libdepthanythingcpp-fallback.so"
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
libFuncs := []LibFuncs{
|
||||
{&CapiLoad, "da_capi_load"},
|
||||
{&CapiFree, "da_capi_free"},
|
||||
{&CapiLastError, "da_capi_last_error"},
|
||||
{&CapiDepthPath, "da_capi_depth_path"},
|
||||
{&CapiFreeFloats, "da_capi_free_floats"},
|
||||
{&CapiPosePath, "da_capi_pose_path"},
|
||||
{&CapiDepthDense, "da_capi_depth_dense"},
|
||||
{&CapiPoints, "da_capi_points"},
|
||||
{&CapiFreeBytes, "da_capi_free_bytes"},
|
||||
{&CapiExportGlb, "da_capi_export_glb"},
|
||||
{&CapiExportColmap, "da_capi_export_colmap"},
|
||||
}
|
||||
|
||||
for _, lf := range libFuncs {
|
||||
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
|
||||
}
|
||||
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &DepthAnythingCpp{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
167
backend/go/depth-anything-cpp/main_test.go
Normal file
167
backend/go/depth-anything-cpp/main_test.go
Normal file
@@ -0,0 +1,167 @@
|
||||
package main
|
||||
|
||||
// main_test.go - end-to-end smoke test for the depth-anything-cpp gRPC backend.
|
||||
//
|
||||
// Spawns the compiled depth-anything-cpp binary on a free local port, dials it
|
||||
// via gRPC, and exercises LoadModel + Predict against the test fixtures
|
||||
// downloaded by test.sh: the small (vits) f32 GGUF of Depth Anything 3 and a
|
||||
// real photo. Asserts that Predict returns a JSON payload with a positive
|
||||
// depth-map width/height.
|
||||
//
|
||||
// The spec Skip()s cleanly if its fixtures (the model, the test image, the
|
||||
// built binary, or the fallback .so) are missing, so the test target stays
|
||||
// usable on a fresh checkout / on CI runners where the model hasn't been
|
||||
// downloaded.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
)
|
||||
|
||||
func TestDepth(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "depth-anything-cpp backend smoke suite")
|
||||
}
|
||||
|
||||
// freePort grabs an ephemeral TCP port and immediately releases it so the
|
||||
// spawned backend can bind to it. There is a tiny TOCTOU window here but in
|
||||
// practice it's adequate for a smoke test on a quiet runner.
|
||||
func freePort() int {
|
||||
l, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
Expect(err).ToNot(HaveOccurred(), "freePort listen")
|
||||
port := l.Addr().(*net.TCPAddr).Port
|
||||
Expect(l.Close()).To(Succeed())
|
||||
return port
|
||||
}
|
||||
|
||||
// startBackend spawns the depth-anything-cpp binary on the given port and waits
|
||||
// until it accepts TCP connections (up to 10s). It mirrors how main.go resolves
|
||||
// the purego library: the DEPTHANYTHING_LIBRARY env var points the dlopen at the
|
||||
// freshly built fallback .so. The returned cleanup func kills the process.
|
||||
func startBackend(port int) func() {
|
||||
binary, err := filepath.Abs("./depth-anything-cpp")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
if _, err := os.Stat(binary); err != nil {
|
||||
Skip(fmt.Sprintf("backend binary not built: %s (run `make depth-anything-cpp` first)", binary))
|
||||
}
|
||||
|
||||
libPath, err := filepath.Abs("./libdepthanythingcpp-fallback.so")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
if _, err := os.Stat(libPath); err != nil {
|
||||
Skip(fmt.Sprintf("fallback library not built: %s (run `make libdepthanythingcpp-fallback.so` first)", libPath))
|
||||
}
|
||||
|
||||
addr := fmt.Sprintf("127.0.0.1:%d", port)
|
||||
cmd := exec.Command(binary, "--addr", addr)
|
||||
cmd.Env = append(os.Environ(), "DEPTHANYTHING_LIBRARY="+libPath)
|
||||
cmd.Stdout = os.Stderr
|
||||
cmd.Stderr = os.Stderr
|
||||
Expect(cmd.Start()).To(Succeed())
|
||||
|
||||
cleanup := func() {
|
||||
if cmd.Process != nil {
|
||||
_ = cmd.Process.Kill()
|
||||
_, _ = cmd.Process.Wait()
|
||||
}
|
||||
}
|
||||
|
||||
deadline := time.Now().Add(10 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
c, err := net.DialTimeout("tcp", addr, 200*time.Millisecond)
|
||||
if err == nil {
|
||||
_ = c.Close()
|
||||
return cleanup
|
||||
}
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
}
|
||||
|
||||
cleanup()
|
||||
Fail(fmt.Sprintf("backend did not become ready on %s within 10s", addr))
|
||||
return func() {}
|
||||
}
|
||||
|
||||
// loadTestImage reads the test image downloaded by test.sh and returns its
|
||||
// base64-encoded content (one of the wire formats accepted by Predict).
|
||||
func loadTestImage() string {
|
||||
imgPath, err := filepath.Abs("test-data/test.jpg")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
imgBytes, err := os.ReadFile(imgPath)
|
||||
if err != nil {
|
||||
Skip(fmt.Sprintf("test image not present: %s (run test.sh first)", imgPath))
|
||||
}
|
||||
return base64.StdEncoding.EncodeToString(imgBytes)
|
||||
}
|
||||
|
||||
// dialBackend opens a gRPC client connection to the spawned backend.
|
||||
func dialBackend(port int) (pb.BackendClient, func()) {
|
||||
addr := fmt.Sprintf("127.0.0.1:%d", port)
|
||||
conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
return pb.NewBackendClient(conn), func() { _ = conn.Close() }
|
||||
}
|
||||
|
||||
// modelPathOrSkip resolves the model file under ./test-models/ and Skip()s the
|
||||
// current spec if it's missing (not present on a fresh checkout / on CI runners
|
||||
// without the download).
|
||||
func modelPathOrSkip(name string) string {
|
||||
modelDir, err := filepath.Abs("test-models")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
modelPath := filepath.Join(modelDir, name)
|
||||
if _, err := os.Stat(modelPath); err != nil {
|
||||
Skip(fmt.Sprintf("model not present: %s (run test.sh first)", modelPath))
|
||||
}
|
||||
return modelPath
|
||||
}
|
||||
|
||||
var _ = Describe("depth-anything-cpp backend", func() {
|
||||
It("runs depth+pose against a known-good image", func() {
|
||||
modelPath := modelPathOrSkip("depth-anything-small-f32.gguf")
|
||||
imgB64 := loadTestImage()
|
||||
|
||||
port := freePort()
|
||||
cleanup := startBackend(port)
|
||||
defer cleanup()
|
||||
|
||||
client, closeConn := dialBackend(port)
|
||||
defer closeConn()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
loadResp, err := client.LoadModel(ctx, &pb.ModelOptions{
|
||||
Model: "depth-anything-small-f32.gguf",
|
||||
ModelFile: modelPath,
|
||||
Threads: 4,
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred(), "LoadModel")
|
||||
Expect(loadResp.GetSuccess()).To(BeTrue(), "LoadModel reported failure: %s", loadResp.GetMessage())
|
||||
|
||||
// Predict runs depth+pose and returns the JSON depthResult in Reply.Message.
|
||||
reply, err := client.Predict(ctx, &pb.PredictOptions{
|
||||
Images: []string{imgB64},
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred(), "Predict")
|
||||
|
||||
var res depthResult
|
||||
Expect(json.Unmarshal(reply.GetMessage(), &res)).To(Succeed(), "Predict returned non-JSON: %q", string(reply.GetMessage()))
|
||||
Expect(res.DepthW).To(BeNumerically(">", 0), "depth width should be positive")
|
||||
Expect(res.DepthH).To(BeNumerically(">", 0), "depth height should be positive")
|
||||
|
||||
_, _ = fmt.Fprintf(GinkgoWriter, "depth OK: %dx%d min=%.3f max=%.3f\n",
|
||||
res.DepthW, res.DepthH, res.DepthMin, res.DepthMax)
|
||||
})
|
||||
})
|
||||
59
backend/go/depth-anything-cpp/package.sh
Executable file
59
backend/go/depth-anything-cpp/package.sh
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to copy the appropriate libraries based on architecture
|
||||
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
REPO_ROOT="${CURDIR}/../../.."
|
||||
|
||||
# Create lib directory
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/
|
||||
cp -avf $CURDIR/depth-anything-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||
# ARM64 architecture
|
||||
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||
elif [ $(uname -s) = "Darwin" ]; then
|
||||
echo "Detected Darwin"
|
||||
else
|
||||
echo "Error: Could not detect architecture"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Package GPU libraries based on BUILD_TYPE
|
||||
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
|
||||
if [ -f "$GPU_LIB_SCRIPT" ]; then
|
||||
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
|
||||
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
|
||||
package_gpu_libs
|
||||
fi
|
||||
|
||||
echo "Packaging completed successfully"
|
||||
ls -liah $CURDIR/package/
|
||||
ls -liah $CURDIR/package/lib/
|
||||
52
backend/go/depth-anything-cpp/run.sh
Executable file
52
backend/go/depth-anything-cpp/run.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
# Get the absolute current dir where the script is located
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
cd /
|
||||
|
||||
echo "CPU info:"
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "model\sname" /proc/cpuinfo | head -1
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-avx.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/libdepthanythingcpp-avx2.so ]; then
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-avx2.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/libdepthanythingcpp-avx512.so ]; then
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export DEPTHANYTHING_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/depth-anything-cpp "$@"
|
||||
fi
|
||||
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/depth-anything-cpp "$@"
|
||||
45
backend/go/depth-anything-cpp/test.sh
Executable file
45
backend/go/depth-anything-cpp/test.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
echo "Running depth-anything-cpp backend tests..."
|
||||
|
||||
# Test model from the mudler/depth-anything.cpp-gguf HuggingFace repo. The small
|
||||
# (vits) f32 GGUF is the lightest backbone (~131 MB), so it keeps the download
|
||||
# cheap. It is resumed with `curl -C -` and skipped entirely if already present.
|
||||
DEPTHANYTHING_MODEL_DIR="${DEPTHANYTHING_MODEL_DIR:-$CURDIR/test-models}"
|
||||
|
||||
DEPTHANYTHING_MODEL_FILE="${DEPTHANYTHING_MODEL_FILE:-depth-anything-small-f32.gguf}"
|
||||
DEPTHANYTHING_MODEL_URL="${DEPTHANYTHING_MODEL_URL:-https://huggingface.co/mudler/depth-anything.cpp-gguf/resolve/main/depth-anything-small-f32.gguf}"
|
||||
|
||||
mkdir -p "$DEPTHANYTHING_MODEL_DIR"
|
||||
|
||||
if [ ! -f "$DEPTHANYTHING_MODEL_DIR/$DEPTHANYTHING_MODEL_FILE" ]; then
|
||||
echo "Downloading depth-anything small f32 model (~131 MB)..."
|
||||
# -C - resumes a partial download so an interrupted run doesn't restart from 0.
|
||||
curl -L -C - -o "$DEPTHANYTHING_MODEL_DIR/$DEPTHANYTHING_MODEL_FILE" "$DEPTHANYTHING_MODEL_URL" --progress-bar
|
||||
fi
|
||||
|
||||
# Use a real photo (people + cars) from the upstream rf-detr.cpp repo (~46 KB).
|
||||
# Depth estimation needs real content; a synthetic image would be degenerate.
|
||||
TEST_IMAGE_DIR="$CURDIR/test-data"
|
||||
TEST_IMAGE_FILE="$TEST_IMAGE_DIR/test.jpg"
|
||||
TEST_IMAGE_URL="${TEST_IMAGE_URL:-https://raw.githubusercontent.com/mudler/rf-detr.cpp/main/tests/fixtures/ci/test_image.jpg}"
|
||||
|
||||
mkdir -p "$TEST_IMAGE_DIR"
|
||||
if [ ! -f "$TEST_IMAGE_FILE" ]; then
|
||||
echo "Downloading test image..."
|
||||
curl -L -o "$TEST_IMAGE_FILE" "$TEST_IMAGE_URL" --progress-bar
|
||||
fi
|
||||
|
||||
echo "depth-anything-cpp test setup complete."
|
||||
echo " model: $DEPTHANYTHING_MODEL_DIR/$DEPTHANYTHING_MODEL_FILE"
|
||||
echo " test image: $TEST_IMAGE_FILE"
|
||||
|
||||
# Run the Go smoke test: spawns the backend binary on a free port, calls
|
||||
# LoadModel + Predict via gRPC against the downloaded GGUF + image.
|
||||
echo ""
|
||||
echo "Running Go smoke test..."
|
||||
cd "$CURDIR"
|
||||
go test -v -timeout 30m ./...
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=19bdfe22d255d5b4dff39d449318b9bc5ea2317f
|
||||
STABLEDIFFUSION_GGML_VERSION?=276025e054555166ec419413c6748ca79986ee93
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
4
backend/go/supertonic/.gitignore
vendored
Normal file
4
backend/go/supertonic/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
/supertonic
|
||||
/sources/
|
||||
/backend-assets/
|
||||
/package/
|
||||
62
backend/go/supertonic/Makefile
Normal file
62
backend/go/supertonic/Makefile
Normal file
@@ -0,0 +1,62 @@
|
||||
CURRENT_DIR=$(abspath ./)
|
||||
GOCMD=go
|
||||
|
||||
ONNX_VERSION?=1.24.4
|
||||
ONNX_ARCH?=x64
|
||||
ONNX_OS?=linux
|
||||
|
||||
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
||||
ONNX_ARCH=aarch64
|
||||
endif
|
||||
|
||||
ifeq ($(OS),Darwin)
|
||||
ONNX_OS=osx
|
||||
ifneq (,$(findstring arm64,$(shell uname -m)))
|
||||
ONNX_ARCH=arm64
|
||||
else
|
||||
ONNX_ARCH=x86_64
|
||||
endif
|
||||
endif
|
||||
|
||||
# CUDA 12 ships as -gpu, CUDA 13 as -gpu_cuda13 (underscore). CPU has no suffix.
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
ONNX_PROVIDER=cuda
|
||||
ifeq ($(CUDA_MAJOR_VERSION),13)
|
||||
ONNX_VARIANT=-gpu_cuda13
|
||||
else
|
||||
ONNX_VARIANT=-gpu
|
||||
endif
|
||||
else
|
||||
ONNX_VARIANT=
|
||||
ONNX_PROVIDER=cpu
|
||||
endif
|
||||
|
||||
sources/onnxruntime:
|
||||
mkdir -p sources/onnxruntime
|
||||
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)$(ONNX_VARIANT)-$(ONNX_VERSION).tgz \
|
||||
-o sources/onnxruntime/onnxruntime.tgz
|
||||
cd sources/onnxruntime && tar -xf onnxruntime.tgz --strip-components=1 && rm onnxruntime.tgz
|
||||
|
||||
backend-assets/lib: sources/onnxruntime
|
||||
mkdir -p backend-assets/lib
|
||||
cp -rfLv sources/onnxruntime/lib/* backend-assets/lib/
|
||||
|
||||
supertonic: backend-assets/lib
|
||||
CGO_ENABLED=1 $(GOCMD) build \
|
||||
-ldflags "$(LD_FLAGS) -X main.onnxProvider=$(ONNX_PROVIDER)" \
|
||||
-tags "$(GO_TAGS)" -o supertonic ./
|
||||
|
||||
package:
|
||||
bash package.sh
|
||||
|
||||
build: supertonic package
|
||||
|
||||
# Tests need only the Go toolchain (gcc); yalue dlopens onnxruntime at
|
||||
# runtime, so no tarball download is required to compile or run unit specs.
|
||||
test:
|
||||
CGO_ENABLED=1 $(GOCMD) test -v -timeout 120s ./...
|
||||
|
||||
clean:
|
||||
rm -rf supertonic sources/ backend-assets/ package/
|
||||
|
||||
.PHONY: build package clean test
|
||||
307
backend/go/supertonic/backend.go
Normal file
307
backend/go/supertonic/backend.go
Normal file
@@ -0,0 +1,307 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
laudio "github.com/mudler/LocalAI/pkg/audio"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
// onnxProvider is set via -ldflags "-X main.onnxProvider=cuda" by the
|
||||
// CUDA build (later phase). Defaults to CPU.
|
||||
var onnxProvider = "cpu"
|
||||
|
||||
// Per-model generation defaults, overridable via ModelOptions.Options:
|
||||
//
|
||||
// supertonic.steps=<int> denoising steps (quality), default 8
|
||||
// supertonic.speed=<float> speech rate, default 1.05
|
||||
// supertonic.silence=<float> inter-chunk silence seconds, default 0.3
|
||||
// supertonic.default_voice=<name> voice-style used when request omits voice
|
||||
// supertonic.default_lang=<lang> language tag used when request omits it
|
||||
const (
|
||||
optionSteps = "supertonic.steps="
|
||||
optionSpeed = "supertonic.speed="
|
||||
optionSilence = "supertonic.silence="
|
||||
optionDefaultVoice = "supertonic.default_voice="
|
||||
optionDefaultLang = "supertonic.default_lang="
|
||||
)
|
||||
|
||||
type SupertonicBackend struct {
|
||||
base.SingleThread
|
||||
|
||||
tts *TextToSpeech
|
||||
cfg Config
|
||||
modelDir string
|
||||
voicesDir string
|
||||
defaultVoice string
|
||||
defaultLang string
|
||||
steps int
|
||||
speed float32
|
||||
silence float32
|
||||
|
||||
styleMu sync.Mutex
|
||||
styles map[string]*Style // voice name -> loaded style cache
|
||||
}
|
||||
|
||||
func (s *SupertonicBackend) Load(opts *pb.ModelOptions) error {
|
||||
modelDir, err := resolveModelDir(opts.ModelFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.modelDir = modelDir
|
||||
s.voicesDir = resolveVoicesDir(modelDir)
|
||||
|
||||
cfg, err := LoadCfgs(modelDir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("loading tts.json from %s: %w", modelDir, err)
|
||||
}
|
||||
s.cfg = cfg
|
||||
|
||||
// onnxProvider is "cpu" for the CPU build; the CUDA build sets it to
|
||||
// "cuda" via -ldflags. Upstream LoadTextToSpeech still errors on GPU
|
||||
// until the CUDA phase wires the execution provider.
|
||||
tts, err := LoadTextToSpeech(modelDir, onnxProvider == "cuda", cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("loading supertonic models from %s: %w", modelDir, err)
|
||||
}
|
||||
s.tts = tts
|
||||
|
||||
s.steps = int(findOptionInt(opts, optionSteps, 8))
|
||||
s.speed = findOptionFloat(opts, optionSpeed, 1.05)
|
||||
s.silence = findOptionFloat(opts, optionSilence, 0.3)
|
||||
s.defaultVoice = findOptionValue(opts, optionDefaultVoice, "")
|
||||
s.defaultLang = findOptionValue(opts, optionDefaultLang, "na")
|
||||
s.styles = map[string]*Style{}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SupertonicBackend) TTS(req *pb.TTSRequest) error {
|
||||
wav, sr, err := s.synthesize(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
out := make([]float64, len(wav))
|
||||
for i, v := range wav {
|
||||
out[i] = float64(v)
|
||||
}
|
||||
if err := writeWavFile(req.Dst, out, sr); err != nil {
|
||||
return fmt.Errorf("writing wav to %s: %w", req.Dst, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SupertonicBackend) TTSStream(req *pb.TTSRequest, results chan []byte) error {
|
||||
defer close(results)
|
||||
|
||||
wav, sr, err := s.synthesize(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
results <- streamingWAVHeader(uint32(sr))
|
||||
|
||||
const chunkSamples = 4096
|
||||
for off := 0; off < len(wav); off += chunkSamples {
|
||||
end := off + chunkSamples
|
||||
if end > len(wav) {
|
||||
end = len(wav)
|
||||
}
|
||||
results <- pcmFloatToInt16LE(wav[off:end])
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// synthesize runs the full pipeline and returns the trimmed mono float32
|
||||
// PCM and its sample rate.
|
||||
func (s *SupertonicBackend) synthesize(req *pb.TTSRequest) ([]float32, int, error) {
|
||||
if s.tts == nil {
|
||||
return nil, 0, fmt.Errorf("supertonic model not loaded")
|
||||
}
|
||||
if strings.TrimSpace(req.Text) == "" {
|
||||
return nil, 0, fmt.Errorf("empty text")
|
||||
}
|
||||
|
||||
style, err := s.loadStyle(s.voiceName(req.Voice))
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
lang := s.resolveLang("")
|
||||
if req.Language != nil {
|
||||
lang = s.resolveLang(*req.Language)
|
||||
}
|
||||
|
||||
wav, dur, err := s.tts.Call(req.Text, lang, style, s.steps, s.speed, s.silence)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
sr := s.tts.SampleRate
|
||||
// Call returns concatenated audio; trim to the reported duration.
|
||||
wavLen := int(float32(sr) * dur)
|
||||
if wavLen < 0 {
|
||||
wavLen = 0
|
||||
}
|
||||
if wavLen > len(wav) {
|
||||
wavLen = len(wav)
|
||||
}
|
||||
return wav[:wavLen], sr, nil
|
||||
}
|
||||
|
||||
// voiceName picks the request voice, falling back to the model default.
|
||||
func (s *SupertonicBackend) voiceName(reqVoice string) string {
|
||||
v := strings.TrimSpace(reqVoice)
|
||||
if v == "" {
|
||||
return s.defaultVoice
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// resolveLang validates against AvailableLangs, falling back to the model
|
||||
// default (then "na").
|
||||
func (s *SupertonicBackend) resolveLang(reqLang string) string {
|
||||
l := strings.TrimSpace(reqLang)
|
||||
if l != "" && isValidLang(l) {
|
||||
return l
|
||||
}
|
||||
if s.defaultLang != "" && isValidLang(s.defaultLang) {
|
||||
return s.defaultLang
|
||||
}
|
||||
return "na"
|
||||
}
|
||||
|
||||
// loadStyle resolves and caches a voice-style. An empty name with no model
|
||||
// default is an error (supertonic requires a style embedding).
|
||||
func (s *SupertonicBackend) loadStyle(name string) (*Style, error) {
|
||||
if name == "" {
|
||||
return nil, fmt.Errorf("no voice specified and no supertonic.default_voice set")
|
||||
}
|
||||
s.styleMu.Lock()
|
||||
defer s.styleMu.Unlock()
|
||||
if st, ok := s.styles[name]; ok {
|
||||
return st, nil
|
||||
}
|
||||
path := s.voiceStylePath(name)
|
||||
st, err := LoadVoiceStyle([]string{path}, false)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading voice style %q (%s): %w", name, path, err)
|
||||
}
|
||||
s.styles[name] = st
|
||||
return st, nil
|
||||
}
|
||||
|
||||
// voiceStylePath maps a voice name to a JSON path. Absolute paths are honored;
|
||||
// names containing a separator resolve under modelDir; bare names resolve under
|
||||
// the resolved voicesDir (see resolveVoicesDir).
|
||||
func (s *SupertonicBackend) voiceStylePath(name string) string {
|
||||
if !strings.HasSuffix(name, ".json") {
|
||||
name += ".json"
|
||||
}
|
||||
if filepath.IsAbs(name) {
|
||||
return name
|
||||
}
|
||||
if strings.ContainsRune(name, filepath.Separator) {
|
||||
return filepath.Join(s.modelDir, name)
|
||||
}
|
||||
return filepath.Join(s.voicesDir, name)
|
||||
}
|
||||
|
||||
// resolveVoicesDir locates the voice_styles directory. The HF model layout
|
||||
// puts the ONNX files in an onnx/ subdir with voice_styles/ as its sibling,
|
||||
// so check modelDir/voice_styles first, then the parent's voice_styles.
|
||||
func resolveVoicesDir(modelDir string) string {
|
||||
candidates := []string{
|
||||
filepath.Join(modelDir, "voice_styles"),
|
||||
filepath.Join(filepath.Dir(modelDir), "voice_styles"),
|
||||
}
|
||||
for _, c := range candidates {
|
||||
if info, err := os.Stat(c); err == nil && info.IsDir() {
|
||||
return c
|
||||
}
|
||||
}
|
||||
return candidates[0]
|
||||
}
|
||||
|
||||
// resolveModelDir accepts either a directory (used as-is) or a file (its
|
||||
// parent dir is used).
|
||||
func resolveModelDir(modelFile string) (string, error) {
|
||||
if modelFile == "" {
|
||||
return "", fmt.Errorf("empty model path")
|
||||
}
|
||||
info, err := os.Stat(modelFile)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("stat model path %s: %w", modelFile, err)
|
||||
}
|
||||
if info.IsDir() {
|
||||
return modelFile, nil
|
||||
}
|
||||
return filepath.Dir(modelFile), nil
|
||||
}
|
||||
|
||||
// ---- option helpers (mirrors backend/go/sherpa-onnx/backend.go) ----
|
||||
|
||||
func findOptionValue(opts *pb.ModelOptions, prefix, def string) string {
|
||||
for _, o := range opts.Options {
|
||||
if strings.HasPrefix(o, prefix) {
|
||||
return strings.TrimPrefix(o, prefix)
|
||||
}
|
||||
}
|
||||
return def
|
||||
}
|
||||
|
||||
func findOptionFloat(opts *pb.ModelOptions, prefix string, def float32) float32 {
|
||||
raw := findOptionValue(opts, prefix, "")
|
||||
if raw == "" {
|
||||
return def
|
||||
}
|
||||
v, err := strconv.ParseFloat(raw, 32)
|
||||
if err != nil {
|
||||
return def
|
||||
}
|
||||
return float32(v)
|
||||
}
|
||||
|
||||
func findOptionInt(opts *pb.ModelOptions, prefix string, def int32) int32 {
|
||||
raw := findOptionValue(opts, prefix, "")
|
||||
if raw == "" {
|
||||
return def
|
||||
}
|
||||
v, err := strconv.ParseInt(raw, 10, 32)
|
||||
if err != nil {
|
||||
return def
|
||||
}
|
||||
return int32(v)
|
||||
}
|
||||
|
||||
// ---- PCM helpers ----
|
||||
|
||||
func pcmFloatToInt16LE(samples []float32) []byte {
|
||||
buf := make([]byte, len(samples)*2)
|
||||
for i, f := range samples {
|
||||
v := int32(f * 32767)
|
||||
if v > 32767 {
|
||||
v = 32767
|
||||
} else if v < -32768 {
|
||||
v = -32768
|
||||
}
|
||||
binary.LittleEndian.PutUint16(buf[2*i:], uint16(int16(v)))
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func streamingWAVHeader(sampleRate uint32) []byte {
|
||||
const streamingSize = 0xFFFFFFFF
|
||||
h := laudio.NewWAVHeaderWithRate(streamingSize, sampleRate)
|
||||
h.ChunkSize = streamingSize
|
||||
var buf bytes.Buffer
|
||||
_ = h.Write(&buf)
|
||||
return buf.Bytes()
|
||||
}
|
||||
86
backend/go/supertonic/backend_test.go
Normal file
86
backend/go/supertonic/backend_test.go
Normal file
@@ -0,0 +1,86 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
var _ = Describe("voiceStylePath", func() {
|
||||
s := &SupertonicBackend{modelDir: "/models/st/onnx", voicesDir: "/models/st/voice_styles"}
|
||||
|
||||
It("resolves a bare name under the resolved voicesDir", func() {
|
||||
Expect(s.voiceStylePath("M1")).To(Equal(filepath.Join("/models/st/voice_styles", "M1.json")))
|
||||
})
|
||||
It("keeps an explicit .json suffix", func() {
|
||||
Expect(s.voiceStylePath("M1.json")).To(Equal(filepath.Join("/models/st/voice_styles", "M1.json")))
|
||||
})
|
||||
It("honors absolute paths", func() {
|
||||
Expect(s.voiceStylePath("/abs/v.json")).To(Equal("/abs/v.json"))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("resolveVoicesDir", func() {
|
||||
It("prefers voice_styles under modelDir", func() {
|
||||
dir := GinkgoT().TempDir()
|
||||
Expect(os.MkdirAll(filepath.Join(dir, "voice_styles"), 0o755)).To(Succeed())
|
||||
Expect(resolveVoicesDir(dir)).To(Equal(filepath.Join(dir, "voice_styles")))
|
||||
})
|
||||
It("falls back to the sibling voice_styles next to an onnx subdir", func() {
|
||||
root := GinkgoT().TempDir()
|
||||
Expect(os.MkdirAll(filepath.Join(root, "voice_styles"), 0o755)).To(Succeed())
|
||||
Expect(os.MkdirAll(filepath.Join(root, "onnx"), 0o755)).To(Succeed())
|
||||
Expect(resolveVoicesDir(filepath.Join(root, "onnx"))).To(Equal(filepath.Join(root, "voice_styles")))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("resolveLang", func() {
|
||||
It("accepts a valid request language", func() {
|
||||
s := &SupertonicBackend{defaultLang: "na"}
|
||||
Expect(s.resolveLang("ko")).To(Equal("ko"))
|
||||
})
|
||||
It("falls back to the model default for an invalid language", func() {
|
||||
s := &SupertonicBackend{defaultLang: "en"}
|
||||
Expect(s.resolveLang("zz")).To(Equal("en"))
|
||||
})
|
||||
It("falls back to na when nothing is valid", func() {
|
||||
s := &SupertonicBackend{defaultLang: ""}
|
||||
Expect(s.resolveLang("")).To(Equal("na"))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("pcmFloatToInt16LE", func() {
|
||||
It("clamps and encodes little-endian", func() {
|
||||
out := pcmFloatToInt16LE([]float32{0, 1.0, -1.0, 2.0})
|
||||
Expect(out).To(HaveLen(8))
|
||||
Expect(out[0:2]).To(Equal([]byte{0x00, 0x00})) // 0
|
||||
Expect(out[2:4]).To(Equal([]byte{0xff, 0x7f})) // 32767
|
||||
Expect(out[6:8]).To(Equal([]byte{0xff, 0x7f})) // clamp 2.0 -> 32767
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("end-to-end synthesis", Ordered, func() {
|
||||
var modelDir string
|
||||
BeforeAll(func() {
|
||||
modelDir = os.Getenv("SUPERTONIC_MODEL_PATH")
|
||||
if modelDir == "" {
|
||||
Skip("set SUPERTONIC_MODEL_PATH to a supertonic model dir to run")
|
||||
}
|
||||
Expect(InitializeONNXRuntime()).To(Succeed())
|
||||
})
|
||||
|
||||
It("synthesizes a wav file", func() {
|
||||
b := &SupertonicBackend{}
|
||||
Expect(b.Load(&pb.ModelOptions{ModelFile: modelDir, Options: []string{"supertonic.default_voice=F1"}})).To(Succeed())
|
||||
dst := filepath.Join(GinkgoT().TempDir(), "out.wav")
|
||||
lang := "en"
|
||||
Expect(b.TTS(&pb.TTSRequest{Text: "Hello from LocalAI.", Dst: dst, Language: &lang})).To(Succeed())
|
||||
info, err := os.Stat(dst)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(info.Size()).To(BeNumerically(">", 44)) // header + PCM
|
||||
})
|
||||
})
|
||||
1085
backend/go/supertonic/helper.go
Normal file
1085
backend/go/supertonic/helper.go
Normal file
File diff suppressed because it is too large
Load Diff
27
backend/go/supertonic/main.go
Normal file
27
backend/go/supertonic/main.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package main
|
||||
|
||||
// Started internally by LocalAI; a server is allocated per model.
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
ort "github.com/yalue/onnxruntime_go"
|
||||
)
|
||||
|
||||
var addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
// InitializeONNXRuntime reads ONNXRUNTIME_LIB_PATH (set by run.sh) and
|
||||
// dlopens libonnxruntime before any session is created in Load().
|
||||
if err := InitializeONNXRuntime(); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer func() { _ = ort.DestroyEnvironment() }()
|
||||
|
||||
if err := grpc.StartServer(*addr, &SupertonicBackend{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
13
backend/go/supertonic/main_suite_test.go
Normal file
13
backend/go/supertonic/main_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestSupertonic(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Supertonic backend test suite")
|
||||
}
|
||||
49
backend/go/supertonic/package.sh
Executable file
49
backend/go/supertonic/package.sh
Executable file
@@ -0,0 +1,49 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
REPO_ROOT="${CURDIR}/../../.."
|
||||
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/supertonic $CURDIR/package/
|
||||
cp -avf $CURDIR/run.sh $CURDIR/package/
|
||||
cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
|
||||
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||
else
|
||||
echo "Error: Could not detect architecture"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
|
||||
if [ -f "$GPU_LIB_SCRIPT" ]; then
|
||||
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
|
||||
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
|
||||
package_gpu_libs
|
||||
fi
|
||||
|
||||
echo "Packaging completed successfully"
|
||||
ls -liah $CURDIR/package/
|
||||
ls -liah $CURDIR/package/lib/
|
||||
14
backend/go/supertonic/run.sh
Executable file
14
backend/go/supertonic/run.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
|
||||
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
|
||||
fi
|
||||
|
||||
exec $CURDIR/supertonic "$@"
|
||||
@@ -458,6 +458,126 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-locate-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-vulkan-locate-anything-cpp
|
||||
- &depthanything
|
||||
name: "depth-anything"
|
||||
alias: "depth-anything"
|
||||
license: apache-2.0
|
||||
description: |
|
||||
Depth Anything 3 monocular metric depth + camera pose estimation in C/C++
|
||||
using GGML. Loads pre-built GGUF weights and, given an image, returns a
|
||||
dense depth map plus the recovered camera extrinsics (3x4) and intrinsics
|
||||
(3x3). No Python at inference (purego, cgo-less).
|
||||
urls:
|
||||
- https://github.com/mudler/depth-anything.cpp
|
||||
- https://huggingface.co/depth-anything/Depth-Anything-V3
|
||||
tags:
|
||||
- depth-estimation
|
||||
- camera-pose
|
||||
- depth-anything
|
||||
- gpu
|
||||
- cpu
|
||||
capabilities:
|
||||
default: "cpu-depth-anything-cpp"
|
||||
nvidia: "cuda12-depth-anything-cpp"
|
||||
nvidia-cuda-12: "cuda12-depth-anything-cpp"
|
||||
nvidia-cuda-13: "cuda13-depth-anything-cpp"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-depth-anything-cpp"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-depth-anything-cpp"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-depth-anything-cpp"
|
||||
intel: "intel-sycl-f32-depth-anything-cpp"
|
||||
vulkan: "vulkan-depth-anything-cpp"
|
||||
- !!merge <<: *depthanything
|
||||
name: "depth-anything-development"
|
||||
capabilities:
|
||||
default: "cpu-depth-anything-cpp-development"
|
||||
nvidia: "cuda12-depth-anything-cpp-development"
|
||||
nvidia-cuda-12: "cuda12-depth-anything-cpp-development"
|
||||
nvidia-cuda-13: "cuda13-depth-anything-cpp-development"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-depth-anything-cpp-development"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-depth-anything-cpp-development"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-depth-anything-cpp-development"
|
||||
intel: "intel-sycl-f32-depth-anything-cpp-development"
|
||||
vulkan: "vulkan-depth-anything-cpp-development"
|
||||
- !!merge <<: *depthanything
|
||||
name: "cpu-depth-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-cpu-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "cpu-depth-anything-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "cuda12-depth-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-12-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "cuda12-depth-anything-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-12-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "cuda13-depth-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "cuda13-depth-anything-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "nvidia-l4t-arm64-depth-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-arm64-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "nvidia-l4t-arm64-depth-anything-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-arm64-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "cuda13-nvidia-l4t-arm64-depth-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "cuda13-nvidia-l4t-arm64-depth-anything-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "intel-sycl-f32-depth-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-intel-sycl-f32-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "intel-sycl-f32-depth-anything-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-intel-sycl-f32-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "intel-sycl-f16-depth-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-intel-sycl-f16-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "intel-sycl-f16-depth-anything-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-intel-sycl-f16-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "vulkan-depth-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-vulkan-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "vulkan-depth-anything-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-vulkan-depth-anything-cpp
|
||||
- &vllm
|
||||
name: "vllm"
|
||||
license: apache-2.0
|
||||
@@ -1368,6 +1488,20 @@
|
||||
nvidia: "cuda12-sherpa-onnx"
|
||||
nvidia-cuda-12: "cuda12-sherpa-onnx"
|
||||
metal: "metal-sherpa-onnx"
|
||||
- &supertonic
|
||||
name: "supertonic"
|
||||
alias: "supertonic"
|
||||
urls:
|
||||
- https://github.com/supertone-inc/supertonic
|
||||
description: |
|
||||
Supertonic backend: lightning-fast, on-device multilingual text-to-speech via ONNX Runtime.
|
||||
Runs Supertone's flow-matching TTS model (Supertone/supertonic-3), 44.1kHz output, 31 languages,
|
||||
multiple preset voice styles. No espeak-ng dependency.
|
||||
tags:
|
||||
- text-to-speech
|
||||
- TTS
|
||||
capabilities:
|
||||
default: "cpu-supertonic"
|
||||
- !!merge <<: *neutts
|
||||
name: "neutts-development"
|
||||
capabilities:
|
||||
@@ -5132,3 +5266,18 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-sherpa-onnx"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-sherpa-onnx
|
||||
## supertonic
|
||||
- !!merge <<: *supertonic
|
||||
name: "supertonic-development"
|
||||
capabilities:
|
||||
default: "cpu-supertonic-development"
|
||||
- !!merge <<: *supertonic
|
||||
name: "cpu-supertonic"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-supertonic"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-cpu-supertonic
|
||||
- !!merge <<: *supertonic
|
||||
name: "cpu-supertonic-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-supertonic"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-supertonic
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.81.0
|
||||
grpcio==1.81.1
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
|
||||
66
core/backend/depth.go
Normal file
66
core/backend/depth.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/trace"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
// Depth runs depth estimation (Depth Anything 3) on the supplied image and
|
||||
// returns the full DepthResponse: per-pixel metric depth + confidence + sky,
|
||||
// camera pose (extrinsics/intrinsics), an optional 3D point cloud and any
|
||||
// requested exports (glb/colmap). The include_* flags and exports mirror the
|
||||
// DepthRequest proto so callers can ask for less work.
|
||||
func Depth(
|
||||
ctx context.Context,
|
||||
in *proto.DepthRequest,
|
||||
loader *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
modelConfig config.ModelConfig,
|
||||
) (*proto.DepthResponse, error) {
|
||||
opts := ModelOptions(modelConfig, appConfig)
|
||||
depthModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if depthModel == nil {
|
||||
return nil, fmt.Errorf("could not load depth model")
|
||||
}
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
res, err := depthModel.Depth(ctx, in)
|
||||
|
||||
if appConfig.EnableTracing {
|
||||
errStr := ""
|
||||
if err != nil {
|
||||
errStr = err.Error()
|
||||
}
|
||||
|
||||
trace.RecordBackendTrace(trace.BackendTrace{
|
||||
Timestamp: startTime,
|
||||
Duration: time.Since(startTime),
|
||||
Type: trace.BackendTraceDepth,
|
||||
ModelName: modelConfig.Name,
|
||||
Backend: modelConfig.Backend,
|
||||
Summary: trace.TruncateString(in.GetSrc(), 200),
|
||||
Error: errStr,
|
||||
Data: map[string]any{
|
||||
"exports": in.GetExports(),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return res, err
|
||||
}
|
||||
@@ -488,6 +488,16 @@ func (o *ApplicationConfig) GetEffectiveMaxActiveBackends() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
// WatchdogShouldRun reports whether the live watchdog process should be
|
||||
// running for the current config. It mirrors the gating in
|
||||
// (*Application).startWatchdog so the /api/settings start/stop decision and
|
||||
// the startup path agree on a single source of truth: the watchdog runs when
|
||||
// idle/busy checks are enabled (WatchDog), when LRU eviction is active
|
||||
// (effective max active backends > 0), or when the memory reclaimer is on.
|
||||
func (o *ApplicationConfig) WatchdogShouldRun() bool {
|
||||
return o.WatchDog || o.GetEffectiveMaxActiveBackends() > 0 || o.MemoryReclaimerEnabled
|
||||
}
|
||||
|
||||
// WithForceEvictionWhenBusy sets whether to force eviction even when models have active API calls
|
||||
func WithForceEvictionWhenBusy(enabled bool) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
@@ -1198,18 +1208,22 @@ func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (req
|
||||
}
|
||||
if settings.WatchdogIdleEnabled != nil {
|
||||
o.WatchDogIdle = *settings.WatchdogIdleEnabled
|
||||
if o.WatchDogIdle {
|
||||
o.WatchDog = true
|
||||
}
|
||||
requireRestart = true
|
||||
}
|
||||
if settings.WatchdogBusyEnabled != nil {
|
||||
o.WatchDogBusy = *settings.WatchdogBusyEnabled
|
||||
if o.WatchDogBusy {
|
||||
o.WatchDog = true
|
||||
}
|
||||
requireRestart = true
|
||||
}
|
||||
// The React Settings "Enable Watchdog" master toggle manages only the
|
||||
// idle/busy checks — watchdog_enabled is vestigial in that UI. Whenever
|
||||
// either idle/busy field is present in the body, derive the run-state from
|
||||
// idle||busy so a cold enable starts the watchdog and a full disable stops
|
||||
// it, instead of trusting the stale watchdog_enabled the UI never updates.
|
||||
// This mirrors the startup invariant in startup.go. An API client posting
|
||||
// only watchdog_enabled (idle/busy absent) keeps its explicit value.
|
||||
if settings.WatchdogIdleEnabled != nil || settings.WatchdogBusyEnabled != nil {
|
||||
o.WatchDog = o.WatchDogIdle || o.WatchDogBusy
|
||||
}
|
||||
if settings.WatchdogIdleTimeout != nil {
|
||||
if dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err == nil {
|
||||
o.WatchDogIdleTimeout = dur
|
||||
|
||||
@@ -223,6 +223,69 @@ var _ = Describe("ApplicationConfig RuntimeSettings Conversion", func() {
|
||||
Expect(appConfig.WatchDogBusy).To(BeTrue())
|
||||
})
|
||||
|
||||
// Residual #9125: the React Settings "Enable Watchdog" master toggle
|
||||
// manages only watchdog_idle_enabled / watchdog_busy_enabled — it never
|
||||
// touches the vestigial watchdog_enabled field. On a cold enable the
|
||||
// body therefore carries watchdog_enabled=false alongside idle/busy=true.
|
||||
// The derived run-state (WatchDog) must follow idle||busy so the live
|
||||
// watchdog actually starts, not the stale watchdog_enabled=false.
|
||||
It("should derive WatchDog from idle||busy on a cold enable even when watchdog_enabled=false", func() {
|
||||
appConfig := &ApplicationConfig{WatchDog: false}
|
||||
|
||||
watchdogEnabled := false
|
||||
watchdogIdle := true
|
||||
watchdogBusy := true
|
||||
rs := &RuntimeSettings{
|
||||
WatchdogEnabled: &watchdogEnabled,
|
||||
WatchdogIdleEnabled: &watchdogIdle,
|
||||
WatchdogBusyEnabled: &watchdogBusy,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.WatchDog).To(BeTrue())
|
||||
Expect(appConfig.WatchdogShouldRun()).To(BeTrue())
|
||||
})
|
||||
|
||||
// The disable direction: the master toggle off sends idle=false,
|
||||
// busy=false, but watchdog_enabled may still be the stale true loaded
|
||||
// before the change. WatchDog must follow idle||busy down to false so
|
||||
// the live watchdog is stopped (it stays stopped unless LRU / memory
|
||||
// reclaimer keep it alive, which is gated by WatchdogShouldRun).
|
||||
It("should disable WatchDog when both idle and busy are turned off", func() {
|
||||
appConfig := &ApplicationConfig{WatchDog: true, WatchDogIdle: true, WatchDogBusy: true}
|
||||
|
||||
watchdogEnabled := true
|
||||
watchdogIdle := false
|
||||
watchdogBusy := false
|
||||
rs := &RuntimeSettings{
|
||||
WatchdogEnabled: &watchdogEnabled,
|
||||
WatchdogIdleEnabled: &watchdogIdle,
|
||||
WatchdogBusyEnabled: &watchdogBusy,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.WatchDog).To(BeFalse())
|
||||
Expect(appConfig.WatchdogShouldRun()).To(BeFalse())
|
||||
})
|
||||
|
||||
// Backward compatibility: an API client that posts only watchdog_enabled
|
||||
// (idle/busy nil) keeps the explicit value — the idle/busy derivation
|
||||
// only kicks in when those fields are actually present in the body.
|
||||
It("should preserve explicit watchdog_enabled when idle/busy are absent", func() {
|
||||
appConfig := &ApplicationConfig{WatchDog: false}
|
||||
|
||||
watchdogEnabled := true
|
||||
rs := &RuntimeSettings{
|
||||
WatchdogEnabled: &watchdogEnabled,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.WatchDog).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should handle MaxActiveBackends and update SingleBackend accordingly", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ const (
|
||||
UsecaseSoundGeneration = "sound_generation"
|
||||
UsecaseRerank = "rerank"
|
||||
UsecaseDetection = "detection"
|
||||
UsecaseDepth = "depth"
|
||||
UsecaseVAD = "vad"
|
||||
UsecaseAudioTransform = "audio_transform"
|
||||
UsecaseDiarization = "diarization"
|
||||
@@ -44,6 +45,7 @@ const (
|
||||
MethodSoundGeneration GRPCMethod = "SoundGeneration"
|
||||
MethodTokenizeString GRPCMethod = "TokenizeString"
|
||||
MethodDetect GRPCMethod = "Detect"
|
||||
MethodDepth GRPCMethod = "Depth"
|
||||
MethodRerank GRPCMethod = "Rerank"
|
||||
MethodVAD GRPCMethod = "VAD"
|
||||
MethodAudioTransform GRPCMethod = "AudioTransform"
|
||||
@@ -141,6 +143,11 @@ var UsecaseInfoMap = map[string]UsecaseInfo{
|
||||
GRPCMethod: MethodDetect,
|
||||
Description: "Object detection via the Detect RPC with bounding boxes.",
|
||||
},
|
||||
UsecaseDepth: {
|
||||
Flag: FLAG_DEPTH,
|
||||
GRPCMethod: MethodDepth,
|
||||
Description: "Per-pixel metric depth, camera pose and 3D point cloud via the Depth RPC (Depth Anything 3).",
|
||||
},
|
||||
UsecaseVAD: {
|
||||
Flag: FLAG_VAD,
|
||||
GRPCMethod: MethodVAD,
|
||||
@@ -488,6 +495,13 @@ var BackendCapabilities = map[string]BackendCapability{
|
||||
DefaultUsecases: []string{UsecaseDetection},
|
||||
Description: "RF-DETR C++ object detection",
|
||||
},
|
||||
"depth-anything": {
|
||||
GRPCMethods: []GRPCMethod{MethodDepth, MethodPredict, MethodGenerateImage},
|
||||
PossibleUsecases: []string{UsecaseDepth},
|
||||
DefaultUsecases: []string{UsecaseDepth},
|
||||
AcceptsImages: true,
|
||||
Description: "Depth Anything 3 C++ — per-pixel metric depth, camera pose and 3D point cloud",
|
||||
},
|
||||
|
||||
// --- Face and speaker recognition backends ---
|
||||
"insightface": {
|
||||
|
||||
@@ -64,6 +64,7 @@ var UsecaseOptions = []FieldOption{
|
||||
{Value: "image", Label: "Image"},
|
||||
{Value: "vision", Label: "Vision"},
|
||||
{Value: "detection", Label: "Detection"},
|
||||
{Value: "depth", Label: "Depth"},
|
||||
{Value: "face_recognition", Label: "Face Recognition"},
|
||||
{Value: "transcript", Label: "Transcript"},
|
||||
{Value: "diarization", Label: "Diarization"},
|
||||
|
||||
@@ -434,6 +434,13 @@ func DefaultRegistry() map[string]FieldMetaOverride {
|
||||
Component: "json-editor",
|
||||
Order: 78,
|
||||
},
|
||||
"pipeline.max_history_items": {
|
||||
Section: "pipeline",
|
||||
Label: "Max History Items",
|
||||
Description: "Cap how many trailing conversation items are fed to the LLM each realtime turn (0 = unlimited, rely on the LLM's context window). Set it on a composed pipeline (VAD+STT+LLM+TTS) so a long-running session doesn't grow until the context fills. Unset uses the per-model-type default.",
|
||||
Component: "number",
|
||||
Order: 79,
|
||||
},
|
||||
|
||||
// --- Functions ---
|
||||
"function.grammar.parallel_calls": {
|
||||
|
||||
@@ -510,6 +510,13 @@ type Pipeline struct {
|
||||
// LLM model config. Unset leaves the LLM model config in charge.
|
||||
DisableThinking *bool `yaml:"disable_thinking,omitempty" json:"disable_thinking,omitempty"`
|
||||
|
||||
// MaxHistoryItems caps how many trailing conversation items are fed to the
|
||||
// LLM each realtime turn (0 = unlimited, rely on the LLM's context window).
|
||||
// Unset (nil) uses the per-model-type default. Set it on a composed pipeline
|
||||
// (VAD+STT+LLM+TTS) so a long-running session doesn't grow until the LLM's
|
||||
// context fills.
|
||||
MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"`
|
||||
|
||||
// VoiceRecognition gates the pipeline behind speaker verification. Nil
|
||||
// (block absent) means no gate, preserving existing behavior.
|
||||
VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"`
|
||||
@@ -1284,6 +1291,10 @@ const (
|
||||
// chat/completion/embeddings.
|
||||
FLAG_SCORE ModelConfigUsecase = 0b10000000000000000000
|
||||
|
||||
// Marks a model as wired for the Depth gRPC primitive (per-pixel
|
||||
// metric depth + camera pose + 3D point cloud via Depth Anything 3).
|
||||
FLAG_DEPTH ModelConfigUsecase = 0b100000000000000000000
|
||||
|
||||
// Common Subsets
|
||||
FLAG_LLM ModelConfigUsecase = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
|
||||
)
|
||||
@@ -1341,6 +1352,7 @@ func GetAllModelConfigUsecases() map[string]ModelConfigUsecase {
|
||||
"FLAG_DIARIZATION": FLAG_DIARIZATION,
|
||||
"FLAG_REALTIME_AUDIO": FLAG_REALTIME_AUDIO,
|
||||
"FLAG_SCORE": FLAG_SCORE,
|
||||
"FLAG_DEPTH": FLAG_DEPTH,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1484,6 +1496,13 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecase) bool {
|
||||
}
|
||||
}
|
||||
|
||||
if (u & FLAG_DEPTH) == FLAG_DEPTH {
|
||||
depthBackends := []string{"depth-anything"}
|
||||
if !slices.Contains(depthBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if (u & FLAG_FACE_RECOGNITION) == FLAG_FACE_RECOGNITION {
|
||||
faceBackends := []string{"insightface"}
|
||||
if !slices.Contains(faceBackends, c.Backend) {
|
||||
|
||||
@@ -38,6 +38,7 @@ var knownPrefOnlyBackends = []schema.KnownBackend{
|
||||
{Name: "qwen3-tts-cpp", Modality: "tts", AutoDetect: false, Description: "Qwen3 TTS C++ (preference-only)"},
|
||||
{Name: "omnivoice-cpp", Modality: "tts", AutoDetect: false, Description: "OmniVoice C++ TTS with voice cloning and voice design (preference-only)"},
|
||||
{Name: "faster-qwen3-tts", Modality: "tts", AutoDetect: false, Description: "Faster Qwen3 TTS (preference-only)"},
|
||||
{Name: "supertonic", Modality: "tts", AutoDetect: false, Description: "Supertonic multilingual ONNX TTS (preference-only)"},
|
||||
// Detection
|
||||
{Name: "sam3-cpp", Modality: "detection", AutoDetect: false, Description: "SAM3 C++ object detection (preference-only)"},
|
||||
// Audio transform (audio-in / audio-out, optional reference signal)
|
||||
|
||||
@@ -145,6 +145,7 @@ var _ = Describe("Backend Endpoints", func() {
|
||||
expectPrefOnly("qwen-tts", "tts")
|
||||
expectPrefOnly("qwen3-tts-cpp", "tts")
|
||||
expectPrefOnly("faster-qwen3-tts", "tts")
|
||||
expectPrefOnly("supertonic", "tts")
|
||||
expectPrefOnly("sam3-cpp", "detection")
|
||||
})
|
||||
|
||||
|
||||
95
core/http/endpoints/localai/depth.go
Normal file
95
core/http/endpoints/localai/depth.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/xlog"
|
||||
)
|
||||
|
||||
// DepthEndpoint is the LocalAI Depth endpoint exposing the full Depth Anything 3
|
||||
// output (per-pixel metric depth + confidence + sky, camera pose, 3D point cloud
|
||||
// and optional glb/COLMAP exports).
|
||||
// @Summary Estimates per-pixel depth (and optionally pose/points) from an image.
|
||||
// @Tags depth
|
||||
// @Param request body schema.DepthRequest true "query params"
|
||||
// @Success 200 {object} schema.DepthResponse "Response"
|
||||
// @Router /v1/depth [post]
|
||||
func DepthEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
|
||||
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.DepthRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return echo.ErrBadRequest
|
||||
}
|
||||
|
||||
cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
|
||||
if !ok || cfg == nil {
|
||||
return echo.ErrBadRequest
|
||||
}
|
||||
|
||||
xlog.Debug("Depth", "image", input.Image, "backend", cfg.Backend)
|
||||
|
||||
image, err := decodeImageInput(input.Image)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Default to returning everything the model can produce when the
|
||||
// caller hasn't asked for any specific subset, so a bare request is
|
||||
// still useful.
|
||||
includeDepth := input.IncludeDepth
|
||||
includeConfidence := input.IncludeConfidence
|
||||
includePose := input.IncludePose
|
||||
includeSky := input.IncludeSky
|
||||
includePoints := input.IncludePoints
|
||||
if !includeDepth && !includeConfidence && !includePose && !includeSky && !includePoints {
|
||||
includeDepth = true
|
||||
includeConfidence = true
|
||||
includePose = true
|
||||
includeSky = true
|
||||
}
|
||||
|
||||
req := &proto.DepthRequest{
|
||||
Src: image,
|
||||
Dst: input.Dst,
|
||||
IncludeDepth: includeDepth,
|
||||
IncludeConfidence: includeConfidence,
|
||||
IncludePose: includePose,
|
||||
IncludeSky: includeSky,
|
||||
IncludePoints: includePoints,
|
||||
PointsConfThresh: input.PointsConfThresh,
|
||||
Exports: input.Exports,
|
||||
}
|
||||
|
||||
res, err := backend.Depth(c.Request().Context(), req, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return mapBackendError(err)
|
||||
}
|
||||
|
||||
response := schema.DepthResponse{
|
||||
Width: res.GetWidth(),
|
||||
Height: res.GetHeight(),
|
||||
Depth: res.GetDepth(),
|
||||
Confidence: res.GetConfidence(),
|
||||
Sky: res.GetSky(),
|
||||
Extrinsics: res.GetExtrinsics(),
|
||||
Intrinsics: res.GetIntrinsics(),
|
||||
NumPoints: res.GetNumPoints(),
|
||||
Points: res.GetPoints(),
|
||||
ExportPaths: res.GetExportPaths(),
|
||||
IsMetric: res.GetIsMetric(),
|
||||
}
|
||||
if len(res.GetPointColors()) > 0 {
|
||||
response.PointColors = base64.StdEncoding.EncodeToString(res.GetPointColors())
|
||||
}
|
||||
|
||||
return c.JSON(200, response)
|
||||
}
|
||||
}
|
||||
@@ -221,9 +221,18 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
// Check if agent job retention changed
|
||||
agentJobChanged := settings.AgentJobRetentionDays != nil
|
||||
|
||||
// Restart watchdog if settings changed
|
||||
// Restart watchdog if settings changed.
|
||||
//
|
||||
// The live start/stop decision derives from the post-apply config
|
||||
// (WatchdogShouldRun) rather than the raw watchdog_enabled request
|
||||
// field: the React master toggle only ever writes the idle/busy flags,
|
||||
// so keying off watchdog_enabled left the live watchdog stopped on a
|
||||
// cold enable until the next restart (#9125). WatchdogShouldRun mirrors
|
||||
// the gating in startWatchdog, so a cold enable starts it immediately
|
||||
// and a full disable (both checks off, no LRU / memory reclaimer) stops
|
||||
// it.
|
||||
if watchdogChanged {
|
||||
if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled {
|
||||
if !appConfig.WatchdogShouldRun() {
|
||||
if err := app.StopWatchdog(); err != nil {
|
||||
xlog.Error("Failed to stop watchdog", "error", err)
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
|
||||
@@ -108,4 +108,20 @@ var _ = Describe("Settings endpoints", func() {
|
||||
_, err := os.Stat(filepath.Join(tmp, "runtime_settings.json"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
// Residual #9125: enabling the watchdog from a cold (off) state via the
|
||||
// React master toggle must start the live watchdog immediately, without a
|
||||
// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while
|
||||
// the vestigial watchdog_enabled stays false (it was loaded false). The
|
||||
// old handler keyed its stop decision off that raw watchdog_enabled=false
|
||||
// and called StopWatchdog(), so the watchdog never started until restart.
|
||||
It("starts the live watchdog on a cold enable even when watchdog_enabled=false", func() {
|
||||
Expect(app.ModelLoader().GetWatchDog()).To(BeNil(), "precondition: watchdog should be off")
|
||||
|
||||
rec := post(`{"watchdog_enabled":false,"watchdog_idle_enabled":true,"watchdog_busy_enabled":true,"watchdog_idle_timeout":"15m","watchdog_busy_timeout":"5m","watchdog_interval":"1s"}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
|
||||
Expect(app.ModelLoader().GetWatchDog()).ToNot(BeNil(),
|
||||
"watchdog should be running after a cold enable, without waiting for a restart")
|
||||
})
|
||||
})
|
||||
|
||||
@@ -340,6 +340,17 @@ func defaultMaxHistoryItems(cfg *config.ModelConfig) int {
|
||||
return 0
|
||||
}
|
||||
|
||||
// resolveMaxHistoryItems honors an explicit pipeline.max_history_items when set,
|
||||
// otherwise falls back to the per-model-type default. This lets a composed
|
||||
// pipeline (VAD+STT+LLM+TTS) cap its history so a long-running session doesn't
|
||||
// grow until the LLM's context window fills.
|
||||
func resolveMaxHistoryItems(cfg *config.ModelConfig) int {
|
||||
if cfg != nil && cfg.Pipeline.MaxHistoryItems != nil {
|
||||
return *cfg.Pipeline.MaxHistoryItems
|
||||
}
|
||||
return defaultMaxHistoryItems(cfg)
|
||||
}
|
||||
|
||||
// trimRealtimeItems returns the tail of items capped at maxItems (0 = no cap).
|
||||
// Walks backwards keeping function_call + function_call_output pairs together
|
||||
// so we never feed the LLM an orphaned tool result that references a call it
|
||||
@@ -492,7 +503,7 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
||||
Conversations: make(map[string]*Conversation),
|
||||
InputSampleRate: defaultRemoteSampleRate,
|
||||
OutputSampleRate: defaultRemoteSampleRate,
|
||||
MaxHistoryItems: defaultMaxHistoryItems(cfg),
|
||||
MaxHistoryItems: resolveMaxHistoryItems(cfg),
|
||||
}
|
||||
|
||||
// Create a default conversation
|
||||
|
||||
@@ -107,6 +107,29 @@ var _ = Describe("defaultMaxHistoryItems", func() {
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("resolveMaxHistoryItems", func() {
|
||||
ptr := func(i int) *int { return &i }
|
||||
|
||||
It("uses an explicit pipeline.max_history_items", func() {
|
||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{LLM: "llama", MaxHistoryItems: ptr(10)}}
|
||||
Expect(resolveMaxHistoryItems(cfg)).To(Equal(10))
|
||||
})
|
||||
It("honors an explicit 0 (unlimited) over the type default", func() {
|
||||
cfg := &config.ModelConfig{
|
||||
KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO),
|
||||
Pipeline: config.Pipeline{MaxHistoryItems: ptr(0)},
|
||||
}
|
||||
Expect(resolveMaxHistoryItems(cfg)).To(Equal(0))
|
||||
})
|
||||
It("falls back to the type default when unset", func() {
|
||||
cfg := &config.ModelConfig{KnownUsecases: withUsecases(config.FLAG_REALTIME_AUDIO)}
|
||||
Expect(resolveMaxHistoryItems(cfg)).To(Equal(6))
|
||||
})
|
||||
It("tolerates nil", func() {
|
||||
Expect(resolveMaxHistoryItems(nil)).To(Equal(0))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("trimRealtimeItems", func() {
|
||||
user := func(id string) *types.MessageItemUnion {
|
||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
||||
|
||||
@@ -44,7 +44,7 @@ test.describe('Model Editor — Back navigation', () => {
|
||||
await mockEditorEndpoints(page)
|
||||
})
|
||||
|
||||
test('Back returns to Manage with a "Back to Manage" caption', async ({ page }) => {
|
||||
test('Back returns to Manage with a "Back to System" caption', async ({ page }) => {
|
||||
await page.goto('/app/manage')
|
||||
await expect(page.locator('.table')).toBeVisible({ timeout: 10_000 })
|
||||
|
||||
@@ -55,7 +55,7 @@ test.describe('Model Editor — Back navigation', () => {
|
||||
await page.getByRole('menuitem', { name: 'Edit configuration' }).click()
|
||||
|
||||
await expect(page).toHaveURL(/\/app\/model-editor\//)
|
||||
const back = page.getByRole('button', { name: /Back to Manage/ })
|
||||
const back = page.getByRole('button', { name: /Back to System/ })
|
||||
await expect(back).toBeVisible({ timeout: 10_000 })
|
||||
|
||||
await back.click()
|
||||
@@ -89,6 +89,6 @@ test.describe('Model Editor — Back navigation', () => {
|
||||
|
||||
test('falls back to "Back to Manage" on a direct visit with no origin state', async ({ page }) => {
|
||||
await page.goto('/app/model-editor/mock-model')
|
||||
await expect(page.getByRole('button', { name: /Back to Manage/ })).toBeVisible({ timeout: 10_000 })
|
||||
await expect(page.getByRole('button', { name: /Back to System/ })).toBeVisible({ timeout: 10_000 })
|
||||
})
|
||||
})
|
||||
|
||||
@@ -86,7 +86,8 @@
|
||||
"type": "Type",
|
||||
"value": "Value",
|
||||
"search": "Search...",
|
||||
"selectPlaceholder": "Select an option..."
|
||||
"selectPlaceholder": "Select an option...",
|
||||
"noMatch": "No matches"
|
||||
},
|
||||
"time": {
|
||||
"now": "now",
|
||||
|
||||
38
core/http/react-ui/public/locales/en/modelEditor.json
Normal file
38
core/http/react-ui/public/locales/en/modelEditor.json
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"title": {
|
||||
"add": "Add Model",
|
||||
"edit": "Model Editor"
|
||||
},
|
||||
"subtitle": {
|
||||
"chooseModelType": "Choose a model type to get started",
|
||||
"newModel": "New model"
|
||||
},
|
||||
"actions": {
|
||||
"backTo": "Back to {{page}}",
|
||||
"system": "System",
|
||||
"templates": "Templates",
|
||||
"createModel": "Create Model",
|
||||
"saveChanges": "Save Changes",
|
||||
"saving": "Saving...",
|
||||
"saved": "Saved",
|
||||
"switchWarning": "Save or discard changes before switching tabs.",
|
||||
"discardAndSwitch": "Discard & Switch"
|
||||
},
|
||||
"tabs": {
|
||||
"interactive": "Interactive",
|
||||
"yaml": "YAML",
|
||||
"yamlDescription": "Edit the YAML directly. The model name must be set in the YAML for create to work."
|
||||
},
|
||||
"forms": {
|
||||
"modelName": {
|
||||
"label": "Model Name",
|
||||
"placeholder": "my-model-name",
|
||||
"hint": "Use letters, numbers, hyphens, underscores, and dots only."
|
||||
},
|
||||
"empty": {
|
||||
"nav": "Use the search bar above to add fields",
|
||||
"title": "No fields configured",
|
||||
"text": "Use the search bar above to find and add configuration fields."
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"title": "Install Models",
|
||||
"subtitle": "Browse and install AI models from the gallery",
|
||||
"models": "Models",
|
||||
"stats": {
|
||||
"available": "Available",
|
||||
"installed": "Installed"
|
||||
@@ -89,5 +90,11 @@
|
||||
"loadFailed": "Failed to load models: {{message}}",
|
||||
"installFailed": "Failed to install: {{message}}",
|
||||
"deleteFailed": "Failed to delete: {{message}}"
|
||||
},
|
||||
"selector": {
|
||||
"loading": "Loading models...",
|
||||
"selectModel": "Select model...",
|
||||
"searchPlaceholder": "Search models...",
|
||||
"noModels": "No models available"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,7 +86,8 @@
|
||||
"type": "Tipe",
|
||||
"value": "Nilai",
|
||||
"search": "Cari...",
|
||||
"selectPlaceholder": "Pilih opsi..."
|
||||
"selectPlaceholder": "Pilih opsi...",
|
||||
"noMatch": "Tidak ada yang cocok"
|
||||
},
|
||||
"time": {
|
||||
"now": "baru saja",
|
||||
@@ -106,4 +107,4 @@
|
||||
"gigabytes": "GB",
|
||||
"terabytes": "TB"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"title": "Impor Model Baru",
|
||||
"subtitle": {
|
||||
"simple": "Import model dari URI — deteksi otomatis memilih backend.",
|
||||
"simple": "Impor model dari URI — deteksi otomatis memilih backend.",
|
||||
"powerYaml": "Tulis konfigurasi YAML lengkap untuk model.",
|
||||
"powerPrefs": "Preferensi impor tingkat lanjut."
|
||||
},
|
||||
@@ -139,4 +139,4 @@
|
||||
"local": "File konfigurasi YAML lokal"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
38
core/http/react-ui/public/locales/id/modelEditor.json
Normal file
38
core/http/react-ui/public/locales/id/modelEditor.json
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"title": {
|
||||
"add": "Tambah Model",
|
||||
"edit": "Editor Model"
|
||||
},
|
||||
"subtitle": {
|
||||
"chooseModelType": "Pilih tipe model untuk memulai",
|
||||
"newModel": "Model baru"
|
||||
},
|
||||
"actions": {
|
||||
"backTo": "Kembali ke {{page}}",
|
||||
"system": "Sistem",
|
||||
"templates": "Templat",
|
||||
"createModel": "Buat Model",
|
||||
"saveChanges": "Simpan Perubahan",
|
||||
"saving": "Menyimpan...",
|
||||
"saved": "Tersimpan",
|
||||
"switchWarning": "Simpan atau buang perubahan sebelum beralih tab.",
|
||||
"discardAndSwitch": "Buang & Beralih"
|
||||
},
|
||||
"tabs": {
|
||||
"interactive": "Interaktif",
|
||||
"yaml": "YAML",
|
||||
"yamlDescription": "Edit YAML secara langsung. Nama model harus diatur di YAML agar pembuatan berhasil."
|
||||
},
|
||||
"forms": {
|
||||
"modelName": {
|
||||
"label": "Nama Model",
|
||||
"placeholder": "nama-model-saya",
|
||||
"hint": "Gunakan huruf, angka, tanda hubung, garis bawah, dan titik saja."
|
||||
},
|
||||
"empty": {
|
||||
"nav": "Gunakan kolom pencarian di atas untuk menambahkan field",
|
||||
"title": "Tidak ada field yang dikonfigurasi",
|
||||
"text": "Gunakan kolom pencarian di atas untuk menemukan dan menambahkan field konfigurasi."
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"title": "Instal Model",
|
||||
"subtitle": "Telusuri dan instal model AI dari galeri",
|
||||
"models": "Model",
|
||||
"stats": {
|
||||
"available": "Tersedia",
|
||||
"installed": "Terinstal"
|
||||
@@ -89,5 +90,11 @@
|
||||
"loadFailed": "Gagal memuat model: {{message}}",
|
||||
"installFailed": "Gagal menginstal: {{message}}",
|
||||
"deleteFailed": "Gagal menghapus: {{message}}"
|
||||
},
|
||||
"selector": {
|
||||
"loading": "Memuat model...",
|
||||
"selectModel": "Pilih model...",
|
||||
"searchPlaceholder": "Cari model...",
|
||||
"noModels": "Model tidak tersedia"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
import { useEffect, useMemo } from 'react'
|
||||
import { useModels } from '../hooks/useModels'
|
||||
import SearchableSelect from './SearchableSelect'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
export default function ModelSelector({
|
||||
value, onChange, capability, className = '',
|
||||
options: externalOptions, loading: externalLoading,
|
||||
disabled: externalDisabled, searchPlaceholder, style,
|
||||
}) {
|
||||
const { t } = useTranslation('models')
|
||||
// Skip capability fetch when external options are provided (capability will be undefined)
|
||||
const { models: hookModels, loading: hookLoading } = useModels(externalOptions ? undefined : capability)
|
||||
|
||||
@@ -28,8 +30,8 @@ export default function ModelSelector({
|
||||
value={value || ''}
|
||||
onChange={onChange}
|
||||
options={modelNames}
|
||||
placeholder={isLoading ? 'Loading models...' : (modelNames.length === 0 ? 'No models available' : 'Select model...')}
|
||||
searchPlaceholder={searchPlaceholder || 'Search models...'}
|
||||
placeholder={isLoading ? t('selector.loading') : (modelNames.length === 0 ? t('selector.noModels') : t('selector.selectModel'))}
|
||||
searchPlaceholder={searchPlaceholder || t('selector.searchPlaceholder')}
|
||||
disabled={isDisabled}
|
||||
className={className}
|
||||
style={style}
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import { useState, useEffect, useRef, useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
export default function SearchableSelect({
|
||||
value, onChange, options, placeholder = 'Select...',
|
||||
allOption, searchPlaceholder = 'Search...',
|
||||
disabled = false, style, className = '',
|
||||
}) {
|
||||
const { t } = useTranslation('common')
|
||||
const [open, setOpen] = useState(false)
|
||||
const [query, setQuery] = useState('')
|
||||
const [focusIndex, setFocusIndex] = useState(-1)
|
||||
@@ -226,7 +228,7 @@ export default function SearchableSelect({
|
||||
})}
|
||||
{filtered.length === 0 && !allOption && (
|
||||
<div style={{ padding: '6px 10px', fontSize: '0.8125rem', color: 'var(--color-text-muted)', fontStyle: 'italic' }}>
|
||||
No matches
|
||||
{t('forms.noMatch')}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -675,7 +675,7 @@ export default function Manage() {
|
||||
onClick: () => handleTogglePinned(model.id, model.pinned),
|
||||
disabled: pinningModels.has(model.id) || !!model.disabled },
|
||||
{ key: 'edit', icon: 'fa-pen-to-square', label: 'Edit configuration',
|
||||
onClick: () => navigate(`/app/model-editor/${encodeURIComponent(model.id)}`, { state: fromState(location, 'Manage') }) },
|
||||
onClick: () => navigate(`/app/model-editor/${encodeURIComponent(model.id)}`, { state: fromState(location, t('manage.title')) }) },
|
||||
{ key: 'logs', icon: 'fa-terminal', label: 'Backend logs',
|
||||
onClick: () => navigate(`/app/backend-logs/${encodeURIComponent(model.id)}`) },
|
||||
{ divider: true },
|
||||
|
||||
@@ -12,6 +12,7 @@ import ConfigFieldRenderer from '../components/ConfigFieldRenderer'
|
||||
import { FormContextProvider } from '../contexts/FormContext'
|
||||
import TemplateSelector from '../components/TemplateSelector'
|
||||
import MODEL_TEMPLATES from '../utils/modelTemplates'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
const SECTION_ICONS = {
|
||||
general: 'fa-cog', llm: 'fa-microchip', parameters: 'fa-sliders',
|
||||
@@ -70,6 +71,7 @@ function defaultForType(uiType) {
|
||||
}
|
||||
|
||||
export default function ModelEditor() {
|
||||
const { t } = useTranslation('modelEditor')
|
||||
const { name } = useParams()
|
||||
const [searchParams] = useSearchParams()
|
||||
const navigate = useNavigate()
|
||||
@@ -397,6 +399,10 @@ export default function ModelEditor() {
|
||||
if (loading) return <div className="page page--medium" style={{ display: 'flex', justifyContent: 'center', padding: 'var(--spacing-xl)' }}><LoadingSpinner size="lg" /></div>
|
||||
if (metaError) return <div className="page page--medium"><div className="empty-state"><p className="empty-state-text">Failed to load config metadata: {metaError}</p></div></div>
|
||||
|
||||
const backPage = isCreateMode && selectedTemplate ? t('actions.templates')
|
||||
: backState ? backState.fromLabel
|
||||
: isCreateMode ? t('actions.models') : t('actions.system')
|
||||
|
||||
return (
|
||||
<FormContextProvider formData={values}>
|
||||
<div className="page page--medium" style={{ padding: 0 }}>
|
||||
@@ -406,10 +412,10 @@ export default function ModelEditor() {
|
||||
padding: 'var(--spacing-lg) var(--spacing-lg) var(--spacing-md)',
|
||||
}}>
|
||||
<div>
|
||||
<h1 className="page-title">{isCreateMode ? 'Add Model' : 'Model Editor'}</h1>
|
||||
<h1 className="page-title">{isCreateMode ? t('title.add') : t('title.edit')}</h1>
|
||||
<p className="page-subtitle">
|
||||
{isCreateMode
|
||||
? (showTemplateSelector ? 'Choose a model type to get started' : `New model${selectedTemplate ? ` — ${selectedTemplate.label}` : ''}`)
|
||||
? (showTemplateSelector ? t('subtitle.chooseModelType') : `${t('subtitle.newModel')}${selectedTemplate ? ` — ${selectedTemplate.label}` : ''}`)
|
||||
: decodeURIComponent(name)}
|
||||
</p>
|
||||
</div>
|
||||
@@ -419,20 +425,16 @@ export default function ModelEditor() {
|
||||
else if (backState) navigate(backState.from)
|
||||
else navigate(isCreateMode ? '/app/models' : '/app/manage')
|
||||
}}>
|
||||
<i className="fas fa-arrow-left" /> Back to {
|
||||
isCreateMode && selectedTemplate ? 'Templates'
|
||||
: backState ? backState.fromLabel
|
||||
: isCreateMode ? 'Models' : 'Manage'
|
||||
}
|
||||
<i className="fas fa-arrow-left" /> {t('actions.backTo', {page: backPage})}
|
||||
</button>
|
||||
{!showTemplateSelector && tab === 'interactive' && (
|
||||
<button className={`btn ${isDirty ? 'btn-primary' : 'btn-secondary'}`} onClick={handleInteractiveSave} disabled={saving || !isDirty}>
|
||||
{saving ? <><LoadingSpinner size="sm" /> Saving...</> : <><i className="fas fa-save" /> {isCreateMode ? 'Create Model' : (isDirty ? 'Save Changes' : 'Saved')}</>}
|
||||
{saving ? <><LoadingSpinner size="sm" /> {t('actions.saving')}</> : <><i className="fas fa-save" /> {isCreateMode ? t('actions.createModel') : (isDirty ? t('actions.saveChanges') : t('actions.saved'))}</>}
|
||||
</button>
|
||||
)}
|
||||
{!showTemplateSelector && tab === 'yaml' && (
|
||||
<button className={`btn ${isDirty ? 'btn-primary' : 'btn-secondary'}`} onClick={handleYamlSave} disabled={saving || !isDirty}>
|
||||
{saving ? <><LoadingSpinner size="sm" /> Saving...</> : <><i className="fas fa-save" /> {isCreateMode ? 'Create Model' : (isDirty ? 'Save Changes' : 'Saved')}</>}
|
||||
{saving ? <><LoadingSpinner size="sm" /> {t('actions.saving')}</> : <><i className="fas fa-save" /> {isCreateMode ? t('actions.createModel') : (isDirty ? t('actions.saveChanges') : t('actions.saved'))}</>}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
@@ -448,17 +450,17 @@ export default function ModelEditor() {
|
||||
display: 'flex', gap: 0, padding: '0 var(--spacing-lg)',
|
||||
borderBottom: '1px solid var(--color-border)',
|
||||
}}>
|
||||
{['interactive', 'yaml'].map(t => {
|
||||
const active = tab === t
|
||||
{['interactive', 'yaml'].map(tb => {
|
||||
const active = tab === tb
|
||||
const blocked = !active && isDirty
|
||||
return (
|
||||
<button
|
||||
key={t}
|
||||
key={tb}
|
||||
onClick={() => {
|
||||
if (active) return
|
||||
if (blocked) { setTabSwitchWarning(true); return }
|
||||
setTabSwitchWarning(false)
|
||||
setTab(t)
|
||||
setTab(tb)
|
||||
}}
|
||||
style={{
|
||||
padding: 'var(--spacing-sm) var(--spacing-md)', border: 'none',
|
||||
@@ -471,8 +473,8 @@ export default function ModelEditor() {
|
||||
transition: 'all 150ms',
|
||||
}}
|
||||
>
|
||||
<i className={`fas ${t === 'interactive' ? 'fa-sliders' : 'fa-code'}`} style={{ marginRight: 6 }} />
|
||||
{t === 'interactive' ? 'Interactive' : 'YAML'}
|
||||
<i className={`fas ${tb === 'interactive' ? 'fa-sliders' : 'fa-code'}`} style={{ marginRight: 6 }} />
|
||||
{tb === 'interactive' ? t('tabs.interactive') : t('tabs.yaml')}
|
||||
</button>
|
||||
)
|
||||
})}
|
||||
@@ -485,7 +487,7 @@ export default function ModelEditor() {
|
||||
background: 'var(--color-warning-light, rgba(245, 158, 11, 0.08))',
|
||||
}}>
|
||||
<i className="fas fa-exclamation-triangle" />
|
||||
<span>Save or discard changes before switching tabs.</span>
|
||||
<span>{t('actions.switchWarning')}</span>
|
||||
<button
|
||||
className="btn btn-secondary"
|
||||
style={{ marginLeft: 'auto', padding: '2px 10px', fontSize: '0.75rem' }}
|
||||
@@ -500,7 +502,7 @@ export default function ModelEditor() {
|
||||
setTab(tab === 'yaml' ? 'interactive' : 'yaml')
|
||||
}}
|
||||
>
|
||||
Discard & Switch
|
||||
{t('actions.discardAndSwitch')}
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
@@ -512,7 +514,7 @@ export default function ModelEditor() {
|
||||
<div style={{ padding: '0 var(--spacing-lg) var(--spacing-lg)' }}>
|
||||
{isCreateMode && (
|
||||
<p style={{ fontSize: '0.8125rem', color: 'var(--color-text-secondary)', marginBottom: 'var(--spacing-sm)' }}>
|
||||
Edit the YAML directly. The model name must be set in the YAML for create to work.
|
||||
{t('tabs.yamlDescription')}
|
||||
</p>
|
||||
)}
|
||||
<CodeEditor
|
||||
@@ -533,18 +535,18 @@ export default function ModelEditor() {
|
||||
<div className="card" style={{ padding: 'var(--spacing-md)' }}>
|
||||
<label className="form-label" style={{ fontWeight: 600 }}>
|
||||
<i className="fas fa-tag" style={{ marginRight: '6px', color: 'var(--color-primary)' }} />
|
||||
Model Name
|
||||
{t('forms.modelName.label')}
|
||||
</label>
|
||||
<input
|
||||
className="input"
|
||||
type="text"
|
||||
value={values['name'] || ''}
|
||||
onChange={e => handleFieldChange('name', e.target.value)}
|
||||
placeholder="my-model-name"
|
||||
placeholder={t('forms.modelName.placeholder')}
|
||||
style={{ maxWidth: 400 }}
|
||||
/>
|
||||
<p style={{ marginTop: 'var(--spacing-xs)', fontSize: '0.75rem', color: 'var(--color-text-muted)' }}>
|
||||
Use letters, numbers, hyphens, underscores, and dots only.
|
||||
{t('forms.modelName.hint')}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -596,7 +598,7 @@ export default function ModelEditor() {
|
||||
))}
|
||||
{activeSections.length === 0 && (
|
||||
<div style={{ padding: '12px', fontSize: '0.8125rem', color: 'var(--color-text-muted)' }}>
|
||||
Use the search bar above to add fields
|
||||
{t('forms.empty.nav')}
|
||||
</div>
|
||||
)}
|
||||
</nav>
|
||||
@@ -610,9 +612,9 @@ export default function ModelEditor() {
|
||||
{activeSections.length === 0 && (
|
||||
<div className="card" style={{ padding: 'var(--spacing-xl)', textAlign: 'center' }}>
|
||||
<i className="fas fa-sliders" style={{ fontSize: '2rem', color: 'var(--color-text-muted)', marginBottom: 'var(--spacing-md)' }} />
|
||||
<h3 style={{ marginBottom: 'var(--spacing-sm)' }}>No fields configured</h3>
|
||||
<h3 style={{ marginBottom: 'var(--spacing-sm)' }}>{t('forms.empty.title')}</h3>
|
||||
<p style={{ color: 'var(--color-text-secondary)', fontSize: '0.875rem' }}>
|
||||
Use the search bar above to find and add configuration fields.
|
||||
{t('forms.empty.text')}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -288,7 +288,7 @@ export default function Models() {
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
<button className="btn btn-primary btn-sm" onClick={() => navigate('/app/model-editor', { state: fromState(location, 'Models') })}>
|
||||
<button className="btn btn-primary btn-sm" onClick={() => navigate('/app/model-editor', { state: fromState(location, t('models')) })}>
|
||||
<i className="fas fa-plus" /> {t('actions.addModel')}
|
||||
</button>
|
||||
<button className="btn btn-secondary btn-sm" onClick={() => navigate('/app/import-model')}>
|
||||
|
||||
@@ -294,7 +294,7 @@ export default function Settings() {
|
||||
</h3>
|
||||
<div className="card">
|
||||
<SettingRow label="Enable Watchdog" description="Automatically monitor and manage backend processes">
|
||||
<Toggle checked={settings.watchdog_idle_enabled || settings.watchdog_busy_enabled} onChange={(v) => { update('watchdog_idle_enabled', v); update('watchdog_busy_enabled', v) }} />
|
||||
<Toggle checked={settings.watchdog_idle_enabled || settings.watchdog_busy_enabled} onChange={(v) => { update('watchdog_idle_enabled', v); update('watchdog_busy_enabled', v); update('watchdog_enabled', v) }} />
|
||||
</SettingRow>
|
||||
<SettingRow label="Enable Idle Check" description="Automatically stop backends that have been idle too long">
|
||||
<Toggle checked={settings.watchdog_idle_enabled} onChange={(v) => update('watchdog_idle_enabled', v)} disabled={!watchdogEnabled} />
|
||||
|
||||
@@ -98,6 +98,12 @@ func RegisterLocalAIRoutes(router *echo.Echo,
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_DETECTION)),
|
||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.DetectionRequest) }))
|
||||
|
||||
depthHandler := localai.DepthEndpoint(cl, ml, appConfig)
|
||||
router.POST("/v1/depth",
|
||||
depthHandler,
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_DEPTH)),
|
||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.DepthRequest) }))
|
||||
|
||||
// Face recognition endpoints
|
||||
faceMw := []echo.MiddlewareFunc{
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_FACE_RECOGNITION)),
|
||||
|
||||
@@ -181,6 +181,40 @@ type Detection struct {
|
||||
Mask string `json:"mask,omitempty"` // base64-encoded PNG segmentation mask
|
||||
}
|
||||
|
||||
// DepthRequest is the request body for the /v1/depth endpoint. It exposes the
|
||||
// full Depth Anything 3 output surface; the include_* flags and exports let a
|
||||
// caller ask for less work (e.g. depth only, or depth+pose without the point
|
||||
// cloud).
|
||||
type DepthRequest struct {
|
||||
BasicModelRequest
|
||||
Image string `json:"image"` // URL or base64-encoded image to analyze
|
||||
Dst string `json:"dst,omitempty"` // optional output directory for exports (glb/colmap)
|
||||
IncludeDepth bool `json:"include_depth,omitempty"` // return the per-pixel depth map
|
||||
IncludeConfidence bool `json:"include_confidence,omitempty"` // return the per-pixel confidence map (DualDPT)
|
||||
IncludePose bool `json:"include_pose,omitempty"` // return camera extrinsics/intrinsics (DualDPT)
|
||||
IncludeSky bool `json:"include_sky,omitempty"` // return the per-pixel sky map (mono models)
|
||||
IncludePoints bool `json:"include_points,omitempty"` // back-project to a 3D point cloud (DualDPT)
|
||||
PointsConfThresh float32 `json:"points_conf_thresh,omitempty"` // keep points with confidence >= this threshold
|
||||
Exports []string `json:"exports,omitempty"` // requested exports: "glb", "colmap"
|
||||
}
|
||||
|
||||
// DepthResponse is the JSON response for the /v1/depth endpoint, mirroring the
|
||||
// DepthResponse proto.
|
||||
type DepthResponse struct {
|
||||
Width int32 `json:"width"`
|
||||
Height int32 `json:"height"`
|
||||
Depth []float32 `json:"depth,omitempty"` // width*height row-major metric depth
|
||||
Confidence []float32 `json:"confidence,omitempty"` // width*height row-major confidence (DualDPT)
|
||||
Sky []float32 `json:"sky,omitempty"` // width*height row-major sky map (mono)
|
||||
Extrinsics []float32 `json:"extrinsics,omitempty"` // 12 floats, 3x4 row-major (world-to-camera)
|
||||
Intrinsics []float32 `json:"intrinsics,omitempty"` // 9 floats, 3x3 row-major
|
||||
NumPoints int32 `json:"num_points,omitempty"` // number of 3D points
|
||||
Points []float32 `json:"points,omitempty"` // num_points*3 xyz, world space
|
||||
PointColors string `json:"point_colors,omitempty"` // base64-encoded num_points*3 uint8 rgb
|
||||
ExportPaths []string `json:"export_paths,omitempty"` // paths written for the requested exports
|
||||
IsMetric bool `json:"is_metric"` // depth is in metric units
|
||||
}
|
||||
|
||||
// ─── Face recognition ──────────────────────────────────────────────
|
||||
//
|
||||
// FacialArea describes a bounding box for a detected face.
|
||||
|
||||
@@ -169,6 +169,9 @@ func (c *fakeBackendClient) SoundGeneration(_ context.Context, _ *pb.SoundGenera
|
||||
func (c *fakeBackendClient) Detect(_ context.Context, _ *pb.DetectOptions, _ ...ggrpc.CallOption) (*pb.DetectResponse, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (c *fakeBackendClient) Depth(_ context.Context, _ *pb.DepthRequest, _ ...ggrpc.CallOption) (*pb.DepthResponse, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (c *fakeBackendClient) FaceVerify(_ context.Context, _ *pb.FaceVerifyRequest, _ ...ggrpc.CallOption) (*pb.FaceVerifyResponse, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -152,6 +152,12 @@ func (c *InFlightTrackingClient) Detect(ctx context.Context, in *pb.DetectOption
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Depth(ctx context.Context, in *pb.DepthRequest, opts ...ggrpc.CallOption) (*pb.DepthResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.Depth(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...ggrpc.CallOption) (*pb.RerankResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.Rerank(ctx, in, opts...)
|
||||
|
||||
@@ -100,6 +100,10 @@ func (f *fakeGRPCBackend) Detect(_ context.Context, _ *pb.DetectOptions, _ ...gg
|
||||
return &pb.DetectResponse{}, nil
|
||||
}
|
||||
|
||||
func (f *fakeGRPCBackend) Depth(_ context.Context, _ *pb.DepthRequest, _ ...ggrpc.CallOption) (*pb.DepthResponse, error) {
|
||||
return &pb.DepthResponse{}, nil
|
||||
}
|
||||
|
||||
func (f *fakeGRPCBackend) FaceVerify(_ context.Context, _ *pb.FaceVerifyRequest, _ ...ggrpc.CallOption) (*pb.FaceVerifyResponse, error) {
|
||||
return &pb.FaceVerifyResponse{}, nil
|
||||
}
|
||||
|
||||
@@ -908,6 +908,17 @@ func (r *SmartRouter) stageModelFiles(ctx context.Context, node *BackendNode, op
|
||||
frontendModelsDir = filepath.Clean(strings.TrimSuffix(opts.ModelFile, opts.Model))
|
||||
}
|
||||
|
||||
// Local model directory, captured before the ModelFile field is rewritten to
|
||||
// its remote path below. Companion assets declared as option paths (e.g.
|
||||
// sherpa-onnx's tokens.txt / espeak-ng-data) live beside the model, so option
|
||||
// values are resolved relative to this dir as well as frontendModelsDir —
|
||||
// letting a shared config declare them with bare names regardless of whether
|
||||
// Model includes a subdirectory.
|
||||
localModelDir := ""
|
||||
if opts.ModelFile != "" {
|
||||
localModelDir = filepath.Dir(opts.ModelFile)
|
||||
}
|
||||
|
||||
// keyMapper generates storage keys namespaced under trackingKey, preserving
|
||||
// subdirectory structure relative to frontendModelsDir. This ensures:
|
||||
// 1. All files for a model land in one directory on the worker for clean deletion
|
||||
@@ -1079,8 +1090,8 @@ func (r *SmartRouter) stageModelFiles(ctx context.Context, node *BackendNode, op
|
||||
|
||||
// Stage file paths referenced in generic Options (key:value pairs where values
|
||||
// are file paths). Options stay as relative paths — backends resolve them via ModelPath.
|
||||
r.stageGenericOptions(ctx, node, opts.Options, frontendModelsDir, keyMapper.Key)
|
||||
r.stageGenericOptions(ctx, node, opts.Overrides, frontendModelsDir, keyMapper.Key)
|
||||
r.stageGenericOptions(ctx, node, opts.Options, frontendModelsDir, localModelDir, keyMapper.Key)
|
||||
r.stageGenericOptions(ctx, node, opts.Overrides, frontendModelsDir, localModelDir, keyMapper.Key)
|
||||
|
||||
return opts, nil
|
||||
}
|
||||
@@ -1196,36 +1207,86 @@ func (r *SmartRouter) stageCompanionFiles(ctx context.Context, node *BackendNode
|
||||
}
|
||||
|
||||
// stageGenericOptions iterates key:value option strings and stages any values
|
||||
// that resolve to existing files relative to the frontend models directory.
|
||||
// Option values are NOT rewritten — backends resolve them via ModelPath.
|
||||
// keyFn generates the namespaced storage key for each file path.
|
||||
func (r *SmartRouter) stageGenericOptions(ctx context.Context, node *BackendNode, options []string, frontendModelsDir string, keyFn func(string) string) {
|
||||
// that resolve to existing files relative to the frontend models directory or
|
||||
// the model's own directory. Option values are NOT rewritten — backends resolve
|
||||
// them via ModelPath. keyFn generates the namespaced storage key for each file.
|
||||
func (r *SmartRouter) stageGenericOptions(ctx context.Context, node *BackendNode, options []string, frontendModelsDir, modelDir string, keyFn func(string) string) {
|
||||
for _, opt := range options {
|
||||
optKey, val, ok := strings.Cut(opt, ":")
|
||||
if !ok || val == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if value is an existing file path (absolute or relative to frontend models dir)
|
||||
absPath := val
|
||||
if !filepath.IsAbs(val) && frontendModelsDir != "" {
|
||||
absPath = filepath.Join(frontendModelsDir, val)
|
||||
// Resolve the value to an existing path: absolute as-is, otherwise
|
||||
// relative to frontendModelsDir first, then the model's own directory
|
||||
// (where backends like sherpa-onnx keep companion assets such as
|
||||
// tokens.txt and espeak-ng-data).
|
||||
absPath, ok := resolveOptionPath(val, frontendModelsDir, modelDir)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, err := os.Stat(absPath); os.IsNotExist(err) {
|
||||
info, err := os.Stat(absPath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// A directory option value (e.g. sherpa-onnx's espeak-ng-data) is staged
|
||||
// file-by-file so the whole tree is recreated beside the model on the
|
||||
// worker; a single file is staged directly. Values are never rewritten —
|
||||
// backends resolve relative paths via ModelPath.
|
||||
if err == nil && info.IsDir() {
|
||||
r.stageOptionDir(ctx, node, absPath, keyFn)
|
||||
xlog.Debug("Staged option directory", "option", optKey, "localPath", absPath)
|
||||
continue
|
||||
}
|
||||
|
||||
// Stage the file to the worker using the namespaced key
|
||||
key := keyFn(absPath)
|
||||
if _, err := r.fileStager.EnsureRemote(ctx, node.ID, absPath, key); err != nil {
|
||||
xlog.Warn("Failed to stage option file, skipping", "option", opt, "path", absPath, "error", err)
|
||||
continue
|
||||
}
|
||||
// Leave option value unchanged — backend resolves relative paths via ModelPath
|
||||
xlog.Debug("Staged option file", "option", optKey, "localPath", absPath)
|
||||
}
|
||||
}
|
||||
|
||||
// resolveOptionPath finds an existing local path for an option value: an
|
||||
// absolute path as-is, otherwise relative to frontendModelsDir, then to the
|
||||
// model's own directory. Returns false when none exists.
|
||||
func resolveOptionPath(val, frontendModelsDir, modelDir string) (string, bool) {
|
||||
if filepath.IsAbs(val) {
|
||||
if _, err := os.Stat(val); err == nil {
|
||||
return val, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
for _, base := range []string{frontendModelsDir, modelDir} {
|
||||
if base == "" {
|
||||
continue
|
||||
}
|
||||
p := filepath.Join(base, val)
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
return p, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// stageOptionDir stages every regular file under an option-declared directory
|
||||
// (e.g. sherpa-onnx's espeak-ng-data) using the structure-preserving key, so the
|
||||
// tree is recreated beside the model on the worker. Per-file errors are logged
|
||||
// and skipped; the option value itself is not rewritten.
|
||||
func (r *SmartRouter) stageOptionDir(ctx context.Context, node *BackendNode, dir string, keyFn func(string) string) {
|
||||
_ = filepath.WalkDir(dir, func(path string, d fs.DirEntry, walkErr error) error {
|
||||
if walkErr != nil || d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
if _, err := r.fileStager.EnsureRemote(ctx, node.ID, path, keyFn(path)); err != nil {
|
||||
xlog.Warn("Failed to stage option directory file, skipping", "path", path, "error", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// probeHealth checks whether a backend process on the given node/addr is alive
|
||||
// via a gRPC health check with a 2-second timeout. The client is closed after
|
||||
// the check.
|
||||
|
||||
77
core/services/nodes/router_optionstage_test.go
Normal file
77
core/services/nodes/router_optionstage_test.go
Normal file
@@ -0,0 +1,77 @@
|
||||
package nodes
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
// These tests cover staging of companion assets declared as option file paths
|
||||
// (the "vae_path:..." convention). Backends like sherpa-onnx keep a single-file
|
||||
// ModelFile (the .onnx) but resolve sibling assets — tokens.txt and the
|
||||
// espeak-ng-data directory — relative to the model dir. Those siblings must be
|
||||
// shipped to remote workers too, including directory-valued options expanded
|
||||
// file-by-file.
|
||||
var _ = Describe("stageGenericOptions companion assets", func() {
|
||||
var (
|
||||
stager *fakeFileStager
|
||||
router *SmartRouter
|
||||
node *BackendNode
|
||||
tmp string
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
stager = &fakeFileStager{}
|
||||
router = &SmartRouter{
|
||||
fileStager: stager,
|
||||
stagingTracker: NewStagingTracker(),
|
||||
}
|
||||
node = &BackendNode{ID: "node-1", Name: "node-1", Address: "10.0.0.1:50051"}
|
||||
tmp = GinkgoT().TempDir()
|
||||
})
|
||||
|
||||
It("stages option-declared sibling files and expands directory options", func() {
|
||||
modelRel := "vits-piper-it_IT-paola-medium"
|
||||
modelDir := filepath.Join(tmp, "models", modelRel)
|
||||
dataDir := filepath.Join(modelDir, "espeak-ng-data")
|
||||
Expect(os.MkdirAll(filepath.Join(dataDir, "lang"), 0o755)).To(Succeed())
|
||||
|
||||
onnx := filepath.Join(modelDir, "it_IT-paola-medium.onnx")
|
||||
tokens := filepath.Join(modelDir, "tokens.txt")
|
||||
phontab := filepath.Join(dataDir, "phontab")
|
||||
langIt := filepath.Join(dataDir, "lang", "it")
|
||||
for _, f := range []string{onnx, tokens, phontab, langIt} {
|
||||
Expect(os.WriteFile(f, []byte("x"), 0o644)).To(Succeed())
|
||||
}
|
||||
|
||||
opts := &pb.ModelOptions{
|
||||
Model: filepath.Join(modelRel, "it_IT-paola-medium.onnx"),
|
||||
ModelFile: onnx,
|
||||
// Bare names: not found under the models root (Model includes a
|
||||
// subdir), so they must resolve relative to the model's own dir.
|
||||
Options: []string{
|
||||
"tts.noise_scale=0.667", // not a path; ignored by staging
|
||||
"tokens:tokens.txt",
|
||||
"data_dir:espeak-ng-data",
|
||||
},
|
||||
}
|
||||
|
||||
_, err := router.stageModelFiles(context.Background(), node, opts, "track-key")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
staged := make([]string, 0, len(stager.ensureCalls))
|
||||
for _, c := range stager.ensureCalls {
|
||||
staged = append(staged, c.localPath)
|
||||
}
|
||||
// The .onnx (ModelFile), the tokens.txt file option, and every file under
|
||||
// the espeak-ng-data directory option are staged; the directory path
|
||||
// itself is never handed to the stager.
|
||||
Expect(staged).To(ContainElements(onnx, tokens, phontab, langIt))
|
||||
Expect(staged).ToNot(ContainElement(dataDir))
|
||||
})
|
||||
})
|
||||
@@ -25,6 +25,7 @@ const (
|
||||
BackendTraceRerank BackendTraceType = "rerank"
|
||||
BackendTraceTokenize BackendTraceType = "tokenize"
|
||||
BackendTraceDetection BackendTraceType = "detection"
|
||||
BackendTraceDepth BackendTraceType = "depth"
|
||||
BackendTraceFaceVerify BackendTraceType = "face_verify"
|
||||
BackendTraceFaceAnalyze BackendTraceType = "face_analyze"
|
||||
BackendTraceVoiceVerify BackendTraceType = "voice_verify"
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v4.4.2"
|
||||
"version": "v4.4.3"
|
||||
}
|
||||
|
||||
1123
gallery/index.yaml
1123
gallery/index.yaml
File diff suppressed because it is too large
Load Diff
@@ -12,3 +12,17 @@ config_file: |
|
||||
# Speech rate multiplier. Applied at every TTS / TTSStream call
|
||||
# since the TTSRequest proto has no speed field.
|
||||
- tts.speed=1.0
|
||||
# Companion assets that sherpa-onnx TTS voices load from beside the .onnx
|
||||
# (tokens, lexicons, espeak-ng phonemization data, Kokoro voices bank / jieba
|
||||
# dict). Declared as option paths so distributed inference stages them to
|
||||
# remote worker nodes too; the backend ignores these keys and resolves the
|
||||
# files relative to the model dir. Bare names resolve against the model's own
|
||||
# directory; any that a given voice doesn't ship are skipped during staging.
|
||||
- tokens:tokens.txt
|
||||
- lexicon:lexicon.txt
|
||||
- data_dir:espeak-ng-data
|
||||
- voices:voices.bin
|
||||
- dict_dir:dict
|
||||
- lexicon_us:lexicon-us-en.txt
|
||||
- lexicon_gb:lexicon-gb-en.txt
|
||||
- lexicon_zh:lexicon-zh.txt
|
||||
|
||||
19
gallery/supertonic.yaml
Normal file
19
gallery/supertonic.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
---
|
||||
name: "supertonic"
|
||||
|
||||
config_file: |
|
||||
backend: supertonic
|
||||
options:
|
||||
# Generation knobs read by the supertonic backend at TTS time.
|
||||
# steps = flow-matching denoising steps (quality); speed = rate;
|
||||
# silence = inter-chunk silence seconds for long inputs.
|
||||
- supertonic.steps=8
|
||||
- supertonic.speed=1.05
|
||||
- supertonic.silence=0.3
|
||||
# Voice style used when a request omits `voice`. The model ships
|
||||
# F1-F5 / M1-M5 under voice_styles/; override per request via the
|
||||
# OpenAI `voice` field.
|
||||
- supertonic.default_voice=F1
|
||||
# Default language tag when a request omits `language`. "na" is the
|
||||
# model's language-agnostic mode.
|
||||
- supertonic.default_lang=na
|
||||
3
go.mod
3
go.mod
@@ -65,6 +65,7 @@ require (
|
||||
github.com/testcontainers/testcontainers-go/modules/nats v0.42.0
|
||||
github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0
|
||||
github.com/timbutler/zxcvbn v1.0.4
|
||||
github.com/yalue/onnxruntime_go v1.11.0
|
||||
go.opentelemetry.io/otel v1.44.0
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.66.0
|
||||
go.opentelemetry.io/otel/metric v1.44.0
|
||||
@@ -497,7 +498,7 @@ require (
|
||||
golang.org/x/sync v0.20.0
|
||||
golang.org/x/sys v0.45.0 // indirect
|
||||
golang.org/x/term v0.43.0
|
||||
golang.org/x/text v0.37.0 // indirect
|
||||
golang.org/x/text v0.37.0
|
||||
golang.org/x/tools v0.45.0 // indirect
|
||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
|
||||
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb // indirect
|
||||
|
||||
2
go.sum
2
go.sum
@@ -1377,6 +1377,8 @@ github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavM
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
||||
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
|
||||
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
|
||||
github.com/yalue/onnxruntime_go v1.11.0 h1:aKH4yPIbqfcB3SfnQWq/WxzLelkyolntHnffL3eMBHY=
|
||||
github.com/yalue/onnxruntime_go v1.11.0/go.mod h1:b4X26A8pekNb1ACJ58wAXgNKeUCGEAQ9dmACut9Sm/4=
|
||||
github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
|
||||
github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
|
||||
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
|
||||
@@ -54,6 +54,7 @@ type Backend interface {
|
||||
TTSStream(ctx context.Context, in *pb.TTSRequest, f func(reply *pb.Reply), opts ...grpc.CallOption) error
|
||||
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.CallOption) (*pb.DetectResponse, error)
|
||||
Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error)
|
||||
FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error)
|
||||
FaceAnalyze(ctx context.Context, in *pb.FaceAnalyzeRequest, opts ...grpc.CallOption) (*pb.FaceAnalyzeResponse, error)
|
||||
VoiceVerify(ctx context.Context, in *pb.VoiceVerifyRequest, opts ...grpc.CallOption) (*pb.VoiceVerifyResponse, error)
|
||||
|
||||
@@ -82,6 +82,10 @@ func (llm *Base) Detect(*pb.DetectOptions) (pb.DetectResponse, error) {
|
||||
return pb.DetectResponse{}, fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) Depth(*pb.DepthRequest) (pb.DepthResponse, error) {
|
||||
return pb.DepthResponse{}, fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) FaceVerify(*pb.FaceVerifyRequest) (pb.FaceVerifyResponse, error) {
|
||||
return pb.FaceVerifyResponse{}, fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
@@ -634,6 +634,24 @@ func (c *Client) Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.
|
||||
return client.Detect(ctx, in, opts...)
|
||||
}
|
||||
|
||||
func (c *Client) Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := c.dial()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewBackendClient(conn)
|
||||
return client.Depth(ctx, in, opts...)
|
||||
}
|
||||
|
||||
func (c *Client) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
|
||||
@@ -73,6 +73,10 @@ func (e *embedBackend) Detect(ctx context.Context, in *pb.DetectOptions, opts ..
|
||||
return e.s.Detect(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error) {
|
||||
return e.s.Depth(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error) {
|
||||
return e.s.FaceVerify(ctx, in)
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ type AIModel interface {
|
||||
GenerateImage(*pb.GenerateImageRequest) error
|
||||
GenerateVideo(*pb.GenerateVideoRequest) error
|
||||
Detect(*pb.DetectOptions) (pb.DetectResponse, error)
|
||||
Depth(*pb.DepthRequest) (pb.DepthResponse, error)
|
||||
FaceVerify(*pb.FaceVerifyRequest) (pb.FaceVerifyResponse, error)
|
||||
FaceAnalyze(*pb.FaceAnalyzeRequest) (pb.FaceAnalyzeResponse, error)
|
||||
VoiceVerify(*pb.VoiceVerifyRequest) (pb.VoiceVerifyResponse, error)
|
||||
|
||||
@@ -156,6 +156,18 @@ func (s *server) Detect(ctx context.Context, in *pb.DetectOptions) (*pb.DetectRe
|
||||
return &res, nil
|
||||
}
|
||||
|
||||
func (s *server) Depth(ctx context.Context, in *pb.DepthRequest) (*pb.DepthResponse, error) {
|
||||
if s.llm.Locking() {
|
||||
s.llm.Lock()
|
||||
defer s.llm.Unlock()
|
||||
}
|
||||
res, err := s.llm.Depth(in)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &res, nil
|
||||
}
|
||||
|
||||
func (s *server) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest) (*pb.FaceVerifyResponse, error) {
|
||||
if s.llm.Locking() {
|
||||
s.llm.Lock()
|
||||
|
||||
@@ -108,6 +108,12 @@ func (c *ConnectionEvictingClient) Detect(ctx context.Context, in *pb.DetectOpti
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *ConnectionEvictingClient) Depth(ctx context.Context, in *pb.DepthRequest, opts ...ggrpc.CallOption) (*pb.DepthResponse, error) {
|
||||
result, err := c.Backend.Depth(ctx, in, opts...)
|
||||
c.checkErr(err)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *ConnectionEvictingClient) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...ggrpc.CallOption) (*pb.RerankResult, error) {
|
||||
result, err := c.Backend.Rerank(ctx, in, opts...)
|
||||
c.checkErr(err)
|
||||
|
||||
@@ -5767,11 +5767,11 @@ const docTemplate = `{
|
||||
}
|
||||
},
|
||||
"redacted_preview": {
|
||||
"description": "RedactedPreview is the input with mask-action spans replaced\nby their placeholders. Identical to Text when no findings or\nwhen the strongest action is block/route_local (which don't\nrewrite content).",
|
||||
"description": "RedactedPreview is the input with mask-action spans replaced\nby their placeholders. Identical to Text when no findings or\nwhen the strongest action is block/allow (which don't rewrite\ncontent).",
|
||||
"type": "string"
|
||||
},
|
||||
"suggested_action": {
|
||||
"description": "SuggestedAction is the strongest action across all findings:\n\"block\", \"route_local\", \"mask\", or \"allow\" (no findings).",
|
||||
"description": "SuggestedAction is the strongest action across all findings:\n\"block\", \"mask\", or \"allow\" (no findings, or all findings\nresolved to the allow action).",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5764,11 +5764,11 @@
|
||||
}
|
||||
},
|
||||
"redacted_preview": {
|
||||
"description": "RedactedPreview is the input with mask-action spans replaced\nby their placeholders. Identical to Text when no findings or\nwhen the strongest action is block/route_local (which don't\nrewrite content).",
|
||||
"description": "RedactedPreview is the input with mask-action spans replaced\nby their placeholders. Identical to Text when no findings or\nwhen the strongest action is block/allow (which don't rewrite\ncontent).",
|
||||
"type": "string"
|
||||
},
|
||||
"suggested_action": {
|
||||
"description": "SuggestedAction is the strongest action across all findings:\n\"block\", \"route_local\", \"mask\", or \"allow\" (no findings).",
|
||||
"description": "SuggestedAction is the strongest action across all findings:\n\"block\", \"mask\", or \"allow\" (no findings, or all findings\nresolved to the allow action).",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1884,13 +1884,14 @@ definitions:
|
||||
description: |-
|
||||
RedactedPreview is the input with mask-action spans replaced
|
||||
by their placeholders. Identical to Text when no findings or
|
||||
when the strongest action is block/route_local (which don't
|
||||
rewrite content).
|
||||
when the strongest action is block/allow (which don't rewrite
|
||||
content).
|
||||
type: string
|
||||
suggested_action:
|
||||
description: |-
|
||||
SuggestedAction is the strongest action across all findings:
|
||||
"block", "route_local", "mask", or "allow" (no findings).
|
||||
"block", "mask", or "allow" (no findings, or all findings
|
||||
resolved to the allow action).
|
||||
type: string
|
||||
type: object
|
||||
schema.PIIFinding:
|
||||
|
||||
Reference in New Issue
Block a user