mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-28 10:27:30 -04:00
Compare commits
34 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
be1ae9338b | ||
|
|
923c47020d | ||
|
|
b7a1dec773 | ||
|
|
de2ec2f136 | ||
|
|
d3a26f961d | ||
|
|
13b1ae53bc | ||
|
|
e68ca109c5 | ||
|
|
6740e988d2 | ||
|
|
ade9cc9e37 | ||
|
|
471e38e4e7 | ||
|
|
f3d829e2ef | ||
|
|
91885c2c7e | ||
|
|
f1fcafb888 | ||
|
|
fdff114701 | ||
|
|
1154be5eea | ||
|
|
8aba4fdba3 | ||
|
|
d7d7721eae | ||
|
|
c548150f99 | ||
|
|
ec26b86dd4 | ||
|
|
d11b202dd2 | ||
|
|
e95018ef70 | ||
|
|
0258f8af55 | ||
|
|
14b29ebf4e | ||
|
|
f0d0bff232 | ||
|
|
64150ca7ab | ||
|
|
f98b0f1c1e | ||
|
|
2c96c2d08e | ||
|
|
f01a969f7b | ||
|
|
56600eec3e | ||
|
|
c4fa256cdf | ||
|
|
17c1fc74b2 | ||
|
|
068d397acf | ||
|
|
5b3572f8b8 | ||
|
|
6afe127cd4 |
307
.github/backend-matrix.yml
vendored
307
.github/backend-matrix.yml
vendored
@@ -3745,6 +3745,302 @@ include:
|
|||||||
dockerfile: "./backend/Dockerfile.golang"
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
context: "./"
|
context: "./"
|
||||||
ubuntu-version: '2404'
|
ubuntu-version: '2404'
|
||||||
|
# voice-detect
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "8"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-12-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-13-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-cuda-13-arm64-voice-detect'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-voice-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f32'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f32-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f16'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f16-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-voice-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-arm64-voice-detect'
|
||||||
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2204'
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-rocm-hipblas-voice-detect'
|
||||||
|
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
# face-detect
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "8"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-12-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-13-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-cuda-13-arm64-face-detect'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-face-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f32'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f32-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f16'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f16-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-face-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-arm64-face-detect'
|
||||||
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2204'
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-rocm-hipblas-face-detect'
|
||||||
|
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
# acestep-cpp
|
# acestep-cpp
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
cuda-major-version: ""
|
cuda-major-version: ""
|
||||||
@@ -4928,6 +5224,14 @@ includeDarwin:
|
|||||||
tag-suffix: "-metal-darwin-arm64-ced"
|
tag-suffix: "-metal-darwin-arm64-ced"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
lang: "go"
|
lang: "go"
|
||||||
|
- backend: "voice-detect"
|
||||||
|
tag-suffix: "-metal-darwin-arm64-voice-detect"
|
||||||
|
build-type: "metal"
|
||||||
|
lang: "go"
|
||||||
|
- backend: "face-detect"
|
||||||
|
tag-suffix: "-metal-darwin-arm64-face-detect"
|
||||||
|
build-type: "metal"
|
||||||
|
lang: "go"
|
||||||
- backend: "acestep-cpp"
|
- backend: "acestep-cpp"
|
||||||
tag-suffix: "-metal-darwin-arm64-acestep-cpp"
|
tag-suffix: "-metal-darwin-arm64-acestep-cpp"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
@@ -4991,9 +5295,6 @@ includeDarwin:
|
|||||||
- backend: "qwen-tts"
|
- backend: "qwen-tts"
|
||||||
tag-suffix: "-metal-darwin-arm64-qwen-tts"
|
tag-suffix: "-metal-darwin-arm64-qwen-tts"
|
||||||
build-type: "mps"
|
build-type: "mps"
|
||||||
- backend: "fish-speech"
|
|
||||||
tag-suffix: "-metal-darwin-arm64-fish-speech"
|
|
||||||
build-type: "mps"
|
|
||||||
- backend: "voxcpm"
|
- backend: "voxcpm"
|
||||||
tag-suffix: "-metal-darwin-arm64-voxcpm"
|
tag-suffix: "-metal-darwin-arm64-voxcpm"
|
||||||
build-type: "mps"
|
build-type: "mps"
|
||||||
|
|||||||
11
.github/workflows/backend_build_darwin.yml
vendored
11
.github/workflows/backend_build_darwin.yml
vendored
@@ -99,6 +99,7 @@ jobs:
|
|||||||
/opt/homebrew/Cellar/xxhash
|
/opt/homebrew/Cellar/xxhash
|
||||||
/opt/homebrew/Cellar/zstd
|
/opt/homebrew/Cellar/zstd
|
||||||
/opt/homebrew/Cellar/nlohmann-json
|
/opt/homebrew/Cellar/nlohmann-json
|
||||||
|
/opt/homebrew/Cellar/opus
|
||||||
key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }}
|
key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }}
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -113,7 +114,12 @@ jobs:
|
|||||||
# nlohmann-json is header-only and required by the ds4 backend
|
# nlohmann-json is header-only and required by the ds4 backend
|
||||||
# (dsml_renderer.cpp includes <nlohmann/json.hpp>); on Linux it comes
|
# (dsml_renderer.cpp includes <nlohmann/json.hpp>); on Linux it comes
|
||||||
# from the apt-installed nlohmann-json3-dev in the build image.
|
# from the apt-installed nlohmann-json3-dev in the build image.
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json
|
# opus + pkg-config are required by the opus go backend: its
|
||||||
|
# Makefile/package.sh call `pkg-config --cflags/--libs opus` to build
|
||||||
|
# libopusshim.dylib and to locate libopus.dylib for bundling. brew's
|
||||||
|
# pkg-config defaults its search path to the Homebrew prefix so the
|
||||||
|
# opus.pc is found.
|
||||||
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json opus pkg-config
|
||||||
# Force-reinstall ccache so brew re-validates its full runtime-dep
|
# Force-reinstall ccache so brew re-validates its full runtime-dep
|
||||||
# closure on every run. This is the durable fix: when the upstream
|
# closure on every run. This is the durable fix: when the upstream
|
||||||
# ccache formula gains a new transitive dep (as it has multiple times
|
# ccache formula gains a new transitive dep (as it has multiple times
|
||||||
@@ -132,7 +138,7 @@ jobs:
|
|||||||
# and decides "already installed" without re-linking, so on a cache-
|
# and decides "already installed" without re-linking, so on a cache-
|
||||||
# hit run the formulas aren't on PATH. Force-link them; --overwrite
|
# hit run the formulas aren't on PATH. Force-link them; --overwrite
|
||||||
# tolerates pre-existing symlinks from earlier installs.
|
# tolerates pre-existing symlinks from earlier installs.
|
||||||
brew link --overwrite protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json 2>/dev/null || true
|
brew link --overwrite protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json opus pkg-config 2>/dev/null || true
|
||||||
|
|
||||||
- name: Save Homebrew cache
|
- name: Save Homebrew cache
|
||||||
if: github.event_name != 'pull_request' && steps.brew-cache.outputs.cache-hit != 'true'
|
if: github.event_name != 'pull_request' && steps.brew-cache.outputs.cache-hit != 'true'
|
||||||
@@ -153,6 +159,7 @@ jobs:
|
|||||||
/opt/homebrew/Cellar/xxhash
|
/opt/homebrew/Cellar/xxhash
|
||||||
/opt/homebrew/Cellar/zstd
|
/opt/homebrew/Cellar/zstd
|
||||||
/opt/homebrew/Cellar/nlohmann-json
|
/opt/homebrew/Cellar/nlohmann-json
|
||||||
|
/opt/homebrew/Cellar/opus
|
||||||
key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }}
|
key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }}
|
||||||
|
|
||||||
# ---- ccache for llama.cpp CMake builds ----
|
# ---- ccache for llama.cpp CMake builds ----
|
||||||
|
|||||||
8
.github/workflows/bump_deps.yaml
vendored
8
.github/workflows/bump_deps.yaml
vendored
@@ -46,6 +46,14 @@ jobs:
|
|||||||
variable: "CED_VERSION"
|
variable: "CED_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
file: "backend/go/ced/Makefile"
|
file: "backend/go/ced/Makefile"
|
||||||
|
- repository: "mudler/voice-detect.cpp"
|
||||||
|
variable: "VOICEDETECT_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
file: "backend/go/voice-detect/Makefile"
|
||||||
|
- repository: "mudler/face-detect.cpp"
|
||||||
|
variable: "FACEDETECT_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
file: "backend/go/face-detect/Makefile"
|
||||||
- repository: "mudler/depth-anything.cpp"
|
- repository: "mudler/depth-anything.cpp"
|
||||||
variable: "DEPTHANYTHING_VERSION"
|
variable: "DEPTHANYTHING_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
|
|||||||
21
.github/workflows/release.yaml
vendored
21
.github/workflows/release.yaml
vendored
@@ -24,6 +24,11 @@ jobs:
|
|||||||
args: release --clean
|
args: release --clean
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
MACOS_SIGN_P12: ${{ secrets.MACOS_CERTIFICATE }}
|
||||||
|
MACOS_SIGN_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PWD }}
|
||||||
|
MACOS_NOTARY_KEY: ${{ secrets.MACOS_NOTARY_KEY }}
|
||||||
|
MACOS_NOTARY_KEY_ID: ${{ secrets.MACOS_NOTARY_KEY_ID }}
|
||||||
|
MACOS_NOTARY_ISSUER_ID: ${{ secrets.MACOS_NOTARY_ISSUER_ID }}
|
||||||
launcher-build-darwin:
|
launcher-build-darwin:
|
||||||
runs-on: macos-latest
|
runs-on: macos-latest
|
||||||
steps:
|
steps:
|
||||||
@@ -35,9 +40,19 @@ jobs:
|
|||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: 1.23
|
go-version: 1.23
|
||||||
- name: Build launcher for macOS ARM64
|
- name: Import signing certificate
|
||||||
run: |
|
env:
|
||||||
make build-launcher-darwin
|
MACOS_CERTIFICATE: ${{ secrets.MACOS_CERTIFICATE }}
|
||||||
|
MACOS_CERTIFICATE_PWD: ${{ secrets.MACOS_CERTIFICATE_PWD }}
|
||||||
|
MACOS_CI_KEYCHAIN_PWD: ${{ secrets.MACOS_CI_KEYCHAIN_PWD }}
|
||||||
|
run: bash contrib/macos/sign-and-notarize.sh import-cert
|
||||||
|
- name: Build, sign and notarize the DMG
|
||||||
|
env:
|
||||||
|
MACOS_SIGN_IDENTITY: ${{ secrets.MACOS_SIGN_IDENTITY }}
|
||||||
|
MACOS_NOTARY_KEY: ${{ secrets.MACOS_NOTARY_KEY }}
|
||||||
|
MACOS_NOTARY_KEY_ID: ${{ secrets.MACOS_NOTARY_KEY_ID }}
|
||||||
|
MACOS_NOTARY_ISSUER_ID: ${{ secrets.MACOS_NOTARY_ISSUER_ID }}
|
||||||
|
run: make release-launcher-darwin
|
||||||
- name: Upload DMG to Release
|
- name: Upload DMG to Release
|
||||||
uses: softprops/action-gh-release@v3
|
uses: softprops/action-gh-release@v3
|
||||||
with:
|
with:
|
||||||
|
|||||||
6
.github/workflows/test-extra.yml
vendored
6
.github/workflows/test-extra.yml
vendored
@@ -1008,7 +1008,11 @@ jobs:
|
|||||||
# image + working dir.
|
# image + working dir.
|
||||||
tests-vibevoice-cpp-grpc-transcription:
|
tests-vibevoice-cpp-grpc-transcription:
|
||||||
needs: detect-changes
|
needs: detect-changes
|
||||||
if: needs.detect-changes.outputs.vibevoice-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true'
|
# Skip on release tag pushes: the ASR Q4_K model is ~10 GB and cannot be
|
||||||
|
# pulled from HF within the inner `go test -timeout 30m` budget on a CI
|
||||||
|
# runner, so every tag build hung and timed out. Still runs on PRs/branch
|
||||||
|
# pushes that touch vibevoice-cpp so regressions are caught off the release path.
|
||||||
|
if: (needs.detect-changes.outputs.vibevoice-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true') && !startsWith(github.ref, 'refs/tags/')
|
||||||
runs-on: bigger-runner
|
runs-on: bigger-runner
|
||||||
timeout-minutes: 150
|
timeout-minutes: 150
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
16
.github/workflows/test.yml
vendored
16
.github/workflows/test.yml
vendored
@@ -121,3 +121,19 @@ jobs:
|
|||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
limit-access-to-actor: true
|
limit-access-to-actor: true
|
||||||
|
|
||||||
|
# Fast standalone unit tests for the backends' pure C++ helpers - currently the
|
||||||
|
# llama-cpp message reconstruction (backend/cpp/llama-cpp/message_content.h),
|
||||||
|
# which guards the OpenAI chat content normalization (mudler/LocalAI#10524,
|
||||||
|
# #7324, #7528). The runner discovers every *_test.cpp under backend/cpp/, so
|
||||||
|
# new pure-C++ unit tests are picked up with no CI changes. These need only the
|
||||||
|
# C++ stdlib + nlohmann/json, so they run on every PR without the full
|
||||||
|
# llama.cpp + gRPC backend build. (The same suite is also wired as an opt-in
|
||||||
|
# CMake/ctest target, -DLLAMA_GRPC_BUILD_TESTS=ON, for in-backend-build runs.)
|
||||||
|
tests-backend-cpp:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v7
|
||||||
|
- name: Run backend C++ unit tests
|
||||||
|
run: make test-backend-cpp
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -94,3 +94,6 @@ core/http/react-ui/test-results/
|
|||||||
|
|
||||||
# SDD / brainstorm scratch (agent-driven development)
|
# SDD / brainstorm scratch (agent-driven development)
|
||||||
.superpowers/
|
.superpowers/
|
||||||
|
|
||||||
|
# Local Apple signing material (never commit)
|
||||||
|
.certs/
|
||||||
|
|||||||
@@ -9,7 +9,8 @@ source:
|
|||||||
enabled: true
|
enabled: true
|
||||||
name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
|
name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
|
||||||
builds:
|
builds:
|
||||||
- main: ./cmd/local-ai
|
- id: local-ai
|
||||||
|
main: ./cmd/local-ai
|
||||||
env:
|
env:
|
||||||
- CGO_ENABLED=0
|
- CGO_ENABLED=0
|
||||||
ldflags:
|
ldflags:
|
||||||
@@ -35,3 +36,19 @@ snapshot:
|
|||||||
version_template: "{{ .Tag }}-next"
|
version_template: "{{ .Tag }}-next"
|
||||||
changelog:
|
changelog:
|
||||||
use: github-native
|
use: github-native
|
||||||
|
# Sign + notarize the macOS server binary via the quill backend (runs on Linux,
|
||||||
|
# no macOS runner needed). Disabled automatically when MACOS_SIGN_P12 is unset
|
||||||
|
# (forks / PRs), so those builds stay unsigned and green.
|
||||||
|
notarize:
|
||||||
|
macos:
|
||||||
|
- enabled: '{{ isEnvSet "MACOS_SIGN_P12" }}'
|
||||||
|
ids:
|
||||||
|
- local-ai
|
||||||
|
sign:
|
||||||
|
certificate: "{{.Env.MACOS_SIGN_P12}}"
|
||||||
|
password: "{{.Env.MACOS_SIGN_PASSWORD}}"
|
||||||
|
notarize:
|
||||||
|
issuer_id: "{{.Env.MACOS_NOTARY_ISSUER_ID}}"
|
||||||
|
key_id: "{{.Env.MACOS_NOTARY_KEY_ID}}"
|
||||||
|
key: "{{.Env.MACOS_NOTARY_KEY}}"
|
||||||
|
wait: true
|
||||||
|
|||||||
44
Makefile
44
Makefile
@@ -103,7 +103,7 @@ COVERAGE_E2E_LABELS?=!real-models
|
|||||||
COVERAGE_EXCLUDE_RE?=grpc/proto/.*[.]pb[.]go
|
COVERAGE_EXCLUDE_RE?=grpc/proto/.*[.]pb[.]go
|
||||||
|
|
||||||
|
|
||||||
.PHONY: all test test-coverage test-coverage-baseline test-coverage-check test-ui test-ui-coverage-baseline test-ui-coverage-check install-hooks build vendor lint lint-all
|
.PHONY: all test test-coverage test-coverage-baseline test-coverage-check test-backend-cpp test-ui test-ui-coverage-baseline test-ui-coverage-check install-hooks build vendor lint lint-all
|
||||||
|
|
||||||
all: help
|
all: help
|
||||||
|
|
||||||
@@ -201,6 +201,13 @@ test: prepare-test
|
|||||||
OPUS_SHIM_LIBRARY=$(abspath ./pkg/opus/shim/libopusshim.so) \
|
OPUS_SHIM_LIBRARY=$(abspath ./pkg/opus/shim/libopusshim.so) \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||||
|
|
||||||
|
## Compiles and runs the standalone C++ unit tests for the backends (pure
|
||||||
|
## helpers that depend only on the stdlib + nlohmann/json, no full backend
|
||||||
|
## build). Discovers every *_test.cpp under backend/cpp/ - see
|
||||||
|
## backend/cpp/run-unit-tests.sh. Set NLOHMANN_INCLUDE to skip the header fetch.
|
||||||
|
test-backend-cpp:
|
||||||
|
bash backend/cpp/run-unit-tests.sh
|
||||||
|
|
||||||
## Runs the core suite ($(TEST_PATHS)) with statement-coverage instrumentation
|
## Runs the core suite ($(TEST_PATHS)) with statement-coverage instrumentation
|
||||||
## and writes a merged profile to $(COVERAGE_PROFILE). Deliberately omits
|
## and writes a merged profile to $(COVERAGE_PROFILE). Deliberately omits
|
||||||
## --fail-fast so a single failure doesn't truncate the coverage number, and
|
## --fail-fast so a single failure doesn't truncate the coverage number, and
|
||||||
@@ -1453,13 +1460,32 @@ docs: docs/static/gallery.html
|
|||||||
########################################################
|
########################################################
|
||||||
|
|
||||||
## fyne cross-platform build
|
## fyne cross-platform build
|
||||||
build-launcher-darwin: build-launcher
|
# Build LocalAI.app from the launcher via fyne (metadata read from cmd/launcher/FyneApp.toml).
|
||||||
go run github.com/tiagomelo/macos-dmg-creator/cmd/createdmg@latest \
|
# Signing happens via contrib/macos/sign-and-notarize.sh, which is a no-op when the signing
|
||||||
--appName "LocalAI" \
|
# secrets are unset, so unsigned local/fork builds keep working.
|
||||||
--appBinaryPath "$(LAUNCHER_BINARY_NAME)" \
|
build-launcher-darwin:
|
||||||
--bundleIdentifier "com.localai.launcher" \
|
rm -rf dist/LocalAI.app cmd/launcher/LocalAI.app
|
||||||
--iconPath "core/http/static/logo.png" \
|
mkdir -p dist
|
||||||
--outputDir "dist/"
|
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os darwin -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)
|
||||||
|
mv cmd/launcher/LocalAI.app dist/LocalAI.app
|
||||||
|
bash contrib/macos/sign-and-notarize.sh sign dist/LocalAI.app
|
||||||
|
|
||||||
|
# Wrap the (signed) app into a drag-to-Applications DMG via hdiutil, then sign the DMG.
|
||||||
|
dmg-launcher-darwin: build-launcher-darwin
|
||||||
|
rm -rf dist/dmg dist/LocalAI.dmg
|
||||||
|
mkdir -p dist/dmg
|
||||||
|
cp -R dist/LocalAI.app dist/dmg/LocalAI.app
|
||||||
|
ln -s /Applications dist/dmg/Applications
|
||||||
|
hdiutil create -volname "LocalAI" -srcfolder dist/dmg -ov -format UDZO dist/LocalAI.dmg
|
||||||
|
bash contrib/macos/sign-and-notarize.sh sign dist/LocalAI.dmg
|
||||||
|
|
||||||
|
# Submit the DMG to Apple notarization and staple the ticket (no-op without notary secrets).
|
||||||
|
notarize-launcher-darwin: dmg-launcher-darwin
|
||||||
|
bash contrib/macos/sign-and-notarize.sh notarize dist/LocalAI.dmg
|
||||||
|
|
||||||
|
# Single entrypoint for CI: build -> sign app -> dmg -> sign dmg -> notarize -> staple.
|
||||||
|
release-launcher-darwin: notarize-launcher-darwin
|
||||||
|
@echo "dist/LocalAI.dmg is ready"
|
||||||
|
|
||||||
build-launcher-linux:
|
build-launcher-linux:
|
||||||
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv launcher.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
|
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv LocalAI.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
|
||||||
|
|||||||
@@ -177,6 +177,7 @@ For more details, see the [Getting Started guide](https://localai.io/basics/gett
|
|||||||
|
|
||||||
## Latest News
|
## Latest News
|
||||||
|
|
||||||
|
- **June 2026**: New native biometric backends from the LocalAI team: [voice-detect.cpp](https://github.com/mudler/voice-detect.cpp) for speaker recognition and voice analysis (ECAPA-TDNN, WeSpeaker, ERes2Net, CAM++, wav2vec2 age/gender/emotion) and [face-detect.cpp](https://github.com/mudler/face-detect.cpp) for face detection, recognition, demographics and anti-spoofing (SCRFD/ArcFace, YuNet/SFace). Both are from-scratch C++/ggml engines with no Python or onnxruntime at inference, self-contained GGUF weights, bit-exact parity with the reference, and GPU cuDNN parity, replacing the heavier Python `insightface` and `speaker-recognition` backends ([PR #10441](https://github.com/mudler/LocalAI/pull/10441)).
|
||||||
- **June 2026**: New [realtime voice assistant demo](https://github.com/localai-org/localai-realtime-demo) (a tiny Go client for the Realtime API with a full talk-back voice loop and tool calling), plus [streaming of the realtime LLM / TTS / transcription pipeline stages](https://github.com/mudler/LocalAI/pull/10176) and [configurable WebRTC ICE candidates](https://github.com/mudler/LocalAI/pull/10231).
|
- **June 2026**: New [realtime voice assistant demo](https://github.com/localai-org/localai-realtime-demo) (a tiny Go client for the Realtime API with a full talk-back voice loop and tool calling), plus [streaming of the realtime LLM / TTS / transcription pipeline stages](https://github.com/mudler/LocalAI/pull/10176) and [configurable WebRTC ICE candidates](https://github.com/mudler/LocalAI/pull/10231).
|
||||||
- **June 2026**: Big speech push: the [parakeet.cpp](https://github.com/mudler/parakeet.cpp) ASR engine gains [NeMo-faithful segment timestamps](https://github.com/mudler/LocalAI/pull/10207), a [multilingual streaming Nemotron-3.5 model](https://github.com/mudler/LocalAI/pull/10199), [dynamic batching for concurrent transcription](https://github.com/mudler/LocalAI/pull/10112) and [CUDA graphs](https://github.com/mudler/LocalAI/pull/10273); the new [CrispASR backend](https://github.com/mudler/LocalAI/pull/10099) adds multi-architecture ASR + TTS, and [60 Piper TTS voices across 42 languages](https://github.com/mudler/LocalAI/pull/10296) land in the gallery (plus [per-request TTS instructions and params](https://github.com/mudler/LocalAI/pull/10172)).
|
- **June 2026**: Big speech push: the [parakeet.cpp](https://github.com/mudler/parakeet.cpp) ASR engine gains [NeMo-faithful segment timestamps](https://github.com/mudler/LocalAI/pull/10207), a [multilingual streaming Nemotron-3.5 model](https://github.com/mudler/LocalAI/pull/10199), [dynamic batching for concurrent transcription](https://github.com/mudler/LocalAI/pull/10112) and [CUDA graphs](https://github.com/mudler/LocalAI/pull/10273); the new [CrispASR backend](https://github.com/mudler/LocalAI/pull/10099) adds multi-architecture ASR + TTS, and [60 Piper TTS voices across 42 languages](https://github.com/mudler/LocalAI/pull/10296) land in the gallery (plus [per-request TTS instructions and params](https://github.com/mudler/LocalAI/pull/10172)).
|
||||||
- **June 2026**: New backends and models: [locate-anything.cpp](https://github.com/mudler/LocalAI/pull/10264) for open-vocabulary object detection via ggml, [Ideogram4 image generation](https://github.com/mudler/LocalAI/pull/10201) in stablediffusion-ggml, [llama.cpp video input](https://github.com/mudler/LocalAI/pull/10216), and the [Gemma 4 QAT family with MTP speculative-decoding pairs](https://github.com/mudler/LocalAI/pull/10215). Plus an [interactive CLI chat mode](https://github.com/mudler/LocalAI/pull/10226) and [RAG source citations in agent responses](https://github.com/mudler/LocalAI/pull/10228).
|
- **June 2026**: New backends and models: [locate-anything.cpp](https://github.com/mudler/LocalAI/pull/10264) for open-vocabulary object detection via ggml, [Ideogram4 image generation](https://github.com/mudler/LocalAI/pull/10201) in stablediffusion-ggml, [llama.cpp video input](https://github.com/mudler/LocalAI/pull/10216), and the [Gemma 4 QAT family with MTP speculative-decoding pairs](https://github.com/mudler/LocalAI/pull/10215). Plus an [interactive CLI chat mode](https://github.com/mudler/LocalAI/pull/10226) and [RAG source citations in agent responses](https://github.com/mudler/LocalAI/pull/10228).
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ RUN <<EOT bash
|
|||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
||||||
if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
|
if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} libcudnn9-dev-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
||||||
fi
|
fi
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|||||||
@@ -1,15 +1,6 @@
|
|||||||
## Clip/LLaVA library for multimodal support — built locally from copied sources
|
## Multimodal support is provided by the in-tree `mtmd` library target
|
||||||
set(TARGET myclip)
|
## (examples/mtmd/), which the grpc-server links and includes below. clip/llava
|
||||||
add_library(${TARGET} clip.cpp clip.h llava.cpp llava.h)
|
## were pruned upstream; the high-level mtmd_* / mtmd_helper_* API is used instead.
|
||||||
install(TARGETS ${TARGET} LIBRARY)
|
|
||||||
target_include_directories(myclip PUBLIC .)
|
|
||||||
target_include_directories(myclip PUBLIC ../..)
|
|
||||||
target_include_directories(myclip PUBLIC ../../common)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
||||||
if (NOT MSVC)
|
|
||||||
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(TARGET grpc-server)
|
set(TARGET grpc-server)
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
@@ -67,12 +58,16 @@ add_library(hw_grpc_proto
|
|||||||
${hw_proto_hdrs} )
|
${hw_proto_hdrs} )
|
||||||
|
|
||||||
add_executable(${TARGET} grpc-server.cpp json.hpp)
|
add_executable(${TARGET} grpc-server.cpp json.hpp)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
# mtmd public headers (mtmd.h / mtmd-helper.h) live in examples/mtmd/.
|
||||||
|
# Linking the mtmd target also propagates this include dir, but we add it
|
||||||
|
# explicitly for clarity.
|
||||||
|
target_include_directories(${TARGET} PRIVATE ../mtmd)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||||
absl::flags_parse
|
absl::flags_parse
|
||||||
gRPC::${_REFLECTION}
|
gRPC::${_REFLECTION}
|
||||||
gRPC::${_GRPC_GRPCPP}
|
gRPC::${_GRPC_GRPCPP}
|
||||||
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||||
if(TARGET BUILD_INFO)
|
if(TARGET BUILD_INFO)
|
||||||
add_dependencies(${TARGET} BUILD_INFO)
|
add_dependencies(${TARGET} BUILD_INFO)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
IK_LLAMA_VERSION?=b84902d2ad27c34f989f23947200c4b91b1568fd
|
IK_LLAMA_VERSION?=f96eaddba8bed6a9a5e628bbf6a566775c70b49c
|
||||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -11,8 +11,8 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include "clip.h"
|
#include "mtmd.h"
|
||||||
#include "llava.h"
|
#include "mtmd-helper.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
@@ -45,7 +45,9 @@ using backend::HealthMessage;
|
|||||||
|
|
||||||
///// LLAMA.CPP server code below
|
///// LLAMA.CPP server code below
|
||||||
|
|
||||||
using json = nlohmann::json;
|
// Match mtmd.h and ik_llama's server/common headers, which all use
|
||||||
|
// nlohmann::ordered_json; a plain nlohmann::json alias collides at global scope.
|
||||||
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
struct server_params
|
struct server_params
|
||||||
{
|
{
|
||||||
@@ -219,6 +221,11 @@ struct llama_client_slot
|
|||||||
|
|
||||||
// multimodal
|
// multimodal
|
||||||
std::vector<slot_image> images;
|
std::vector<slot_image> images;
|
||||||
|
// Full prompt with mtmd media markers (mtmd_default_marker()) substituted in
|
||||||
|
// place of the legacy [img-N] tags, covering the text up to and including the
|
||||||
|
// last image. The text after the last image is kept in params.input_suffix and
|
||||||
|
// decoded through the normal token path so the sampling loop is unchanged.
|
||||||
|
std::string mtmd_prompt;
|
||||||
|
|
||||||
// stats
|
// stats
|
||||||
size_t sent_count = 0;
|
size_t sent_count = 0;
|
||||||
@@ -252,14 +259,14 @@ struct llama_client_slot
|
|||||||
|
|
||||||
for (slot_image & img : images)
|
for (slot_image & img : images)
|
||||||
{
|
{
|
||||||
free(img.image_embedding);
|
if (img.bitmap) {
|
||||||
if (img.img_data) {
|
mtmd_bitmap_free(img.bitmap);
|
||||||
clip_image_u8_free(img.img_data);
|
img.bitmap = nullptr;
|
||||||
}
|
}
|
||||||
img.prefix_prompt = "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
images.clear();
|
images.clear();
|
||||||
|
mtmd_prompt = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has_budget(gpt_params &global_params) {
|
bool has_budget(gpt_params &global_params) {
|
||||||
@@ -396,46 +403,13 @@ struct llama_metrics {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llava_embd_batch {
|
|
||||||
std::vector<llama_pos> pos;
|
|
||||||
std::vector<int32_t> n_seq_id;
|
|
||||||
std::vector<llama_seq_id> seq_id_0;
|
|
||||||
std::vector<llama_seq_id *> seq_ids;
|
|
||||||
std::vector<int8_t> logits;
|
|
||||||
llama_batch batch;
|
|
||||||
llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
|
|
||||||
pos .resize(n_tokens);
|
|
||||||
n_seq_id.resize(n_tokens);
|
|
||||||
seq_ids .resize(n_tokens + 1);
|
|
||||||
logits .resize(n_tokens);
|
|
||||||
seq_id_0.resize(1);
|
|
||||||
seq_id_0[0] = seq_id;
|
|
||||||
seq_ids [n_tokens] = nullptr;
|
|
||||||
batch = {
|
|
||||||
/*n_tokens =*/ n_tokens,
|
|
||||||
/*tokens =*/ nullptr,
|
|
||||||
/*embd =*/ embd,
|
|
||||||
/*pos =*/ pos.data(),
|
|
||||||
/*n_seq_id =*/ n_seq_id.data(),
|
|
||||||
/*seq_id =*/ seq_ids.data(),
|
|
||||||
/*logits =*/ logits.data(),
|
|
||||||
};
|
|
||||||
for (int i = 0; i < n_tokens; i++) {
|
|
||||||
batch.pos [i] = pos_0 + i;
|
|
||||||
batch.n_seq_id[i] = 1;
|
|
||||||
batch.seq_id [i] = seq_id_0.data();
|
|
||||||
batch.logits [i] = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct llama_server_context
|
struct llama_server_context
|
||||||
{
|
{
|
||||||
llama_model *model = nullptr;
|
llama_model *model = nullptr;
|
||||||
llama_context *ctx = nullptr;
|
llama_context *ctx = nullptr;
|
||||||
const llama_vocab * vocab = nullptr;
|
const llama_vocab * vocab = nullptr;
|
||||||
|
|
||||||
clip_ctx *clp_ctx = nullptr;
|
mtmd_context *mctx = nullptr;
|
||||||
|
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
|
|
||||||
@@ -491,11 +465,6 @@ struct llama_server_context
|
|||||||
if (!params.mmproj.path.empty()) {
|
if (!params.mmproj.path.empty()) {
|
||||||
multimodal = true;
|
multimodal = true;
|
||||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||||
clp_ctx = clip_model_load(params.mmproj.path.c_str(), /*verbosity=*/ 1);
|
|
||||||
if(clp_ctx == nullptr) {
|
|
||||||
LOG_ERR("unable to load clip model: %s", params.mmproj.path.c_str());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (params.n_ctx < 2048) { // request larger context for the image embedding
|
if (params.n_ctx < 2048) { // request larger context for the image embedding
|
||||||
params.n_ctx = 2048;
|
params.n_ctx = 2048;
|
||||||
@@ -512,10 +481,24 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (multimodal) {
|
if (multimodal) {
|
||||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
// mtmd_init_from_file requires the already-loaded text model, so it must
|
||||||
const int n_embd_llm = llama_model_n_embd(model);
|
// run AFTER llama_init_from_gpt_params. It validates the projector
|
||||||
if (n_embd_clip != n_embd_llm) {
|
// against the model internally and returns nullptr on dim mismatch, so
|
||||||
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
// the explicit clip_n_mmproj_embd check is no longer needed.
|
||||||
|
mtmd_context_params mparams = mtmd_context_params_default();
|
||||||
|
mparams.use_gpu = params.mmproj_use_gpu;
|
||||||
|
mparams.print_timings = false;
|
||||||
|
mparams.n_threads = params.n_threads_mtmd != -1 ? params.n_threads_mtmd
|
||||||
|
: params.n_threads_batch != -1 ? params.n_threads_batch
|
||||||
|
: params.n_threads;
|
||||||
|
mparams.verbosity = GGML_LOG_LEVEL_INFO;
|
||||||
|
mparams.flash_attn_type = params.flash_attn ? LLAMA_FLASH_ATTN_TYPE_ENABLED
|
||||||
|
: LLAMA_FLASH_ATTN_TYPE_DISABLED;
|
||||||
|
mparams.image_min_tokens = params.image_min_tokens;
|
||||||
|
mparams.image_max_tokens = params.image_max_tokens;
|
||||||
|
mctx = mtmd_init_from_file(params.mmproj.path.c_str(), model, mparams);
|
||||||
|
if (mctx == nullptr) {
|
||||||
|
LOG_ERR("unable to load multimodal projector: %s", params.mmproj.path.c_str());
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
return false;
|
return false;
|
||||||
@@ -865,8 +848,8 @@ struct llama_server_context
|
|||||||
|
|
||||||
slot_image img_sl;
|
slot_image img_sl;
|
||||||
img_sl.id = img.count("id") != 0 ? img["id"].get<int>() : slot->images.size();
|
img_sl.id = img.count("id") != 0 ? img["id"].get<int>() : slot->images.size();
|
||||||
img_sl.img_data = clip_image_u8_init();
|
img_sl.bitmap = mtmd_helper_bitmap_init_from_buf(mctx, image_buffer.data(), image_buffer.size());
|
||||||
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
if (img_sl.bitmap == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
|
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
|
||||||
__func__,
|
__func__,
|
||||||
@@ -879,50 +862,74 @@ struct llama_server_context
|
|||||||
{"slot_id", slot->id},
|
{"slot_id", slot->id},
|
||||||
{"img_sl_id", img_sl.id}
|
{"img_sl_id", img_sl.id}
|
||||||
});
|
});
|
||||||
img_sl.request_encode_image = true;
|
|
||||||
slot->images.push_back(img_sl);
|
slot->images.push_back(img_sl);
|
||||||
}
|
}
|
||||||
// process prompt
|
// Translate the legacy [img-N] tags into mtmd media markers, in
|
||||||
// example: system prompt [img-102] user [img-103] describe [img-134] -> [{id: 102, prefix: 'system prompt '}, {id: 103, prefix: ' user '}, {id: 134, prefix: ' describe '}]}
|
// order, and collect the matching bitmaps in marker order so they
|
||||||
|
// line up with the markers passed to mtmd_tokenize(). The text after
|
||||||
|
// the last image stays in input_suffix and is decoded through the
|
||||||
|
// normal token path, so the sampling loop is unchanged.
|
||||||
|
// example: system prompt [img-102] user [img-103] describe [img-134]
|
||||||
if (slot->images.size() > 0 && !slot->prompt.is_array())
|
if (slot->images.size() > 0 && !slot->prompt.is_array())
|
||||||
{
|
{
|
||||||
|
const std::string marker = mtmd_default_marker();
|
||||||
std::string prompt = slot->prompt.get<std::string>();
|
std::string prompt = slot->prompt.get<std::string>();
|
||||||
size_t pos = 0, begin_prefix = 0;
|
std::string built_prompt;
|
||||||
|
std::vector<slot_image> ordered;
|
||||||
|
size_t pos = 0, copy_from = 0;
|
||||||
std::string pattern = "[img-";
|
std::string pattern = "[img-";
|
||||||
while ((pos = prompt.find(pattern, pos)) != std::string::npos) {
|
|
||||||
size_t end_prefix = pos;
|
auto free_images = [&]() {
|
||||||
pos += pattern.length();
|
for (slot_image &img : slot->images) {
|
||||||
size_t end_pos = prompt.find(']', pos);
|
if (img.bitmap) {
|
||||||
if (end_pos != std::string::npos)
|
mtmd_bitmap_free(img.bitmap);
|
||||||
{
|
img.bitmap = nullptr;
|
||||||
std::string image_id = prompt.substr(pos, end_pos - pos);
|
|
||||||
try
|
|
||||||
{
|
|
||||||
int img_id = std::stoi(image_id);
|
|
||||||
bool found = false;
|
|
||||||
for (slot_image &img : slot->images)
|
|
||||||
{
|
|
||||||
if (img.id == img_id) {
|
|
||||||
found = true;
|
|
||||||
img.prefix_prompt = prompt.substr(begin_prefix, end_prefix - begin_prefix);
|
|
||||||
begin_prefix = end_pos + 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!found) {
|
|
||||||
LOG("ERROR: Image with id: %i, not found.\n", img_id);
|
|
||||||
slot->images.clear();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} catch (const std::invalid_argument& e) {
|
|
||||||
LOG("Invalid image number id in prompt\n");
|
|
||||||
slot->images.clear();
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
slot->images.clear();
|
||||||
|
};
|
||||||
|
|
||||||
|
while ((pos = prompt.find(pattern, pos)) != std::string::npos) {
|
||||||
|
size_t tag_begin = pos;
|
||||||
|
pos += pattern.length();
|
||||||
|
size_t end_pos = prompt.find(']', pos);
|
||||||
|
if (end_pos == std::string::npos) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
std::string image_id = prompt.substr(pos, end_pos - pos);
|
||||||
|
try
|
||||||
|
{
|
||||||
|
int img_id = std::stoi(image_id);
|
||||||
|
bool found = false;
|
||||||
|
for (slot_image &img : slot->images)
|
||||||
|
{
|
||||||
|
if (img.id == img_id) {
|
||||||
|
found = true;
|
||||||
|
// text before this tag, then the media marker
|
||||||
|
built_prompt += prompt.substr(copy_from, tag_begin - copy_from);
|
||||||
|
built_prompt += marker;
|
||||||
|
copy_from = end_pos + 1;
|
||||||
|
ordered.push_back(img);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
LOG("ERROR: Image with id: %i, not found.\n", img_id);
|
||||||
|
free_images();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (const std::invalid_argument& e) {
|
||||||
|
LOG("Invalid image number id in prompt\n");
|
||||||
|
free_images();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
pos = end_pos + 1;
|
||||||
}
|
}
|
||||||
|
// bitmaps are consumed in marker order by mtmd_tokenize()
|
||||||
|
slot->images = ordered;
|
||||||
|
slot->mtmd_prompt = built_prompt;
|
||||||
slot->prompt = "";
|
slot->prompt = "";
|
||||||
slot->params.input_suffix = prompt.substr(begin_prefix);
|
slot->params.input_suffix = prompt.substr(copy_from);
|
||||||
slot->params.cache_prompt = false; // multimodal doesn't support cache prompt
|
slot->params.cache_prompt = false; // multimodal doesn't support cache prompt
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1176,21 +1183,10 @@ struct llama_server_context
|
|||||||
|
|
||||||
bool process_images(llama_client_slot &slot) const
|
bool process_images(llama_client_slot &slot) const
|
||||||
{
|
{
|
||||||
for (slot_image &img : slot.images)
|
// With the mtmd pipeline, image encoding is no longer eager: the bitmaps
|
||||||
{
|
// are tokenized and encoded together with the surrounding text inside
|
||||||
if (!img.request_encode_image)
|
// ingest_images() via mtmd_tokenize() + mtmd_helper_eval_chunks(). This
|
||||||
{
|
// just reports whether the slot carries any images to process.
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
|
||||||
LOG("Error processing the given image");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
img.request_encode_image = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return slot.images.size() > 0;
|
return slot.images.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1435,69 +1431,70 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// for multiple images processing
|
// Tokenize the multimodal prompt (text interleaved with media markers) together
|
||||||
|
// with the slot's bitmaps, then decode the resulting chunks into the llama
|
||||||
|
// context via the high-level mtmd helper. The helper runs llama_decode() on the
|
||||||
|
// text chunks and mtmd_encode() + llama_decode() on the image chunks, handling
|
||||||
|
// batching and any pre/post decode setup (e.g. non-causal attention for gemma3).
|
||||||
|
// Advances slot.n_past by the number of positions consumed, then leaves the
|
||||||
|
// post-image suffix tokens in `batch` so the normal decode + sampling loop
|
||||||
|
// produces the first generated token.
|
||||||
bool ingest_images(llama_client_slot &slot, int n_batch)
|
bool ingest_images(llama_client_slot &slot, int n_batch)
|
||||||
{
|
{
|
||||||
int image_idx = 0;
|
if (mctx == nullptr)
|
||||||
|
|
||||||
while (image_idx < (int) slot.images.size())
|
|
||||||
{
|
{
|
||||||
slot_image &img = slot.images[image_idx];
|
LOG("%s : multimodal context is not initialized\n", __func__);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// process prefix prompt
|
// bitmaps stay owned by slot.images (freed on reset()); pass non-owning ptrs
|
||||||
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
|
std::vector<const mtmd_bitmap *> bitmaps;
|
||||||
{
|
bitmaps.reserve(slot.images.size());
|
||||||
const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
|
for (const slot_image &img : slot.images)
|
||||||
llama_batch batch_view = {
|
{
|
||||||
n_tokens,
|
bitmaps.push_back(img.bitmap);
|
||||||
batch.token + i,
|
}
|
||||||
nullptr,
|
|
||||||
batch.pos + i,
|
|
||||||
batch.n_seq_id + i,
|
|
||||||
batch.seq_id + i,
|
|
||||||
batch.logits + i,
|
|
||||||
};
|
|
||||||
if (llama_decode(ctx, batch_view))
|
|
||||||
{
|
|
||||||
LOG("%s : failed to eval\n", __func__);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// process image with llm
|
mtmd_input_text inp_txt;
|
||||||
for (int i = 0; i < img.image_tokens; i += n_batch)
|
inp_txt.text = slot.mtmd_prompt.c_str();
|
||||||
{
|
inp_txt.add_special = add_bos_token;
|
||||||
int n_eval = img.image_tokens - i;
|
inp_txt.parse_special = true;
|
||||||
if (n_eval > n_batch)
|
|
||||||
{
|
|
||||||
n_eval = n_batch;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int n_embd = llama_model_n_embd(model);
|
mtmd::input_chunks chunks(mtmd_input_chunks_init());
|
||||||
float * embd = img.image_embedding + i * n_embd;
|
int32_t res = mtmd_tokenize(mctx,
|
||||||
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
|
chunks.ptr.get(),
|
||||||
if (llama_decode(ctx, llava_batch.batch))
|
&inp_txt,
|
||||||
{
|
bitmaps.data(),
|
||||||
LOG("%s : failed to eval image\n", __func__);
|
bitmaps.size());
|
||||||
return false;
|
if (res != 0)
|
||||||
}
|
{
|
||||||
slot.n_past += n_eval;
|
LOG("%s : failed to tokenize multimodal prompt, res = %d\n", __func__, res);
|
||||||
}
|
return false;
|
||||||
image_idx++;
|
}
|
||||||
|
|
||||||
common_batch_clear(batch);
|
const llama_pos start_pos = (llama_pos) system_tokens.size() + slot.n_past;
|
||||||
|
llama_pos new_n_past = start_pos;
|
||||||
|
if (mtmd_helper_eval_chunks(mctx,
|
||||||
|
ctx,
|
||||||
|
chunks.ptr.get(),
|
||||||
|
start_pos,
|
||||||
|
slot.id,
|
||||||
|
n_batch,
|
||||||
|
/*logits_last=*/ false,
|
||||||
|
&new_n_past) != 0)
|
||||||
|
{
|
||||||
|
LOG("%s : failed to eval multimodal chunks\n", __func__);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
slot.n_past += (int32_t) (new_n_past - start_pos);
|
||||||
|
|
||||||
// append prefix of next image
|
// queue the post-image suffix text for the normal decode + sampling path
|
||||||
const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
|
common_batch_clear(batch);
|
||||||
slot.params.input_suffix : // no more images, then process suffix prompt
|
std::vector<llama_token> suffix_tokens = tokenize(slot.params.input_suffix, false);
|
||||||
(json)(slot.images[image_idx].prefix_prompt);
|
for (llama_token tok : suffix_tokens)
|
||||||
|
{
|
||||||
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
|
common_batch_add(batch, tok, system_tokens.size() + slot.n_past, { slot.id }, false);
|
||||||
for (int i = 0; i < (int) append_tokens.size(); ++i)
|
slot.n_past += 1;
|
||||||
{
|
|
||||||
common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
|
|
||||||
slot.n_past += 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -1884,8 +1881,11 @@ struct llama_server_context
|
|||||||
|
|
||||||
const bool has_images = process_images(slot);
|
const bool has_images = process_images(slot);
|
||||||
|
|
||||||
// process the prefix of first image
|
// For the multimodal path the whole pre-image / inter-image text is
|
||||||
std::vector<llama_token> prefix_tokens = has_images ? tokenize(slot.images[0].prefix_prompt, add_bos_token) : prompt_tokens;
|
// tokenized and decoded inside ingest_images() via mtmd, so no prefix
|
||||||
|
// tokens are queued here; the post-image suffix is appended by
|
||||||
|
// ingest_images() for the normal decode + sampling loop.
|
||||||
|
std::vector<llama_token> prefix_tokens = has_images ? std::vector<llama_token>() : prompt_tokens;
|
||||||
|
|
||||||
int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past;
|
int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past;
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +0,0 @@
|
|||||||
--- a/examples/llava/clip.cpp
|
|
||||||
+++ b/examples/llava/clip.cpp
|
|
||||||
@@ -2494,7 +2494,7 @@
|
|
||||||
}
|
|
||||||
new_data = work.data();
|
|
||||||
|
|
||||||
- new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr);
|
|
||||||
+ new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr, nullptr);
|
|
||||||
} else {
|
|
||||||
new_type = cur->type;
|
|
||||||
new_data = cur->data;
|
|
||||||
@@ -17,28 +17,9 @@ cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
|||||||
cp -r utils.hpp llama.cpp/examples/grpc-server/
|
cp -r utils.hpp llama.cpp/examples/grpc-server/
|
||||||
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/examples/grpc-server/
|
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
|
||||||
## Copy clip/llava files for multimodal support (built as myclip library)
|
## Multimodal support is provided by the `mtmd` library target (examples/mtmd/),
|
||||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
## which the grpc-server links and includes directly. No source copy is needed:
|
||||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
## clip/llava were pruned upstream and the high-level mtmd_* API is used instead.
|
||||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
|
||||||
# Prepend llama.h include to llava.h
|
|
||||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
|
||||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
|
||||||
# Copy clip-impl.h if it exists
|
|
||||||
if [ -f llama.cpp/examples/llava/clip-impl.h ]; then
|
|
||||||
cp -rfv llama.cpp/examples/llava/clip-impl.h llama.cpp/examples/grpc-server/clip-impl.h
|
|
||||||
fi
|
|
||||||
# Copy stb_image.h
|
|
||||||
if [ -f llama.cpp/vendor/stb/stb_image.h ]; then
|
|
||||||
cp -rfv llama.cpp/vendor/stb/stb_image.h llama.cpp/examples/grpc-server/stb_image.h
|
|
||||||
elif [ -f llama.cpp/common/stb_image.h ]; then
|
|
||||||
cp -rfv llama.cpp/common/stb_image.h llama.cpp/examples/grpc-server/stb_image.h
|
|
||||||
fi
|
|
||||||
|
|
||||||
## Fix API compatibility in llava.cpp (llama_n_embd -> llama_model_n_embd)
|
|
||||||
if [ -f llama.cpp/examples/grpc-server/llava.cpp ]; then
|
|
||||||
sed -i 's/llama_n_embd(/llama_model_n_embd(/g' llama.cpp/examples/grpc-server/llava.cpp
|
|
||||||
fi
|
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
||||||
|
|||||||
@@ -11,9 +11,12 @@
|
|||||||
|
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
|
||||||
#include "clip.h"
|
#include "mtmd.h"
|
||||||
|
|
||||||
using json = nlohmann::json;
|
// mtmd.h and ik_llama's entire server/common stack (chat.h, server-common.h,
|
||||||
|
// server-task.h, ...) declare `using json = nlohmann::ordered_json`, so match it
|
||||||
|
// here: a plain `nlohmann::json` alias collides with mtmd.h's at global scope.
|
||||||
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
extern bool server_verbose;
|
extern bool server_verbose;
|
||||||
|
|
||||||
@@ -111,13 +114,12 @@ struct slot_image
|
|||||||
{
|
{
|
||||||
int32_t id;
|
int32_t id;
|
||||||
|
|
||||||
bool request_encode_image = false;
|
// mtmd bitmap (image/audio) decoded from the request buffer. Owned by the
|
||||||
float * image_embedding = nullptr;
|
// slot; freed via mtmd_bitmap_free() on reset. The high-level mtmd pipeline
|
||||||
int32_t image_tokens = 0;
|
// (mtmd_tokenize + mtmd_helper_eval_chunks) consumes these directly, so the
|
||||||
|
// legacy eager-encode fields (embedding/tokens) and per-image prefix prompt
|
||||||
clip_image_u8 * img_data;
|
// are no longer needed.
|
||||||
|
mtmd_bitmap * bitmap = nullptr;
|
||||||
std::string prefix_prompt; // before of this image
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// completion token output with probabilities
|
// completion token output with probabilities
|
||||||
|
|||||||
@@ -87,3 +87,18 @@ target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|||||||
if(TARGET BUILD_INFO)
|
if(TARGET BUILD_INFO)
|
||||||
add_dependencies(${TARGET} BUILD_INFO)
|
add_dependencies(${TARGET} BUILD_INFO)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Unit test for the message-content normalization helper (message_content.h).
|
||||||
|
# Off by default so the normal backend build is untouched; enable with
|
||||||
|
# -DLLAMA_GRPC_BUILD_TESTS=ON and run via ctest. It reuses llama.cpp's vendored
|
||||||
|
# <nlohmann/json.hpp> (propagated by the common helpers library) so it has no
|
||||||
|
# extra dependency beyond what the backend already builds against.
|
||||||
|
option(LLAMA_GRPC_BUILD_TESTS "Build grpc-server unit tests" OFF)
|
||||||
|
if(LLAMA_GRPC_BUILD_TESTS)
|
||||||
|
enable_testing()
|
||||||
|
add_executable(message_content_test message_content_test.cpp message_content.h)
|
||||||
|
target_include_directories(message_content_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_link_libraries(message_content_test PRIVATE ${_LLAMA_COMMON_TARGET})
|
||||||
|
target_compile_features(message_content_test PRIVATE cxx_std_17)
|
||||||
|
add_test(NAME message_content_test COMMAND message_content_test)
|
||||||
|
endif()
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=9d5d882d8cd0f0a9283d87ed5e6fe3ee0d925fb1
|
LLAMA_VERSION?=0ed235ea2c17a19fc8238668653946721ed136fd
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
@@ -156,11 +156,11 @@ llama-cpp-grpc: llama.cpp
|
|||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
||||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target ggml-rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
|
||||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
|
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
|
||||||
|
|
||||||
llama-cpp-rpc-server: llama-cpp-grpc
|
llama-cpp-rpc-server: llama-cpp-grpc
|
||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/ggml-rpc-server llama-cpp-rpc-server
|
||||||
|
|
||||||
llama.cpp:
|
llama.cpp:
|
||||||
mkdir -p llama.cpp
|
mkdir -p llama.cpp
|
||||||
|
|||||||
@@ -30,6 +30,19 @@
|
|||||||
#define LOCALAI_HAS_SERVER_SCHEMA 1
|
#define LOCALAI_HAS_SERVER_SCHEMA 1
|
||||||
#include "server-schema.cpp"
|
#include "server-schema.cpp"
|
||||||
#endif
|
#endif
|
||||||
|
// server-stream.cpp exists only in llama.cpp after the upstream refactor that
|
||||||
|
// added the SSE stream-resumption layer (stream_session/stream_pipe_producer).
|
||||||
|
// server-context.cpp calls into it (spipe->cleanup(), stream_aware_should_stop,
|
||||||
|
// stream_session_attach_pipe), so its definitions must be part of this
|
||||||
|
// translation unit or the link fails with "undefined reference to
|
||||||
|
// stream_pipe_producer::cleanup()". The file is self-contained (its only
|
||||||
|
// external symbols come from server-common, already pulled in above) and the
|
||||||
|
// http route-handler factories it also defines are unused here but harmless.
|
||||||
|
// __has_include keeps the source compatible with older pins/forks that predate
|
||||||
|
// the split.
|
||||||
|
#if __has_include("server-stream.cpp")
|
||||||
|
#include "server-stream.cpp"
|
||||||
|
#endif
|
||||||
#include "server-context.cpp"
|
#include "server-context.cpp"
|
||||||
|
|
||||||
// LocalAI
|
// LocalAI
|
||||||
@@ -39,6 +52,7 @@
|
|||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "arg.h"
|
#include "arg.h"
|
||||||
#include "chat-auto-parser.h"
|
#include "chat-auto-parser.h"
|
||||||
|
#include "message_content.h"
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||||
#include <grpcpp/grpcpp.h>
|
#include <grpcpp/grpcpp.h>
|
||||||
@@ -1616,242 +1630,20 @@ public:
|
|||||||
|
|
||||||
for (int i = 0; i < request->messages_size(); i++) {
|
for (int i = 0; i < request->messages_size(); i++) {
|
||||||
const auto& msg = request->messages(i);
|
const auto& msg = request->messages(i);
|
||||||
json msg_json;
|
llama_grpc::ReconstructedMessageInput rin;
|
||||||
msg_json["role"] = msg.role();
|
rin.role = msg.role();
|
||||||
|
rin.content = msg.content();
|
||||||
bool is_last_user_msg = (i == last_user_msg_idx);
|
rin.name = msg.name();
|
||||||
bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0 || request->videos_size() > 0);
|
rin.tool_call_id = msg.tool_call_id();
|
||||||
|
rin.reasoning_content = msg.reasoning_content();
|
||||||
// Handle content - can be string, null, or array
|
rin.tool_calls = msg.tool_calls();
|
||||||
// For multimodal content, we'll embed images/audio from separate fields
|
rin.is_last_user_msg = (i == last_user_msg_idx);
|
||||||
if (!msg.content().empty()) {
|
if (rin.is_last_user_msg) {
|
||||||
// Try to parse content as JSON to see if it's already an array
|
for (int j = 0; j < request->images_size(); j++) rin.images.push_back(request->images(j));
|
||||||
json content_val;
|
for (int j = 0; j < request->audios_size(); j++) rin.audios.push_back(request->audios(j));
|
||||||
try {
|
for (int j = 0; j < request->videos_size(); j++) rin.videos.push_back(request->videos(j));
|
||||||
content_val = json::parse(msg.content());
|
|
||||||
// Handle null values - convert to empty string to avoid template errors
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
content_val = "";
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error&) {
|
|
||||||
// Not JSON, treat as plain string
|
|
||||||
content_val = msg.content();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If content is an object (e.g., from tool call failures), convert to string
|
|
||||||
if (content_val.is_object()) {
|
|
||||||
content_val = content_val.dump();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If content is a string and this is the last user message with images/audio, combine them
|
|
||||||
if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
|
|
||||||
json content_array = json::array();
|
|
||||||
// Add text first
|
|
||||||
content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
|
|
||||||
// Add images
|
|
||||||
if (request->images_size() > 0) {
|
|
||||||
for (int j = 0; j < request->images_size(); j++) {
|
|
||||||
json image_chunk;
|
|
||||||
image_chunk["type"] = "image_url";
|
|
||||||
json image_url;
|
|
||||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
|
||||||
image_chunk["image_url"] = image_url;
|
|
||||||
content_array.push_back(image_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Add audios
|
|
||||||
if (request->audios_size() > 0) {
|
|
||||||
for (int j = 0; j < request->audios_size(); j++) {
|
|
||||||
json audio_chunk;
|
|
||||||
audio_chunk["type"] = "input_audio";
|
|
||||||
json input_audio;
|
|
||||||
input_audio["data"] = request->audios(j);
|
|
||||||
input_audio["format"] = "wav"; // default, could be made configurable
|
|
||||||
audio_chunk["input_audio"] = input_audio;
|
|
||||||
content_array.push_back(audio_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->videos_size() > 0) {
|
|
||||||
for (int j = 0; j < request->videos_size(); j++) {
|
|
||||||
json video_chunk;
|
|
||||||
video_chunk["type"] = "input_video";
|
|
||||||
json input_video;
|
|
||||||
input_video["data"] = request->videos(j);
|
|
||||||
video_chunk["input_video"] = input_video;
|
|
||||||
content_array.push_back(video_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
msg_json["content"] = content_array;
|
|
||||||
} else {
|
|
||||||
// Use content as-is (already array or not last user message)
|
|
||||||
// Ensure null values are converted to empty string
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
msg_json["content"] = "";
|
|
||||||
} else {
|
|
||||||
msg_json["content"] = content_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (is_last_user_msg && has_images_or_audio) {
|
|
||||||
// If no content but this is the last user message with images/audio, create content array
|
|
||||||
json content_array = json::array();
|
|
||||||
if (request->images_size() > 0) {
|
|
||||||
for (int j = 0; j < request->images_size(); j++) {
|
|
||||||
json image_chunk;
|
|
||||||
image_chunk["type"] = "image_url";
|
|
||||||
json image_url;
|
|
||||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
|
||||||
image_chunk["image_url"] = image_url;
|
|
||||||
content_array.push_back(image_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->audios_size() > 0) {
|
|
||||||
for (int j = 0; j < request->audios_size(); j++) {
|
|
||||||
json audio_chunk;
|
|
||||||
audio_chunk["type"] = "input_audio";
|
|
||||||
json input_audio;
|
|
||||||
input_audio["data"] = request->audios(j);
|
|
||||||
input_audio["format"] = "wav"; // default, could be made configurable
|
|
||||||
audio_chunk["input_audio"] = input_audio;
|
|
||||||
content_array.push_back(audio_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->videos_size() > 0) {
|
|
||||||
for (int j = 0; j < request->videos_size(); j++) {
|
|
||||||
json video_chunk;
|
|
||||||
video_chunk["type"] = "input_video";
|
|
||||||
json input_video;
|
|
||||||
input_video["data"] = request->videos(j);
|
|
||||||
video_chunk["input_video"] = input_video;
|
|
||||||
content_array.push_back(video_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
msg_json["content"] = content_array;
|
|
||||||
} else if (msg.role() == "tool") {
|
|
||||||
// Tool role messages must have content field set, even if empty
|
|
||||||
// Jinja templates expect content to be a string, not null or object
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
|
|
||||||
if (msg.content().empty()) {
|
|
||||||
msg_json["content"] = "";
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): empty content, set to empty string\n", i);
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): content exists: %s\n",
|
|
||||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
|
||||||
// Content exists, parse and ensure it's a string
|
|
||||||
json content_val;
|
|
||||||
try {
|
|
||||||
content_val = json::parse(msg.content());
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): parsed JSON, type=%s\n",
|
|
||||||
i, content_val.is_null() ? "null" :
|
|
||||||
content_val.is_object() ? "object" :
|
|
||||||
content_val.is_string() ? "string" :
|
|
||||||
content_val.is_array() ? "array" : "other");
|
|
||||||
// Handle null values - Jinja templates expect content to be a string, not null
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
msg_json["content"] = "";
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): null content, converted to empty string\n", i);
|
|
||||||
} else if (content_val.is_object()) {
|
|
||||||
// If content is an object (e.g., from tool call failures/errors), convert to string
|
|
||||||
msg_json["content"] = content_val.dump();
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): object content, converted to string: %s\n",
|
|
||||||
i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
|
|
||||||
} else if (content_val.is_string()) {
|
|
||||||
msg_json["content"] = content_val.get<std::string>();
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): string content, using as-is\n", i);
|
|
||||||
} else {
|
|
||||||
// For arrays or other types, convert to string
|
|
||||||
msg_json["content"] = content_val.dump();
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): %s content, converted to string\n",
|
|
||||||
i, content_val.is_array() ? "array" : "other type");
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error&) {
|
|
||||||
// Not JSON, treat as plain string
|
|
||||||
msg_json["content"] = msg.content();
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): not JSON, using as string\n", i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Ensure all messages have content set (fallback for any unhandled cases)
|
|
||||||
// Jinja templates expect content to be present, default to empty string if not set
|
|
||||||
if (!msg_json.contains("content")) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (role=%s): no content field, adding empty string\n",
|
|
||||||
i, msg.role().c_str());
|
|
||||||
msg_json["content"] = "";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
messages_json.push_back(llama_grpc::build_reconstructed_message(rin));
|
||||||
// Add optional fields for OpenAI-compatible message format
|
|
||||||
if (!msg.name().empty()) {
|
|
||||||
msg_json["name"] = msg.name();
|
|
||||||
}
|
|
||||||
if (!msg.tool_call_id().empty()) {
|
|
||||||
msg_json["tool_call_id"] = msg.tool_call_id();
|
|
||||||
}
|
|
||||||
if (!msg.reasoning_content().empty()) {
|
|
||||||
msg_json["reasoning_content"] = msg.reasoning_content();
|
|
||||||
}
|
|
||||||
if (!msg.tool_calls().empty()) {
|
|
||||||
// Parse tool_calls JSON string and add to message
|
|
||||||
try {
|
|
||||||
json tool_calls = json::parse(msg.tool_calls());
|
|
||||||
msg_json["tool_calls"] = tool_calls;
|
|
||||||
SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
|
|
||||||
// IMPORTANT: If message has tool_calls but content is empty or not set,
|
|
||||||
// set content to space " " instead of empty string "", because llama.cpp's
|
|
||||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
|
||||||
// which causes template errors when accessing message.content[:tool_start_length]
|
|
||||||
if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d has tool_calls but empty content, setting to space\n", i);
|
|
||||||
msg_json["content"] = " ";
|
|
||||||
}
|
|
||||||
// Log each tool call with name and arguments
|
|
||||||
if (tool_calls.is_array()) {
|
|
||||||
for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
|
|
||||||
const auto& tc = tool_calls[tc_idx];
|
|
||||||
std::string tool_name = "unknown";
|
|
||||||
std::string tool_args = "{}";
|
|
||||||
if (tc.contains("function")) {
|
|
||||||
const auto& func = tc["function"];
|
|
||||||
if (func.contains("name")) {
|
|
||||||
tool_name = func["name"].get<std::string>();
|
|
||||||
}
|
|
||||||
if (func.contains("arguments")) {
|
|
||||||
tool_args = func["arguments"].is_string() ?
|
|
||||||
func["arguments"].get<std::string>() :
|
|
||||||
func["arguments"].dump();
|
|
||||||
}
|
|
||||||
} else if (tc.contains("name")) {
|
|
||||||
tool_name = tc["name"].get<std::string>();
|
|
||||||
if (tc.contains("arguments")) {
|
|
||||||
tool_args = tc["arguments"].is_string() ?
|
|
||||||
tc["arguments"].get<std::string>() :
|
|
||||||
tc["arguments"].dump();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d, tool_call %zu: name=%s, arguments=%s\n",
|
|
||||||
i, tc_idx, tool_name.c_str(), tool_args.c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error& e) {
|
|
||||||
SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Debug: Log final content state before adding to array
|
|
||||||
if (msg_json.contains("content")) {
|
|
||||||
if (msg_json["content"].is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content type=%s, has_value=%d\n",
|
|
||||||
i, msg_json["content"].is_string() ? "string" :
|
|
||||||
msg_json["content"].is_array() ? "array" :
|
|
||||||
msg_json["content"].is_object() ? "object" : "other",
|
|
||||||
msg_json["content"].is_null() ? 0 : 1);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
|
|
||||||
}
|
|
||||||
|
|
||||||
messages_json.push_back(msg_json);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
||||||
@@ -2072,36 +1864,7 @@ public:
|
|||||||
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
SRV_INF("[CONTENT DEBUG] PredictStream: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
||||||
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
||||||
auto& msg = body_json["messages"][idx];
|
llama_grpc::normalize_template_message(body_json["messages"][idx]);
|
||||||
std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
|
|
||||||
if (msg.contains("content")) {
|
|
||||||
if (msg["content"].is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
|
|
||||||
msg["content"] = ""; // Fix null content
|
|
||||||
} else if (role_str == "tool" && msg["content"].is_array()) {
|
|
||||||
// Tool messages must have string content, not array
|
|
||||||
// oaicompat_chat_params_parse expects tool messages to have string content
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=tool) has array content, converting to string\n", idx);
|
|
||||||
msg["content"] = msg["content"].dump();
|
|
||||||
} else if (!msg["content"].is_string() && !msg["content"].is_array()) {
|
|
||||||
// If content is object or other non-string type, convert to string for templates
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
|
|
||||||
if (msg["content"].is_object()) {
|
|
||||||
msg["content"] = msg["content"].dump();
|
|
||||||
} else {
|
|
||||||
msg["content"] = "";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
|
|
||||||
idx, role_str.c_str(),
|
|
||||||
msg["content"].is_string() ? "string" :
|
|
||||||
msg["content"].is_array() ? "array" :
|
|
||||||
msg["content"].is_object() ? "object" : "other");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
|
|
||||||
msg["content"] = ""; // Add missing content
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2433,264 +2196,20 @@ public:
|
|||||||
SRV_INF("[CONTENT DEBUG] Predict: Processing %d messages\n", request->messages_size());
|
SRV_INF("[CONTENT DEBUG] Predict: Processing %d messages\n", request->messages_size());
|
||||||
for (int i = 0; i < request->messages_size(); i++) {
|
for (int i = 0; i < request->messages_size(); i++) {
|
||||||
const auto& msg = request->messages(i);
|
const auto& msg = request->messages(i);
|
||||||
json msg_json;
|
llama_grpc::ReconstructedMessageInput rin;
|
||||||
msg_json["role"] = msg.role();
|
rin.role = msg.role();
|
||||||
|
rin.content = msg.content();
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d: role=%s, content_empty=%d, content_length=%zu\n",
|
rin.name = msg.name();
|
||||||
i, msg.role().c_str(), msg.content().empty() ? 1 : 0, msg.content().size());
|
rin.tool_call_id = msg.tool_call_id();
|
||||||
if (!msg.content().empty()) {
|
rin.reasoning_content = msg.reasoning_content();
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content (first 200 chars): %s\n",
|
rin.tool_calls = msg.tool_calls();
|
||||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
rin.is_last_user_msg = (i == last_user_msg_idx);
|
||||||
|
if (rin.is_last_user_msg) {
|
||||||
|
for (int j = 0; j < request->images_size(); j++) rin.images.push_back(request->images(j));
|
||||||
|
for (int j = 0; j < request->audios_size(); j++) rin.audios.push_back(request->audios(j));
|
||||||
|
for (int j = 0; j < request->videos_size(); j++) rin.videos.push_back(request->videos(j));
|
||||||
}
|
}
|
||||||
|
messages_json.push_back(llama_grpc::build_reconstructed_message(rin));
|
||||||
bool is_last_user_msg = (i == last_user_msg_idx);
|
|
||||||
bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0 || request->videos_size() > 0);
|
|
||||||
|
|
||||||
// Handle content - can be string, null, or array
|
|
||||||
// For multimodal content, we'll embed images/audio from separate fields
|
|
||||||
if (!msg.content().empty()) {
|
|
||||||
// Try to parse content as JSON to see if it's already an array
|
|
||||||
json content_val;
|
|
||||||
try {
|
|
||||||
content_val = json::parse(msg.content());
|
|
||||||
// Handle null values - convert to empty string to avoid template errors
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d parsed JSON is null, converting to empty string\n", i);
|
|
||||||
content_val = "";
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error&) {
|
|
||||||
// Not JSON, treat as plain string
|
|
||||||
content_val = msg.content();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If content is an object (e.g., from tool call failures), convert to string
|
|
||||||
if (content_val.is_object()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content is object, converting to string\n", i);
|
|
||||||
content_val = content_val.dump();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If content is a string and this is the last user message with images/audio, combine them
|
|
||||||
if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
|
|
||||||
json content_array = json::array();
|
|
||||||
// Add text first
|
|
||||||
content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
|
|
||||||
// Add images
|
|
||||||
if (request->images_size() > 0) {
|
|
||||||
for (int j = 0; j < request->images_size(); j++) {
|
|
||||||
json image_chunk;
|
|
||||||
image_chunk["type"] = "image_url";
|
|
||||||
json image_url;
|
|
||||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
|
||||||
image_chunk["image_url"] = image_url;
|
|
||||||
content_array.push_back(image_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Add audios
|
|
||||||
if (request->audios_size() > 0) {
|
|
||||||
for (int j = 0; j < request->audios_size(); j++) {
|
|
||||||
json audio_chunk;
|
|
||||||
audio_chunk["type"] = "input_audio";
|
|
||||||
json input_audio;
|
|
||||||
input_audio["data"] = request->audios(j);
|
|
||||||
input_audio["format"] = "wav"; // default, could be made configurable
|
|
||||||
audio_chunk["input_audio"] = input_audio;
|
|
||||||
content_array.push_back(audio_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->videos_size() > 0) {
|
|
||||||
for (int j = 0; j < request->videos_size(); j++) {
|
|
||||||
json video_chunk;
|
|
||||||
video_chunk["type"] = "input_video";
|
|
||||||
json input_video;
|
|
||||||
input_video["data"] = request->videos(j);
|
|
||||||
video_chunk["input_video"] = input_video;
|
|
||||||
content_array.push_back(video_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
msg_json["content"] = content_array;
|
|
||||||
} else {
|
|
||||||
// Use content as-is (already array or not last user message)
|
|
||||||
// Ensure null values are converted to empty string
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content_val was null, setting to empty string\n", i);
|
|
||||||
msg_json["content"] = "";
|
|
||||||
} else {
|
|
||||||
msg_json["content"] = content_val;
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content set, type=%s\n",
|
|
||||||
i, content_val.is_string() ? "string" :
|
|
||||||
content_val.is_array() ? "array" :
|
|
||||||
content_val.is_object() ? "object" : "other");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (is_last_user_msg && has_images_or_audio) {
|
|
||||||
// If no content but this is the last user message with images/audio, create content array
|
|
||||||
json content_array = json::array();
|
|
||||||
if (request->images_size() > 0) {
|
|
||||||
for (int j = 0; j < request->images_size(); j++) {
|
|
||||||
json image_chunk;
|
|
||||||
image_chunk["type"] = "image_url";
|
|
||||||
json image_url;
|
|
||||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
|
||||||
image_chunk["image_url"] = image_url;
|
|
||||||
content_array.push_back(image_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->audios_size() > 0) {
|
|
||||||
for (int j = 0; j < request->audios_size(); j++) {
|
|
||||||
json audio_chunk;
|
|
||||||
audio_chunk["type"] = "input_audio";
|
|
||||||
json input_audio;
|
|
||||||
input_audio["data"] = request->audios(j);
|
|
||||||
input_audio["format"] = "wav"; // default, could be made configurable
|
|
||||||
audio_chunk["input_audio"] = input_audio;
|
|
||||||
content_array.push_back(audio_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->videos_size() > 0) {
|
|
||||||
for (int j = 0; j < request->videos_size(); j++) {
|
|
||||||
json video_chunk;
|
|
||||||
video_chunk["type"] = "input_video";
|
|
||||||
json input_video;
|
|
||||||
input_video["data"] = request->videos(j);
|
|
||||||
video_chunk["input_video"] = input_video;
|
|
||||||
content_array.push_back(video_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
msg_json["content"] = content_array;
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d created content array with media\n", i);
|
|
||||||
} else if (!msg.tool_calls().empty()) {
|
|
||||||
// Tool call messages may have null content, but templates expect string
|
|
||||||
// IMPORTANT: Set to space " " instead of empty string "", because llama.cpp's
|
|
||||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
|
||||||
// which causes template errors when accessing message.content[:tool_start_length]
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls, setting content to space (not empty string)\n", i);
|
|
||||||
msg_json["content"] = " ";
|
|
||||||
} else if (msg.role() == "tool") {
|
|
||||||
// Tool role messages must have content field set, even if empty
|
|
||||||
// Jinja templates expect content to be a string, not null or object
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
|
|
||||||
if (msg.content().empty()) {
|
|
||||||
msg_json["content"] = "";
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): empty content, set to empty string\n", i);
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): content exists: %s\n",
|
|
||||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
|
||||||
// Content exists, parse and ensure it's a string
|
|
||||||
json content_val;
|
|
||||||
try {
|
|
||||||
content_val = json::parse(msg.content());
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): parsed JSON, type=%s\n",
|
|
||||||
i, content_val.is_null() ? "null" :
|
|
||||||
content_val.is_object() ? "object" :
|
|
||||||
content_val.is_string() ? "string" :
|
|
||||||
content_val.is_array() ? "array" : "other");
|
|
||||||
// Handle null values - Jinja templates expect content to be a string, not null
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
msg_json["content"] = "";
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): null content, converted to empty string\n", i);
|
|
||||||
} else if (content_val.is_object()) {
|
|
||||||
// If content is an object (e.g., from tool call failures/errors), convert to string
|
|
||||||
msg_json["content"] = content_val.dump();
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): object content, converted to string: %s\n",
|
|
||||||
i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
|
|
||||||
} else if (content_val.is_string()) {
|
|
||||||
msg_json["content"] = content_val.get<std::string>();
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): string content, using as-is\n", i);
|
|
||||||
} else {
|
|
||||||
// For arrays or other types, convert to string
|
|
||||||
msg_json["content"] = content_val.dump();
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): %s content, converted to string\n",
|
|
||||||
i, content_val.is_array() ? "array" : "other type");
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error&) {
|
|
||||||
// Not JSON, treat as plain string
|
|
||||||
msg_json["content"] = msg.content();
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): not JSON, using as string\n", i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Ensure all messages have content set (fallback for any unhandled cases)
|
|
||||||
// Jinja templates expect content to be present, default to empty string if not set
|
|
||||||
if (!msg_json.contains("content")) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (role=%s): no content field, adding empty string\n",
|
|
||||||
i, msg.role().c_str());
|
|
||||||
msg_json["content"] = "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add optional fields for OpenAI-compatible message format
|
|
||||||
if (!msg.name().empty()) {
|
|
||||||
msg_json["name"] = msg.name();
|
|
||||||
}
|
|
||||||
if (!msg.tool_call_id().empty()) {
|
|
||||||
msg_json["tool_call_id"] = msg.tool_call_id();
|
|
||||||
}
|
|
||||||
if (!msg.reasoning_content().empty()) {
|
|
||||||
msg_json["reasoning_content"] = msg.reasoning_content();
|
|
||||||
}
|
|
||||||
if (!msg.tool_calls().empty()) {
|
|
||||||
// Parse tool_calls JSON string and add to message
|
|
||||||
try {
|
|
||||||
json tool_calls = json::parse(msg.tool_calls());
|
|
||||||
msg_json["tool_calls"] = tool_calls;
|
|
||||||
SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
|
|
||||||
// IMPORTANT: If message has tool_calls but content is empty or not set,
|
|
||||||
// set content to space " " instead of empty string "", because llama.cpp's
|
|
||||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
|
||||||
// which causes template errors when accessing message.content[:tool_start_length]
|
|
||||||
if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls but empty content, setting to space\n", i);
|
|
||||||
msg_json["content"] = " ";
|
|
||||||
}
|
|
||||||
// Log each tool call with name and arguments
|
|
||||||
if (tool_calls.is_array()) {
|
|
||||||
for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
|
|
||||||
const auto& tc = tool_calls[tc_idx];
|
|
||||||
std::string tool_name = "unknown";
|
|
||||||
std::string tool_args = "{}";
|
|
||||||
if (tc.contains("function")) {
|
|
||||||
const auto& func = tc["function"];
|
|
||||||
if (func.contains("name")) {
|
|
||||||
tool_name = func["name"].get<std::string>();
|
|
||||||
}
|
|
||||||
if (func.contains("arguments")) {
|
|
||||||
tool_args = func["arguments"].is_string() ?
|
|
||||||
func["arguments"].get<std::string>() :
|
|
||||||
func["arguments"].dump();
|
|
||||||
}
|
|
||||||
} else if (tc.contains("name")) {
|
|
||||||
tool_name = tc["name"].get<std::string>();
|
|
||||||
if (tc.contains("arguments")) {
|
|
||||||
tool_args = tc["arguments"].is_string() ?
|
|
||||||
tc["arguments"].get<std::string>() :
|
|
||||||
tc["arguments"].dump();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d, tool_call %zu: name=%s, arguments=%s\n",
|
|
||||||
i, tc_idx, tool_name.c_str(), tool_args.c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error& e) {
|
|
||||||
SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Debug: Log final content state before adding to array
|
|
||||||
if (msg_json.contains("content")) {
|
|
||||||
if (msg_json["content"].is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content type=%s, has_value=%d\n",
|
|
||||||
i, msg_json["content"].is_string() ? "string" :
|
|
||||||
msg_json["content"].is_array() ? "array" :
|
|
||||||
msg_json["content"].is_object() ? "object" : "other",
|
|
||||||
msg_json["content"].is_null() ? 0 : 1);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
|
|
||||||
}
|
|
||||||
|
|
||||||
messages_json.push_back(msg_json);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
||||||
@@ -2911,36 +2430,7 @@ public:
|
|||||||
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
SRV_INF("[CONTENT DEBUG] Predict: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
||||||
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
||||||
auto& msg = body_json["messages"][idx];
|
llama_grpc::normalize_template_message(body_json["messages"][idx]);
|
||||||
std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
|
|
||||||
if (msg.contains("content")) {
|
|
||||||
if (msg["content"].is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
|
|
||||||
msg["content"] = ""; // Fix null content
|
|
||||||
} else if (role_str == "tool" && msg["content"].is_array()) {
|
|
||||||
// Tool messages must have string content, not array
|
|
||||||
// oaicompat_chat_params_parse expects tool messages to have string content
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=tool) has array content, converting to string\n", idx);
|
|
||||||
msg["content"] = msg["content"].dump();
|
|
||||||
} else if (!msg["content"].is_string() && !msg["content"].is_array()) {
|
|
||||||
// If content is object or other non-string type, convert to string for templates
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
|
|
||||||
if (msg["content"].is_object()) {
|
|
||||||
msg["content"] = msg["content"].dump();
|
|
||||||
} else {
|
|
||||||
msg["content"] = "";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
|
|
||||||
idx, role_str.c_str(),
|
|
||||||
msg["content"].is_string() ? "string" :
|
|
||||||
msg["content"].is_array() ? "array" :
|
|
||||||
msg["content"].is_object() ? "object" : "other");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
|
|
||||||
msg["content"] = ""; // Add missing content
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
192
backend/cpp/llama-cpp/message_content.h
Normal file
192
backend/cpp/llama-cpp/message_content.h
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
|
||||||
|
namespace llama_grpc {
|
||||||
|
|
||||||
|
// Normalizes a proto message's content string into the JSON value used when
|
||||||
|
// reconstructing OpenAI-format messages for the tokenizer (jinja) template.
|
||||||
|
//
|
||||||
|
// Shared by the streaming (PredictStream) and non-streaming (Predict) message
|
||||||
|
// reconstruction paths so the two cannot drift.
|
||||||
|
//
|
||||||
|
// LocalAI's Go layer (schema.Messages.ToProto) always sends content as a plain
|
||||||
|
// text string; multimodal media travels in separate proto fields, never inside
|
||||||
|
// content. So user/system/developer content is *only ever* opaque text and must
|
||||||
|
// NOT be JSON-sniffed: a prompt that merely looks like JSON (e.g. an ingredient
|
||||||
|
// list ["1/4 cup sugar", ...]) would otherwise be reinterpreted as structured
|
||||||
|
// content parts and rejected by oaicompat_chat_params_parse with
|
||||||
|
// "unsupported content[].type" (https://github.com/mudler/LocalAI/issues/10524).
|
||||||
|
// (developer is OpenAI's modern system alias - same "human-authored text" nature.)
|
||||||
|
//
|
||||||
|
// For assistant/tool messages we still collapse a literal JSON null/object
|
||||||
|
// (tool-call bookkeeping) to a string, but we never turn a plain string into an
|
||||||
|
// array/scalar. The array defense is therefore role-independent (arrays/scalars
|
||||||
|
// fall through for every role); the role gate only governs the null/object case.
|
||||||
|
inline nlohmann::ordered_json normalize_message_content(const std::string& role,
|
||||||
|
const std::string& content) {
|
||||||
|
nlohmann::ordered_json content_val = content;
|
||||||
|
if (role != "user" && role != "system" && role != "developer") {
|
||||||
|
try {
|
||||||
|
nlohmann::ordered_json parsed = nlohmann::ordered_json::parse(content);
|
||||||
|
if (parsed.is_null()) {
|
||||||
|
content_val = "";
|
||||||
|
} else if (parsed.is_object()) {
|
||||||
|
content_val = parsed.dump();
|
||||||
|
}
|
||||||
|
// arrays / scalars: keep the original plain-text string as-is
|
||||||
|
} catch (const nlohmann::ordered_json::parse_error&) {
|
||||||
|
// Not JSON, already the plain string
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return content_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final safety pass applied to each reconstructed OpenAI message right before it
|
||||||
|
// is handed to oaicompat_chat_params_parse (jinja templating). Jinja templates
|
||||||
|
// assume content is a string: a literal null breaks slicing such as
|
||||||
|
// message.content[:N] (#7324), and a tool message with array content is rejected
|
||||||
|
// (#7528). A multimodal user message legitimately carries a typed-part array
|
||||||
|
// ({type:text}, {type:image_url}, ...), which must be left intact. Shared by the
|
||||||
|
// streaming and non-streaming paths so this invariant cannot drift between them.
|
||||||
|
inline void normalize_template_message(nlohmann::ordered_json& msg) {
|
||||||
|
if (!msg.contains("content")) {
|
||||||
|
msg["content"] = ""; // templates expect the field to exist
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
nlohmann::ordered_json& content = msg["content"];
|
||||||
|
const std::string role = (msg.contains("role") && msg["role"].is_string())
|
||||||
|
? msg["role"].get<std::string>()
|
||||||
|
: std::string();
|
||||||
|
if (content.is_null()) {
|
||||||
|
content = ""; // #7324: null would crash content[:N] slicing
|
||||||
|
} else if (role == "tool" && content.is_array()) {
|
||||||
|
content = content.dump(); // #7528: tool messages must have string content
|
||||||
|
} else if (!content.is_string() && !content.is_array()) {
|
||||||
|
if (content.is_object()) {
|
||||||
|
content = content.dump(); // tool-call bookkeeping object -> string
|
||||||
|
} else {
|
||||||
|
content = ""; // other scalar (number/bool) -> empty
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// string, or a non-tool (multimodal) typed-part array: leave untouched
|
||||||
|
}
|
||||||
|
|
||||||
|
// One proto message's data, flattened to plain types so the reconstruction logic
|
||||||
|
// can be shared and unit-tested without protobuf. The streaming and non-streaming
|
||||||
|
// predict paths both populate this from proto::Message + the request's media.
|
||||||
|
struct ReconstructedMessageInput {
|
||||||
|
std::string role;
|
||||||
|
std::string content; // proto.Message.content (always a plain string)
|
||||||
|
std::string name;
|
||||||
|
std::string tool_call_id;
|
||||||
|
std::string reasoning_content;
|
||||||
|
std::string tool_calls; // tool_calls as a JSON string, or empty
|
||||||
|
bool is_last_user_msg = false; // attach request media to this message
|
||||||
|
std::vector<std::string> images; // base64 (jpeg)
|
||||||
|
std::vector<std::string> audios; // base64 (wav)
|
||||||
|
std::vector<std::string> videos; // base64
|
||||||
|
};
|
||||||
|
|
||||||
|
// Appends the request's media as OpenAI typed content parts. Imperative (not
|
||||||
|
// brace-init) to avoid nlohmann's object-vs-array initializer-list ambiguity.
|
||||||
|
inline void append_media_parts(nlohmann::ordered_json& content_array,
|
||||||
|
const std::vector<std::string>& images,
|
||||||
|
const std::vector<std::string>& audios,
|
||||||
|
const std::vector<std::string>& videos) {
|
||||||
|
for (const auto& img : images) {
|
||||||
|
nlohmann::ordered_json image_chunk;
|
||||||
|
image_chunk["type"] = "image_url";
|
||||||
|
nlohmann::ordered_json image_url;
|
||||||
|
image_url["url"] = "data:image/jpeg;base64," + img;
|
||||||
|
image_chunk["image_url"] = image_url;
|
||||||
|
content_array.push_back(image_chunk);
|
||||||
|
}
|
||||||
|
for (const auto& aud : audios) {
|
||||||
|
nlohmann::ordered_json audio_chunk;
|
||||||
|
audio_chunk["type"] = "input_audio";
|
||||||
|
nlohmann::ordered_json input_audio;
|
||||||
|
input_audio["data"] = aud;
|
||||||
|
input_audio["format"] = "wav"; // default; could be made configurable
|
||||||
|
audio_chunk["input_audio"] = input_audio;
|
||||||
|
content_array.push_back(audio_chunk);
|
||||||
|
}
|
||||||
|
for (const auto& vid : videos) {
|
||||||
|
nlohmann::ordered_json video_chunk;
|
||||||
|
video_chunk["type"] = "input_video";
|
||||||
|
nlohmann::ordered_json input_video;
|
||||||
|
input_video["data"] = vid;
|
||||||
|
video_chunk["input_video"] = input_video;
|
||||||
|
content_array.push_back(video_chunk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reconstructs a single OpenAI-format message (the object fed to
|
||||||
|
// oaicompat_chat_params_parse) from a proto message. Shared by PredictStream and
|
||||||
|
// Predict so the content/multimodal/tool_calls handling cannot drift between the
|
||||||
|
// two stream modes (it previously lived as two ~150-line copies with a redundant
|
||||||
|
// Predict-only tool_calls->" " branch). Guarantees content is always a string or
|
||||||
|
// a typed-part array, never null/missing.
|
||||||
|
inline nlohmann::ordered_json build_reconstructed_message(const ReconstructedMessageInput& in) {
|
||||||
|
nlohmann::ordered_json msg_json;
|
||||||
|
msg_json["role"] = in.role;
|
||||||
|
const bool has_media = !in.images.empty() || !in.audios.empty() || !in.videos.empty();
|
||||||
|
|
||||||
|
if (!in.content.empty()) {
|
||||||
|
nlohmann::ordered_json content_val = normalize_message_content(in.role, in.content);
|
||||||
|
if (content_val.is_string() && in.is_last_user_msg && has_media) {
|
||||||
|
// Last user message + media: build a typed-part array (text first).
|
||||||
|
nlohmann::ordered_json content_array = nlohmann::ordered_json::array();
|
||||||
|
nlohmann::ordered_json text_part;
|
||||||
|
text_part["type"] = "text";
|
||||||
|
text_part["text"] = content_val.get<std::string>();
|
||||||
|
content_array.push_back(text_part);
|
||||||
|
append_media_parts(content_array, in.images, in.audios, in.videos);
|
||||||
|
msg_json["content"] = content_array;
|
||||||
|
} else if (content_val.is_null()) {
|
||||||
|
msg_json["content"] = "";
|
||||||
|
} else {
|
||||||
|
msg_json["content"] = content_val;
|
||||||
|
}
|
||||||
|
} else if (in.is_last_user_msg && has_media) {
|
||||||
|
// No text but media on the last user message: media-only typed array.
|
||||||
|
nlohmann::ordered_json content_array = nlohmann::ordered_json::array();
|
||||||
|
append_media_parts(content_array, in.images, in.audios, in.videos);
|
||||||
|
msg_json["content"] = content_array;
|
||||||
|
} else {
|
||||||
|
// Empty content (any role, incl. tool/assistant): templates need a string.
|
||||||
|
msg_json["content"] = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!in.name.empty()) {
|
||||||
|
msg_json["name"] = in.name;
|
||||||
|
}
|
||||||
|
if (!in.tool_call_id.empty()) {
|
||||||
|
msg_json["tool_call_id"] = in.tool_call_id;
|
||||||
|
}
|
||||||
|
if (!in.reasoning_content.empty()) {
|
||||||
|
msg_json["reasoning_content"] = in.reasoning_content;
|
||||||
|
}
|
||||||
|
if (!in.tool_calls.empty()) {
|
||||||
|
try {
|
||||||
|
nlohmann::ordered_json tool_calls = nlohmann::ordered_json::parse(in.tool_calls);
|
||||||
|
msg_json["tool_calls"] = tool_calls;
|
||||||
|
// tool_calls + empty/blank content: use " " not "", because llama.cpp's
|
||||||
|
// common_chat_msgs_to_json_oaicompat turns "" into null, which breaks
|
||||||
|
// templates that slice message.content[:tool_start_length] (#7324).
|
||||||
|
if (!msg_json.contains("content") ||
|
||||||
|
(msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
||||||
|
msg_json["content"] = " ";
|
||||||
|
}
|
||||||
|
} catch (const nlohmann::ordered_json::parse_error&) {
|
||||||
|
// Malformed tool_calls JSON: leave content as-is (prior behavior).
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return msg_json;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace llama_grpc
|
||||||
234
backend/cpp/llama-cpp/message_content_test.cpp
Normal file
234
backend/cpp/llama-cpp/message_content_test.cpp
Normal file
@@ -0,0 +1,234 @@
|
|||||||
|
// Unit tests for the shared message-reconstruction helpers (message_content.h).
|
||||||
|
//
|
||||||
|
// Build & run standalone (nlohmann/json single header on the include path):
|
||||||
|
// g++ -std=c++17 -I<dir-with-nlohmann> message_content_test.cpp -o t && ./t
|
||||||
|
// or via CMake: -DLLAMA_GRPC_BUILD_TESTS=ON then ctest.
|
||||||
|
//
|
||||||
|
// Regression coverage for:
|
||||||
|
// #10524 - a user/system prompt that is itself a JSON-array string must stay
|
||||||
|
// plain text, never be reinterpreted as OpenAI structured parts.
|
||||||
|
// #7324 - assistant/tool null content -> "" (templates slice content[:N]);
|
||||||
|
// assistant+tool_calls+empty content -> " " (not "", which becomes null).
|
||||||
|
// #7528 - tool message array content must reach the template as a string.
|
||||||
|
// multimodal - last user message text + media -> typed-part array, media kept.
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "message_content.h"
|
||||||
|
|
||||||
|
using nlohmann::ordered_json;
|
||||||
|
using llama_grpc::normalize_message_content;
|
||||||
|
using llama_grpc::normalize_template_message;
|
||||||
|
using llama_grpc::build_reconstructed_message;
|
||||||
|
using llama_grpc::ReconstructedMessageInput;
|
||||||
|
|
||||||
|
static int failures = 0;
|
||||||
|
|
||||||
|
static void check(bool ok, const std::string& name, const std::string& detail = "") {
|
||||||
|
if (!ok) {
|
||||||
|
std::cerr << "FAIL " << name << (detail.empty() ? "" : ": " + detail) << "\n";
|
||||||
|
failures++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- normalize_message_content -------------------------------------------
|
||||||
|
|
||||||
|
static void expect_norm_string(const char* name, const std::string& role,
|
||||||
|
const std::string& content, const std::string& want) {
|
||||||
|
auto got = normalize_message_content(role, content);
|
||||||
|
if (!got.is_string()) {
|
||||||
|
check(false, name, "expected a JSON string, got " +
|
||||||
|
std::string(got.is_array() ? "array" : got.is_object() ? "object" : "other") +
|
||||||
|
" (" + got.dump() + ")");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
check(got.get<std::string>() == want, name, "expected \"" + want + "\", got \"" + got.get<std::string>() + "\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_normalize() {
|
||||||
|
const std::string ingredients = R"(["1/4 cup brown sugar, packed","1 pound ground beef"])";
|
||||||
|
|
||||||
|
// #10524 - JSON-array text must stay a string. Role-INDEPENDENT array defense.
|
||||||
|
for (const char* role : {"user", "system", "developer", "function", "assistant", "tool"}) {
|
||||||
|
expect_norm_string((std::string("json_array_stays_text:") + role).c_str(), role, ingredients, ingredients);
|
||||||
|
}
|
||||||
|
|
||||||
|
// #10524 - user/system/developer JSON-object text stays verbatim (NOT re-dumped).
|
||||||
|
expect_norm_string("user_json_object_verbatim", "user", R"({"a":1})", R"({"a":1})");
|
||||||
|
expect_norm_string("system_json_object_verbatim", "system", R"({"a":1})", R"({"a":1})");
|
||||||
|
expect_norm_string("developer_json_object_verbatim", "developer", R"({"a":1})", R"({"a":1})");
|
||||||
|
|
||||||
|
// Plain text unchanged for all roles.
|
||||||
|
expect_norm_string("user_plain_text", "user", "hello world", "hello world");
|
||||||
|
expect_norm_string("assistant_non_json_text_kept", "assistant", "hi [unclosed", "hi [unclosed");
|
||||||
|
|
||||||
|
// #7324 boundary - user/system/developer literal "null" preserved (never parsed).
|
||||||
|
expect_norm_string("user_literal_null_stays", "user", "null", "null");
|
||||||
|
expect_norm_string("system_literal_null_stays", "system", "null", "null");
|
||||||
|
expect_norm_string("developer_literal_null_stays", "developer", "null", "null");
|
||||||
|
|
||||||
|
// #7324 - assistant/tool literal null collapses to empty string.
|
||||||
|
expect_norm_string("assistant_null_to_empty", "assistant", "null", "");
|
||||||
|
expect_norm_string("tool_null_to_empty", "tool", "null", "");
|
||||||
|
|
||||||
|
// #7324/#7528 - assistant/tool object bookkeeping stringified (stays a string).
|
||||||
|
check(normalize_message_content("assistant", R"({"tool":"x"})").is_string(), "assistant_object_stringified");
|
||||||
|
check(normalize_message_content("tool", R"({"error":"boom"})").is_string(), "tool_object_stringified");
|
||||||
|
|
||||||
|
// #10524-family - a bare scalar that parses as a JSON number stays the string.
|
||||||
|
expect_norm_string("assistant_scalar_number_stays_string", "assistant", "42", "42");
|
||||||
|
|
||||||
|
// baseline - empty content stays empty.
|
||||||
|
expect_norm_string("user_empty_stays_empty", "user", "", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- normalize_template_message (BEFORE TEMPLATE sanitizer) ---------------
|
||||||
|
|
||||||
|
static void test_template_sanitizer() {
|
||||||
|
// #7528 - a tool message with an ACTUAL array becomes a string.
|
||||||
|
{
|
||||||
|
ordered_json msg = {{"role", "tool"}, {"content", ordered_json::array({{{"type", "text"}, {"text", "r"}}})}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg["content"].is_string(), "before_template_tool_array_to_string", "got " + msg["content"].dump());
|
||||||
|
}
|
||||||
|
// #7324 - null content -> "" for any role.
|
||||||
|
{
|
||||||
|
ordered_json msg = {{"role", "assistant"}, {"content", nullptr}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg["content"].is_string() && msg["content"] == "", "before_template_null_to_empty");
|
||||||
|
}
|
||||||
|
// object content -> dumped string (would otherwise throw at the template).
|
||||||
|
{
|
||||||
|
ordered_json msg = {{"role", "assistant"}, {"content", {{"x", 1}}}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg["content"].is_string(), "before_template_object_to_string", "got " + msg["content"].dump());
|
||||||
|
}
|
||||||
|
// missing content field -> "".
|
||||||
|
{
|
||||||
|
ordered_json msg = {{"role", "user"}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg.contains("content") && msg["content"] == "", "before_template_missing_to_empty");
|
||||||
|
}
|
||||||
|
// multimodal: a well-typed user array must be left UNTOUCHED (role!=tool).
|
||||||
|
{
|
||||||
|
ordered_json parts = ordered_json::array();
|
||||||
|
parts.push_back({{"type", "text"}, {"text", "x"}});
|
||||||
|
ordered_json img; img["type"] = "image_url"; img["image_url"] = {{"url", "data:..."}};
|
||||||
|
parts.push_back(img);
|
||||||
|
ordered_json msg = {{"role", "user"}, {"content", parts}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg["content"].is_array() && msg["content"].size() == 2, "before_template_user_typed_array_preserved",
|
||||||
|
"got " + msg["content"].dump());
|
||||||
|
}
|
||||||
|
// a plain string is left untouched.
|
||||||
|
{
|
||||||
|
ordered_json msg = {{"role", "user"}, {"content", "hello"}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg["content"] == "hello", "before_template_string_untouched");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- build_reconstructed_message ----------------------------------------
|
||||||
|
|
||||||
|
static void test_reconstruction() {
|
||||||
|
const std::string ingredients = R"(["1/4 cup brown sugar","1 pound ground beef"])";
|
||||||
|
|
||||||
|
// #10524 end-state - user JSON-array text, no media -> string content.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "user"; in.content = ingredients;
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_string() && m["content"] == ingredients, "recon_user_json_array_string",
|
||||||
|
"got " + m["content"].dump());
|
||||||
|
}
|
||||||
|
// multimodal - user text + one image on last user msg -> typed array, image kept.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "user"; in.content = ingredients; in.is_last_user_msg = true;
|
||||||
|
in.images.push_back("BASE64IMG");
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_array() && m["content"].size() == 2, "recon_multimodal_text_plus_image",
|
||||||
|
"got " + m["content"].dump());
|
||||||
|
check(m["content"][0]["type"] == "text" && m["content"][0]["text"] == ingredients, "recon_multimodal_text_first");
|
||||||
|
check(m["content"][1]["type"] == "image_url", "recon_multimodal_image_kept");
|
||||||
|
}
|
||||||
|
// multimodal media-only - empty text + image on last user msg.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "user"; in.content = ""; in.is_last_user_msg = true;
|
||||||
|
in.images.push_back("BASE64IMG");
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_array() && m["content"].size() == 1 && m["content"][0]["type"] == "image_url",
|
||||||
|
"recon_media_only", "got " + m["content"].dump());
|
||||||
|
}
|
||||||
|
// #7528 - tool array-string content stays a string.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "tool"; in.content = R"(["a","b"])"; in.tool_call_id = "call_1";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_string() && m["content"] == R"(["a","b"])", "recon_tool_array_string",
|
||||||
|
"got " + m["content"].dump());
|
||||||
|
check(m["tool_call_id"] == "call_1", "recon_tool_call_id_set");
|
||||||
|
}
|
||||||
|
// tool empty content -> "".
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "tool"; in.content = "";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_string() && m["content"] == "", "recon_tool_empty_to_string");
|
||||||
|
}
|
||||||
|
// #7324 - assistant + tool_calls + empty content -> " " (single space, not "").
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "assistant"; in.content = "";
|
||||||
|
in.tool_calls = R"([{"id":"c1","type":"function","function":{"name":"f","arguments":"{}"}}])";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_string() && m["content"] == " ", "recon_toolcalls_empty_content_space",
|
||||||
|
"got " + m["content"].dump());
|
||||||
|
check(m["tool_calls"].is_array() && m["tool_calls"].size() == 1, "recon_toolcalls_parsed");
|
||||||
|
}
|
||||||
|
// assistant + tool_calls + real content keeps the content.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "assistant"; in.content = "I'll call f";
|
||||||
|
in.tool_calls = R"([{"id":"c1","type":"function","function":{"name":"f","arguments":"{}"}}])";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"] == "I'll call f", "recon_toolcalls_with_content_kept");
|
||||||
|
}
|
||||||
|
// assistant null content -> "".
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "assistant"; in.content = "null";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"] == "", "recon_assistant_null_to_empty");
|
||||||
|
}
|
||||||
|
// malformed tool_calls JSON must not throw; content preserved.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "assistant"; in.content = "hi"; in.tool_calls = "{not json";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"] == "hi" && !m.contains("tool_calls"), "recon_malformed_toolcalls_safe");
|
||||||
|
}
|
||||||
|
// optional fields: name + reasoning carried through.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "tool"; in.content = "result"; in.name = "get_weather"; in.reasoning_content = "thinking";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["name"] == "get_weather" && m["reasoning_content"] == "thinking", "recon_optional_fields");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
test_normalize();
|
||||||
|
test_template_sanitizer();
|
||||||
|
test_reconstruction();
|
||||||
|
|
||||||
|
if (failures == 0) {
|
||||||
|
std::cout << "OK: all message_content tests passed\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
std::cerr << failures << " test(s) failed\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
@@ -18,6 +18,10 @@ done
|
|||||||
|
|
||||||
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
||||||
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
||||||
|
# Shared message-reconstruction helpers (included by grpc-server.cpp) and their
|
||||||
|
# unit test (compiled only when -DLLAMA_GRPC_BUILD_TESTS=ON).
|
||||||
|
cp -r message_content.h llama.cpp/tools/grpc-server/
|
||||||
|
cp -r message_content_test.cpp llama.cpp/tools/grpc-server/
|
||||||
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
|
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
|
||||||
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
|
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
|
||||||
|
|
||||||
|
|||||||
71
backend/cpp/run-unit-tests.sh
Executable file
71
backend/cpp/run-unit-tests.sh
Executable file
@@ -0,0 +1,71 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Discovers and runs every standalone C++ unit test under backend/cpp/.
|
||||||
|
#
|
||||||
|
# A "standalone" unit test is a *_test.cpp that depends only on the C++ standard
|
||||||
|
# library and nlohmann/json (single header) - i.e. it exercises pure helpers and
|
||||||
|
# does not need the full llama.cpp + gRPC backend build. Tests that DO need the
|
||||||
|
# backend build use the CMake/ctest path (e.g. -DLLAMA_GRPC_BUILD_TESTS=ON)
|
||||||
|
# instead and are skipped here.
|
||||||
|
#
|
||||||
|
# This keeps CI generic: adding a new pure-C++ unit test file named *_test.cpp in
|
||||||
|
# an active backend source dir is picked up automatically, with no CI edits.
|
||||||
|
#
|
||||||
|
# Env:
|
||||||
|
# NLOHMANN_INCLUDE include dir that contains nlohmann/json.hpp. If unset, the
|
||||||
|
# nlohmann/json single header is fetched to a temp dir.
|
||||||
|
# CXX compiler (default: g++).
|
||||||
|
# JSON_VERSION nlohmann/json tag to fetch when NLOHMANN_INCLUDE is unset
|
||||||
|
# (default: v3.11.3).
|
||||||
|
set -uo pipefail
|
||||||
|
|
||||||
|
ROOT="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
CXX="${CXX:-g++}"
|
||||||
|
JSON_VERSION="${JSON_VERSION:-v3.11.3}"
|
||||||
|
|
||||||
|
JSON_INC="${NLOHMANN_INCLUDE:-}"
|
||||||
|
if [ -z "$JSON_INC" ]; then
|
||||||
|
JSON_INC="$(mktemp -d)"
|
||||||
|
mkdir -p "$JSON_INC/nlohmann"
|
||||||
|
echo "Fetching nlohmann/json ${JSON_VERSION} single header..."
|
||||||
|
if ! curl -L -sf \
|
||||||
|
"https://raw.githubusercontent.com/nlohmann/json/${JSON_VERSION}/single_include/nlohmann/json.hpp" \
|
||||||
|
-o "$JSON_INC/nlohmann/json.hpp"; then
|
||||||
|
echo "ERROR: failed to fetch nlohmann/json header" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Active source dirs only - exclude per-variant build copies, dev snapshots and
|
||||||
|
# the vendored upstream llama.cpp tree.
|
||||||
|
mapfile -t tests < <(find "$ROOT" -name '*_test.cpp' \
|
||||||
|
-not -path '*/llama.cpp/*' \
|
||||||
|
-not -path '*-build/*' \
|
||||||
|
-not -path '*-dev/*' \
|
||||||
|
-not -path '*fallback*' | sort)
|
||||||
|
|
||||||
|
if [ "${#tests[@]}" -eq 0 ]; then
|
||||||
|
echo "No standalone C++ unit tests found under $ROOT"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
fail=0
|
||||||
|
for test_src in "${tests[@]}"; do
|
||||||
|
name="$(basename "$test_src" .cpp)"
|
||||||
|
bin="$(mktemp -d)/$name"
|
||||||
|
echo "==> $test_src"
|
||||||
|
if ! "$CXX" -std=c++17 -Wall -Wextra \
|
||||||
|
-I"$JSON_INC" -I"$(dirname "$test_src")" \
|
||||||
|
"$test_src" -o "$bin"; then
|
||||||
|
echo "COMPILE FAILED: $test_src" >&2
|
||||||
|
fail=1
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if ! "$bin"; then
|
||||||
|
echo "TEST FAILED: $test_src" >&2
|
||||||
|
fail=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Ran ${#tests[@]} standalone C++ unit test file(s)"
|
||||||
|
exit "$fail"
|
||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# CrispASR version (release tag)
|
# CrispASR version (release tag)
|
||||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||||
CRISPASR_VERSION?=8f1218141b792b8868861c1af17ba1e361b05dc0
|
CRISPASR_VERSION?=6514c9da00b03a2f0f1b49a43fae4f3a01a41844
|
||||||
SO_TARGET?=libgocrispasr.so
|
SO_TARGET?=libgocrispasr.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
18
backend/go/face-detect/.gitignore
vendored
Normal file
18
backend/go/face-detect/.gitignore
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Fetched upstream sources
|
||||||
|
sources/
|
||||||
|
|
||||||
|
# CMake build directories
|
||||||
|
build*/
|
||||||
|
|
||||||
|
# build artifacts staged in-tree by the Makefile (cp from sources/) or
|
||||||
|
# symlinked for local dev; the real sources live in face-detect.cpp upstream.
|
||||||
|
*.so
|
||||||
|
*.so.*
|
||||||
|
facedetect_capi.h
|
||||||
|
compile_commands.json
|
||||||
|
|
||||||
|
# Compiled backend binary
|
||||||
|
face-detect-grpc
|
||||||
|
|
||||||
|
# Packaging output
|
||||||
|
package/
|
||||||
110
backend/go/face-detect/Makefile
Normal file
110
backend/go/face-detect/Makefile
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
# face-detect backend Makefile.
|
||||||
|
#
|
||||||
|
# Upstream pin lives below as FACEDETECT_VERSION?=06914b0... (.github/bump_deps.sh
|
||||||
|
# can find and update it - matches the voice-detect / parakeet.cpp / whisper.cpp
|
||||||
|
# convention).
|
||||||
|
#
|
||||||
|
# Local dev shortcut: if you already have an out-of-tree face-detect.cpp build,
|
||||||
|
# symlink the .so + header into this directory and skip the clone/cmake steps:
|
||||||
|
#
|
||||||
|
# ln -sf /path/to/face-detect.cpp/build-shared/libfacedetect.so .
|
||||||
|
# ln -sf /path/to/face-detect.cpp/include/facedetect_capi.h .
|
||||||
|
# go build -o face-detect-grpc .
|
||||||
|
#
|
||||||
|
# The default target below does the proper clone-at-pin + cmake build so CI does
|
||||||
|
# not need a side-checkout.
|
||||||
|
|
||||||
|
FACEDETECT_VERSION?=06914b077d52f90d5421299138e7be6bdd06b5e8
|
||||||
|
FACEDETECT_REPO?=https://github.com/mudler/face-detect.cpp
|
||||||
|
|
||||||
|
GOCMD?=go
|
||||||
|
GO_TAGS?=
|
||||||
|
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
|
||||||
|
|
||||||
|
BUILD_TYPE?=
|
||||||
|
NATIVE?=false
|
||||||
|
|
||||||
|
# Resolve the target arch. The backend matrix / Docker build pass TARGETARCH
|
||||||
|
# (amd64|arm64); fall back to uname -m (aarch64|x86_64) for a local build.
|
||||||
|
RECON_ARCH?=$(or $(TARGETARCH),$(shell uname -m))
|
||||||
|
|
||||||
|
# Build ggml + the vendored libjpeg-turbo statically into libfacedetect.so (PIC)
|
||||||
|
# so the shared lib is self-contained: dlopen needs no libggml*.so alongside it,
|
||||||
|
# only system libs (libstdc++/libgomp/libc) the runtime image already provides.
|
||||||
|
# The vendored jpeg symbols are hidden via -Wl,--exclude-libs,ALL on the C++
|
||||||
|
# side, so only the facedetect_capi_* surface is exported.
|
||||||
|
CMAKE_ARGS?=-DCMAKE_BUILD_TYPE=Release -DFACEDETECT_SHARED=ON -DFACEDETECT_BUILD_CLI=OFF -DFACEDETECT_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||||
|
|
||||||
|
ifeq ($(NATIVE),false)
|
||||||
|
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
|
endif
|
||||||
|
|
||||||
|
# face-detect.cpp gates its GGML backends behind FACEDETECT_GGML_* options and
|
||||||
|
# does set(GGML_CUDA ${FACEDETECT_GGML_CUDA} CACHE BOOL "" FORCE), so a bare
|
||||||
|
# -DGGML_CUDA=ON is overwritten back to OFF. Forward the FACEDETECT_GGML_*
|
||||||
|
# options instead. (openblas is not gated, so -DGGML_BLAS passes through.)
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_CUDA=ON
|
||||||
|
# Opt-in cuDNN implicit-GEMM conv path (kills im2col on GPU, SCRFD 2.3x
|
||||||
|
# vs torch-cuDNN parity). Only the arm64 + CUDA 13 image (GB10/Jetson/L4T)
|
||||||
|
# ships libcudnn9 + the -dev headers, so gate cuDNN to that variant.
|
||||||
|
# x86 CUDA images carry no cuDNN -> enabling it there is a link failure.
|
||||||
|
ifeq ($(CUDA_MAJOR_VERSION),13)
|
||||||
|
ifneq (,$(filter arm64 aarch64,$(RECON_ARCH)))
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_CUDNN=ON
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_HIP=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),vulkan)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_VULKAN=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_METAL=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: face-detect-grpc package build clean purge test all
|
||||||
|
|
||||||
|
all: face-detect-grpc
|
||||||
|
|
||||||
|
# Clone the upstream face-detect.cpp source at the pinned commit. Directory acts
|
||||||
|
# as the target so make only re-clones when missing. After a FACEDETECT_VERSION
|
||||||
|
# bump, run 'make purge && make' to refetch.
|
||||||
|
sources/face-detect.cpp:
|
||||||
|
mkdir -p sources/face-detect.cpp
|
||||||
|
cd sources/face-detect.cpp && \
|
||||||
|
git init -q && \
|
||||||
|
git remote add origin $(FACEDETECT_REPO) && \
|
||||||
|
git fetch --depth 1 origin $(FACEDETECT_VERSION) && \
|
||||||
|
git checkout FETCH_HEAD && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
# Build the shared lib + header out-of-tree, then stage them next to the Go
|
||||||
|
# sources so purego.Dlopen("libfacedetect.so") and the cgo-less build both pick
|
||||||
|
# them up.
|
||||||
|
libfacedetect.so: sources/face-detect.cpp
|
||||||
|
cmake -B sources/face-detect.cpp/build-shared -S sources/face-detect.cpp $(CMAKE_ARGS)
|
||||||
|
cmake --build sources/face-detect.cpp/build-shared --config Release -j$(JOBS) --target facedetect
|
||||||
|
cp -fv sources/face-detect.cpp/build-shared/libfacedetect.so* ./ 2>/dev/null || true
|
||||||
|
cp -fv sources/face-detect.cpp/include/facedetect_capi.h ./
|
||||||
|
|
||||||
|
face-detect-grpc: libfacedetect.so main.go gofacedetect.go options.go
|
||||||
|
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o face-detect-grpc .
|
||||||
|
|
||||||
|
package: face-detect-grpc
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: package
|
||||||
|
|
||||||
|
# Test target. The embed/detect/verify/analyze smoke specs are gated on
|
||||||
|
# FACEDETECT_BACKEND_TEST_MODEL + FACEDETECT_BACKEND_TEST_IMAGE; without them the
|
||||||
|
# heavy specs auto-skip and only the pure-Go parsing specs run.
|
||||||
|
test:
|
||||||
|
LD_LIBRARY_PATH=$(CURDIR):$$LD_LIBRARY_PATH $(GOCMD) test ./... -count=1
|
||||||
|
|
||||||
|
clean: purge
|
||||||
|
rm -rf libfacedetect.so* facedetect_capi.h package face-detect-grpc
|
||||||
|
|
||||||
|
purge:
|
||||||
|
rm -rf sources/face-detect.cpp
|
||||||
431
backend/go/face-detect/gofacedetect.go
Normal file
431
backend/go/face-detect/gofacedetect.go
Normal file
@@ -0,0 +1,431 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/mudler/xlog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// purego-bound entry points from libfacedetect.so. Names match
|
||||||
|
// facedetect_capi.h exactly so a `nm libfacedetect.so | grep facedetect_capi`
|
||||||
|
// is enough to spot drift.
|
||||||
|
//
|
||||||
|
// The opaque ctx and the malloc'd char*/float* return values are declared as
|
||||||
|
// uintptr so we get the raw pointer back and can release it via the matching
|
||||||
|
// capi free function. purego's native string/[]float32 returns would copy and
|
||||||
|
// forget the original pointer, leaking the C-owned buffer on every call.
|
||||||
|
var (
|
||||||
|
CppAbiVersion func() int32
|
||||||
|
CppLoad func(ggufPath string) uintptr
|
||||||
|
CppFree func(ctx uintptr)
|
||||||
|
CppLastError func(ctx uintptr) string
|
||||||
|
CppFreeString func(s uintptr)
|
||||||
|
CppFreeVec func(v uintptr)
|
||||||
|
CppEmbedPath func(ctx uintptr, imagePath string, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppEmbedRGB func(ctx uintptr, rgb []byte, width, height int32, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppDetectJSON func(ctx uintptr, imagePath string) uintptr
|
||||||
|
CppVerifyPaths func(ctx uintptr, a, b string, threshold float32, antiSpoof int32, outDistance, outVerified unsafe.Pointer) int32
|
||||||
|
CppAnalyzeJSON func(ctx uintptr, imagePath string) uintptr
|
||||||
|
)
|
||||||
|
|
||||||
|
// FaceDetect implements the face-recognition (biometric) subset of the Backend
|
||||||
|
// gRPC service over libfacedetect.so. The C side keeps a single loaded model
|
||||||
|
// pack plus a per-ctx last-error buffer and is not reentrant, so
|
||||||
|
// base.SingleThread serializes every call.
|
||||||
|
type FaceDetect struct {
|
||||||
|
base.SingleThread
|
||||||
|
opts loadOptions
|
||||||
|
ctxPtr uintptr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FaceDetect) Load(opts *pb.ModelOptions) error {
|
||||||
|
model := opts.ModelFile
|
||||||
|
if model == "" {
|
||||||
|
model = opts.ModelPath
|
||||||
|
}
|
||||||
|
if !filepath.IsAbs(model) && opts.ModelPath != "" {
|
||||||
|
model = filepath.Join(opts.ModelPath, model)
|
||||||
|
}
|
||||||
|
if model == "" {
|
||||||
|
return errors.New("face-detect: ModelFile is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
f.opts = parseOptions(opts.Options)
|
||||||
|
if f.opts.modelName == "" {
|
||||||
|
f.opts.modelName = filepath.Base(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns
|
||||||
|
// one backend process per model and serves requests concurrently, so the
|
||||||
|
// engine's own min(hardware_concurrency, 8) default can oversubscribe cores.
|
||||||
|
// FACEDETECT_THREADS is read by the engine at backend construction, so it
|
||||||
|
// must be set before the capi load. A non-positive Threads means "unset":
|
||||||
|
// leave the env alone so the engine keeps its sane default.
|
||||||
|
threads := opts.Threads
|
||||||
|
if threads > 0 {
|
||||||
|
if err := os.Setenv("FACEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil {
|
||||||
|
return fmt.Errorf("face-detect: set FACEDETECT_THREADS: %w", err)
|
||||||
|
}
|
||||||
|
xlog.Info("face-detect: applying LocalAI thread budget", "threads", threads)
|
||||||
|
}
|
||||||
|
|
||||||
|
xlog.Info("face-detect: loading model", "model", model,
|
||||||
|
"verify_threshold", f.opts.verifyThreshold, "abi", CppAbiVersion())
|
||||||
|
|
||||||
|
ctx := CppLoad(model)
|
||||||
|
if ctx == 0 {
|
||||||
|
// The last-error buffer lives on the ctx that was never returned, so
|
||||||
|
// surface the path the operator tried to load instead.
|
||||||
|
return fmt.Errorf("face-detect: facedetect_capi_load failed for %q", model)
|
||||||
|
}
|
||||||
|
f.ctxPtr = ctx
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Embeddings returns the L2-normalized ArcFace embedding of the primary face in
|
||||||
|
// the supplied image. Mirroring the Python face backend, the image is read from
|
||||||
|
// Images[0] as a base64 payload; materializeImage decodes it to a temp file so
|
||||||
|
// the path-based C-API can run its own decode (cv2.imread parity). The gRPC
|
||||||
|
// server wraps the returned slice in an EmbeddingResult.
|
||||||
|
func (f *FaceDetect) Embeddings(req *pb.PredictOptions) ([]float32, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return nil, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if len(req.Images) == 0 || req.Images[0] == "" {
|
||||||
|
return nil, errors.New("face-detect: Embedding requires Images[0] to be a base64 image")
|
||||||
|
}
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(req.Images[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
return f.embedPath(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FaceDetect) embedPath(path string) ([]float32, error) {
|
||||||
|
var vec uintptr
|
||||||
|
var dim int32
|
||||||
|
rc := CppEmbedPath(f.ctxPtr, path, unsafe.Pointer(&vec), unsafe.Pointer(&dim))
|
||||||
|
if rc != 0 || vec == 0 || dim <= 0 {
|
||||||
|
return nil, f.lastErr("embed", path)
|
||||||
|
}
|
||||||
|
defer CppFreeVec(vec)
|
||||||
|
// Copy out of the C-owned malloc'd buffer before freeing it. The
|
||||||
|
// uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory; safe here, the GC neither tracks
|
||||||
|
// nor moves this buffer and we copy immediately.
|
||||||
|
src := unsafe.Slice((*float32)(unsafe.Pointer(vec)), int(dim)) //nolint:govet // C-owned malloc'd vector, copied out before free
|
||||||
|
out := make([]float32, int(dim))
|
||||||
|
copy(out, src)
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect runs SCRFD over the image and returns one Detection per face. The
|
||||||
|
// C-API emits a box as [x1,y1,x2,y2] in pixels; the proto carries x/y plus
|
||||||
|
// width/height, so the corners are converted. The 5 facial landmarks the engine
|
||||||
|
// also returns are dropped: the Detection message has no field for them.
|
||||||
|
func (f *FaceDetect) Detect(req *pb.DetectOptions) (pb.DetectResponse, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return pb.DetectResponse{}, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Src == "" {
|
||||||
|
return pb.DetectResponse{}, errors.New("face-detect: src image is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(req.Src)
|
||||||
|
if err != nil {
|
||||||
|
return pb.DetectResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
faces, err := f.detectFaces(path)
|
||||||
|
if err != nil {
|
||||||
|
return pb.DetectResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
dets := make([]*pb.Detection, 0, len(faces))
|
||||||
|
for _, fc := range faces {
|
||||||
|
if req.Threshold > 0 && fc.Score < req.Threshold {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x, y, w, h := fc.xywh()
|
||||||
|
dets = append(dets, &pb.Detection{
|
||||||
|
X: x,
|
||||||
|
Y: y,
|
||||||
|
Width: w,
|
||||||
|
Height: h,
|
||||||
|
Confidence: fc.Score,
|
||||||
|
ClassName: "face",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return pb.DetectResponse{Detections: dets}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FaceVerify embeds the primary face in each image and reports whether they are
|
||||||
|
// the same identity by cosine distance against a threshold. A request threshold
|
||||||
|
// <= 0 falls back to the model-configured default (verify_threshold option,
|
||||||
|
// 0.35 if unset). When anti_spoofing is set, the C-API applies a MiniFASNet
|
||||||
|
// veto internally (verified forced false on a spoof); the per-image liveness
|
||||||
|
// scores are not exposed by the verify entry point, so img*_is_real /
|
||||||
|
// img*_antispoof_score stay at their zero values.
|
||||||
|
func (f *FaceDetect) FaceVerify(req *pb.FaceVerifyRequest) (pb.FaceVerifyResponse, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return pb.FaceVerifyResponse{}, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Img1 == "" || req.Img2 == "" {
|
||||||
|
return pb.FaceVerifyResponse{}, errors.New("face-detect: img1 and img2 are required")
|
||||||
|
}
|
||||||
|
|
||||||
|
path1, cleanup1, err := materializeImage(req.Img1)
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceVerifyResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup1()
|
||||||
|
path2, cleanup2, err := materializeImage(req.Img2)
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceVerifyResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup2()
|
||||||
|
|
||||||
|
threshold := req.Threshold
|
||||||
|
if threshold <= 0 {
|
||||||
|
threshold = f.opts.verifyThreshold
|
||||||
|
}
|
||||||
|
|
||||||
|
antiSpoof := int32(0)
|
||||||
|
if req.AntiSpoofing {
|
||||||
|
antiSpoof = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
started := time.Now()
|
||||||
|
var distance float32
|
||||||
|
var verified int32
|
||||||
|
rc := CppVerifyPaths(f.ctxPtr, path1, path2, threshold, antiSpoof,
|
||||||
|
unsafe.Pointer(&distance), unsafe.Pointer(&verified))
|
||||||
|
if rc != 0 {
|
||||||
|
return pb.FaceVerifyResponse{}, f.lastErr("verify", req.Img1[:min(8, len(req.Img1))]+"...")
|
||||||
|
}
|
||||||
|
elapsedMs := float32(time.Since(started).Seconds() * 1000.0)
|
||||||
|
|
||||||
|
// Confidence decays linearly from 100 at distance 0 to 0 at the threshold,
|
||||||
|
// matching the Python face backend's reporting.
|
||||||
|
confidence := float32(0)
|
||||||
|
if threshold > 0 {
|
||||||
|
confidence = float32(math.Max(0, math.Min(100, (1.0-float64(distance)/float64(threshold))*100.0)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return pb.FaceVerifyResponse{
|
||||||
|
Verified: verified != 0,
|
||||||
|
Distance: distance,
|
||||||
|
Threshold: threshold,
|
||||||
|
Confidence: confidence,
|
||||||
|
Model: f.opts.modelName,
|
||||||
|
Img1Area: f.bestArea(path1),
|
||||||
|
Img2Area: f.bestArea(path2),
|
||||||
|
ProcessingTimeMs: elapsedMs,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FaceAnalyze runs the genderage head on every detected face. The C-API returns
|
||||||
|
// "M"/"F" gender labels and a rounded age; the labels are normalized to the
|
||||||
|
// "Man"/"Woman" values the proto documents.
|
||||||
|
func (f *FaceDetect) FaceAnalyze(req *pb.FaceAnalyzeRequest) (pb.FaceAnalyzeResponse, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return pb.FaceAnalyzeResponse{}, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Img == "" {
|
||||||
|
return pb.FaceAnalyzeResponse{}, errors.New("face-detect: img is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(req.Img)
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceAnalyzeResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
ptr := CppAnalyzeJSON(f.ctxPtr, path)
|
||||||
|
if ptr == 0 {
|
||||||
|
return pb.FaceAnalyzeResponse{}, f.lastErr("analyze", path)
|
||||||
|
}
|
||||||
|
defer CppFreeString(ptr)
|
||||||
|
|
||||||
|
faces, err := parseAnalyzeJSON(goStringFromCPtr(ptr))
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceAnalyzeResponse{}, fmt.Errorf("face-detect: analyze JSON: %w", err)
|
||||||
|
}
|
||||||
|
return pb.FaceAnalyzeResponse{Faces: faces}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// faceBox is one entry of the detect/analyze JSON documents the engine emits.
|
||||||
|
type faceBox struct {
|
||||||
|
Score float32 `json:"score"`
|
||||||
|
Box []float32 `json:"box"`
|
||||||
|
Age float32 `json:"age"`
|
||||||
|
Gender string `json:"gender"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// xywh converts the engine's [x1,y1,x2,y2] box into the x/y/width/height the
|
||||||
|
// proto carries. A short or missing box yields zeros.
|
||||||
|
func (b faceBox) xywh() (x, y, w, h float32) {
|
||||||
|
if len(b.Box) < 4 {
|
||||||
|
return 0, 0, 0, 0
|
||||||
|
}
|
||||||
|
return b.Box[0], b.Box[1], b.Box[2] - b.Box[0], b.Box[3] - b.Box[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
type facesJSON struct {
|
||||||
|
Faces []faceBox `json:"faces"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FaceDetect) detectFaces(path string) ([]faceBox, error) {
|
||||||
|
ptr := CppDetectJSON(f.ctxPtr, path)
|
||||||
|
if ptr == 0 {
|
||||||
|
return nil, f.lastErr("detect", path)
|
||||||
|
}
|
||||||
|
defer CppFreeString(ptr)
|
||||||
|
|
||||||
|
var doc facesJSON
|
||||||
|
if err := json.Unmarshal([]byte(goStringFromCPtr(ptr)), &doc); err != nil {
|
||||||
|
return nil, fmt.Errorf("face-detect: detect JSON: %w", err)
|
||||||
|
}
|
||||||
|
return doc.Faces, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// bestArea returns the FacialArea of the highest-scoring face in an image, or an
|
||||||
|
// empty area when detection fails or finds nothing. Best-effort: verify already
|
||||||
|
// succeeded, so a missing region must not turn a valid match into an error.
|
||||||
|
func (f *FaceDetect) bestArea(path string) *pb.FacialArea {
|
||||||
|
faces, err := f.detectFaces(path)
|
||||||
|
if err != nil || len(faces) == 0 {
|
||||||
|
return &pb.FacialArea{}
|
||||||
|
}
|
||||||
|
best := faces[0]
|
||||||
|
for _, fc := range faces[1:] {
|
||||||
|
if fc.Score > best.Score {
|
||||||
|
best = fc
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x, y, w, h := best.xywh()
|
||||||
|
return &pb.FacialArea{X: x, Y: y, W: w, H: h}
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseAnalyzeJSON maps the engine's analyze document onto FaceAnalysis entries.
|
||||||
|
// The engine reports gender as "M"/"F"; both the dominant label and the score
|
||||||
|
// map are filled with the "Man"/"Woman" form the proto documents.
|
||||||
|
func parseAnalyzeJSON(doc string) ([]*pb.FaceAnalysis, error) {
|
||||||
|
var parsed facesJSON
|
||||||
|
if err := json.Unmarshal([]byte(doc), &parsed); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]*pb.FaceAnalysis, 0, len(parsed.Faces))
|
||||||
|
for _, fc := range parsed.Faces {
|
||||||
|
x, y, w, h := fc.xywh()
|
||||||
|
fa := &pb.FaceAnalysis{
|
||||||
|
Region: &pb.FacialArea{X: x, Y: y, W: w, H: h},
|
||||||
|
FaceConfidence: fc.Score,
|
||||||
|
Age: fc.Age,
|
||||||
|
}
|
||||||
|
if label := normalizeGender(fc.Gender); label != "" {
|
||||||
|
fa.DominantGender = label
|
||||||
|
fa.Gender = map[string]float32{label: 1.0}
|
||||||
|
}
|
||||||
|
out = append(out, fa)
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeGender maps the engine's "M"/"F" code to the "Man"/"Woman" labels the
|
||||||
|
// proto documents. Unknown codes pass through unchanged.
|
||||||
|
func normalizeGender(g string) string {
|
||||||
|
switch strings.ToUpper(strings.TrimSpace(g)) {
|
||||||
|
case "M":
|
||||||
|
return "Man"
|
||||||
|
case "F":
|
||||||
|
return "Woman"
|
||||||
|
case "":
|
||||||
|
return ""
|
||||||
|
default:
|
||||||
|
return g
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// materializeImage decodes a base64 image payload into a temp file and returns
|
||||||
|
// its path plus a cleanup func. As a convenience for callers that already pass a
|
||||||
|
// filesystem path (e.g. a test fixture), an existing path is used as-is with a
|
||||||
|
// no-op cleanup. data: URI prefixes are stripped before decoding.
|
||||||
|
func materializeImage(src string) (path string, cleanup func(), err error) {
|
||||||
|
noop := func() {}
|
||||||
|
if src == "" {
|
||||||
|
return "", noop, errors.New("face-detect: empty image input")
|
||||||
|
}
|
||||||
|
if _, statErr := os.Stat(src); statErr == nil {
|
||||||
|
return src, noop, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
payload := src
|
||||||
|
if i := strings.Index(payload, ","); strings.HasPrefix(payload, "data:") && i >= 0 {
|
||||||
|
payload = payload[i+1:]
|
||||||
|
}
|
||||||
|
data, decErr := base64.StdEncoding.DecodeString(strings.TrimSpace(payload))
|
||||||
|
if decErr != nil || len(data) == 0 {
|
||||||
|
return "", noop, errors.New("face-detect: image is neither an existing path nor valid base64")
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp, createErr := os.CreateTemp("", "face-detect-*.img")
|
||||||
|
if createErr != nil {
|
||||||
|
return "", noop, fmt.Errorf("face-detect: create temp image: %w", createErr)
|
||||||
|
}
|
||||||
|
cleanup = func() { _ = os.Remove(tmp.Name()) }
|
||||||
|
if _, wErr := tmp.Write(data); wErr != nil {
|
||||||
|
_ = tmp.Close()
|
||||||
|
cleanup()
|
||||||
|
return "", noop, fmt.Errorf("face-detect: write temp image: %w", wErr)
|
||||||
|
}
|
||||||
|
if cErr := tmp.Close(); cErr != nil {
|
||||||
|
cleanup()
|
||||||
|
return "", noop, fmt.Errorf("face-detect: close temp image: %w", cErr)
|
||||||
|
}
|
||||||
|
return tmp.Name(), cleanup, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// lastErr wraps the C-API's per-ctx last-error buffer into a Go error.
|
||||||
|
func (f *FaceDetect) lastErr(op, subject string) error {
|
||||||
|
msg := strings.TrimSpace(CppLastError(f.ctxPtr))
|
||||||
|
if msg == "" {
|
||||||
|
msg = "no error detail"
|
||||||
|
}
|
||||||
|
return fmt.Errorf("face-detect: %s failed for %q: %s", op, subject, msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// goStringFromCPtr copies a NUL-terminated C string into Go memory. cptr is a
|
||||||
|
// malloc'd buffer the caller owns; release it via CppFreeString after the copy.
|
||||||
|
//
|
||||||
|
// The uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory. Safe here: the GC neither tracks nor
|
||||||
|
// moves the buffer and we dereference it immediately to copy the bytes out.
|
||||||
|
func goStringFromCPtr(cptr uintptr) string {
|
||||||
|
if cptr == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
p := unsafe.Pointer(cptr) //nolint:govet // C-owned malloc'd buffer, not Go-GC memory (see doc above)
|
||||||
|
n := 0
|
||||||
|
for *(*byte)(unsafe.Add(p, n)) != 0 {
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
return string(unsafe.Slice((*byte)(p), n))
|
||||||
|
}
|
||||||
230
backend/go/face-detect/gofacedetect_test.go
Normal file
230
backend/go/face-detect/gofacedetect_test.go
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFaceDetect(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "face-detect Backend Suite")
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
libLoadOnce sync.Once
|
||||||
|
libLoadErr error
|
||||||
|
)
|
||||||
|
|
||||||
|
// ensureLibLoaded mirrors main.go's bootstrap so a Go test can drive the C-API
|
||||||
|
// bridge without spinning up the gRPC server. Records the error (the smoke
|
||||||
|
// specs skip themselves) when libfacedetect.so is not loadable from cwd
|
||||||
|
// (LD_LIBRARY_PATH or a symlink in ./).
|
||||||
|
func ensureLibLoaded() error {
|
||||||
|
libLoadOnce.Do(func() {
|
||||||
|
libName := os.Getenv("FACEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libfacedetect.so"
|
||||||
|
}
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
libLoadErr = err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
purego.RegisterLibFunc(&CppAbiVersion, lib, "facedetect_capi_abi_version")
|
||||||
|
purego.RegisterLibFunc(&CppLoad, lib, "facedetect_capi_load")
|
||||||
|
purego.RegisterLibFunc(&CppFree, lib, "facedetect_capi_free")
|
||||||
|
purego.RegisterLibFunc(&CppLastError, lib, "facedetect_capi_last_error")
|
||||||
|
purego.RegisterLibFunc(&CppFreeString, lib, "facedetect_capi_free_string")
|
||||||
|
purego.RegisterLibFunc(&CppFreeVec, lib, "facedetect_capi_free_vec")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedPath, lib, "facedetect_capi_embed_path")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedRGB, lib, "facedetect_capi_embed_rgb")
|
||||||
|
purego.RegisterLibFunc(&CppDetectJSON, lib, "facedetect_capi_detect_path_json")
|
||||||
|
purego.RegisterLibFunc(&CppVerifyPaths, lib, "facedetect_capi_verify_paths")
|
||||||
|
purego.RegisterLibFunc(&CppAnalyzeJSON, lib, "facedetect_capi_analyze_path_json")
|
||||||
|
})
|
||||||
|
return libLoadErr
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("parseOptions", func() {
|
||||||
|
It("defaults verify_threshold to 0.35", func() {
|
||||||
|
o := parseOptions(nil)
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.35)))
|
||||||
|
Expect(o.modelName).To(Equal(""))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("parses verify_threshold, threshold alias and model_name", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0.4", "model_name:buffalo_l", "unknown:x"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.4)))
|
||||||
|
Expect(o.modelName).To(Equal("buffalo_l"))
|
||||||
|
|
||||||
|
o2 := parseOptions([]string{"threshold:0.3"})
|
||||||
|
Expect(o2.verifyThreshold).To(Equal(float32(0.3)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("ignores non-positive thresholds and keeps the default", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0", "threshold:-1"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.35)))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("normalizeGender", func() {
|
||||||
|
It("maps M/F codes to Man/Woman", func() {
|
||||||
|
Expect(normalizeGender("M")).To(Equal("Man"))
|
||||||
|
Expect(normalizeGender("f")).To(Equal("Woman"))
|
||||||
|
Expect(normalizeGender(" m ")).To(Equal("Man"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("passes empty and unknown codes through", func() {
|
||||||
|
Expect(normalizeGender("")).To(Equal(""))
|
||||||
|
Expect(normalizeGender("nonbinary")).To(Equal("nonbinary"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("faceBox.xywh", func() {
|
||||||
|
It("converts an [x1,y1,x2,y2] box to x/y/width/height", func() {
|
||||||
|
b := faceBox{Box: []float32{10, 20, 50, 80}}
|
||||||
|
x, y, w, h := b.xywh()
|
||||||
|
Expect(x).To(Equal(float32(10)))
|
||||||
|
Expect(y).To(Equal(float32(20)))
|
||||||
|
Expect(w).To(Equal(float32(40)))
|
||||||
|
Expect(h).To(Equal(float32(60)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns zeros for a short box", func() {
|
||||||
|
x, y, w, h := faceBox{Box: []float32{1, 2}}.xywh()
|
||||||
|
Expect([]float32{x, y, w, h}).To(Equal([]float32{0, 0, 0, 0}))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("parseAnalyzeJSON", func() {
|
||||||
|
It("maps region, age and gender for each face", func() {
|
||||||
|
doc := `{"faces":[
|
||||||
|
{"score":0.997,"box":[10,20,50,80],"age":31,"gender":"M"},
|
||||||
|
{"score":0.81,"box":[0,0,40,40],"age":24,"gender":"F"}]}`
|
||||||
|
faces, err := parseAnalyzeJSON(doc)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(faces).To(HaveLen(2))
|
||||||
|
|
||||||
|
Expect(faces[0].FaceConfidence).To(BeNumerically("~", 0.997, 1e-4))
|
||||||
|
Expect(faces[0].Age).To(BeNumerically("~", 31, 1e-4))
|
||||||
|
Expect(faces[0].DominantGender).To(Equal("Man"))
|
||||||
|
Expect(faces[0].Gender).To(HaveKeyWithValue("Man", float32(1.0)))
|
||||||
|
Expect(faces[0].Region.W).To(Equal(float32(40)))
|
||||||
|
Expect(faces[0].Region.H).To(Equal(float32(60)))
|
||||||
|
|
||||||
|
Expect(faces[1].DominantGender).To(Equal("Woman"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("tolerates a missing gender field", func() {
|
||||||
|
faces, err := parseAnalyzeJSON(`{"faces":[{"score":0.5,"box":[0,0,10,10],"age":40}]}`)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(faces).To(HaveLen(1))
|
||||||
|
Expect(faces[0].DominantGender).To(Equal(""))
|
||||||
|
Expect(faces[0].Gender).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns no faces for an empty document", func() {
|
||||||
|
faces, err := parseAnalyzeJSON(`{"faces":[]}`)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(faces).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns an error on malformed JSON", func() {
|
||||||
|
_, err := parseAnalyzeJSON(`{not-json`)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("materializeImage", func() {
|
||||||
|
It("decodes a base64 payload to a temp file", func() {
|
||||||
|
payload := base64.StdEncoding.EncodeToString([]byte("\xff\xd8\xff\xe0fake-jpeg"))
|
||||||
|
path, cleanup, err := materializeImage(payload)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer cleanup()
|
||||||
|
data, rerr := os.ReadFile(path)
|
||||||
|
Expect(rerr).ToNot(HaveOccurred())
|
||||||
|
Expect(data).To(Equal([]byte("\xff\xd8\xff\xe0fake-jpeg")))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("strips a data: URI prefix before decoding", func() {
|
||||||
|
payload := "data:image/png;base64," + base64.StdEncoding.EncodeToString([]byte("hello"))
|
||||||
|
path, cleanup, err := materializeImage(payload)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer cleanup()
|
||||||
|
data, rerr := os.ReadFile(path)
|
||||||
|
Expect(rerr).ToNot(HaveOccurred())
|
||||||
|
Expect(data).To(Equal([]byte("hello")))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("uses an existing path as-is", func() {
|
||||||
|
tmp, err := os.CreateTemp("", "face-detect-fixture-*.bin")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer func() { _ = os.Remove(tmp.Name()) }()
|
||||||
|
Expect(tmp.Close()).To(Succeed())
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(tmp.Name())
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer cleanup()
|
||||||
|
Expect(path).To(Equal(tmp.Name()))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("errors on input that is neither a path nor base64", func() {
|
||||||
|
_, _, err := materializeImage("not base64!!!")
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// The specs below exercise the real C-API end to end. They run only when both a
|
||||||
|
// model GGUF and a test image are provided, and skip cleanly otherwise so the
|
||||||
|
// suite stays green without large assets.
|
||||||
|
var _ = Describe("FaceDetect end-to-end", Ordered, func() {
|
||||||
|
var (
|
||||||
|
f *FaceDetect
|
||||||
|
modelPath = os.Getenv("FACEDETECT_BACKEND_TEST_MODEL")
|
||||||
|
imagePath = os.Getenv("FACEDETECT_BACKEND_TEST_IMAGE")
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeAll(func() {
|
||||||
|
if modelPath == "" || imagePath == "" {
|
||||||
|
Skip("set FACEDETECT_BACKEND_TEST_MODEL and FACEDETECT_BACKEND_TEST_IMAGE to run the e2e specs")
|
||||||
|
}
|
||||||
|
if err := ensureLibLoaded(); err != nil {
|
||||||
|
Skip("libfacedetect.so not loadable: " + err.Error())
|
||||||
|
}
|
||||||
|
f = &FaceDetect{}
|
||||||
|
Expect(f.Load(&pb.ModelOptions{ModelFile: modelPath})).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("embeds the primary face in an image", func() {
|
||||||
|
emb, err := f.Embeddings(&pb.PredictOptions{Images: []string{imagePath}})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(emb).ToNot(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("detects at least one face", func() {
|
||||||
|
resp, err := f.Detect(&pb.DetectOptions{Src: imagePath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Detections).ToNot(BeEmpty())
|
||||||
|
Expect(resp.Detections[0].ClassName).To(Equal("face"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("verifies an image against itself as the same identity", func() {
|
||||||
|
resp, err := f.FaceVerify(&pb.FaceVerifyRequest{Img1: imagePath, Img2: imagePath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Verified).To(BeTrue())
|
||||||
|
Expect(resp.Distance).To(BeNumerically("<=", resp.Threshold))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("analyzes age/gender for each face", func() {
|
||||||
|
resp, err := f.FaceAnalyze(&pb.FaceAnalyzeRequest{Img: imagePath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Faces).ToNot(BeEmpty())
|
||||||
|
})
|
||||||
|
})
|
||||||
65
backend/go/face-detect/main.go
Normal file
65
backend/go/face-detect/main.go
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Started internally by LocalAI - one gRPC server per loaded model.
|
||||||
|
//
|
||||||
|
// Loads libfacedetect.so via purego and registers the flat C-API entry points
|
||||||
|
// declared in facedetect_capi.h. The library name can be overridden with
|
||||||
|
// FACEDETECT_LIBRARY (mirrors the VOICEDETECT_LIBRARY / PARAKEET_LIBRARY
|
||||||
|
// convention in the sibling backends); the default looks for the .so next to
|
||||||
|
// this binary (resolved via LD_LIBRARY_PATH by run.sh).
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
type LibFuncs struct {
|
||||||
|
FuncPtr any
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
libName := os.Getenv("FACEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libfacedetect.so"
|
||||||
|
}
|
||||||
|
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("face-detect: dlopen %q: %w", libName, err))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bound 1:1 to facedetect_capi.h. char*/float* returns are registered as
|
||||||
|
// uintptr so the raw pointer can be freed via the matching capi free fn.
|
||||||
|
libFuncs := []LibFuncs{
|
||||||
|
{&CppAbiVersion, "facedetect_capi_abi_version"},
|
||||||
|
{&CppLoad, "facedetect_capi_load"},
|
||||||
|
{&CppFree, "facedetect_capi_free"},
|
||||||
|
{&CppLastError, "facedetect_capi_last_error"},
|
||||||
|
{&CppFreeString, "facedetect_capi_free_string"},
|
||||||
|
{&CppFreeVec, "facedetect_capi_free_vec"},
|
||||||
|
{&CppEmbedPath, "facedetect_capi_embed_path"},
|
||||||
|
{&CppEmbedRGB, "facedetect_capi_embed_rgb"},
|
||||||
|
{&CppDetectJSON, "facedetect_capi_detect_path_json"},
|
||||||
|
{&CppVerifyPaths, "facedetect_capi_verify_paths"},
|
||||||
|
{&CppAnalyzeJSON, "facedetect_capi_analyze_path_json"},
|
||||||
|
}
|
||||||
|
for _, lf := range libFuncs {
|
||||||
|
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(os.Stderr, "[face-detect] ABI=%d\n", CppAbiVersion())
|
||||||
|
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &FaceDetect{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
47
backend/go/face-detect/options.go
Normal file
47
backend/go/face-detect/options.go
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// defaultVerifyThreshold is the cosine-distance cutoff used when a request does
|
||||||
|
// not set one. Matches the insightface buffalo_l ArcFace R50 default the Python
|
||||||
|
// face backend ships with so the two implementations agree on verdicts out of
|
||||||
|
// the box.
|
||||||
|
const defaultVerifyThreshold float32 = 0.35
|
||||||
|
|
||||||
|
// loadOptions holds the parsed model-level options for face-detect.
|
||||||
|
type loadOptions struct {
|
||||||
|
verifyThreshold float32
|
||||||
|
modelName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitOption(o string) (key, value string, ok bool) {
|
||||||
|
i := strings.Index(o, ":")
|
||||||
|
if i < 0 {
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(o[:i]), strings.TrimSpace(o[i+1:]), true
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseOptions reads the backend "key:value" option slice. Unknown keys are
|
||||||
|
// ignored. Defaults: verify_threshold 0.35, model_name derived from the file.
|
||||||
|
func parseOptions(opts []string) loadOptions {
|
||||||
|
o := loadOptions{verifyThreshold: defaultVerifyThreshold}
|
||||||
|
for _, oo := range opts {
|
||||||
|
key, value, ok := splitOption(oo)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch key {
|
||||||
|
case "verify_threshold", "threshold":
|
||||||
|
if f, err := strconv.ParseFloat(value, 32); err == nil && f > 0 {
|
||||||
|
o.verifyThreshold = float32(f)
|
||||||
|
}
|
||||||
|
case "model_name":
|
||||||
|
o.modelName = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return o
|
||||||
|
}
|
||||||
68
backend/go/face-detect/package.sh
Normal file
68
backend/go/face-detect/package.sh
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Bundle the face-detect-grpc binary, libfacedetect.so, the core runtime libs
|
||||||
|
# (libc/libstdc++/libgomp + ld.so) and the GPU runtime for the active BUILD_TYPE
|
||||||
|
# so the package is self-contained. Mirrors backend/go/voice-detect/package.sh;
|
||||||
|
# run.sh routes the (CGO_ENABLED=0) binary through lib/ld.so so the packaged libc
|
||||||
|
# is used instead of the host's.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
REPO_ROOT="${CURDIR}/../../.."
|
||||||
|
|
||||||
|
mkdir -p "$CURDIR/package/lib"
|
||||||
|
|
||||||
|
cp -avf "$CURDIR/face-detect-grpc" "$CURDIR/package/"
|
||||||
|
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||||
|
|
||||||
|
# libfacedetect.so + any soname symlinks. purego.Dlopen resolves it via
|
||||||
|
# LD_LIBRARY_PATH, which run.sh points at lib/.
|
||||||
|
cp -avf "$CURDIR"/libfacedetect.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||||
|
echo "ERROR: libfacedetect.so not found in $CURDIR, run 'make' first" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detect architecture and copy the core runtime libs libfacedetect.so links
|
||||||
|
# against, plus the matching dynamic loader as lib/ld.so.
|
||||||
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||||
|
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||||
|
cp -arfLv /lib/ld-linux-aarch64.so.1 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||||
|
echo "Detected Darwin"
|
||||||
|
else
|
||||||
|
echo "Error: Could not detect architecture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Package GPU libraries (CUDA/ROCm/Intel/Vulkan loader + ICDs + drivers) based on
|
||||||
|
# BUILD_TYPE so the backend can reach the GPU without the runtime base image
|
||||||
|
# shipping those drivers.
|
||||||
|
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
|
||||||
|
if [ -f "$GPU_LIB_SCRIPT" ]; then
|
||||||
|
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
|
||||||
|
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
|
||||||
|
package_gpu_libs
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Packaging completed successfully"
|
||||||
|
ls -liah "$CURDIR/package/" "$CURDIR/package/lib/"
|
||||||
16
backend/go/face-detect/run.sh
Normal file
16
backend/go/face-detect/run.sh
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||||
|
|
||||||
|
# If a self-contained ld.so was packaged, route through it so the packaged
|
||||||
|
# libc / libstdc++ are used instead of the host's (matches the voice-detect /
|
||||||
|
# whisper / parakeet backends' runtime layout).
|
||||||
|
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||||
|
echo "Using lib/ld.so"
|
||||||
|
exec "$CURDIR/lib/ld.so" "$CURDIR/face-detect-grpc" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec "$CURDIR/face-detect-grpc" "$@"
|
||||||
15
backend/go/face-detect/test.sh
Normal file
15
backend/go/face-detect/test.sh
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
cd "$CURDIR"
|
||||||
|
|
||||||
|
echo "Running face-detect backend tests..."
|
||||||
|
|
||||||
|
# The pure-Go parsing specs always run. The embed/detect/verify/analyze smoke
|
||||||
|
# specs run only when a model + image are provided via
|
||||||
|
# FACEDETECT_BACKEND_TEST_MODEL and FACEDETECT_BACKEND_TEST_IMAGE; otherwise they
|
||||||
|
# auto-skip.
|
||||||
|
LD_LIBRARY_PATH="$CURDIR:${LD_LIBRARY_PATH:-}" go test -v -timeout 1200s .
|
||||||
|
|
||||||
|
echo "face-detect tests completed."
|
||||||
@@ -1,13 +1,30 @@
|
|||||||
GOCMD?=go
|
GOCMD?=go
|
||||||
GO_TAGS?=
|
GO_TAGS?=
|
||||||
|
|
||||||
|
# The opus shim is a small C wrapper around libopus' variadic
|
||||||
|
# opus_encoder_ctl (see csrc/opus_shim.c). It is built as a shared library
|
||||||
|
# and dlopen'd at runtime by the Go backend (codec.go). The extension is
|
||||||
|
# OS-specific: Linux uses .so, macOS uses .dylib. OS is exported by the root
|
||||||
|
# Makefile (`export OS := $(shell uname -s)`).
|
||||||
|
SHIM_EXT=so
|
||||||
|
|
||||||
OPUS_CFLAGS := $(shell pkg-config --cflags opus)
|
OPUS_CFLAGS := $(shell pkg-config --cflags opus)
|
||||||
OPUS_LIBS := $(shell pkg-config --libs opus)
|
OPUS_LIBS := $(shell pkg-config --libs opus)
|
||||||
|
SHIM_LDFLAGS := $(OPUS_LIBS)
|
||||||
|
|
||||||
libopusshim.so: csrc/opus_shim.c
|
ifeq ($(OS),Darwin)
|
||||||
$(CC) -shared -fPIC -o $@ $< $(OPUS_CFLAGS) $(OPUS_LIBS)
|
SHIM_EXT=dylib
|
||||||
|
# Resolve libopus symbols lazily from the already globally-loaded
|
||||||
|
# libopus (codec.go dlopens it RTLD_GLOBAL before the shim) rather than
|
||||||
|
# recording an absolute Homebrew path in the dylib. This keeps the
|
||||||
|
# packaged shim relocatable on machines that have no Homebrew.
|
||||||
|
SHIM_LDFLAGS := -undefined dynamic_lookup
|
||||||
|
endif
|
||||||
|
|
||||||
opus: libopusshim.so
|
libopusshim.$(SHIM_EXT): csrc/opus_shim.c
|
||||||
|
$(CC) -shared -fPIC -o $@ $< $(OPUS_CFLAGS) $(SHIM_LDFLAGS)
|
||||||
|
|
||||||
|
opus: libopusshim.$(SHIM_EXT)
|
||||||
$(GOCMD) build -tags "$(GO_TAGS)" -o opus ./
|
$(GOCMD) build -tags "$(GO_TAGS)" -o opus ./
|
||||||
|
|
||||||
package: opus
|
package: opus
|
||||||
@@ -16,4 +33,7 @@ package: opus
|
|||||||
build: package
|
build: package
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f opus libopusshim.so
|
rm -f opus libopusshim.$(SHIM_EXT)
|
||||||
|
rm -rf package
|
||||||
|
|
||||||
|
.PHONY: build package clean
|
||||||
|
|||||||
@@ -8,13 +8,23 @@ mkdir -p $CURDIR/package/lib
|
|||||||
cp -avf $CURDIR/opus $CURDIR/package/
|
cp -avf $CURDIR/opus $CURDIR/package/
|
||||||
cp -avf $CURDIR/run.sh $CURDIR/package/
|
cp -avf $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
# Copy the opus shim library
|
# The shim extension is OS-specific (.so on Linux, .dylib on macOS).
|
||||||
cp -avf $CURDIR/libopusshim.so $CURDIR/package/lib/
|
SHIM_EXT=so
|
||||||
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
SHIM_EXT=dylib
|
||||||
|
fi
|
||||||
|
|
||||||
# Copy system libopus
|
# Copy the opus shim library
|
||||||
|
cp -avf $CURDIR/libopusshim.$SHIM_EXT $CURDIR/package/lib/
|
||||||
|
|
||||||
|
# Copy system libopus so the backend is self-contained: the runtime base
|
||||||
|
# image has neither libopus-dev (Linux) nor Homebrew (macOS), so codec.go's
|
||||||
|
# dlopen would otherwise fail. Both name patterns are attempted; only the
|
||||||
|
# host's matching one exists.
|
||||||
if command -v pkg-config >/dev/null 2>&1 && pkg-config --exists opus; then
|
if command -v pkg-config >/dev/null 2>&1 && pkg-config --exists opus; then
|
||||||
LIBOPUS_DIR=$(pkg-config --variable=libdir opus)
|
LIBOPUS_DIR=$(pkg-config --variable=libdir opus)
|
||||||
cp -avfL $LIBOPUS_DIR/libopus.so* $CURDIR/package/lib/ 2>/dev/null || true
|
cp -avf $LIBOPUS_DIR/libopus.so* $CURDIR/package/lib/ 2>/dev/null || true
|
||||||
|
cp -avf $LIBOPUS_DIR/libopus*.dylib $CURDIR/package/lib/ 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
@@ -38,6 +48,8 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
|||||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||||
|
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||||
|
echo "Detected Darwin — system libraries linked dynamically, no bundled loader needed"
|
||||||
else
|
else
|
||||||
echo "Warning: Could not detect architecture for system library bundling"
|
echo "Warning: Could not detect architecture for system library bundling"
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -3,8 +3,13 @@ set -ex
|
|||||||
|
|
||||||
CURDIR=$(dirname "$(realpath "$0")")
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
export OPUS_SHIM_LIBRARY="$CURDIR"/lib/libopusshim.so
|
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||||
|
export OPUS_SHIM_LIBRARY="$CURDIR"/lib/libopusshim.dylib
|
||||||
|
else
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||||
|
export OPUS_SHIM_LIBRARY="$CURDIR"/lib/libopusshim.so
|
||||||
|
fi
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# parakeet-cpp backend Makefile.
|
# parakeet-cpp backend Makefile.
|
||||||
#
|
#
|
||||||
# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
# Upstream pin lives below as PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
|
||||||
# (.github/bump_deps.sh) can find and update it - matches the
|
# (.github/bump_deps.sh) can find and update it - matches the
|
||||||
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
||||||
#
|
#
|
||||||
@@ -15,7 +15,7 @@
|
|||||||
# That's what the L0 smoke test uses. The default target below does the
|
# That's what the L0 smoke test uses. The default target below does the
|
||||||
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
||||||
|
|
||||||
PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
|
||||||
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
||||||
|
|
||||||
GOCMD?=go
|
GOCMD?=go
|
||||||
|
|||||||
@@ -16,7 +16,15 @@ cp -rfv $CURDIR/run.sh $CURDIR/package/
|
|||||||
cp -rfLv $CURDIR/sources/go-piper/piper-phonemize/pi/lib/* $CURDIR/package/lib/
|
cp -rfLv $CURDIR/sources/go-piper/piper-phonemize/pi/lib/* $CURDIR/package/lib/
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS has no glibc loader to bundle. The piper binary links its bundled
|
||||||
|
# libs (libucd, libespeak-ng, libpiper_phonemize, libonnxruntime) via
|
||||||
|
# @rpath but ships with no LC_RPATH, so dyld aborts at launch with
|
||||||
|
# "Library not loaded: @rpath/libucd.dylib ... no LC_RPATH's found".
|
||||||
|
# Add an @loader_path/lib rpath so @rpath resolves to package/lib/.
|
||||||
|
echo "Detected macOS; adding @loader_path/lib rpath so bundled libs resolve via @rpath..."
|
||||||
|
install_name_tool -add_rpath @loader_path/lib "$CURDIR/package/piper"
|
||||||
|
elif [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
# x86_64 architecture
|
# x86_64 architecture
|
||||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||||
|
|||||||
@@ -4,7 +4,12 @@ set -ex
|
|||||||
CURDIR=$(dirname "$(realpath "$0")")
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
export ESPEAK_NG_DATA="$CURDIR"/espeak-ng-data
|
export ESPEAK_NG_DATA="$CURDIR"/espeak-ng-data
|
||||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
|
||||||
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||||
|
|||||||
@@ -15,7 +15,14 @@ cp -avf $CURDIR/run.sh $CURDIR/package/
|
|||||||
cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
|
cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS has no glibc loader to bundle. silero-vad links its bundled
|
||||||
|
# libonnxruntime via @rpath but ships with no LC_RPATH, so dyld can't find
|
||||||
|
# it at runtime. Add an @loader_path/lib rpath so @rpath resolves to
|
||||||
|
# package/lib/ (matching the piper darwin fix, #10525).
|
||||||
|
echo "Detected macOS; adding @loader_path/lib rpath so bundled libs resolve via @rpath..."
|
||||||
|
install_name_tool -add_rpath @loader_path/lib "$CURDIR/package/silero-vad"
|
||||||
|
elif [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
# x86_64 architecture
|
# x86_64 architecture
|
||||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||||
|
|||||||
@@ -3,7 +3,11 @@ set -ex
|
|||||||
|
|
||||||
CURDIR=$(dirname "$(realpath "$0")")
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# stablediffusion.cpp (ggml)
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||||
STABLEDIFFUSION_GGML_VERSION?=8caa3f908ae6d4a4bef531e73b9a969f266a3d1f
|
STABLEDIFFUSION_GGML_VERSION?=9956436c925a367daeab097598b1ea1f32d3503f
|
||||||
|
|
||||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||||
|
|
||||||
|
|||||||
18
backend/go/voice-detect/.gitignore
vendored
Normal file
18
backend/go/voice-detect/.gitignore
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Fetched upstream sources
|
||||||
|
sources/
|
||||||
|
|
||||||
|
# CMake build directories
|
||||||
|
build*/
|
||||||
|
|
||||||
|
# build artifacts staged in-tree by the Makefile (cp from sources/) or
|
||||||
|
# symlinked for local dev; the real sources live in voice-detect.cpp upstream.
|
||||||
|
*.so
|
||||||
|
*.so.*
|
||||||
|
voicedetect_capi.h
|
||||||
|
compile_commands.json
|
||||||
|
|
||||||
|
# Compiled backend binary
|
||||||
|
voice-detect-grpc
|
||||||
|
|
||||||
|
# Packaging output
|
||||||
|
package/
|
||||||
107
backend/go/voice-detect/Makefile
Normal file
107
backend/go/voice-detect/Makefile
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
# voice-detect backend Makefile.
|
||||||
|
#
|
||||||
|
# Upstream pin lives below as VOICEDETECT_VERSION?=3d51077... (.github/bump_deps.sh
|
||||||
|
# can find and update it - matches the parakeet.cpp / whisper.cpp / ds4 convention).
|
||||||
|
#
|
||||||
|
# Local dev shortcut: if you already have an out-of-tree voice-detect.cpp build,
|
||||||
|
# symlink the .so + header into this directory and skip the clone/cmake steps:
|
||||||
|
#
|
||||||
|
# ln -sf /path/to/voice-detect.cpp/build-shared/libvoicedetect.so .
|
||||||
|
# ln -sf /path/to/voice-detect.cpp/include/voicedetect_capi.h .
|
||||||
|
# go build -o voice-detect-grpc .
|
||||||
|
#
|
||||||
|
# The default target below does the proper clone-at-pin + cmake build so CI does
|
||||||
|
# not need a side-checkout.
|
||||||
|
|
||||||
|
VOICEDETECT_VERSION?=3d510772357538c5182808ac7de2278b84824e24
|
||||||
|
VOICEDETECT_REPO?=https://github.com/mudler/voice-detect.cpp
|
||||||
|
|
||||||
|
GOCMD?=go
|
||||||
|
GO_TAGS?=
|
||||||
|
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
|
||||||
|
|
||||||
|
BUILD_TYPE?=
|
||||||
|
NATIVE?=false
|
||||||
|
|
||||||
|
# Resolve the target arch. The backend matrix / Docker build pass TARGETARCH
|
||||||
|
# (amd64|arm64); fall back to uname -m (aarch64|x86_64) for a local build.
|
||||||
|
RECON_ARCH?=$(or $(TARGETARCH),$(shell uname -m))
|
||||||
|
|
||||||
|
# Build ggml statically into libvoicedetect.so (PIC) so the shared lib is
|
||||||
|
# self-contained: dlopen needs no libggml*.so alongside it, only system libs
|
||||||
|
# (libstdc++/libgomp/libc) that the runtime image already provides.
|
||||||
|
CMAKE_ARGS?=-DCMAKE_BUILD_TYPE=Release -DVOICEDETECT_SHARED=ON -DVOICEDETECT_BUILD_CLI=OFF -DVOICEDETECT_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||||
|
|
||||||
|
ifeq ($(NATIVE),false)
|
||||||
|
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
|
endif
|
||||||
|
|
||||||
|
# voice-detect.cpp gates its GGML backends behind VOICEDETECT_GGML_* options and
|
||||||
|
# does set(GGML_CUDA ${VOICEDETECT_GGML_CUDA} CACHE BOOL "" FORCE), so a bare
|
||||||
|
# -DGGML_CUDA=ON is overwritten back to OFF. Forward the VOICEDETECT_GGML_*
|
||||||
|
# options instead. (openblas is not gated, so -DGGML_BLAS passes through.)
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_CUDA=ON
|
||||||
|
# Opt-in cuDNN implicit-GEMM conv path (kills im2col on GPU, reaches
|
||||||
|
# torch-cuDNN parity). Only the arm64 + CUDA 13 image (GB10/Jetson/L4T)
|
||||||
|
# ships libcudnn9 + the -dev headers, so gate cuDNN to that variant.
|
||||||
|
# x86 CUDA images carry no cuDNN -> enabling it there is a link failure.
|
||||||
|
ifeq ($(CUDA_MAJOR_VERSION),13)
|
||||||
|
ifneq (,$(filter arm64 aarch64,$(RECON_ARCH)))
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_CUDNN=ON
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_HIP=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),vulkan)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_VULKAN=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_METAL=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: voice-detect-grpc package build clean purge test all
|
||||||
|
|
||||||
|
all: voice-detect-grpc
|
||||||
|
|
||||||
|
# Clone the upstream voice-detect.cpp source at the pinned commit. Directory acts
|
||||||
|
# as the target so make only re-clones when missing. After a VOICEDETECT_VERSION
|
||||||
|
# bump, run 'make purge && make' to refetch.
|
||||||
|
sources/voice-detect.cpp:
|
||||||
|
mkdir -p sources/voice-detect.cpp
|
||||||
|
cd sources/voice-detect.cpp && \
|
||||||
|
git init -q && \
|
||||||
|
git remote add origin $(VOICEDETECT_REPO) && \
|
||||||
|
git fetch --depth 1 origin $(VOICEDETECT_VERSION) && \
|
||||||
|
git checkout FETCH_HEAD && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
# Build the shared lib + header out-of-tree, then stage them next to the Go
|
||||||
|
# sources so purego.Dlopen("libvoicedetect.so") and the cgo-less build both pick
|
||||||
|
# them up.
|
||||||
|
libvoicedetect.so: sources/voice-detect.cpp
|
||||||
|
cmake -B sources/voice-detect.cpp/build-shared -S sources/voice-detect.cpp $(CMAKE_ARGS)
|
||||||
|
cmake --build sources/voice-detect.cpp/build-shared --config Release -j$(JOBS) --target voicedetect
|
||||||
|
cp -fv sources/voice-detect.cpp/build-shared/libvoicedetect.so* ./ 2>/dev/null || true
|
||||||
|
cp -fv sources/voice-detect.cpp/include/voicedetect_capi.h ./
|
||||||
|
|
||||||
|
voice-detect-grpc: libvoicedetect.so main.go govoicedetect.go options.go
|
||||||
|
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o voice-detect-grpc .
|
||||||
|
|
||||||
|
package: voice-detect-grpc
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: package
|
||||||
|
|
||||||
|
# Test target. The embed/verify/analyze smoke specs are gated on
|
||||||
|
# VOICEDETECT_BACKEND_TEST_MODEL + VOICEDETECT_BACKEND_TEST_WAV; without them the
|
||||||
|
# heavy specs auto-skip and only the pure-Go parsing specs run.
|
||||||
|
test:
|
||||||
|
LD_LIBRARY_PATH=$(CURDIR):$$LD_LIBRARY_PATH $(GOCMD) test ./... -count=1
|
||||||
|
|
||||||
|
clean: purge
|
||||||
|
rm -rf libvoicedetect.so* voicedetect_capi.h package voice-detect-grpc
|
||||||
|
|
||||||
|
purge:
|
||||||
|
rm -rf sources/voice-detect.cpp
|
||||||
273
backend/go/voice-detect/govoicedetect.go
Normal file
273
backend/go/voice-detect/govoicedetect.go
Normal file
@@ -0,0 +1,273 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/mudler/xlog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// purego-bound entry points from libvoicedetect.so. Names match
|
||||||
|
// voicedetect_capi.h exactly so a `nm libvoicedetect.so | grep voicedetect_capi`
|
||||||
|
// is enough to spot drift.
|
||||||
|
//
|
||||||
|
// The opaque ctx and the malloc'd char*/float* return values are declared as
|
||||||
|
// uintptr so we get the raw pointer back and can release it via the matching
|
||||||
|
// capi free function. purego's native string/[]float32 returns would copy and
|
||||||
|
// forget the original pointer, leaking the C-owned buffer on every call.
|
||||||
|
var (
|
||||||
|
CppAbiVersion func() int32
|
||||||
|
CppLoad func(ggufPath string) uintptr
|
||||||
|
CppFree func(ctx uintptr)
|
||||||
|
CppLastError func(ctx uintptr) string
|
||||||
|
CppFreeString func(s uintptr)
|
||||||
|
CppFreeVec func(v uintptr)
|
||||||
|
CppEmbedPath func(ctx uintptr, wavPath string, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppEmbedPCM func(ctx uintptr, pcm []float32, nSamples, sampleRate int32, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppVerifyPaths func(ctx uintptr, a, b string, threshold float32, outDistance, outVerified unsafe.Pointer) int32
|
||||||
|
CppAnalyzeJSON func(ctx uintptr, wavPath string) uintptr
|
||||||
|
)
|
||||||
|
|
||||||
|
// VoiceDetect implements the speaker-recognition voice subset of the Backend
|
||||||
|
// gRPC service over libvoicedetect.so. The C side keeps a single loaded model
|
||||||
|
// plus a per-ctx last-error buffer and is not reentrant, so base.SingleThread
|
||||||
|
// serializes every call.
|
||||||
|
type VoiceDetect struct {
|
||||||
|
base.SingleThread
|
||||||
|
opts loadOptions
|
||||||
|
ctxPtr uintptr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *VoiceDetect) Load(opts *pb.ModelOptions) error {
|
||||||
|
model := opts.ModelFile
|
||||||
|
if model == "" {
|
||||||
|
model = opts.ModelPath
|
||||||
|
}
|
||||||
|
if !filepath.IsAbs(model) && opts.ModelPath != "" {
|
||||||
|
model = filepath.Join(opts.ModelPath, model)
|
||||||
|
}
|
||||||
|
if model == "" {
|
||||||
|
return errors.New("voice-detect: ModelFile is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
v.opts = parseOptions(opts.Options)
|
||||||
|
if v.opts.modelName == "" {
|
||||||
|
v.opts.modelName = filepath.Base(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns
|
||||||
|
// one backend process per model and serves requests concurrently, so the
|
||||||
|
// engine's own min(hardware_concurrency, 8) default can oversubscribe cores.
|
||||||
|
// VOICEDETECT_THREADS is read by the engine at backend construction, so it
|
||||||
|
// must be set before the capi load. A non-positive Threads means "unset":
|
||||||
|
// leave the env alone so the engine keeps its sane default.
|
||||||
|
threads := opts.Threads
|
||||||
|
if threads > 0 {
|
||||||
|
if err := os.Setenv("VOICEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil {
|
||||||
|
return fmt.Errorf("voice-detect: set VOICEDETECT_THREADS: %w", err)
|
||||||
|
}
|
||||||
|
xlog.Info("voice-detect: applying LocalAI thread budget", "threads", threads)
|
||||||
|
}
|
||||||
|
|
||||||
|
xlog.Info("voice-detect: loading model", "model", model,
|
||||||
|
"verify_threshold", v.opts.verifyThreshold, "abi", CppAbiVersion())
|
||||||
|
|
||||||
|
ctx := CppLoad(model)
|
||||||
|
if ctx == 0 {
|
||||||
|
// The last-error buffer lives on the ctx that was never returned, so
|
||||||
|
// surface the path the operator tried to load instead.
|
||||||
|
return fmt.Errorf("voice-detect: voicedetect_capi_load failed for %q", model)
|
||||||
|
}
|
||||||
|
v.ctxPtr = ctx
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// VoiceEmbed returns the L2-normalized speaker embedding for an audio clip.
|
||||||
|
// The request carries a filesystem PATH; the HTTP layer materializes
|
||||||
|
// base64/URL/data-URI inputs to a temp file before the gRPC call.
|
||||||
|
func (v *VoiceDetect) VoiceEmbed(req *pb.VoiceEmbedRequest) (pb.VoiceEmbedResponse, error) {
|
||||||
|
if v.ctxPtr == 0 {
|
||||||
|
return pb.VoiceEmbedResponse{}, errors.New("voice-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Audio == "" {
|
||||||
|
return pb.VoiceEmbedResponse{}, errors.New("voice-detect: audio path is required")
|
||||||
|
}
|
||||||
|
emb, err := v.embedPath(req.Audio)
|
||||||
|
if err != nil {
|
||||||
|
return pb.VoiceEmbedResponse{}, err
|
||||||
|
}
|
||||||
|
return pb.VoiceEmbedResponse{Embedding: emb, Model: v.opts.modelName}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *VoiceDetect) embedPath(path string) ([]float32, error) {
|
||||||
|
var vec uintptr
|
||||||
|
var dim int32
|
||||||
|
rc := CppEmbedPath(v.ctxPtr, path, unsafe.Pointer(&vec), unsafe.Pointer(&dim))
|
||||||
|
if rc != 0 || vec == 0 || dim <= 0 {
|
||||||
|
return nil, v.lastErr("embed", path)
|
||||||
|
}
|
||||||
|
defer CppFreeVec(vec)
|
||||||
|
// Copy out of the C-owned malloc'd buffer before freeing it. The
|
||||||
|
// uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory; safe here, the GC neither tracks
|
||||||
|
// nor moves this buffer and we copy immediately.
|
||||||
|
src := unsafe.Slice((*float32)(unsafe.Pointer(vec)), int(dim)) //nolint:govet // C-owned malloc'd vector, copied out before free
|
||||||
|
out := make([]float32, int(dim))
|
||||||
|
copy(out, src)
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// VoiceVerify embeds two clips and reports whether they are the same speaker by
|
||||||
|
// cosine distance against a threshold. A request threshold <= 0 falls back to
|
||||||
|
// the model-configured default (verify_threshold option, 0.25 if unset).
|
||||||
|
func (v *VoiceDetect) VoiceVerify(req *pb.VoiceVerifyRequest) (pb.VoiceVerifyResponse, error) {
|
||||||
|
if v.ctxPtr == 0 {
|
||||||
|
return pb.VoiceVerifyResponse{}, errors.New("voice-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Audio1 == "" || req.Audio2 == "" {
|
||||||
|
return pb.VoiceVerifyResponse{}, errors.New("voice-detect: audio1 and audio2 are required")
|
||||||
|
}
|
||||||
|
|
||||||
|
threshold := req.Threshold
|
||||||
|
if threshold <= 0 {
|
||||||
|
threshold = v.opts.verifyThreshold
|
||||||
|
}
|
||||||
|
|
||||||
|
started := time.Now()
|
||||||
|
var distance float32
|
||||||
|
var verified int32
|
||||||
|
rc := CppVerifyPaths(v.ctxPtr, req.Audio1, req.Audio2, threshold,
|
||||||
|
unsafe.Pointer(&distance), unsafe.Pointer(&verified))
|
||||||
|
if rc != 0 {
|
||||||
|
return pb.VoiceVerifyResponse{}, v.lastErr("verify", req.Audio1+","+req.Audio2)
|
||||||
|
}
|
||||||
|
elapsedMs := float32(time.Since(started).Seconds() * 1000.0)
|
||||||
|
|
||||||
|
// Confidence decays linearly from 100 at distance 0 to 0 at the threshold,
|
||||||
|
// matching the Python speaker-recognition backend's reporting.
|
||||||
|
confidence := float32(0)
|
||||||
|
if threshold > 0 {
|
||||||
|
confidence = float32(math.Max(0, math.Min(100, (1.0-float64(distance)/float64(threshold))*100.0)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return pb.VoiceVerifyResponse{
|
||||||
|
Verified: verified != 0,
|
||||||
|
Distance: distance,
|
||||||
|
Threshold: threshold,
|
||||||
|
Confidence: confidence,
|
||||||
|
Model: v.opts.modelName,
|
||||||
|
ProcessingTimeMs: elapsedMs,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// VoiceAnalyze runs the age/gender/emotion heads on a single clip. The C-API
|
||||||
|
// always evaluates every supported head, so the request's actions filter is
|
||||||
|
// advisory and the full analysis is returned as a single segment (the engine
|
||||||
|
// does not produce time-bounded segments).
|
||||||
|
func (v *VoiceDetect) VoiceAnalyze(req *pb.VoiceAnalyzeRequest) (pb.VoiceAnalyzeResponse, error) {
|
||||||
|
if v.ctxPtr == 0 {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, errors.New("voice-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Audio == "" {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, errors.New("voice-detect: audio path is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr := CppAnalyzeJSON(v.ctxPtr, req.Audio)
|
||||||
|
if ptr == 0 {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, v.lastErr("analyze", req.Audio)
|
||||||
|
}
|
||||||
|
defer CppFreeString(ptr)
|
||||||
|
|
||||||
|
seg, err := parseAnalyzeJSON(goStringFromCPtr(ptr))
|
||||||
|
if err != nil {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, fmt.Errorf("voice-detect: analyze JSON for %q: %w", req.Audio, err)
|
||||||
|
}
|
||||||
|
return pb.VoiceAnalyzeResponse{Segments: []*pb.VoiceAnalysis{seg}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// analyzeJSON mirrors the document returned by voicedetect_capi_analyze_path_json:
|
||||||
|
//
|
||||||
|
// {"age":42.0,
|
||||||
|
// "gender":{"label":"female","female":0.88,"male":0.12},
|
||||||
|
// "emotion":{"label":"neutral","scores":{"neutral":0.7, ...}}}
|
||||||
|
//
|
||||||
|
// gender is a mixed object (a "label" string plus per-class float scores), so
|
||||||
|
// it is decoded into raw messages and split in parseAnalyzeJSON.
|
||||||
|
type analyzeJSON struct {
|
||||||
|
Age float32 `json:"age"`
|
||||||
|
Gender map[string]json.RawMessage `json:"gender"`
|
||||||
|
Emotion struct {
|
||||||
|
Label string `json:"label"`
|
||||||
|
Scores map[string]float32 `json:"scores"`
|
||||||
|
} `json:"emotion"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseAnalyzeJSON maps the engine's analyze document onto a VoiceAnalysis.
|
||||||
|
// start/end stay 0: the model emits a single whole-utterance result, not
|
||||||
|
// time-bounded segments.
|
||||||
|
func parseAnalyzeJSON(doc string) (*pb.VoiceAnalysis, error) {
|
||||||
|
var a analyzeJSON
|
||||||
|
if err := json.Unmarshal([]byte(doc), &a); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
seg := &pb.VoiceAnalysis{
|
||||||
|
Age: a.Age,
|
||||||
|
DominantEmotion: a.Emotion.Label,
|
||||||
|
Emotion: a.Emotion.Scores,
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(a.Gender) > 0 {
|
||||||
|
gender := make(map[string]float32, len(a.Gender))
|
||||||
|
for k, raw := range a.Gender {
|
||||||
|
if k == "label" {
|
||||||
|
_ = json.Unmarshal(raw, &seg.DominantGender)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var score float32
|
||||||
|
if err := json.Unmarshal(raw, &score); err == nil {
|
||||||
|
gender[k] = score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
seg.Gender = gender
|
||||||
|
}
|
||||||
|
|
||||||
|
return seg, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// lastErr wraps the C-API's per-ctx last-error buffer into a Go error.
|
||||||
|
func (v *VoiceDetect) lastErr(op, subject string) error {
|
||||||
|
msg := strings.TrimSpace(CppLastError(v.ctxPtr))
|
||||||
|
if msg == "" {
|
||||||
|
msg = "no error detail"
|
||||||
|
}
|
||||||
|
return fmt.Errorf("voice-detect: %s failed for %q: %s", op, subject, msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// goStringFromCPtr copies a NUL-terminated C string into Go memory. cptr is a
|
||||||
|
// malloc'd buffer the caller owns; release it via CppFreeString after the copy.
|
||||||
|
//
|
||||||
|
// The uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory. Safe here: the GC neither tracks nor
|
||||||
|
// moves the buffer and we dereference it immediately to copy the bytes out.
|
||||||
|
func goStringFromCPtr(cptr uintptr) string {
|
||||||
|
if cptr == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
p := unsafe.Pointer(cptr) //nolint:govet // C-owned malloc'd buffer, not Go-GC memory (see doc above)
|
||||||
|
n := 0
|
||||||
|
for *(*byte)(unsafe.Add(p, n)) != 0 {
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
return string(unsafe.Slice((*byte)(p), n))
|
||||||
|
}
|
||||||
144
backend/go/voice-detect/govoicedetect_test.go
Normal file
144
backend/go/voice-detect/govoicedetect_test.go
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestVoiceDetect(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "voice-detect Backend Suite")
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
libLoadOnce sync.Once
|
||||||
|
libLoadErr error
|
||||||
|
)
|
||||||
|
|
||||||
|
// ensureLibLoaded mirrors main.go's bootstrap so a Go test can drive the C-API
|
||||||
|
// bridge without spinning up the gRPC server. Records the error (the smoke
|
||||||
|
// specs skip themselves) when libvoicedetect.so is not loadable from cwd
|
||||||
|
// (LD_LIBRARY_PATH or a symlink in ./).
|
||||||
|
func ensureLibLoaded() error {
|
||||||
|
libLoadOnce.Do(func() {
|
||||||
|
libName := os.Getenv("VOICEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libvoicedetect.so"
|
||||||
|
}
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
libLoadErr = err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
purego.RegisterLibFunc(&CppAbiVersion, lib, "voicedetect_capi_abi_version")
|
||||||
|
purego.RegisterLibFunc(&CppLoad, lib, "voicedetect_capi_load")
|
||||||
|
purego.RegisterLibFunc(&CppFree, lib, "voicedetect_capi_free")
|
||||||
|
purego.RegisterLibFunc(&CppLastError, lib, "voicedetect_capi_last_error")
|
||||||
|
purego.RegisterLibFunc(&CppFreeString, lib, "voicedetect_capi_free_string")
|
||||||
|
purego.RegisterLibFunc(&CppFreeVec, lib, "voicedetect_capi_free_vec")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedPath, lib, "voicedetect_capi_embed_path")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedPCM, lib, "voicedetect_capi_embed_pcm")
|
||||||
|
purego.RegisterLibFunc(&CppVerifyPaths, lib, "voicedetect_capi_verify_paths")
|
||||||
|
purego.RegisterLibFunc(&CppAnalyzeJSON, lib, "voicedetect_capi_analyze_path_json")
|
||||||
|
})
|
||||||
|
return libLoadErr
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("parseOptions", func() {
|
||||||
|
It("defaults verify_threshold to 0.25", func() {
|
||||||
|
o := parseOptions(nil)
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.25)))
|
||||||
|
Expect(o.modelName).To(Equal(""))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("parses verify_threshold, threshold alias and model_name", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0.4", "model_name:ecapa", "unknown:x"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.4)))
|
||||||
|
Expect(o.modelName).To(Equal("ecapa"))
|
||||||
|
|
||||||
|
o2 := parseOptions([]string{"threshold:0.3"})
|
||||||
|
Expect(o2.verifyThreshold).To(Equal(float32(0.3)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("ignores non-positive thresholds and keeps the default", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0", "threshold:-1"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.25)))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("parseAnalyzeJSON", func() {
|
||||||
|
It("maps age, gender label+scores and emotion label+scores", func() {
|
||||||
|
doc := `{"age":42.0,
|
||||||
|
"gender":{"label":"female","female":0.88,"male":0.12},
|
||||||
|
"emotion":{"label":"neutral","scores":{"neutral":0.7,"happy":0.2,"sad":0.1}}}`
|
||||||
|
seg, err := parseAnalyzeJSON(doc)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(seg.Age).To(BeNumerically("~", 42.0, 1e-4))
|
||||||
|
Expect(seg.Start).To(Equal(float32(0)))
|
||||||
|
Expect(seg.End).To(Equal(float32(0)))
|
||||||
|
|
||||||
|
Expect(seg.DominantGender).To(Equal("female"))
|
||||||
|
Expect(seg.Gender).To(HaveKeyWithValue("female", BeNumerically("~", 0.88, 1e-4)))
|
||||||
|
Expect(seg.Gender).To(HaveKeyWithValue("male", BeNumerically("~", 0.12, 1e-4)))
|
||||||
|
// The "label" entry is consumed into DominantGender, not the score map.
|
||||||
|
Expect(seg.Gender).ToNot(HaveKey("label"))
|
||||||
|
|
||||||
|
Expect(seg.DominantEmotion).To(Equal("neutral"))
|
||||||
|
Expect(seg.Emotion).To(HaveKeyWithValue("neutral", BeNumerically("~", 0.7, 1e-4)))
|
||||||
|
Expect(seg.Emotion).To(HaveKeyWithValue("happy", BeNumerically("~", 0.2, 1e-4)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("tolerates a missing gender block", func() {
|
||||||
|
seg, err := parseAnalyzeJSON(`{"age":30.0,"emotion":{"label":"happy","scores":{"happy":1.0}}}`)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(seg.DominantGender).To(Equal(""))
|
||||||
|
Expect(seg.DominantEmotion).To(Equal("happy"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns an error on malformed JSON", func() {
|
||||||
|
_, err := parseAnalyzeJSON(`{not-json`)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// The specs below exercise the real C-API end to end. They run only when both a
|
||||||
|
// model GGUF and a test WAV are provided, and skip cleanly otherwise so the
|
||||||
|
// suite stays green without large assets.
|
||||||
|
var _ = Describe("VoiceDetect end-to-end", Ordered, func() {
|
||||||
|
var (
|
||||||
|
v *VoiceDetect
|
||||||
|
modelPath = os.Getenv("VOICEDETECT_BACKEND_TEST_MODEL")
|
||||||
|
wavPath = os.Getenv("VOICEDETECT_BACKEND_TEST_WAV")
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeAll(func() {
|
||||||
|
if modelPath == "" || wavPath == "" {
|
||||||
|
Skip("set VOICEDETECT_BACKEND_TEST_MODEL and VOICEDETECT_BACKEND_TEST_WAV to run the e2e specs")
|
||||||
|
}
|
||||||
|
if err := ensureLibLoaded(); err != nil {
|
||||||
|
Skip("libvoicedetect.so not loadable: " + err.Error())
|
||||||
|
}
|
||||||
|
v = &VoiceDetect{}
|
||||||
|
Expect(v.Load(&pb.ModelOptions{ModelFile: modelPath})).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("embeds an audio clip", func() {
|
||||||
|
resp, err := v.VoiceEmbed(&pb.VoiceEmbedRequest{Audio: wavPath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Embedding).ToNot(BeEmpty())
|
||||||
|
Expect(resp.Model).ToNot(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("verifies a clip against itself as the same speaker", func() {
|
||||||
|
resp, err := v.VoiceVerify(&pb.VoiceVerifyRequest{Audio1: wavPath, Audio2: wavPath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Verified).To(BeTrue())
|
||||||
|
Expect(resp.Distance).To(BeNumerically("<=", resp.Threshold))
|
||||||
|
})
|
||||||
|
})
|
||||||
64
backend/go/voice-detect/main.go
Normal file
64
backend/go/voice-detect/main.go
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Started internally by LocalAI - one gRPC server per loaded model.
|
||||||
|
//
|
||||||
|
// Loads libvoicedetect.so via purego and registers the flat C-API entry points
|
||||||
|
// declared in voicedetect_capi.h. The library name can be overridden with
|
||||||
|
// VOICEDETECT_LIBRARY (mirrors the PARAKEET_LIBRARY / OMNIVOICE_LIBRARY
|
||||||
|
// convention in the sibling backends); the default looks for the .so next to
|
||||||
|
// this binary (resolved via LD_LIBRARY_PATH by run.sh).
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
type LibFuncs struct {
|
||||||
|
FuncPtr any
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
libName := os.Getenv("VOICEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libvoicedetect.so"
|
||||||
|
}
|
||||||
|
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("voice-detect: dlopen %q: %w", libName, err))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bound 1:1 to voicedetect_capi.h. char*/float* returns are registered as
|
||||||
|
// uintptr so the raw pointer can be freed via the matching capi free fn.
|
||||||
|
libFuncs := []LibFuncs{
|
||||||
|
{&CppAbiVersion, "voicedetect_capi_abi_version"},
|
||||||
|
{&CppLoad, "voicedetect_capi_load"},
|
||||||
|
{&CppFree, "voicedetect_capi_free"},
|
||||||
|
{&CppLastError, "voicedetect_capi_last_error"},
|
||||||
|
{&CppFreeString, "voicedetect_capi_free_string"},
|
||||||
|
{&CppFreeVec, "voicedetect_capi_free_vec"},
|
||||||
|
{&CppEmbedPath, "voicedetect_capi_embed_path"},
|
||||||
|
{&CppEmbedPCM, "voicedetect_capi_embed_pcm"},
|
||||||
|
{&CppVerifyPaths, "voicedetect_capi_verify_paths"},
|
||||||
|
{&CppAnalyzeJSON, "voicedetect_capi_analyze_path_json"},
|
||||||
|
}
|
||||||
|
for _, lf := range libFuncs {
|
||||||
|
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(os.Stderr, "[voice-detect] ABI=%d\n", CppAbiVersion())
|
||||||
|
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &VoiceDetect{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
46
backend/go/voice-detect/options.go
Normal file
46
backend/go/voice-detect/options.go
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// defaultVerifyThreshold is the cosine-distance cutoff used when a request does
|
||||||
|
// not set one. Matches the Python speaker-recognition backend's default so the
|
||||||
|
// two implementations agree on verdicts out of the box.
|
||||||
|
const defaultVerifyThreshold float32 = 0.25
|
||||||
|
|
||||||
|
// loadOptions holds the parsed model-level options for voice-detect.
|
||||||
|
type loadOptions struct {
|
||||||
|
verifyThreshold float32
|
||||||
|
modelName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitOption(o string) (key, value string, ok bool) {
|
||||||
|
i := strings.Index(o, ":")
|
||||||
|
if i < 0 {
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(o[:i]), strings.TrimSpace(o[i+1:]), true
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseOptions reads the backend "key:value" option slice. Unknown keys are
|
||||||
|
// ignored. Defaults: verify_threshold 0.25, model_name derived from the file.
|
||||||
|
func parseOptions(opts []string) loadOptions {
|
||||||
|
o := loadOptions{verifyThreshold: defaultVerifyThreshold}
|
||||||
|
for _, oo := range opts {
|
||||||
|
key, value, ok := splitOption(oo)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch key {
|
||||||
|
case "verify_threshold", "threshold":
|
||||||
|
if f, err := strconv.ParseFloat(value, 32); err == nil && f > 0 {
|
||||||
|
o.verifyThreshold = float32(f)
|
||||||
|
}
|
||||||
|
case "model_name":
|
||||||
|
o.modelName = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return o
|
||||||
|
}
|
||||||
68
backend/go/voice-detect/package.sh
Executable file
68
backend/go/voice-detect/package.sh
Executable file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Bundle the voice-detect-grpc binary, libvoicedetect.so, the core runtime libs
|
||||||
|
# (libc/libstdc++/libgomp + ld.so) and the GPU runtime for the active BUILD_TYPE
|
||||||
|
# so the package is self-contained. Mirrors backend/go/parakeet-cpp/package.sh;
|
||||||
|
# run.sh routes the (CGO_ENABLED=0) binary through lib/ld.so so the packaged libc
|
||||||
|
# is used instead of the host's.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
REPO_ROOT="${CURDIR}/../../.."
|
||||||
|
|
||||||
|
mkdir -p "$CURDIR/package/lib"
|
||||||
|
|
||||||
|
cp -avf "$CURDIR/voice-detect-grpc" "$CURDIR/package/"
|
||||||
|
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||||
|
|
||||||
|
# libvoicedetect.so + any soname symlinks. purego.Dlopen resolves it via
|
||||||
|
# LD_LIBRARY_PATH, which run.sh points at lib/.
|
||||||
|
cp -avf "$CURDIR"/libvoicedetect.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||||
|
echo "ERROR: libvoicedetect.so not found in $CURDIR, run 'make' first" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detect architecture and copy the core runtime libs libvoicedetect.so links
|
||||||
|
# against, plus the matching dynamic loader as lib/ld.so.
|
||||||
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||||
|
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||||
|
cp -arfLv /lib/ld-linux-aarch64.so.1 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||||
|
echo "Detected Darwin"
|
||||||
|
else
|
||||||
|
echo "Error: Could not detect architecture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Package GPU libraries (CUDA/ROCm/Intel/Vulkan loader + ICDs + drivers) based on
|
||||||
|
# BUILD_TYPE so the backend can reach the GPU without the runtime base image
|
||||||
|
# shipping those drivers.
|
||||||
|
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
|
||||||
|
if [ -f "$GPU_LIB_SCRIPT" ]; then
|
||||||
|
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
|
||||||
|
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
|
||||||
|
package_gpu_libs
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Packaging completed successfully"
|
||||||
|
ls -liah "$CURDIR/package/" "$CURDIR/package/lib/"
|
||||||
16
backend/go/voice-detect/run.sh
Executable file
16
backend/go/voice-detect/run.sh
Executable file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||||
|
|
||||||
|
# If a self-contained ld.so was packaged, route through it so the packaged
|
||||||
|
# libc / libstdc++ are used instead of the host's (matches the whisper /
|
||||||
|
# parakeet backends' runtime layout).
|
||||||
|
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||||
|
echo "Using lib/ld.so"
|
||||||
|
exec "$CURDIR/lib/ld.so" "$CURDIR/voice-detect-grpc" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec "$CURDIR/voice-detect-grpc" "$@"
|
||||||
14
backend/go/voice-detect/test.sh
Executable file
14
backend/go/voice-detect/test.sh
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
cd "$CURDIR"
|
||||||
|
|
||||||
|
echo "Running voice-detect backend tests..."
|
||||||
|
|
||||||
|
# The pure-Go parsing specs always run. The embed/verify/analyze smoke specs run
|
||||||
|
# only when a model + WAV are provided via VOICEDETECT_BACKEND_TEST_MODEL and
|
||||||
|
# VOICEDETECT_BACKEND_TEST_WAV; otherwise they auto-skip.
|
||||||
|
LD_LIBRARY_PATH="$CURDIR:${LD_LIBRARY_PATH:-}" go test -v -timeout 1200s .
|
||||||
|
|
||||||
|
echo "voice-detect tests completed."
|
||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=43d78af5be58f41d6ffbc227d608f104577741ea
|
WHISPER_CPP_VERSION?=0ae02cdb2c7317b50991367c165736ce42ed96ac
|
||||||
SO_TARGET?=libgowhisper.so
|
SO_TARGET?=libgowhisper.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
@@ -13,8 +13,14 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$(uname)" = "Darwin" ]; then
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
# macOS: single dylib variant (Metal or Accelerate)
|
# macOS: single fallback variant (Metal/Accelerate). The cmake build emits a
|
||||||
LIBRARY="$CURDIR/libgowhisper-fallback.dylib"
|
# Mach-O named .so, but tolerate .dylib too — pick whichever exists so the Go
|
||||||
|
# loader doesn't panic on a hardcoded name that isn't on disk.
|
||||||
|
if [ -e "$CURDIR/libgowhisper-fallback.dylib" ]; then
|
||||||
|
LIBRARY="$CURDIR/libgowhisper-fallback.dylib"
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libgowhisper-fallback.so"
|
||||||
|
fi
|
||||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||||
else
|
else
|
||||||
LIBRARY="$CURDIR/libgowhisper-fallback.so"
|
LIBRARY="$CURDIR/libgowhisper-fallback.so"
|
||||||
|
|||||||
@@ -209,6 +209,78 @@
|
|||||||
nvidia-cuda-12: "cuda12-ced"
|
nvidia-cuda-12: "cuda12-ced"
|
||||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-ced"
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-ced"
|
||||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-ced"
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-ced"
|
||||||
|
- &voicedetect
|
||||||
|
name: "voice-detect"
|
||||||
|
alias: "voice-detect"
|
||||||
|
license: mit
|
||||||
|
icon: https://avatars.githubusercontent.com/u/95302084
|
||||||
|
description: |
|
||||||
|
voice-detect speaker recognition and voice analysis.
|
||||||
|
voice-detect.cpp is a C++/ggml engine that produces L2-normalised
|
||||||
|
speaker embeddings (ECAPA-TDNN, WeSpeaker ResNet34, 3D-Speaker
|
||||||
|
ERes2Net, CAM++) for voice verification and 1:N identification, plus
|
||||||
|
a wav2vec2 age / gender / emotion analysis head. It replaces the
|
||||||
|
Python speaker-recognition backend and is exposed through the Voice*
|
||||||
|
gRPC rpcs and the /v1/voice/* REST endpoints. It runs on CPU, NVIDIA
|
||||||
|
CUDA, AMD ROCm/HIP, Intel SYCL, Vulkan and NVIDIA Jetson (L4T) targets.
|
||||||
|
urls:
|
||||||
|
- https://github.com/mudler/voice-detect.cpp
|
||||||
|
tags:
|
||||||
|
- voice-recognition
|
||||||
|
- speaker-verification
|
||||||
|
- speaker-embedding
|
||||||
|
- CPU
|
||||||
|
- GPU
|
||||||
|
- CUDA
|
||||||
|
- HIP
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-voice-detect"
|
||||||
|
nvidia: "cuda12-voice-detect"
|
||||||
|
intel: "intel-sycl-f16-voice-detect"
|
||||||
|
metal: "metal-voice-detect"
|
||||||
|
amd: "rocm-voice-detect"
|
||||||
|
vulkan: "vulkan-voice-detect"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-voice-detect"
|
||||||
|
nvidia-cuda-13: "cuda13-voice-detect"
|
||||||
|
nvidia-cuda-12: "cuda12-voice-detect"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-voice-detect"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-voice-detect"
|
||||||
|
- &facedetect
|
||||||
|
name: "face-detect"
|
||||||
|
alias: "face-detect"
|
||||||
|
license: mit
|
||||||
|
icon: https://avatars.githubusercontent.com/u/95302084
|
||||||
|
description: |
|
||||||
|
face-detect face detection, embedding, verification and analysis.
|
||||||
|
face-detect.cpp is a C++/ggml engine that runs SCRFD / YuNet face
|
||||||
|
detection and ArcFace / SFace 512-d (or 128-d) L2-normalised face
|
||||||
|
embeddings for verification and 1:N identification, plus a landmark /
|
||||||
|
age / gender analysis head. It replaces the Python insightface backend
|
||||||
|
and is exposed through the Embedding, Detect and Face* gRPC rpcs and
|
||||||
|
the /v1/face/* REST endpoints. It runs on CPU, NVIDIA CUDA, AMD
|
||||||
|
ROCm/HIP, Intel SYCL, Vulkan and NVIDIA Jetson (L4T) targets.
|
||||||
|
urls:
|
||||||
|
- https://github.com/mudler/face-detect.cpp
|
||||||
|
tags:
|
||||||
|
- face-recognition
|
||||||
|
- face-verification
|
||||||
|
- face-embedding
|
||||||
|
- CPU
|
||||||
|
- GPU
|
||||||
|
- CUDA
|
||||||
|
- HIP
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-face-detect"
|
||||||
|
nvidia: "cuda12-face-detect"
|
||||||
|
intel: "intel-sycl-f16-face-detect"
|
||||||
|
metal: "metal-face-detect"
|
||||||
|
amd: "rocm-face-detect"
|
||||||
|
vulkan: "vulkan-face-detect"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-face-detect"
|
||||||
|
nvidia-cuda-13: "cuda13-face-detect"
|
||||||
|
nvidia-cuda-12: "cuda12-face-detect"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-face-detect"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-face-detect"
|
||||||
- &voxtral
|
- &voxtral
|
||||||
name: "voxtral"
|
name: "voxtral"
|
||||||
alias: "voxtral"
|
alias: "voxtral"
|
||||||
@@ -1356,7 +1428,6 @@
|
|||||||
intel: "intel-fish-speech"
|
intel: "intel-fish-speech"
|
||||||
amd: "rocm-fish-speech"
|
amd: "rocm-fish-speech"
|
||||||
nvidia-l4t: "nvidia-l4t-fish-speech"
|
nvidia-l4t: "nvidia-l4t-fish-speech"
|
||||||
metal: "metal-fish-speech"
|
|
||||||
default: "cpu-fish-speech"
|
default: "cpu-fish-speech"
|
||||||
nvidia-cuda-13: "cuda13-fish-speech"
|
nvidia-cuda-13: "cuda13-fish-speech"
|
||||||
nvidia-cuda-12: "cuda12-fish-speech"
|
nvidia-cuda-12: "cuda12-fish-speech"
|
||||||
@@ -2828,6 +2899,236 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-ced"
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-ced"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-ced
|
- localai/localai-backends:master-gpu-nvidia-cuda-13-ced
|
||||||
|
## voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "voice-detect-development"
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-voice-detect-development"
|
||||||
|
nvidia: "cuda12-voice-detect-development"
|
||||||
|
intel: "intel-sycl-f16-voice-detect-development"
|
||||||
|
metal: "metal-voice-detect-development"
|
||||||
|
amd: "rocm-voice-detect-development"
|
||||||
|
vulkan: "vulkan-voice-detect-development"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
nvidia-cuda-13: "cuda13-voice-detect-development"
|
||||||
|
nvidia-cuda-12: "cuda12-voice-detect-development"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "nvidia-l4t-arm64-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cpu-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-cpu-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cpu-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-cpu-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "metal-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-metal-darwin-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "metal-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-metal-darwin-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda12-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-12-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda12-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-12-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "rocm-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-rocm-hipblas-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "rocm-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-rocm-hipblas-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f32-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f32-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f32-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f32-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f16-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f16-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f16-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f16-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "vulkan-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-vulkan-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "vulkan-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-vulkan-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-13-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-13-voice-detect
|
||||||
|
## face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "face-detect-development"
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-face-detect-development"
|
||||||
|
nvidia: "cuda12-face-detect-development"
|
||||||
|
intel: "intel-sycl-f16-face-detect-development"
|
||||||
|
metal: "metal-face-detect-development"
|
||||||
|
amd: "rocm-face-detect-development"
|
||||||
|
vulkan: "vulkan-face-detect-development"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-face-detect-development"
|
||||||
|
nvidia-cuda-13: "cuda13-face-detect-development"
|
||||||
|
nvidia-cuda-12: "cuda12-face-detect-development"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-face-detect-development"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-face-detect-development"
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "nvidia-l4t-arm64-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "nvidia-l4t-arm64-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cpu-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-cpu-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cpu-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-cpu-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "metal-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-metal-darwin-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "metal-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-metal-darwin-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda12-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-12-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda12-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-12-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "rocm-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-rocm-hipblas-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "rocm-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-rocm-hipblas-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f32-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f32-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f32-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f32-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f16-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f16-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f16-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f16-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "vulkan-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-vulkan-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "vulkan-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-vulkan-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-13-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-13-face-detect
|
||||||
## stablediffusion-ggml
|
## stablediffusion-ggml
|
||||||
- !!merge <<: *stablediffusionggml
|
- !!merge <<: *stablediffusionggml
|
||||||
name: "cpu-stablediffusion-ggml"
|
name: "cpu-stablediffusion-ggml"
|
||||||
@@ -4870,7 +5171,6 @@
|
|||||||
intel: "intel-fish-speech-development"
|
intel: "intel-fish-speech-development"
|
||||||
amd: "rocm-fish-speech-development"
|
amd: "rocm-fish-speech-development"
|
||||||
nvidia-l4t: "nvidia-l4t-fish-speech-development"
|
nvidia-l4t: "nvidia-l4t-fish-speech-development"
|
||||||
metal: "metal-fish-speech-development"
|
|
||||||
default: "cpu-fish-speech-development"
|
default: "cpu-fish-speech-development"
|
||||||
nvidia-cuda-13: "cuda13-fish-speech-development"
|
nvidia-cuda-13: "cuda13-fish-speech-development"
|
||||||
nvidia-cuda-12: "cuda12-fish-speech-development"
|
nvidia-cuda-12: "cuda12-fish-speech-development"
|
||||||
@@ -4946,16 +5246,6 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-fish-speech"
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-fish-speech"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-fish-speech
|
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-fish-speech
|
||||||
- !!merge <<: *fish-speech
|
|
||||||
name: "metal-fish-speech"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-fish-speech"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:latest-metal-darwin-arm64-fish-speech
|
|
||||||
- !!merge <<: *fish-speech
|
|
||||||
name: "metal-fish-speech-development"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-fish-speech"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:master-metal-darwin-arm64-fish-speech
|
|
||||||
## faster-qwen3-tts
|
## faster-qwen3-tts
|
||||||
- !!merge <<: *faster-qwen3-tts
|
- !!merge <<: *faster-qwen3-tts
|
||||||
name: "faster-qwen3-tts-development"
|
name: "faster-qwen3-tts-development"
|
||||||
|
|||||||
@@ -1,2 +0,0 @@
|
|||||||
torch
|
|
||||||
torchaudio
|
|
||||||
@@ -7,3 +7,7 @@ setuptools
|
|||||||
six
|
six
|
||||||
scipy
|
scipy
|
||||||
numpy
|
numpy
|
||||||
|
# fish-speech is installed editable with --no-build-isolation, so the build
|
||||||
|
# backends of its transitive deps must already be in the venv. One of them
|
||||||
|
# builds a Rust extension and needs setuptools-rust present at metadata time.
|
||||||
|
setuptools-rust
|
||||||
|
|||||||
@@ -3,4 +3,5 @@ protobuf
|
|||||||
certifi
|
certifi
|
||||||
packaging==24.1
|
packaging==24.1
|
||||||
pip
|
pip
|
||||||
chardet
|
chardet
|
||||||
|
click
|
||||||
|
|||||||
@@ -11,14 +11,31 @@ fi
|
|||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade "
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade "
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|
||||||
# Fetch convert_hf_to_gguf.py from llama.cpp
|
# Fetch convert_hf_to_gguf.py from llama.cpp.
|
||||||
|
# Upstream split the model-specific logic out of the single file into a
|
||||||
|
# sibling `conversion/` package (convert_hf_to_gguf.py now does
|
||||||
|
# `from conversion import ...`), so a single-file download no longer runs —
|
||||||
|
# it fails with `ModuleNotFoundError: No module named 'conversion'`. We clone
|
||||||
|
# the repo and copy both the script and the package; Python puts the script's
|
||||||
|
# own directory on sys.path[0], so the package resolves when placed beside it.
|
||||||
LLAMA_CPP_CONVERT_VERSION="${LLAMA_CPP_CONVERT_VERSION:-master}"
|
LLAMA_CPP_CONVERT_VERSION="${LLAMA_CPP_CONVERT_VERSION:-master}"
|
||||||
|
LLAMA_CPP_SRC="${EDIR}/llama.cpp"
|
||||||
CONVERT_SCRIPT="${EDIR}/convert_hf_to_gguf.py"
|
CONVERT_SCRIPT="${EDIR}/convert_hf_to_gguf.py"
|
||||||
if [ ! -f "${CONVERT_SCRIPT}" ]; then
|
|
||||||
echo "Downloading convert_hf_to_gguf.py from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
cloneLlamaCpp() {
|
||||||
curl -L --fail --retry 3 \
|
if [ ! -d "${LLAMA_CPP_SRC}/.git" ]; then
|
||||||
"https://raw.githubusercontent.com/ggml-org/llama.cpp/${LLAMA_CPP_CONVERT_VERSION}/convert_hf_to_gguf.py" \
|
git clone --depth 1 --branch "${LLAMA_CPP_CONVERT_VERSION}" \
|
||||||
-o "${CONVERT_SCRIPT}" || echo "Warning: Failed to download convert_hf_to_gguf.py."
|
https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}" 2>/dev/null || \
|
||||||
|
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ ! -f "${CONVERT_SCRIPT}" ] || [ ! -d "${EDIR}/conversion" ]; then
|
||||||
|
echo "Fetching convert_hf_to_gguf.py + conversion/ from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
||||||
|
cloneLlamaCpp
|
||||||
|
cp "${LLAMA_CPP_SRC}/convert_hf_to_gguf.py" "${CONVERT_SCRIPT}"
|
||||||
|
rm -rf "${EDIR}/conversion"
|
||||||
|
cp -r "${LLAMA_CPP_SRC}/conversion" "${EDIR}/conversion"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Install gguf package from the same llama.cpp commit to keep them in sync
|
# Install gguf package from the same llama.cpp commit to keep them in sync
|
||||||
@@ -41,12 +58,7 @@ QUANTIZE_BIN="${EDIR}/llama-quantize"
|
|||||||
if [ ! -x "${QUANTIZE_BIN}" ] && ! command -v llama-quantize &>/dev/null; then
|
if [ ! -x "${QUANTIZE_BIN}" ] && ! command -v llama-quantize &>/dev/null; then
|
||||||
if command -v cmake &>/dev/null; then
|
if command -v cmake &>/dev/null; then
|
||||||
echo "Building llama-quantize from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
echo "Building llama-quantize from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
||||||
LLAMA_CPP_SRC="${EDIR}/llama.cpp"
|
cloneLlamaCpp # reuses the clone fetched for convert_hf_to_gguf.py
|
||||||
if [ ! -d "${LLAMA_CPP_SRC}" ]; then
|
|
||||||
git clone --depth 1 --branch "${LLAMA_CPP_CONVERT_VERSION}" \
|
|
||||||
https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}" 2>/dev/null || \
|
|
||||||
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}"
|
|
||||||
fi
|
|
||||||
cmake -B "${LLAMA_CPP_SRC}/build" -S "${LLAMA_CPP_SRC}" -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF
|
cmake -B "${LLAMA_CPP_SRC}/build" -S "${LLAMA_CPP_SRC}" -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF
|
||||||
cmake --build "${LLAMA_CPP_SRC}/build" --target llama-quantize -j"$(nproc 2>/dev/null || echo 2)"
|
cmake --build "${LLAMA_CPP_SRC}/build" --target llama-quantize -j"$(nproc 2>/dev/null || echo 2)"
|
||||||
cp "${LLAMA_CPP_SRC}/build/bin/llama-quantize" "${QUANTIZE_BIN}"
|
cp "${LLAMA_CPP_SRC}/build/bin/llama-quantize" "${QUANTIZE_BIN}"
|
||||||
|
|||||||
@@ -85,9 +85,15 @@ if [ "x${BUILD_TYPE}" == "x" ] || [ "x${FROM_SOURCE:-}" == "xtrue" ]; then
|
|||||||
# The resulting binary still requires an AVX-512 capable CPU at runtime,
|
# The resulting binary still requires an AVX-512 capable CPU at runtime,
|
||||||
# same constraint sglang upstream documents in docker/xeon.Dockerfile.
|
# same constraint sglang upstream documents in docker/xeon.Dockerfile.
|
||||||
|
|
||||||
|
# Pin the source build to the same release the GPU path floors on
|
||||||
|
# (0.5.11, see requirements-cublas12-after.txt). An unpinned master clone
|
||||||
|
# pulls in newer CPU kernels (e.g. mamba/fla.cpp) that fail to compile
|
||||||
|
# (constexpr non-constant + kineto_LIBRARY-NOTFOUND). Bump deliberately.
|
||||||
|
SGLANG_VERSION="${SGLANG_VERSION:-v0.5.11}"
|
||||||
_sgl_src=$(mktemp -d)
|
_sgl_src=$(mktemp -d)
|
||||||
trap 'rm -rf "${_sgl_src}"' EXIT
|
trap 'rm -rf "${_sgl_src}"' EXIT
|
||||||
git clone --depth 1 https://github.com/sgl-project/sglang "${_sgl_src}/sglang"
|
git clone --depth 1 --branch "${SGLANG_VERSION}" \
|
||||||
|
https://github.com/sgl-project/sglang "${_sgl_src}/sglang"
|
||||||
|
|
||||||
# Patch -march=native → -march=sapphirerapids in the CPU kernel CMakeLists
|
# Patch -march=native → -march=sapphirerapids in the CPU kernel CMakeLists
|
||||||
sed -i 's/-march=native/-march=sapphirerapids/g' \
|
sed -i 's/-march=native/-march=sapphirerapids/g' \
|
||||||
|
|||||||
@@ -570,6 +570,43 @@ impl Backend for KokorosService {
|
|||||||
) -> Result<Response<backend::Result>, Status> {
|
) -> Result<Response<backend::Result>, Status> {
|
||||||
Err(Status::unimplemented("Not supported"))
|
Err(Status::unimplemented("Not supported"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn sound_detection(
|
||||||
|
&self,
|
||||||
|
_: Request<backend::SoundDetectionRequest>,
|
||||||
|
) -> Result<Response<backend::SoundDetectionResponse>, Status> {
|
||||||
|
Err(Status::unimplemented("Not supported"))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn depth(
|
||||||
|
&self,
|
||||||
|
_: Request<backend::DepthRequest>,
|
||||||
|
) -> Result<Response<backend::DepthResponse>, Status> {
|
||||||
|
Err(Status::unimplemented("Not supported"))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn token_classify(
|
||||||
|
&self,
|
||||||
|
_: Request<backend::TokenClassifyRequest>,
|
||||||
|
) -> Result<Response<backend::TokenClassifyResponse>, Status> {
|
||||||
|
Err(Status::unimplemented("Not supported"))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn score(
|
||||||
|
&self,
|
||||||
|
_: Request<backend::ScoreRequest>,
|
||||||
|
) -> Result<Response<backend::ScoreResponse>, Status> {
|
||||||
|
Err(Status::unimplemented("Not supported"))
|
||||||
|
}
|
||||||
|
|
||||||
|
type ForwardStream = ReceiverStream<Result<backend::ForwardReply, Status>>;
|
||||||
|
|
||||||
|
async fn forward(
|
||||||
|
&self,
|
||||||
|
_: Request<tonic::Streaming<backend::ForwardRequest>>,
|
||||||
|
) -> Result<Response<Self::ForwardStream>, Status> {
|
||||||
|
Err(Status::unimplemented("Not supported"))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
8
cmd/launcher/FyneApp.toml
Normal file
8
cmd/launcher/FyneApp.toml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
Website = "https://localai.io"
|
||||||
|
|
||||||
|
[Details]
|
||||||
|
Icon = "../../core/http/static/logo.png"
|
||||||
|
Name = "LocalAI"
|
||||||
|
ID = "com.localai.launcher"
|
||||||
|
Version = "0.0.0"
|
||||||
|
Build = 1
|
||||||
@@ -429,7 +429,7 @@ func (l *Launcher) CheckForUpdates() (bool, string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// DownloadUpdate downloads the latest version
|
// DownloadUpdate downloads the latest version
|
||||||
func (l *Launcher) DownloadUpdate(version string, progressCallback func(float64)) error {
|
func (l *Launcher) DownloadUpdate(version string, progressCallback func(downloaded, total int64)) error {
|
||||||
return l.releaseManager.DownloadRelease(version, progressCallback)
|
return l.releaseManager.DownloadRelease(version, progressCallback)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -486,7 +486,6 @@ func (l *Launcher) showDownloadLocalAIDialog() {
|
|||||||
fyne.DoAndWait(func() {
|
fyne.DoAndWait(func() {
|
||||||
// Create a standalone window for the download dialog
|
// Create a standalone window for the download dialog
|
||||||
dialogWindow := l.app.NewWindow("LocalAI Installation Required")
|
dialogWindow := l.app.NewWindow("LocalAI Installation Required")
|
||||||
dialogWindow.Resize(fyne.NewSize(500, 350))
|
|
||||||
dialogWindow.CenterOnScreen()
|
dialogWindow.CenterOnScreen()
|
||||||
dialogWindow.SetCloseIntercept(func() {
|
dialogWindow.SetCloseIntercept(func() {
|
||||||
dialogWindow.Close()
|
dialogWindow.Close()
|
||||||
@@ -548,6 +547,7 @@ func (l *Launcher) showDownloadLocalAIDialog() {
|
|||||||
)
|
)
|
||||||
|
|
||||||
dialogWindow.SetContent(content)
|
dialogWindow.SetContent(content)
|
||||||
|
resizeToContent(dialogWindow, content)
|
||||||
dialogWindow.Show()
|
dialogWindow.Show()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -621,88 +621,134 @@ func (l *Launcher) showDownloadError(title, message string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// showDownloadProgress shows a standalone progress window for downloading LocalAI
|
// showDownloadProgress shows a standalone progress window for downloading LocalAI
|
||||||
|
// after a fresh install (no LocalAI binary present yet).
|
||||||
func (l *Launcher) showDownloadProgress(version, title string) {
|
func (l *Launcher) showDownloadProgress(version, title string) {
|
||||||
|
l.showDownloadProgressWindow(version, title, func(win fyne.Window) {
|
||||||
|
dialog.ShowConfirm("Installation Complete",
|
||||||
|
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
|
||||||
|
func(bool) {
|
||||||
|
win.Close()
|
||||||
|
l.updateStatus("LocalAI installed successfully")
|
||||||
|
if l.systray != nil {
|
||||||
|
l.systray.recreateMenu()
|
||||||
|
}
|
||||||
|
}, win)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// showDownloadProgressWindow renders the download progress popup shared by every
|
||||||
|
// "download/upgrade LocalAI" entry point. It owns the progress bar, the
|
||||||
|
// human-readable byte readout, resume-aware retry, and content-fit window
|
||||||
|
// sizing so the behaviour stays identical everywhere. onSuccess runs (on the UI
|
||||||
|
// goroutine) once the download verifies, and is responsible for the success
|
||||||
|
// dialog and any follow-up; the window is passed in so it can be parented/closed.
|
||||||
|
func (l *Launcher) showDownloadProgressWindow(version, title string, onSuccess func(win fyne.Window)) {
|
||||||
fyne.DoAndWait(func() {
|
fyne.DoAndWait(func() {
|
||||||
// Create progress window
|
|
||||||
progressWindow := l.app.NewWindow("Downloading LocalAI")
|
progressWindow := l.app.NewWindow("Downloading LocalAI")
|
||||||
progressWindow.Resize(fyne.NewSize(400, 250))
|
|
||||||
progressWindow.CenterOnScreen()
|
progressWindow.CenterOnScreen()
|
||||||
progressWindow.SetCloseIntercept(func() {
|
progressWindow.SetCloseIntercept(func() {
|
||||||
progressWindow.Close()
|
progressWindow.Close()
|
||||||
})
|
})
|
||||||
|
|
||||||
// Progress bar
|
|
||||||
progressBar := widget.NewProgressBar()
|
progressBar := widget.NewProgressBar()
|
||||||
progressBar.SetValue(0)
|
progressBar.SetValue(0)
|
||||||
|
|
||||||
// Status label. Truncate with an ellipsis so a long "Download failed:
|
// Status label. Truncate with an ellipsis so a long "Download failed:
|
||||||
// <url>" message can't stretch the window (and progress bar) to fit the
|
// <url>" message can't stretch the window (and progress bar) to fit the
|
||||||
// whole error on one line; the full error is shown in the dialog below.
|
// whole error on one line.
|
||||||
statusLabel := widget.NewLabel("Preparing download...")
|
statusLabel := widget.NewLabel("Preparing download...")
|
||||||
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
||||||
|
|
||||||
// Release notes button
|
|
||||||
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
||||||
releaseNotesURL, err := l.githubReleaseNotesURL(version)
|
releaseNotesURL, err := l.githubReleaseNotesURL(version)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("Failed to parse URL: %v", err)
|
log.Printf("Failed to parse URL: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
l.app.OpenURL(releaseNotesURL)
|
l.app.OpenURL(releaseNotesURL)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Progress container
|
// Retry button: hidden until a download fails. GitHub downloads are
|
||||||
progressContainer := container.NewVBox(
|
// flaky, and the underlying download resumes from the partial file, so
|
||||||
|
// a retry continues where it left off rather than starting over.
|
||||||
|
retryButton := widget.NewButton("Retry", nil)
|
||||||
|
retryButton.Importance = widget.HighImportance
|
||||||
|
retryButton.Hide()
|
||||||
|
|
||||||
|
buttonRow := container.NewHBox(releaseNotesButton, retryButton)
|
||||||
|
content := container.NewVBox(
|
||||||
widget.NewLabel(title),
|
widget.NewLabel(title),
|
||||||
progressBar,
|
progressBar,
|
||||||
statusLabel,
|
statusLabel,
|
||||||
widget.NewSeparator(),
|
widget.NewSeparator(),
|
||||||
releaseNotesButton,
|
buttonRow,
|
||||||
)
|
)
|
||||||
|
progressWindow.SetContent(content)
|
||||||
|
resizeToContent(progressWindow, content)
|
||||||
|
|
||||||
progressWindow.SetContent(progressContainer)
|
var startDownload func()
|
||||||
progressWindow.Show()
|
startDownload = func() {
|
||||||
|
retryButton.Hide()
|
||||||
|
progressBar.SetValue(0)
|
||||||
|
statusLabel.SetText("Preparing download...")
|
||||||
|
resizeToContent(progressWindow, content)
|
||||||
|
|
||||||
// Start download in background
|
go func() {
|
||||||
go func() {
|
err := l.DownloadUpdate(version, func(downloaded, total int64) {
|
||||||
err := l.DownloadUpdate(version, func(progress float64) {
|
fyne.Do(func() {
|
||||||
// Update progress bar
|
if total > 0 {
|
||||||
fyne.Do(func() {
|
progressBar.SetValue(float64(downloaded) / float64(total))
|
||||||
progressBar.SetValue(progress)
|
statusLabel.SetText(fmt.Sprintf("Downloading… %s / %s", formatBytes(downloaded), formatBytes(total)))
|
||||||
percentage := int(progress * 100)
|
} else {
|
||||||
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
|
statusLabel.SetText(fmt.Sprintf("Downloading… %s", formatBytes(downloaded)))
|
||||||
|
}
|
||||||
|
})
|
||||||
})
|
})
|
||||||
})
|
|
||||||
|
|
||||||
// Handle completion
|
fyne.Do(func() {
|
||||||
fyne.Do(func() {
|
if err != nil {
|
||||||
if err != nil {
|
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
||||||
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
retryButton.Show()
|
||||||
// Show error dialog
|
resizeToContent(progressWindow, content)
|
||||||
dialog.ShowError(err, progressWindow)
|
return
|
||||||
} else {
|
}
|
||||||
statusLabel.SetText("Download completed successfully!")
|
|
||||||
progressBar.SetValue(1.0)
|
progressBar.SetValue(1.0)
|
||||||
|
statusLabel.SetText("Download complete")
|
||||||
|
onSuccess(progressWindow)
|
||||||
|
})
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
retryButton.OnTapped = startDownload
|
||||||
|
|
||||||
// Show success dialog
|
progressWindow.Show()
|
||||||
dialog.ShowConfirm("Installation Complete",
|
startDownload()
|
||||||
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
|
|
||||||
func(close bool) {
|
|
||||||
progressWindow.Close()
|
|
||||||
// Update status and refresh systray menu
|
|
||||||
l.updateStatus("LocalAI installed successfully")
|
|
||||||
|
|
||||||
if l.systray != nil {
|
|
||||||
l.systray.recreateMenu()
|
|
||||||
}
|
|
||||||
}, progressWindow)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}()
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// resizeToContent sizes a window to fit its content (with a sane minimum width)
|
||||||
|
// so the dialog doesn't show a large blank gap below the last widget.
|
||||||
|
func resizeToContent(w fyne.Window, content fyne.CanvasObject) {
|
||||||
|
size := content.MinSize()
|
||||||
|
if size.Width < 400 {
|
||||||
|
size.Width = 400
|
||||||
|
}
|
||||||
|
w.Resize(size)
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatBytes renders a byte count as a human-readable size (e.g. "12.3 MB").
|
||||||
|
func formatBytes(b int64) string {
|
||||||
|
const unit = 1024
|
||||||
|
if b < unit {
|
||||||
|
return fmt.Sprintf("%d B", b)
|
||||||
|
}
|
||||||
|
div, exp := int64(unit), 0
|
||||||
|
for n := b / unit; n >= unit; n /= unit {
|
||||||
|
div *= unit
|
||||||
|
exp++
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "KMGTPE"[exp])
|
||||||
|
}
|
||||||
|
|
||||||
// monitorLogs monitors the output of LocalAI and adds it to the log buffer
|
// monitorLogs monitors the output of LocalAI and adds it to the log buffer
|
||||||
func (l *Launcher) monitorLogs(reader io.Reader, prefix string) {
|
func (l *Launcher) monitorLogs(reader io.Reader, prefix string) {
|
||||||
scanner := bufio.NewScanner(reader)
|
scanner := bufio.NewScanner(reader)
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -50,6 +51,12 @@ type ReleaseManager struct {
|
|||||||
ChecksumsPath string
|
ChecksumsPath string
|
||||||
// MetadataPath is where version metadata is stored
|
// MetadataPath is where version metadata is stored
|
||||||
MetadataPath string
|
MetadataPath string
|
||||||
|
// BaseDownloadURL is the base URL release assets are downloaded from
|
||||||
|
// (defaults to https://github.com; overridable for testing)
|
||||||
|
BaseDownloadURL string
|
||||||
|
// RetryBackoff is the base wait between download attempts; the Nth retry
|
||||||
|
// waits N*RetryBackoff (defaults to 1s; lowered in tests)
|
||||||
|
RetryBackoff time.Duration
|
||||||
// HTTPClient is the HTTP client used for downloads
|
// HTTPClient is the HTTP client used for downloads
|
||||||
HTTPClient *http.Client
|
HTTPClient *http.Client
|
||||||
}
|
}
|
||||||
@@ -62,28 +69,94 @@ func NewReleaseManager() *ReleaseManager {
|
|||||||
metadataPath := filepath.Join(homeDir, ".localai", "metadata")
|
metadataPath := filepath.Join(homeDir, ".localai", "metadata")
|
||||||
|
|
||||||
return &ReleaseManager{
|
return &ReleaseManager{
|
||||||
GitHubOwner: "mudler",
|
GitHubOwner: "mudler",
|
||||||
GitHubRepo: "LocalAI",
|
GitHubRepo: "LocalAI",
|
||||||
BinaryPath: binaryPath,
|
BinaryPath: binaryPath,
|
||||||
CurrentVersion: internal.PrintableVersion(),
|
CurrentVersion: internal.PrintableVersion(),
|
||||||
ChecksumsPath: checksumsPath,
|
ChecksumsPath: checksumsPath,
|
||||||
MetadataPath: metadataPath,
|
MetadataPath: metadataPath,
|
||||||
HTTPClient: httpclient.NewWithTimeout(30*time.Second, httpclient.WithFollowRedirects()),
|
BaseDownloadURL: "https://github.com",
|
||||||
|
RetryBackoff: 1 * time.Second,
|
||||||
|
HTTPClient: httpclient.NewWithTimeout(30*time.Second, httpclient.WithFollowRedirects()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLatestRelease fetches the latest release information from GitHub
|
// GetLatestRelease resolves the latest LocalAI release.
|
||||||
|
//
|
||||||
|
// It first follows the github.com "releases/latest" redirect, which reveals the
|
||||||
|
// latest tag in the final URL and—crucially—is NOT subject to the
|
||||||
|
// 60-requests/hour unauthenticated rate limit of api.github.com. That limit is
|
||||||
|
// per-IP, so on shared/NAT/CGNAT/cloud addresses the API returns 403 almost
|
||||||
|
// immediately (e.g. on a fresh install with no LocalAI present yet). The
|
||||||
|
// redirect avoids that entirely. The richer JSON API is kept only as a fallback.
|
||||||
|
//
|
||||||
|
// Only the version is consumed by callers, so the redirect's tag is sufficient.
|
||||||
func (rm *ReleaseManager) GetLatestRelease() (*Release, error) {
|
func (rm *ReleaseManager) GetLatestRelease() (*Release, error) {
|
||||||
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/releases/latest", rm.GitHubOwner, rm.GitHubRepo)
|
version, redirectErr := rm.latestVersionFromRedirect()
|
||||||
|
if redirectErr == nil {
|
||||||
|
return &Release{Version: version}, nil
|
||||||
|
}
|
||||||
|
log.Printf("Could not resolve latest version via release redirect (%v); falling back to GitHub API", redirectErr)
|
||||||
|
|
||||||
|
release, apiErr := rm.latestReleaseFromAPI()
|
||||||
|
if apiErr != nil {
|
||||||
|
// Surface both failures so a rate-limited API doesn't mask the (usually
|
||||||
|
// more relevant) redirect error.
|
||||||
|
return nil, fmt.Errorf("failed to fetch latest release: %v (redirect: %v)", apiErr, redirectErr)
|
||||||
|
}
|
||||||
|
return release, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// latestVersionFromRedirect returns the latest tag by following the github.com
|
||||||
|
// "releases/latest" redirect to ".../releases/tag/<tag>".
|
||||||
|
func (rm *ReleaseManager) latestVersionFromRedirect() (string, error) {
|
||||||
|
url := fmt.Sprintf("%s/%s/%s/releases/latest", rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo)
|
||||||
|
|
||||||
resp, err := rm.HTTPClient.Get(url)
|
resp, err := rm.HTTPClient.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return "", fmt.Errorf("unexpected status %s", resp.Status)
|
||||||
|
}
|
||||||
|
|
||||||
|
// After the redirect is followed, the final request URL is the tag page.
|
||||||
|
version := path.Base(resp.Request.URL.Path)
|
||||||
|
if version == "" || version == "." || version == "latest" {
|
||||||
|
return "", fmt.Errorf("could not determine version from %s", resp.Request.URL.String())
|
||||||
|
}
|
||||||
|
return version, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// latestReleaseFromAPI fetches the latest release JSON from api.github.com. This
|
||||||
|
// is the fallback path; it is rate-limited unless GITHUB_TOKEN is set.
|
||||||
|
func (rm *ReleaseManager) latestReleaseFromAPI() (*Release, error) {
|
||||||
|
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/releases/latest", rm.GitHubOwner, rm.GitHubRepo)
|
||||||
|
|
||||||
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("Accept", "application/vnd.github+json")
|
||||||
|
// An optional token lifts the unauthenticated 60/hour limit to 5000/hour.
|
||||||
|
if token := os.Getenv("GITHUB_TOKEN"); token != "" {
|
||||||
|
req.Header.Set("Authorization", "Bearer "+token)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := rm.HTTPClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to fetch latest release: %w", err)
|
return nil, fmt.Errorf("failed to fetch latest release: %w", err)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
return nil, fmt.Errorf("failed to fetch latest release: status %d", resp.StatusCode)
|
if (resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusTooManyRequests) &&
|
||||||
|
resp.Header.Get("X-RateLimit-Remaining") == "0" {
|
||||||
|
return nil, fmt.Errorf("GitHub API rate limit exceeded (status %d); retry later or set GITHUB_TOKEN to raise the limit", resp.StatusCode)
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse the JSON response properly
|
// Parse the JSON response properly
|
||||||
@@ -106,7 +179,7 @@ func (rm *ReleaseManager) GetLatestRelease() (*Release, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// DownloadRelease downloads a specific version of LocalAI
|
// DownloadRelease downloads a specific version of LocalAI
|
||||||
func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(float64)) error {
|
func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(downloaded, total int64)) error {
|
||||||
// Ensure the binary directory exists
|
// Ensure the binary directory exists
|
||||||
if err := os.MkdirAll(rm.BinaryPath, 0755); err != nil {
|
if err := os.MkdirAll(rm.BinaryPath, 0755); err != nil {
|
||||||
return fmt.Errorf("failed to create binary directory: %w", err)
|
return fmt.Errorf("failed to create binary directory: %w", err)
|
||||||
@@ -117,16 +190,16 @@ func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(
|
|||||||
localPath := filepath.Join(rm.BinaryPath, "local-ai")
|
localPath := filepath.Join(rm.BinaryPath, "local-ai")
|
||||||
|
|
||||||
// Download the binary
|
// Download the binary
|
||||||
downloadURL := fmt.Sprintf("https://github.com/%s/%s/releases/download/%s/%s",
|
downloadURL := fmt.Sprintf("%s/%s/%s/releases/download/%s/%s",
|
||||||
rm.GitHubOwner, rm.GitHubRepo, version, binaryName)
|
rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo, version, binaryName)
|
||||||
|
|
||||||
if err := rm.downloadFile(downloadURL, localPath, progressCallback); err != nil {
|
if err := rm.downloadFile(downloadURL, localPath, progressCallback); err != nil {
|
||||||
return fmt.Errorf("failed to download binary: %w", err)
|
return fmt.Errorf("failed to download binary: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Download and verify checksums
|
// Download and verify checksums
|
||||||
checksumURL := fmt.Sprintf("https://github.com/%s/%s/releases/download/%s/LocalAI-%s-checksums.txt",
|
checksumURL := fmt.Sprintf("%s/%s/%s/releases/download/%s/LocalAI-%s-checksums.txt",
|
||||||
rm.GitHubOwner, rm.GitHubRepo, version, version)
|
rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo, version, version)
|
||||||
|
|
||||||
checksumPath := filepath.Join(rm.BinaryPath, "checksums.txt")
|
checksumPath := filepath.Join(rm.BinaryPath, "checksums.txt")
|
||||||
manualChecksumPath := filepath.Join(rm.ChecksumsPath, fmt.Sprintf("checksums-%s.txt", version))
|
manualChecksumPath := filepath.Join(rm.ChecksumsPath, fmt.Sprintf("checksums-%s.txt", version))
|
||||||
@@ -154,6 +227,10 @@ func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(
|
|||||||
// Verify the checksum if we have a checksum file
|
// Verify the checksum if we have a checksum file
|
||||||
if _, err := os.Stat(checksumPath); err == nil {
|
if _, err := os.Stat(checksumPath); err == nil {
|
||||||
if err := rm.VerifyChecksum(localPath, checksumPath, binaryName); err != nil {
|
if err := rm.VerifyChecksum(localPath, checksumPath, binaryName); err != nil {
|
||||||
|
// Discard the corrupt binary (and any leftover partial) so the next
|
||||||
|
// retry starts from a clean slate rather than resuming corruption.
|
||||||
|
os.Remove(localPath)
|
||||||
|
os.Remove(localPath + ".part")
|
||||||
return fmt.Errorf("checksum verification failed: %w", err)
|
return fmt.Errorf("checksum verification failed: %w", err)
|
||||||
}
|
}
|
||||||
log.Printf("Checksum verification successful")
|
log.Printf("Checksum verification successful")
|
||||||
@@ -196,44 +273,88 @@ func (rm *ReleaseManager) GetBinaryName(version string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// downloadFile downloads a file from a URL to a local path with optional progress callback
|
// downloadFile downloads a file from a URL to a local path with optional progress callback
|
||||||
func (rm *ReleaseManager) downloadFile(url, filepath string, progressCallback func(float64)) error {
|
func (rm *ReleaseManager) downloadFile(url, filepath string, progressCallback func(downloaded, total int64)) error {
|
||||||
return rm.downloadFileWithRetry(url, filepath, progressCallback, 3)
|
return rm.downloadFileWithRetry(url, filepath, progressCallback, 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
// downloadFileWithRetry downloads a file from a URL with retry logic
|
// downloadFileWithRetry downloads a file with retry and HTTP Range resume.
|
||||||
func (rm *ReleaseManager) downloadFileWithRetry(url, filepath string, progressCallback func(float64), maxRetries int) error {
|
//
|
||||||
|
// The body is streamed to "<dest>.part" and only renamed to dest on success, so
|
||||||
|
// a dropped connection leaves a partial file that the next attempt continues via
|
||||||
|
// a "Range: bytes=N-" request instead of restarting from zero. This matters for
|
||||||
|
// GitHub release downloads, which are large and flaky.
|
||||||
|
func (rm *ReleaseManager) downloadFileWithRetry(url, dest string, progressCallback func(downloaded, total int64), maxRetries int) error {
|
||||||
|
partPath := dest + ".part"
|
||||||
var lastErr error
|
var lastErr error
|
||||||
|
|
||||||
for attempt := 1; attempt <= maxRetries; attempt++ {
|
for attempt := 1; attempt <= maxRetries; attempt++ {
|
||||||
if attempt > 1 {
|
if attempt > 1 {
|
||||||
log.Printf("Retrying download (attempt %d/%d): %s", attempt, maxRetries, url)
|
log.Printf("Retrying download (attempt %d/%d): %s", attempt, maxRetries, url)
|
||||||
time.Sleep(time.Duration(attempt) * time.Second)
|
time.Sleep(time.Duration(attempt) * rm.RetryBackoff)
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := rm.HTTPClient.Get(url)
|
// Resume from however much we already have on disk.
|
||||||
|
var offset int64
|
||||||
|
if fi, err := os.Stat(partPath); err == nil {
|
||||||
|
offset = fi.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if offset > 0 {
|
||||||
|
req.Header.Set("Range", fmt.Sprintf("bytes=%d-", offset))
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := rm.HTTPClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
lastErr = err
|
lastErr = err
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
switch resp.StatusCode {
|
||||||
|
case http.StatusOK:
|
||||||
|
// Server ignored the Range (or we had nothing): start fresh.
|
||||||
|
offset = 0
|
||||||
|
case http.StatusPartialContent:
|
||||||
|
// Resume: append to the existing partial file.
|
||||||
|
case http.StatusRequestedRangeNotSatisfiable:
|
||||||
|
// Stale or already-complete partial: discard and restart fresh.
|
||||||
|
resp.Body.Close()
|
||||||
|
os.Remove(partPath)
|
||||||
|
lastErr = fmt.Errorf("partial download no longer valid (status %s), restarting", resp.Status)
|
||||||
|
continue
|
||||||
|
default:
|
||||||
resp.Body.Close()
|
resp.Body.Close()
|
||||||
lastErr = fmt.Errorf("bad status: %s", resp.Status)
|
lastErr = fmt.Errorf("bad status: %s", resp.Status)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
out, err := os.Create(filepath)
|
var out *os.File
|
||||||
|
if offset > 0 {
|
||||||
|
out, err = os.OpenFile(partPath, os.O_WRONLY|os.O_APPEND, 0644)
|
||||||
|
} else {
|
||||||
|
out, err = os.Create(partPath)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
resp.Body.Close()
|
resp.Body.Close()
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a progress reader if callback is provided
|
// On a 206 the Content-Length is the remaining bytes, so the full size
|
||||||
|
// is what we already have plus what's still to come.
|
||||||
|
total := resp.ContentLength
|
||||||
|
if offset > 0 && total > 0 {
|
||||||
|
total += offset
|
||||||
|
}
|
||||||
|
|
||||||
var reader io.Reader = resp.Body
|
var reader io.Reader = resp.Body
|
||||||
if progressCallback != nil && resp.ContentLength > 0 {
|
if progressCallback != nil && total > 0 {
|
||||||
reader = &progressReader{
|
reader = &progressReader{
|
||||||
Reader: resp.Body,
|
Reader: resp.Body,
|
||||||
Total: resp.ContentLength,
|
Total: total,
|
||||||
|
Current: offset,
|
||||||
Callback: progressCallback,
|
Callback: progressCallback,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -243,11 +364,14 @@ func (rm *ReleaseManager) downloadFileWithRetry(url, filepath string, progressCa
|
|||||||
out.Close()
|
out.Close()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// Keep the partial file so the next attempt can resume from it.
|
||||||
lastErr = err
|
lastErr = err
|
||||||
os.Remove(filepath)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := os.Rename(partPath, dest); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -322,20 +446,21 @@ func (rm *ReleaseManager) saveVersionMetadata(version string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// progressReader wraps an io.Reader to provide download progress
|
// progressReader wraps an io.Reader to provide download progress as a
|
||||||
|
// (downloaded, total) byte count so callers can render both a progress bar and
|
||||||
|
// a human-readable size.
|
||||||
type progressReader struct {
|
type progressReader struct {
|
||||||
io.Reader
|
io.Reader
|
||||||
Total int64
|
Total int64
|
||||||
Current int64
|
Current int64
|
||||||
Callback func(float64)
|
Callback func(downloaded, total int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pr *progressReader) Read(p []byte) (int, error) {
|
func (pr *progressReader) Read(p []byte) (int, error) {
|
||||||
n, err := pr.Reader.Read(p)
|
n, err := pr.Reader.Read(p)
|
||||||
pr.Current += int64(n)
|
pr.Current += int64(n)
|
||||||
if pr.Callback != nil {
|
if pr.Callback != nil {
|
||||||
progress := float64(pr.Current) / float64(pr.Total)
|
pr.Callback(pr.Current, pr.Total)
|
||||||
pr.Callback(progress)
|
|
||||||
}
|
}
|
||||||
return n, err
|
return n, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,17 @@
|
|||||||
package launcher_test
|
package launcher_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
. "github.com/onsi/ginkgo/v2"
|
||||||
@@ -178,4 +186,221 @@ var _ = Describe("ReleaseManager", func() {
|
|||||||
Expect(err.Error()).To(ContainSubstring("checksum not found"))
|
Expect(err.Error()).To(ContainSubstring("checksum not found"))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Describe("DownloadRelease resume and retry", func() {
|
||||||
|
var (
|
||||||
|
version string
|
||||||
|
binaryName string
|
||||||
|
content []byte
|
||||||
|
checksums string
|
||||||
|
finalPath string
|
||||||
|
partPath string
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
version = "v9.9.9"
|
||||||
|
binaryName = rm.GetBinaryName(version)
|
||||||
|
|
||||||
|
// Deterministic, non-trivial content so resume/append bugs surface.
|
||||||
|
content = make([]byte, 4096)
|
||||||
|
for i := range content {
|
||||||
|
content[i] = byte(i % 251)
|
||||||
|
}
|
||||||
|
sum := sha256.Sum256(content)
|
||||||
|
checksums = fmt.Sprintf("%s %s\n", hex.EncodeToString(sum[:]), binaryName)
|
||||||
|
|
||||||
|
finalPath = filepath.Join(tempDir, "local-ai")
|
||||||
|
partPath = finalPath + ".part"
|
||||||
|
|
||||||
|
// Isolate the persistent checksum/metadata dirs to the temp dir so
|
||||||
|
// the test never touches the real ~/.localai and existing checksum
|
||||||
|
// files don't short-circuit the download.
|
||||||
|
rm.ChecksumsPath = filepath.Join(tempDir, "checksums")
|
||||||
|
rm.MetadataPath = filepath.Join(tempDir, "metadata")
|
||||||
|
rm.GitHubOwner = "owner"
|
||||||
|
rm.GitHubRepo = "repo"
|
||||||
|
rm.RetryBackoff = time.Millisecond
|
||||||
|
|
||||||
|
Expect(os.MkdirAll(tempDir, 0755)).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("resumes from a partial .part file using a Range request", func() {
|
||||||
|
Expect(os.WriteFile(partPath, content[:1024], 0644)).To(Succeed())
|
||||||
|
|
||||||
|
var mu sync.Mutex
|
||||||
|
sawRange := false
|
||||||
|
binBytesServed := 0
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if rangeHdr := r.Header.Get("Range"); rangeHdr != "" {
|
||||||
|
var start int
|
||||||
|
_, _ = fmt.Sscanf(rangeHdr, "bytes=%d-", &start)
|
||||||
|
mu.Lock()
|
||||||
|
sawRange = true
|
||||||
|
mu.Unlock()
|
||||||
|
w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, len(content)-1, len(content)))
|
||||||
|
w.WriteHeader(http.StatusPartialContent)
|
||||||
|
n, _ := w.Write(content[start:])
|
||||||
|
mu.Lock()
|
||||||
|
binBytesServed += n
|
||||||
|
mu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
n, _ := w.Write(content)
|
||||||
|
mu.Lock()
|
||||||
|
binBytesServed += n
|
||||||
|
mu.Unlock()
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
got, err := os.ReadFile(finalPath)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got).To(Equal(content))
|
||||||
|
Expect(sawRange).To(BeTrue(), "expected the download to resume with a Range request")
|
||||||
|
Expect(binBytesServed).To(Equal(len(content)-1024), "expected only the remaining bytes to be served")
|
||||||
|
Expect(partPath).ToNot(BeAnExistingFile())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("starts fresh when the server ignores the Range header (200)", func() {
|
||||||
|
// A stale/garbage partial that must NOT be appended to.
|
||||||
|
Expect(os.WriteFile(partPath, []byte("garbage-garbage-garbage"), 0644)).To(Succeed())
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Ignore any Range and always serve the full body.
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(content)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
got, err := os.ReadFile(finalPath)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got).To(Equal(content))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("restarts the download when the partial is stale (416)", func() {
|
||||||
|
// Oversized partial -> requested Range start is beyond the content.
|
||||||
|
Expect(os.WriteFile(partPath, make([]byte, len(content)+10), 0644)).To(Succeed())
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if rangeHdr := r.Header.Get("Range"); rangeHdr != "" {
|
||||||
|
var start int
|
||||||
|
_, _ = fmt.Sscanf(rangeHdr, "bytes=%d-", &start)
|
||||||
|
if start >= len(content) {
|
||||||
|
w.WriteHeader(http.StatusRequestedRangeNotSatisfiable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, len(content)-1, len(content)))
|
||||||
|
w.WriteHeader(http.StatusPartialContent)
|
||||||
|
_, _ = w.Write(content[start:])
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(content)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
got, err := os.ReadFile(finalPath)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got).To(Equal(content))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("removes the downloaded file when checksum verification fails", func() {
|
||||||
|
bad := []byte("this is definitely not the expected binary content")
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
// Checksums are for `content`, but we serve `bad`.
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(bad)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
Expect(err.Error()).To(ContainSubstring("checksum"))
|
||||||
|
Expect(finalPath).ToNot(BeAnExistingFile())
|
||||||
|
Expect(partPath).ToNot(BeAnExistingFile())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("reports progress as downloaded and total byte counts", func() {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Length", strconv.Itoa(len(content)))
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(content)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
var mu sync.Mutex
|
||||||
|
var lastDownloaded, lastTotal int64
|
||||||
|
err := rm.DownloadRelease(version, func(downloaded, total int64) {
|
||||||
|
mu.Lock()
|
||||||
|
lastDownloaded = downloaded
|
||||||
|
lastTotal = total
|
||||||
|
mu.Unlock()
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(lastTotal).To(Equal(int64(len(content))))
|
||||||
|
Expect(lastDownloaded).To(Equal(int64(len(content))))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("GetLatestRelease", func() {
|
||||||
|
It("resolves the latest version from the releases/latest redirect", func() {
|
||||||
|
// The github.com redirect path must be preferred over the
|
||||||
|
// rate-limited api.github.com, so a working redirect yields the tag
|
||||||
|
// without ever needing the API.
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case strings.HasSuffix(r.URL.Path, "/releases/latest"):
|
||||||
|
http.Redirect(w, r, "/owner/repo/releases/tag/v9.9.9", http.StatusFound)
|
||||||
|
case strings.HasSuffix(r.URL.Path, "/releases/tag/v9.9.9"):
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
rm.GitHubOwner = "owner"
|
||||||
|
rm.GitHubRepo = "repo"
|
||||||
|
|
||||||
|
release, err := rm.GetLatestRelease()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(release.Version).To(Equal("v9.9.9"))
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -443,84 +443,23 @@ func (sm *SystrayManager) showStartupErrorDialog(err error) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// showDownloadProgress shows a progress window for downloading updates
|
// showDownloadProgress shows a progress window for downloading updates. The
|
||||||
|
// progress UI (byte readout, resume-aware retry, sizing) is shared with the
|
||||||
|
// other download entry points via the launcher; only the post-success behaviour
|
||||||
|
// (restart prompt + systray refresh) is specific to the update flow.
|
||||||
func (sm *SystrayManager) showDownloadProgress(version string) {
|
func (sm *SystrayManager) showDownloadProgress(version string) {
|
||||||
// Create a new window for download progress
|
sm.launcher.showDownloadProgressWindow(version, fmt.Sprintf("Downloading LocalAI version %s", version), func(win fyne.Window) {
|
||||||
progressWindow := sm.app.NewWindow("Downloading LocalAI Update")
|
dialog.ShowConfirm("Update Downloaded",
|
||||||
progressWindow.Resize(fyne.NewSize(400, 250))
|
"LocalAI has been updated successfully. Please restart the launcher to use the new version.",
|
||||||
progressWindow.CenterOnScreen()
|
func(restart bool) {
|
||||||
|
if restart {
|
||||||
|
sm.app.Quit()
|
||||||
|
}
|
||||||
|
win.Close()
|
||||||
|
}, win)
|
||||||
|
|
||||||
// Progress bar
|
sm.hasUpdateAvailable = false
|
||||||
progressBar := widget.NewProgressBar()
|
sm.latestVersion = ""
|
||||||
progressBar.SetValue(0)
|
sm.recreateMenu()
|
||||||
|
|
||||||
// Status label. Truncate with an ellipsis so a long "Download failed:
|
|
||||||
// <url>" message can't stretch the window (and progress bar) to fit the
|
|
||||||
// whole error on one line; the full error is shown in the dialog below.
|
|
||||||
statusLabel := widget.NewLabel("Preparing download...")
|
|
||||||
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
|
||||||
|
|
||||||
// Release notes button
|
|
||||||
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
|
||||||
releaseNotesURL, err := sm.launcher.githubReleaseNotesURL(version)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("Failed to parse URL: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
sm.app.OpenURL(releaseNotesURL)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// Progress container
|
|
||||||
progressContainer := container.NewVBox(
|
|
||||||
widget.NewLabel(fmt.Sprintf("Downloading LocalAI version %s", version)),
|
|
||||||
progressBar,
|
|
||||||
statusLabel,
|
|
||||||
widget.NewSeparator(),
|
|
||||||
releaseNotesButton,
|
|
||||||
)
|
|
||||||
|
|
||||||
progressWindow.SetContent(progressContainer)
|
|
||||||
progressWindow.Show()
|
|
||||||
|
|
||||||
// Start download in background
|
|
||||||
go func() {
|
|
||||||
err := sm.launcher.DownloadUpdate(version, func(progress float64) {
|
|
||||||
// Update progress bar
|
|
||||||
fyne.Do(func() {
|
|
||||||
progressBar.SetValue(progress)
|
|
||||||
percentage := int(progress * 100)
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// Handle completion
|
|
||||||
fyne.Do(func() {
|
|
||||||
if err != nil {
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
|
||||||
// Show error dialog
|
|
||||||
dialog.ShowError(err, progressWindow)
|
|
||||||
} else {
|
|
||||||
statusLabel.SetText("Download completed successfully!")
|
|
||||||
progressBar.SetValue(1.0)
|
|
||||||
|
|
||||||
// Show restart dialog
|
|
||||||
dialog.ShowConfirm("Update Downloaded",
|
|
||||||
"LocalAI has been updated successfully. Please restart the launcher to use the new version.",
|
|
||||||
func(restart bool) {
|
|
||||||
if restart {
|
|
||||||
sm.app.Quit()
|
|
||||||
}
|
|
||||||
progressWindow.Close()
|
|
||||||
}, progressWindow)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Update systray menu
|
|
||||||
if err == nil {
|
|
||||||
sm.hasUpdateAvailable = false
|
|
||||||
sm.latestVersion = ""
|
|
||||||
sm.recreateMenu()
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -490,14 +490,19 @@ func (ui *LauncherUI) downloadUpdate() {
|
|||||||
ui.UpdateStatus("Downloading update " + version + "...")
|
ui.UpdateStatus("Downloading update " + version + "...")
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
err := ui.launcher.DownloadUpdate(version, func(progress float64) {
|
err := ui.launcher.DownloadUpdate(version, func(downloaded, total int64) {
|
||||||
// Update progress bar
|
|
||||||
fyne.Do(func() {
|
fyne.Do(func() {
|
||||||
ui.progressBar.SetValue(progress)
|
if total > 0 {
|
||||||
|
ui.progressBar.SetValue(float64(downloaded) / float64(total))
|
||||||
|
}
|
||||||
})
|
})
|
||||||
// Update status with percentage
|
// The progress bar already shows the percentage, so report the
|
||||||
percentage := int(progress * 100)
|
// human-readable size here instead of repeating the percent.
|
||||||
ui.UpdateStatus(fmt.Sprintf("Downloading update %s... %d%%", version, percentage))
|
if total > 0 {
|
||||||
|
ui.UpdateStatus(fmt.Sprintf("Downloading update %s… %s / %s", version, formatBytes(downloaded), formatBytes(total)))
|
||||||
|
} else {
|
||||||
|
ui.UpdateStatus(fmt.Sprintf("Downloading update %s… %s", version, formatBytes(downloaded)))
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
fyne.Do(func() {
|
fyne.Do(func() {
|
||||||
@@ -598,82 +603,6 @@ func (ui *LauncherUI) LoadConfiguration() {
|
|||||||
log.Printf("UI LoadConfiguration: configuration loaded successfully")
|
log.Printf("UI LoadConfiguration: configuration loaded successfully")
|
||||||
}
|
}
|
||||||
|
|
||||||
// showDownloadProgress shows a progress window for downloading LocalAI
|
|
||||||
func (ui *LauncherUI) showDownloadProgress(version, title string) {
|
|
||||||
fyne.DoAndWait(func() {
|
|
||||||
// Create progress window using the launcher's app
|
|
||||||
progressWindow := ui.launcher.app.NewWindow("Downloading LocalAI")
|
|
||||||
progressWindow.Resize(fyne.NewSize(400, 250))
|
|
||||||
progressWindow.CenterOnScreen()
|
|
||||||
|
|
||||||
// Progress bar
|
|
||||||
progressBar := widget.NewProgressBar()
|
|
||||||
progressBar.SetValue(0)
|
|
||||||
|
|
||||||
// Status label. Truncate with an ellipsis so a long "Download failed:
|
|
||||||
// <url>" message can't stretch the window (and progress bar) to fit the
|
|
||||||
// whole error on one line; the full error is shown in the dialog below.
|
|
||||||
statusLabel := widget.NewLabel("Preparing download...")
|
|
||||||
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
|
||||||
|
|
||||||
// Release notes button
|
|
||||||
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
|
||||||
releaseNotesURL, err := ui.launcher.githubReleaseNotesURL(version)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("Failed to parse URL: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
ui.launcher.app.OpenURL(releaseNotesURL)
|
|
||||||
})
|
|
||||||
|
|
||||||
// Progress container
|
|
||||||
progressContainer := container.NewVBox(
|
|
||||||
widget.NewLabel(title),
|
|
||||||
progressBar,
|
|
||||||
statusLabel,
|
|
||||||
widget.NewSeparator(),
|
|
||||||
releaseNotesButton,
|
|
||||||
)
|
|
||||||
|
|
||||||
progressWindow.SetContent(progressContainer)
|
|
||||||
progressWindow.Show()
|
|
||||||
|
|
||||||
// Start download in background
|
|
||||||
go func() {
|
|
||||||
err := ui.launcher.DownloadUpdate(version, func(progress float64) {
|
|
||||||
// Update progress bar
|
|
||||||
fyne.Do(func() {
|
|
||||||
progressBar.SetValue(progress)
|
|
||||||
percentage := int(progress * 100)
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// Handle completion
|
|
||||||
fyne.Do(func() {
|
|
||||||
if err != nil {
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
|
||||||
// Show error dialog
|
|
||||||
dialog.ShowError(err, progressWindow)
|
|
||||||
} else {
|
|
||||||
statusLabel.SetText("Download completed successfully!")
|
|
||||||
progressBar.SetValue(1.0)
|
|
||||||
|
|
||||||
// Show success dialog
|
|
||||||
dialog.ShowConfirm("Installation Complete",
|
|
||||||
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
|
|
||||||
func(close bool) {
|
|
||||||
progressWindow.Close()
|
|
||||||
// Update status
|
|
||||||
ui.UpdateStatus("LocalAI installed successfully")
|
|
||||||
}, progressWindow)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// UpdateRunningState updates UI based on LocalAI running state
|
// UpdateRunningState updates UI based on LocalAI running state
|
||||||
func (ui *LauncherUI) UpdateRunningState(isRunning bool) {
|
func (ui *LauncherUI) UpdateRunningState(isRunning bool) {
|
||||||
fyne.Do(func() {
|
fyne.Do(func() {
|
||||||
|
|||||||
14
contrib/macos/Launcher.entitlements
Normal file
14
contrib/macos/Launcher.entitlements
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||||
|
<plist version="1.0">
|
||||||
|
<dict>
|
||||||
|
<key>com.apple.security.network.client</key>
|
||||||
|
<true/>
|
||||||
|
<key>com.apple.security.network.server</key>
|
||||||
|
<true/>
|
||||||
|
<key>com.apple.security.cs.allow-jit</key>
|
||||||
|
<true/>
|
||||||
|
<key>com.apple.security.cs.allow-unsigned-executable-memory</key>
|
||||||
|
<true/>
|
||||||
|
</dict>
|
||||||
|
</plist>
|
||||||
84
contrib/macos/sign-and-notarize.sh
Executable file
84
contrib/macos/sign-and-notarize.sh
Executable file
@@ -0,0 +1,84 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Code-sign and notarize macOS artifacts for LocalAI.
|
||||||
|
# Every sub-command is a no-op (exit 0) when its required secret is unset,
|
||||||
|
# so unsigned builds (forks, local dev, PRs) keep working.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ENTITLEMENTS="contrib/macos/Launcher.entitlements"
|
||||||
|
KEYCHAIN="localai-ci.keychain-db"
|
||||||
|
|
||||||
|
cmd_import_cert() {
|
||||||
|
if [ -z "${MACOS_CERTIFICATE:-}" ]; then
|
||||||
|
echo "[sign] MACOS_CERTIFICATE unset: skipping cert import (unsigned build)"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
local certfile keychain_pwd default_keychain
|
||||||
|
certfile="$(mktemp).p12"
|
||||||
|
keychain_pwd="${MACOS_CI_KEYCHAIN_PWD:?MACOS_CI_KEYCHAIN_PWD required when signing}"
|
||||||
|
echo "$MACOS_CERTIFICATE" | base64 --decode > "$certfile"
|
||||||
|
security create-keychain -p "$keychain_pwd" "$KEYCHAIN"
|
||||||
|
security set-keychain-settings -lut 21600 "$KEYCHAIN"
|
||||||
|
security unlock-keychain -p "$keychain_pwd" "$KEYCHAIN"
|
||||||
|
security import "$certfile" -k "$KEYCHAIN" -P "${MACOS_CERTIFICATE_PWD:?}" \
|
||||||
|
-T /usr/bin/codesign -T /usr/bin/security
|
||||||
|
security set-key-partition-list -S apple-tool:,apple:,codesign: \
|
||||||
|
-s -k "$keychain_pwd" "$KEYCHAIN" >/dev/null
|
||||||
|
default_keychain="$(security default-keychain | tr -d ' "')"
|
||||||
|
security list-keychains -d user -s "$KEYCHAIN" "$default_keychain"
|
||||||
|
rm -f "$certfile"
|
||||||
|
echo "[sign] certificate imported into $KEYCHAIN"
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_sign() {
|
||||||
|
local target="$1"
|
||||||
|
if [ -z "${MACOS_SIGN_IDENTITY:-}" ]; then
|
||||||
|
echo "[sign] MACOS_SIGN_IDENTITY unset: skipping codesign of $target"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
case "$target" in
|
||||||
|
*.app)
|
||||||
|
# Hardened runtime + entitlements are required for notarizing the app bundle.
|
||||||
|
codesign --deep --force --options runtime --timestamp \
|
||||||
|
--entitlements "$ENTITLEMENTS" \
|
||||||
|
--sign "$MACOS_SIGN_IDENTITY" "$target"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
# A disk image carries no entitlements/runtime; just sign the container.
|
||||||
|
codesign --force --timestamp --sign "$MACOS_SIGN_IDENTITY" "$target"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
codesign --verify --strict --verbose=2 "$target"
|
||||||
|
echo "[sign] signed $target"
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_notarize() {
|
||||||
|
local dmg="$1"
|
||||||
|
if [ -z "${MACOS_NOTARY_KEY:-}" ]; then
|
||||||
|
echo "[notarize] MACOS_NOTARY_KEY unset: skipping notarization of $dmg"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
local keyfile
|
||||||
|
keyfile="$(mktemp).p8"
|
||||||
|
echo "$MACOS_NOTARY_KEY" | base64 --decode > "$keyfile"
|
||||||
|
xcrun notarytool submit "$dmg" \
|
||||||
|
--key "$keyfile" \
|
||||||
|
--key-id "${MACOS_NOTARY_KEY_ID:?}" \
|
||||||
|
--issuer "${MACOS_NOTARY_ISSUER_ID:?}" \
|
||||||
|
--wait
|
||||||
|
rm -f "$keyfile"
|
||||||
|
xcrun stapler staple "$dmg"
|
||||||
|
xcrun stapler validate "$dmg"
|
||||||
|
echo "[notarize] notarized and stapled $dmg"
|
||||||
|
}
|
||||||
|
|
||||||
|
main() {
|
||||||
|
local sub="${1:-}"; shift || true
|
||||||
|
case "$sub" in
|
||||||
|
import-cert) cmd_import_cert ;;
|
||||||
|
sign) cmd_sign "$@" ;;
|
||||||
|
notarize) cmd_notarize "$@" ;;
|
||||||
|
*) echo "usage: $0 {import-cert|sign <path>|notarize <dmg>}" >&2; exit 2 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
main "$@"
|
||||||
@@ -37,6 +37,8 @@ func (a *Application) RestartAgentJobService() error {
|
|||||||
if d.JobStore != nil {
|
if d.JobStore != nil {
|
||||||
agentJobService.SetDistributedJobStore(d.JobStore)
|
agentJobService.SetDistributedJobStore(d.JobStore)
|
||||||
}
|
}
|
||||||
|
// Keep agent tasks consistent across replicas (same client the dispatcher uses).
|
||||||
|
agentJobService.SetTaskSyncNATS(d.Nats)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start the service
|
// Start the service
|
||||||
|
|||||||
@@ -604,6 +604,10 @@ func (a *Application) StartAgentPool() {
|
|||||||
usm.SetJobDBStore(s)
|
usm.SetJobDBStore(s)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Keep per-user agent tasks consistent across replicas (nil in standalone).
|
||||||
|
if d := a.Distributed(); d != nil {
|
||||||
|
usm.SetJobSyncNATS(d.Nats)
|
||||||
|
}
|
||||||
aps.SetUserServicesManager(usm)
|
aps.SetUserServicesManager(usm)
|
||||||
|
|
||||||
a.agentPoolService.Store(aps)
|
a.agentPoolService.Store(aps)
|
||||||
|
|||||||
@@ -355,6 +355,7 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoade
|
|||||||
PrefixProvider: prefixProvider,
|
PrefixProvider: prefixProvider,
|
||||||
PrefixConfig: prefixCfg,
|
PrefixConfig: prefixCfg,
|
||||||
Pressure: pressure,
|
Pressure: pressure,
|
||||||
|
SharedModels: cfg.Distributed.SharedModels,
|
||||||
})
|
})
|
||||||
|
|
||||||
// Wire staging-progress broadcasting so file-staging shows up on every
|
// Wire staging-progress broadcasting so file-staging shows up on every
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||||
"github.com/mudler/LocalAI/core/services/jobs"
|
"github.com/mudler/LocalAI/core/services/jobs"
|
||||||
"github.com/mudler/LocalAI/core/services/messaging"
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||||
"github.com/mudler/LocalAI/core/services/monitoring"
|
"github.com/mudler/LocalAI/core/services/monitoring"
|
||||||
"github.com/mudler/LocalAI/core/services/nodes"
|
"github.com/mudler/LocalAI/core/services/nodes"
|
||||||
"github.com/mudler/LocalAI/core/services/routing/admission"
|
"github.com/mudler/LocalAI/core/services/routing/admission"
|
||||||
@@ -279,6 +280,9 @@ func New(opts ...config.AppOption) (*Application, error) {
|
|||||||
if application.agentJobService != nil {
|
if application.agentJobService != nil {
|
||||||
application.agentJobService.SetDistributedBackends(distSvc.Dispatcher)
|
application.agentJobService.SetDistributedBackends(distSvc.Dispatcher)
|
||||||
application.agentJobService.SetDistributedJobStore(distSvc.JobStore)
|
application.agentJobService.SetDistributedJobStore(distSvc.JobStore)
|
||||||
|
// Keep agent tasks consistent across replicas (jobs already sync via the
|
||||||
|
// dispatcher + DB read-through). Same NATS client the dispatcher uses.
|
||||||
|
application.agentJobService.SetTaskSyncNATS(distSvc.Nats)
|
||||||
}
|
}
|
||||||
// Wire skill store into AgentPoolService (wired at pool start time via closure)
|
// Wire skill store into AgentPoolService (wired at pool start time via closure)
|
||||||
// The actual wiring happens in StartAgentPool since the pool doesn't exist yet.
|
// The actual wiring happens in StartAgentPool since the pool doesn't exist yet.
|
||||||
@@ -330,9 +334,14 @@ func New(opts ...config.AppOption) (*Application, error) {
|
|||||||
gs := application.galleryService
|
gs := application.galleryService
|
||||||
sys := options.SystemState
|
sys := options.SystemState
|
||||||
cfgLoaderOpts := options.ToConfigLoaderOptions()
|
cfgLoaderOpts := options.ToConfigLoaderOptions()
|
||||||
gs.OnModelsChanged = func(_ messaging.CacheInvalidateEvent) {
|
gs.OnModelsChanged = func(evt messaging.CacheInvalidateEvent) {
|
||||||
if err := application.ModelConfigLoader().LoadModelConfigsFromPath(sys.Model.ModelsPath, cfgLoaderOpts...); err != nil {
|
// ApplyRemoteChange honors the op: a "delete" prunes the element
|
||||||
xlog.Warn("Failed to reload model configs after peer invalidation", "error", err)
|
// (a reload-from-path is additive and cannot drop it), anything
|
||||||
|
// else reloads from disk; a named element's running instance is
|
||||||
|
// shut down so the new config takes effect. The originating
|
||||||
|
// replica reloads inline and never depends on this path.
|
||||||
|
if err := modeladmin.ApplyRemoteChange(application.ModelConfigLoader(), application.modelLoader, sys.Model.ModelsPath, evt, cfgLoaderOpts...); err != nil {
|
||||||
|
xlog.Warn("Failed to apply peer model config change", "error", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := application.galleryService.SubscribeBroadcasts(); err != nil {
|
if err := application.galleryService.SubscribeBroadcasts(); err != nil {
|
||||||
|
|||||||
@@ -160,6 +160,7 @@ type RunCMD struct {
|
|||||||
RegistrationRequireAuth bool `env:"LOCALAI_REGISTRATION_REQUIRE_AUTH" default:"false" help:"Fail startup when distributed mode is enabled but LOCALAI_REGISTRATION_TOKEN is empty (node endpoints and worker file-transfer server would otherwise be unauthenticated)" group:"distributed"`
|
RegistrationRequireAuth bool `env:"LOCALAI_REGISTRATION_REQUIRE_AUTH" default:"false" help:"Fail startup when distributed mode is enabled but LOCALAI_REGISTRATION_TOKEN is empty (node endpoints and worker file-transfer server would otherwise be unauthenticated)" group:"distributed"`
|
||||||
DistributedRequireAuth bool `env:"LOCALAI_DISTRIBUTED_REQUIRE_AUTH" default:"false" help:"Umbrella switch: require BOTH NATS JWT credentials and a registration token when distributed mode is enabled (implies --nats-require-auth and --registration-require-auth)" group:"distributed"`
|
DistributedRequireAuth bool `env:"LOCALAI_DISTRIBUTED_REQUIRE_AUTH" default:"false" help:"Umbrella switch: require BOTH NATS JWT credentials and a registration token when distributed mode is enabled (implies --nats-require-auth and --registration-require-auth)" group:"distributed"`
|
||||||
AutoApproveNodes bool `env:"LOCALAI_AUTO_APPROVE_NODES" default:"false" help:"Auto-approve new worker nodes (skip admin approval)" group:"distributed"`
|
AutoApproveNodes bool `env:"LOCALAI_AUTO_APPROVE_NODES" default:"false" help:"Auto-approve new worker nodes (skip admin approval)" group:"distributed"`
|
||||||
|
DistributedSharedModels bool `env:"LOCALAI_DISTRIBUTED_SHARED_MODELS" default:"false" help:"Assert that every node mounts the SAME models directory at the SAME path (shared volume). When true, the router skips staging model files to workers and loads them directly from the shared path, avoiding re-downloads." group:"distributed"`
|
||||||
DistributedPrefixCache bool `env:"LOCALAI_DISTRIBUTED_PREFIX_CACHE" default:"true" help:"Enable prefix-cache-aware routing in distributed mode (default true). When false, routing falls back to round-robin." group:"distributed"`
|
DistributedPrefixCache bool `env:"LOCALAI_DISTRIBUTED_PREFIX_CACHE" default:"true" help:"Enable prefix-cache-aware routing in distributed mode (default true). When false, routing falls back to round-robin." group:"distributed"`
|
||||||
DistributedPrefixCacheTTL string `env:"LOCALAI_DISTRIBUTED_PREFIX_CACHE_TTL" help:"Idle-timeout for prefix-cache index entries; also drives the background eviction cadence (every TTL/2). Default 5m." group:"distributed"`
|
DistributedPrefixCacheTTL string `env:"LOCALAI_DISTRIBUTED_PREFIX_CACHE_TTL" help:"Idle-timeout for prefix-cache index entries; also drives the background eviction cadence (every TTL/2). Default 5m." group:"distributed"`
|
||||||
BackendInstallTimeout string `env:"LOCALAI_NATS_BACKEND_INSTALL_TIMEOUT" help:"NATS round-trip timeout for backend.install requests sent to worker nodes (default 15m). Increase for slow links pulling multi-GB images." group:"distributed"`
|
BackendInstallTimeout string `env:"LOCALAI_NATS_BACKEND_INSTALL_TIMEOUT" help:"NATS round-trip timeout for backend.install requests sent to worker nodes (default 15m). Increase for slow links pulling multi-GB images." group:"distributed"`
|
||||||
@@ -310,6 +311,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
if r.DistributedRequireAuth {
|
if r.DistributedRequireAuth {
|
||||||
opts = append(opts, config.EnableDistributedRequireAuth)
|
opts = append(opts, config.EnableDistributedRequireAuth)
|
||||||
}
|
}
|
||||||
|
if r.DistributedSharedModels {
|
||||||
|
opts = append(opts, config.EnableDistributedSharedModels)
|
||||||
|
}
|
||||||
if r.NatsAccountSeed != "" {
|
if r.NatsAccountSeed != "" {
|
||||||
opts = append(opts, config.WithNatsAccountSeed(r.NatsAccountSeed))
|
opts = append(opts, config.WithNatsAccountSeed(r.NatsAccountSeed))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -542,6 +542,19 @@ var BackendCapabilities = map[string]BackendCapability{
|
|||||||
DefaultUsecases: []string{UsecaseSpeakerRecognition},
|
DefaultUsecases: []string{UsecaseSpeakerRecognition},
|
||||||
Description: "Speaker recognition — voice identity verification and analysis",
|
Description: "Speaker recognition — voice identity verification and analysis",
|
||||||
},
|
},
|
||||||
|
"voice-detect": {
|
||||||
|
GRPCMethods: []GRPCMethod{MethodVoiceVerify, MethodVoiceEmbed, MethodVoiceAnalyze},
|
||||||
|
PossibleUsecases: []string{UsecaseSpeakerRecognition},
|
||||||
|
DefaultUsecases: []string{UsecaseSpeakerRecognition},
|
||||||
|
Description: "voice-detect.cpp: C++/ggml speaker embedding, verification and voice analysis (age/gender/emotion)",
|
||||||
|
},
|
||||||
|
"face-detect": {
|
||||||
|
GRPCMethods: []GRPCMethod{MethodEmbedding, MethodDetect, MethodFaceVerify, MethodFaceAnalyze},
|
||||||
|
PossibleUsecases: []string{UsecaseEmbeddings, UsecaseDetection, UsecaseFaceRecognition},
|
||||||
|
DefaultUsecases: []string{UsecaseFaceRecognition},
|
||||||
|
AcceptsImages: true,
|
||||||
|
Description: "face-detect.cpp: C++/ggml face detection, embedding, verification and attribute analysis",
|
||||||
|
},
|
||||||
"silero-vad": {
|
"silero-vad": {
|
||||||
GRPCMethods: []GRPCMethod{MethodVAD},
|
GRPCMethods: []GRPCMethod{MethodVAD},
|
||||||
PossibleUsecases: []string{UsecaseVAD},
|
PossibleUsecases: []string{UsecaseVAD},
|
||||||
|
|||||||
@@ -12,14 +12,12 @@ package config
|
|||||||
// these; config never imports backend.
|
// these; config never imports backend.
|
||||||
const (
|
const (
|
||||||
// DefaultContextSize is the fallback context window when none is configured
|
// DefaultContextSize is the fallback context window when none is configured
|
||||||
// or estimable from the model.
|
// or estimable from the model. It is also the fallback for a GGUF whose
|
||||||
|
// metadata yields no usable estimate or that the parser cannot read at all
|
||||||
|
// (e.g. a quant type it does not know, such as NVFP4): a model-agnostic
|
||||||
|
// safe default beats a tiny, surprising window that truncates real prompts.
|
||||||
DefaultContextSize = 4096
|
DefaultContextSize = 4096
|
||||||
|
|
||||||
// GGUFFallbackContextSize is the context window for a GGUF model whose
|
|
||||||
// metadata yields no usable estimate (see guessGGUFFromFile). Deliberately
|
|
||||||
// smaller than DefaultContextSize to stay conservative on memory there.
|
|
||||||
GGUFFallbackContextSize = 1024
|
|
||||||
|
|
||||||
// DefaultNGPULayers means "offload all layers"; the backend (fit_params)
|
// DefaultNGPULayers means "offload all layers"; the backend (fit_params)
|
||||||
// clamps to what actually fits in device memory.
|
// clamps to what actually fits in device memory.
|
||||||
DefaultNGPULayers = 99999999
|
DefaultNGPULayers = 99999999
|
||||||
|
|||||||
@@ -31,6 +31,14 @@ type DistributedConfig struct {
|
|||||||
// available to enforce just one layer.
|
// available to enforce just one layer.
|
||||||
RequireAuth bool // LOCALAI_DISTRIBUTED_REQUIRE_AUTH
|
RequireAuth bool // LOCALAI_DISTRIBUTED_REQUIRE_AUTH
|
||||||
AutoApproveNodes bool // --auto-approve-nodes / LOCALAI_AUTO_APPROVE_NODES (skip admin approval for new workers)
|
AutoApproveNodes bool // --auto-approve-nodes / LOCALAI_AUTO_APPROVE_NODES (skip admin approval for new workers)
|
||||||
|
// SharedModels asserts that every node (frontend and workers) mounts the
|
||||||
|
// SAME models directory at the SAME path (e.g. a shared volume, as in
|
||||||
|
// docker-compose.distributed.yaml). When true, the router skips staging
|
||||||
|
// model files to workers entirely: the frontend's absolute model paths are
|
||||||
|
// already valid on the worker, so re-uploading them into a per-model
|
||||||
|
// subdirectory only re-downloads what is already present (#10556). Default
|
||||||
|
// false preserves the historical per-node staging behavior.
|
||||||
|
SharedModels bool // --distributed-shared-models / LOCALAI_DISTRIBUTED_SHARED_MODELS
|
||||||
|
|
||||||
// NATS JWT auth (optional; see pkg/natsauth and docs/features/distributed-mode.md)
|
// NATS JWT auth (optional; see pkg/natsauth and docs/features/distributed-mode.md)
|
||||||
NatsAccountSeed string // LOCALAI_NATS_ACCOUNT_SEED — account signing seed to mint per-node worker JWTs
|
NatsAccountSeed string // LOCALAI_NATS_ACCOUNT_SEED — account signing seed to mint per-node worker JWTs
|
||||||
@@ -282,6 +290,13 @@ var EnableAutoApproveNodes = func(o *ApplicationConfig) {
|
|||||||
o.Distributed.AutoApproveNodes = true
|
o.Distributed.AutoApproveNodes = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EnableDistributedSharedModels marks the cluster as sharing one models
|
||||||
|
// directory across all nodes, so the router skips staging model files to
|
||||||
|
// workers (see DistributedConfig.SharedModels).
|
||||||
|
var EnableDistributedSharedModels = func(o *ApplicationConfig) {
|
||||||
|
o.Distributed.SharedModels = true
|
||||||
|
}
|
||||||
|
|
||||||
// DisablePrefixCache turns off prefix-cache-aware routing (falls back to
|
// DisablePrefixCache turns off prefix-cache-aware routing (falls back to
|
||||||
// round-robin). Prefix-cache routing is enabled by default in distributed mode.
|
// round-robin). Prefix-cache routing is enabled by default in distributed mode.
|
||||||
var DisablePrefixCache = func(o *ApplicationConfig) {
|
var DisablePrefixCache = func(o *ApplicationConfig) {
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {
|
|||||||
cSize := int(ctxSize)
|
cSize := int(ctxSize)
|
||||||
cfg.ContextSize = &cSize
|
cfg.ContextSize = &cSize
|
||||||
} else {
|
} else {
|
||||||
defaultCtx = GGUFFallbackContextSize
|
defaultCtx = DefaultContextSize
|
||||||
cfg.ContextSize = &defaultCtx
|
cfg.ContextSize = &defaultCtx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ func llamaCppDefaults(cfg *ModelConfig, modelPath string) {
|
|||||||
// Default context size if not set, regardless of whether GGUF parsing succeeds
|
// Default context size if not set, regardless of whether GGUF parsing succeeds
|
||||||
defer func() {
|
defer func() {
|
||||||
if cfg.ContextSize == nil {
|
if cfg.ContextSize == nil {
|
||||||
ctx := GGUFFallbackContextSize
|
ctx := DefaultContextSize
|
||||||
cfg.ContextSize = &ctx
|
cfg.ContextSize = &ctx
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|||||||
@@ -248,7 +248,11 @@ var _ = Describe("Backend hooks and parser defaults", func() {
|
|||||||
}
|
}
|
||||||
cfg.SetDefaults(ModelPath(dir))
|
cfg.SetDefaults(ModelPath(dir))
|
||||||
|
|
||||||
|
// An unreadable/unparseable GGUF (e.g. a quant type the parser does
|
||||||
|
// not know, such as NVFP4) yields no estimate, so the hook must fall
|
||||||
|
// back to DefaultContextSize rather than a tiny, surprising value.
|
||||||
Expect(cfg.ContextSize).NotTo(BeNil())
|
Expect(cfg.ContextSize).NotTo(BeNil())
|
||||||
|
Expect(*cfg.ContextSize).To(Equal(DefaultContextSize))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -25,8 +25,8 @@ var (
|
|||||||
|
|
||||||
type LlamaCPPImporter struct{}
|
type LlamaCPPImporter struct{}
|
||||||
|
|
||||||
func (i *LlamaCPPImporter) Name() string { return "llama-cpp" }
|
func (i *LlamaCPPImporter) Name() string { return "llama-cpp" }
|
||||||
func (i *LlamaCPPImporter) Modality() string { return "text" }
|
func (i *LlamaCPPImporter) Modality() string { return "text" }
|
||||||
func (i *LlamaCPPImporter) AutoDetects() bool { return true }
|
func (i *LlamaCPPImporter) AutoDetects() bool { return true }
|
||||||
|
|
||||||
// AdditionalBackends advertises drop-in replacements that share the
|
// AdditionalBackends advertises drop-in replacements that share the
|
||||||
@@ -293,7 +293,7 @@ func pickPreferredGroup(groups []hfapi.ShardGroup, prefs []string) *hfapi.ShardG
|
|||||||
for _, pref := range prefs {
|
for _, pref := range prefs {
|
||||||
lower := strings.ToLower(pref)
|
lower := strings.ToLower(pref)
|
||||||
for i := range groups {
|
for i := range groups {
|
||||||
if strings.Contains(strings.ToLower(groups[i].Base), lower) {
|
if quantTokenMatches(strings.ToLower(groups[i].Base), lower) {
|
||||||
return &groups[i]
|
return &groups[i]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -301,6 +301,39 @@ func pickPreferredGroup(groups []hfapi.ShardGroup, prefs []string) *hfapi.ShardG
|
|||||||
return &groups[len(groups)-1]
|
return &groups[len(groups)-1]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// quantTokenMatches reports whether pref appears in base as a whole token
|
||||||
|
// rather than as a substring of a larger alphanumeric run. Both arguments
|
||||||
|
// must already be lowercased.
|
||||||
|
//
|
||||||
|
// A plain strings.Contains is wrong here: `f16` is a substring of `bf16`, so
|
||||||
|
// asking for the `F16` quant used to wrongly select a `BF16` file (#10559).
|
||||||
|
// Only the OUTER edges of the matched preference must hit a boundary — a
|
||||||
|
// non-alphanumeric char (or the start/end of base). Separators inside the
|
||||||
|
// preference itself (e.g. `ud-q4_k_xl`) are intentionally left untouched.
|
||||||
|
func quantTokenMatches(base, pref string) bool {
|
||||||
|
if pref == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for start := strings.Index(base, pref); start != -1; {
|
||||||
|
end := start + len(pref)
|
||||||
|
leftOK := start == 0 || !isAlphaNum(base[start-1])
|
||||||
|
rightOK := end == len(base) || !isAlphaNum(base[end])
|
||||||
|
if leftOK && rightOK {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
next := strings.Index(base[start+1:], pref)
|
||||||
|
if next == -1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
start += next + 1
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func isAlphaNum(b byte) bool {
|
||||||
|
return (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9')
|
||||||
|
}
|
||||||
|
|
||||||
// maybeApplyMTPDefaults parses the picked GGUF header (range-fetched over
|
// maybeApplyMTPDefaults parses the picked GGUF header (range-fetched over
|
||||||
// HTTP for HF/URL imports) and, if the file declares a Multi-Token Prediction
|
// HTTP for HF/URL imports) and, if the file declares a Multi-Token Prediction
|
||||||
// head, appends the auto-MTP option keys to modelConfig.Options. Failures
|
// head, appends the auto-MTP option keys to modelConfig.Options. Failures
|
||||||
|
|||||||
@@ -374,6 +374,104 @@ var _ = Describe("LlamaCPPImporter", func() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Context("quant token boundary matching", func() {
|
||||||
|
// Regression for #10559: the quant preference must match as a whole
|
||||||
|
// token, not as a substring. Asking for `F16` used to select a
|
||||||
|
// `BF16` mmproj because strings.Contains("...bf16.gguf", "f16") is
|
||||||
|
// true — the leading `b` was ignored.
|
||||||
|
|
||||||
|
const repoBase = "https://huggingface.co/acme/example-GGUF/resolve/main/"
|
||||||
|
|
||||||
|
hfFile := func(path, sha string) hfapi.ModelFile {
|
||||||
|
return hfapi.ModelFile{
|
||||||
|
Path: path,
|
||||||
|
SHA256: sha,
|
||||||
|
URL: repoBase + path,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
withHF := func(preferences string, files ...hfapi.ModelFile) Details {
|
||||||
|
d := Details{
|
||||||
|
URI: "https://huggingface.co/acme/example-GGUF",
|
||||||
|
HuggingFace: &hfapi.ModelDetails{
|
||||||
|
ModelID: "acme/example-GGUF",
|
||||||
|
Files: files,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if preferences != "" {
|
||||||
|
d.Preferences = json.RawMessage(preferences)
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
It("selects the F16 mmproj over BF16 (BF16 listed first)", func() {
|
||||||
|
details := withHF(`{"name":"VL","mmproj_quantizations":"F16"}`,
|
||||||
|
hfFile("model-Q4_K_M.gguf", "model"),
|
||||||
|
hfFile("mmproj-x-BF16.gguf", "bf16"),
|
||||||
|
hfFile("mmproj-x-F16.gguf", "f16"),
|
||||||
|
)
|
||||||
|
|
||||||
|
modelConfig, err := importer.Import(details)
|
||||||
|
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-F16.gguf"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("BF16"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("selects the F16 mmproj over BF16 (F16 listed first)", func() {
|
||||||
|
details := withHF(`{"name":"VL","mmproj_quantizations":"F16"}`,
|
||||||
|
hfFile("model-Q4_K_M.gguf", "model"),
|
||||||
|
hfFile("mmproj-x-F16.gguf", "f16"),
|
||||||
|
hfFile("mmproj-x-BF16.gguf", "bf16"),
|
||||||
|
)
|
||||||
|
|
||||||
|
modelConfig, err := importer.Import(details)
|
||||||
|
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-F16.gguf"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("BF16"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("selects BF16 when BF16 is the requested mmproj quant", func() {
|
||||||
|
details := withHF(`{"name":"VL","mmproj_quantizations":"BF16"}`,
|
||||||
|
hfFile("model-Q4_K_M.gguf", "model"),
|
||||||
|
hfFile("mmproj-x-F16.gguf", "f16"),
|
||||||
|
hfFile("mmproj-x-BF16.gguf", "bf16"),
|
||||||
|
)
|
||||||
|
|
||||||
|
modelConfig, err := importer.Import(details)
|
||||||
|
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-BF16.gguf"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("still matches a normal model quant with internal separators", func() {
|
||||||
|
// ud-q4_k_xl contains `-`/`_` internally; only the outer edges
|
||||||
|
// must hit a token boundary.
|
||||||
|
details := withHF(`{"name":"M","quantizations":"ud-q4_k_xl"}`,
|
||||||
|
hfFile("model-UD-Q4_K_XL.gguf", "xl"),
|
||||||
|
hfFile("model-Q3_K_M.gguf", "q3"),
|
||||||
|
)
|
||||||
|
|
||||||
|
modelConfig, err := importer.Import(details)
|
||||||
|
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/M/model-UD-Q4_K_XL.gguf"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("falls back to the last group when no preference matches", func() {
|
||||||
|
details := withHF(`{"name":"M","quantizations":"Q2_K"}`,
|
||||||
|
hfFile("model-Q8_0.gguf", "q8"),
|
||||||
|
hfFile("model-Q3_K_M.gguf", "q3"),
|
||||||
|
)
|
||||||
|
|
||||||
|
modelConfig, err := importer.Import(details)
|
||||||
|
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/M/model-Q3_K_M.gguf"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
Context("AdditionalBackends", func() {
|
Context("AdditionalBackends", func() {
|
||||||
It("advertises ik-llama-cpp and turboquant as drop-in replacements", func() {
|
It("advertises ik-llama-cpp and turboquant as drop-in replacements", func() {
|
||||||
entries := importer.AdditionalBackends()
|
entries := importer.AdditionalBackends()
|
||||||
|
|||||||
@@ -23,8 +23,10 @@ import (
|
|||||||
|
|
||||||
"github.com/mudler/LocalAI/core/application"
|
"github.com/mudler/LocalAI/core/application"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/core/services/distributed"
|
||||||
"github.com/mudler/LocalAI/core/services/finetune"
|
"github.com/mudler/LocalAI/core/services/finetune"
|
||||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
"github.com/mudler/LocalAI/core/services/nodes"
|
"github.com/mudler/LocalAI/core/services/nodes"
|
||||||
"github.com/mudler/LocalAI/core/services/quantization"
|
"github.com/mudler/LocalAI/core/services/quantization"
|
||||||
|
|
||||||
@@ -400,25 +402,45 @@ func API(application *application.Application) (*echo.Echo, error) {
|
|||||||
routes.RegisterAgentPoolRoutes(e, application, agentsMw, skillsMw, collectionsMw)
|
routes.RegisterAgentPoolRoutes(e, application, agentsMw, skillsMw, collectionsMw)
|
||||||
// Fine-tuning routes
|
// Fine-tuning routes
|
||||||
fineTuningMw := auth.RequireFeature(application.AuthDB(), auth.FeatureFineTuning)
|
fineTuningMw := auth.RequireFeature(application.AuthDB(), auth.FeatureFineTuning)
|
||||||
|
// In distributed mode pass the shared NATS client + PostgreSQL store so
|
||||||
|
// fine-tune jobs stay consistent across replicas (the SyncedMap broadcasts
|
||||||
|
// mutations and hydrates from the DB); standalone passes nil for both.
|
||||||
|
var ftNats messaging.MessagingClient
|
||||||
|
var ftStore *distributed.FineTuneStore
|
||||||
|
if d := application.Distributed(); d != nil {
|
||||||
|
ftNats = d.Nats
|
||||||
|
if d.DistStores != nil && d.DistStores.FineTune != nil {
|
||||||
|
ftStore = d.DistStores.FineTune
|
||||||
|
}
|
||||||
|
}
|
||||||
ftService := finetune.NewFineTuneService(
|
ftService := finetune.NewFineTuneService(
|
||||||
application.ApplicationConfig(),
|
application.ApplicationConfig(),
|
||||||
application.ModelLoader(),
|
application.ModelLoader(),
|
||||||
application.ModelConfigLoader(),
|
application.ModelConfigLoader(),
|
||||||
|
ftNats,
|
||||||
|
ftStore,
|
||||||
)
|
)
|
||||||
if d := application.Distributed(); d != nil {
|
|
||||||
ftService.SetNATSClient(d.Nats)
|
|
||||||
if d.DistStores != nil && d.DistStores.FineTune != nil {
|
|
||||||
ftService.SetFineTuneStore(d.DistStores.FineTune)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
routes.RegisterFineTuningRoutes(e, ftService, application.ApplicationConfig(), fineTuningMw)
|
routes.RegisterFineTuningRoutes(e, ftService, application.ApplicationConfig(), fineTuningMw)
|
||||||
|
|
||||||
// Quantization routes
|
// Quantization routes
|
||||||
quantizationMw := auth.RequireFeature(application.AuthDB(), auth.FeatureQuantization)
|
quantizationMw := auth.RequireFeature(application.AuthDB(), auth.FeatureQuantization)
|
||||||
|
// In distributed mode pass the shared NATS client + PostgreSQL store so
|
||||||
|
// quantization jobs stay consistent across replicas (the SyncedMap broadcasts
|
||||||
|
// mutations and hydrates from the DB); standalone passes nil for both.
|
||||||
|
var quantNats messaging.MessagingClient
|
||||||
|
var quantStore *distributed.QuantStore
|
||||||
|
if d := application.Distributed(); d != nil {
|
||||||
|
quantNats = d.Nats
|
||||||
|
if d.DistStores != nil && d.DistStores.Quant != nil {
|
||||||
|
quantStore = d.DistStores.Quant
|
||||||
|
}
|
||||||
|
}
|
||||||
qService := quantization.NewQuantizationService(
|
qService := quantization.NewQuantizationService(
|
||||||
application.ApplicationConfig(),
|
application.ApplicationConfig(),
|
||||||
application.ModelLoader(),
|
application.ModelLoader(),
|
||||||
application.ModelConfigLoader(),
|
application.ModelConfigLoader(),
|
||||||
|
quantNats,
|
||||||
|
quantStore,
|
||||||
)
|
)
|
||||||
routes.RegisterQuantizationRoutes(e, qService, application.ApplicationConfig(), quantizationMw)
|
routes.RegisterQuantizationRoutes(e, qService, application.ApplicationConfig(), quantizationMw)
|
||||||
|
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
|
|||||||
// @Param name path string true "Model name"
|
// @Param name path string true "Model name"
|
||||||
// @Success 200 {object} map[string]any "success message"
|
// @Success 200 {object} map[string]any "success message"
|
||||||
// @Router /api/models/config-json/{name} [patch]
|
// @Router /api/models/config-json/{name} [patch]
|
||||||
func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
modelName := c.Param("name")
|
modelName := c.Param("name")
|
||||||
@@ -173,6 +173,14 @@ func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, app
|
|||||||
if _, err := svc.PatchConfig(c.Request().Context(), modelName, patchMap); err != nil {
|
if _, err := svc.PatchConfig(c.Request().Context(), modelName, patchMap); err != nil {
|
||||||
return c.JSON(httpStatusForModelAdminError(err), map[string]any{"error": err.Error()})
|
return c.JSON(httpStatusForModelAdminError(err), map[string]any{"error": err.Error()})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Patch rewrites the config on disk and reloads only the local loader;
|
||||||
|
// tell peers to refresh so the change is consistent across replicas.
|
||||||
|
// No-op in standalone mode.
|
||||||
|
if gs != nil {
|
||||||
|
gs.BroadcastModelsChanged(modelName, "install")
|
||||||
|
}
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, map[string]any{
|
return c.JSON(http.StatusOK, map[string]any{
|
||||||
"success": true,
|
"success": true,
|
||||||
"message": fmt.Sprintf("Model '%s' updated successfully", modelName),
|
"message": fmt.Sprintf("Model '%s' updated successfully", modelName),
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ var _ = Describe("Config Metadata Endpoints", func() {
|
|||||||
app = echo.New()
|
app = echo.New()
|
||||||
app.GET("/api/models/config-metadata", ConfigMetadataEndpoint())
|
app.GET("/api/models/config-metadata", ConfigMetadataEndpoint())
|
||||||
app.GET("/api/models/config-metadata/autocomplete/:provider", AutocompleteEndpoint(configLoader, modelLoader, appConfig))
|
app.GET("/api/models/config-metadata/autocomplete/:provider", AutocompleteEndpoint(configLoader, modelLoader, appConfig))
|
||||||
app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, appConfig))
|
app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, nil, appConfig))
|
||||||
})
|
})
|
||||||
|
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
httpUtils "github.com/mudler/LocalAI/core/http/middleware"
|
httpUtils "github.com/mudler/LocalAI/core/http/middleware"
|
||||||
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||||
"github.com/mudler/LocalAI/core/services/modeladmin"
|
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||||
"github.com/mudler/LocalAI/internal"
|
"github.com/mudler/LocalAI/internal"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
@@ -55,7 +56,7 @@ func GetEditModelPage(cl *config.ModelConfigLoader, appConfig *config.Applicatio
|
|||||||
}
|
}
|
||||||
|
|
||||||
// EditModelEndpoint handles updating existing model configurations
|
// EditModelEndpoint handles updating existing model configurations
|
||||||
func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
modelName := c.Param("name")
|
modelName := c.Param("name")
|
||||||
@@ -70,6 +71,17 @@ func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appC
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tell peer replicas to refresh their in-memory config: this endpoint
|
||||||
|
// only reloaded the local loader. A rename is a delete of the old name
|
||||||
|
// plus an install of the new one. No-op in standalone mode.
|
||||||
|
if gs != nil {
|
||||||
|
if result.Renamed {
|
||||||
|
gs.BroadcastModelsChanged(result.OldName, "delete")
|
||||||
|
}
|
||||||
|
gs.BroadcastModelsChanged(result.NewName, "install")
|
||||||
|
}
|
||||||
|
|
||||||
msg := fmt.Sprintf("Model '%s' updated successfully. Model has been reloaded with new configuration.", result.NewName)
|
msg := fmt.Sprintf("Model '%s' updated successfully. Model has been reloaded with new configuration.", result.NewName)
|
||||||
if result.Renamed {
|
if result.Renamed {
|
||||||
msg = fmt.Sprintf("Model '%s' renamed to '%s' and updated successfully.", result.OldName, result.NewName)
|
msg = fmt.Sprintf("Model '%s' renamed to '%s' and updated successfully.", result.OldName, result.NewName)
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ var _ = Describe("Edit Model test", func() {
|
|||||||
app := echo.New()
|
app := echo.New()
|
||||||
// Set up a simple renderer for the test
|
// Set up a simple renderer for the test
|
||||||
app.Renderer = &testRenderer{}
|
app.Renderer = &testRenderer{}
|
||||||
app.POST("/import-model", ImportModelEndpoint(modelConfigLoader, applicationConfig))
|
app.POST("/import-model", ImportModelEndpoint(modelConfigLoader, nil, applicationConfig))
|
||||||
app.GET("/edit-model/:name", GetEditModelPage(modelConfigLoader, applicationConfig))
|
app.GET("/edit-model/:name", GetEditModelPage(modelConfigLoader, applicationConfig))
|
||||||
|
|
||||||
requestBody := bytes.NewBufferString(`{"name": "foo", "backend": "foo", "model": "foo"}`)
|
requestBody := bytes.NewBufferString(`{"name": "foo", "backend": "foo", "model": "foo"}`)
|
||||||
@@ -106,7 +106,7 @@ var _ = Describe("Edit Model test", func() {
|
|||||||
Expect(exists).To(BeTrue())
|
Expect(exists).To(BeTrue())
|
||||||
|
|
||||||
app := echo.New()
|
app := echo.New()
|
||||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||||
|
|
||||||
newYAML := "name: newname\nbackend: llama\nmodel: foo\n"
|
newYAML := "name: newname\nbackend: llama\nmodel: foo\n"
|
||||||
req := httptest.NewRequest("POST", "/models/edit/oldname", bytes.NewBufferString(newYAML))
|
req := httptest.NewRequest("POST", "/models/edit/oldname", bytes.NewBufferString(newYAML))
|
||||||
@@ -163,7 +163,7 @@ var _ = Describe("Edit Model test", func() {
|
|||||||
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
||||||
|
|
||||||
app := echo.New()
|
app := echo.New()
|
||||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||||
|
|
||||||
req := httptest.NewRequest(
|
req := httptest.NewRequest(
|
||||||
"POST",
|
"POST",
|
||||||
@@ -204,7 +204,7 @@ var _ = Describe("Edit Model test", func() {
|
|||||||
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
||||||
|
|
||||||
app := echo.New()
|
app := echo.New()
|
||||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||||
|
|
||||||
req := httptest.NewRequest(
|
req := httptest.NewRequest(
|
||||||
"POST",
|
"POST",
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ImportModelEndpoint handles creating new model configurations
|
// ImportModelEndpoint handles creating new model configurations
|
||||||
func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
func ImportModelEndpoint(cl *config.ModelConfigLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
// Get the raw body
|
// Get the raw body
|
||||||
body, err := io.ReadAll(c.Request().Body)
|
body, err := io.ReadAll(c.Request().Body)
|
||||||
@@ -245,6 +245,13 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applica
|
|||||||
}
|
}
|
||||||
return c.JSON(http.StatusInternalServerError, response)
|
return c.JSON(http.StatusInternalServerError, response)
|
||||||
}
|
}
|
||||||
|
// Tell peer replicas to load the newly-created config from the shared
|
||||||
|
// models dir: this endpoint only reloaded the local loader. No-op in
|
||||||
|
// standalone mode.
|
||||||
|
if gs != nil {
|
||||||
|
gs.BroadcastModelsChanged(modelConfig.Name, "install")
|
||||||
|
}
|
||||||
|
|
||||||
// Return success response
|
// Return success response
|
||||||
response := ModelResponse{
|
response := ModelResponse{
|
||||||
Success: true,
|
Success: true,
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/http/auth"
|
"github.com/mudler/LocalAI/core/http/auth"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
"github.com/mudler/LocalAI/core/services/nodes"
|
"github.com/mudler/LocalAI/core/services/nodes"
|
||||||
"github.com/mudler/LocalAI/core/services/nodes/prefixcache"
|
"github.com/mudler/LocalAI/core/services/nodes/prefixcache"
|
||||||
"github.com/mudler/LocalAI/pkg/httpclient"
|
"github.com/mudler/LocalAI/pkg/httpclient"
|
||||||
@@ -60,7 +61,10 @@ func GetNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
ctx := c.Request().Context()
|
ctx := c.Request().Context()
|
||||||
id := c.Param("id")
|
id := c.Param("id")
|
||||||
node, err := registry.Get(ctx, id)
|
// GetWithExtras (not Get) so the response carries the node's labels,
|
||||||
|
// loaded-model count, and in-flight total — the bare BackendNode keeps
|
||||||
|
// labels in a separate table, leaving the detail view's label list empty.
|
||||||
|
node, err := registry.GetWithExtras(ctx, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return c.JSON(http.StatusNotFound, nodeError(http.StatusNotFound, "node not found"))
|
return c.JSON(http.StatusNotFound, nodeError(http.StatusNotFound, "node not found"))
|
||||||
}
|
}
|
||||||
@@ -547,12 +551,23 @@ func DeleteBackendOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.HandlerF
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ListBackendsOnNodeEndpoint lists installed backends on a worker node via NATS.
|
// ListBackendsOnNodeEndpoint lists installed backends on a worker node via NATS.
|
||||||
func ListBackendsOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.HandlerFunc {
|
func ListBackendsOnNodeEndpoint(unloader nodes.NodeCommandSender, registry *nodes.NodeRegistry) echo.HandlerFunc {
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
|
nodeID := c.Param("id")
|
||||||
|
// Agent-type workers don't run backends and never subscribe to the
|
||||||
|
// nodes.<id>.backend.list NATS subject, so the request would hang
|
||||||
|
// until timeout with "no responders". Their backend list is simply
|
||||||
|
// empty. Mirror the aggregate-list guard in managers_distributed.go
|
||||||
|
// (skip nodes whose NodeType is set and not "backend") so the
|
||||||
|
// single-node and cluster-wide views stay consistent.
|
||||||
|
if node, err := registry.Get(c.Request().Context(), nodeID); err == nil {
|
||||||
|
if node.NodeType != "" && node.NodeType != nodes.NodeTypeBackend {
|
||||||
|
return c.JSON(http.StatusOK, []messaging.NodeBackendInfo{})
|
||||||
|
}
|
||||||
|
}
|
||||||
if unloader == nil {
|
if unloader == nil {
|
||||||
return c.JSON(http.StatusServiceUnavailable, nodeError(http.StatusServiceUnavailable, "NATS not configured"))
|
return c.JSON(http.StatusServiceUnavailable, nodeError(http.StatusServiceUnavailable, "NATS not configured"))
|
||||||
}
|
}
|
||||||
nodeID := c.Param("id")
|
|
||||||
reply, err := unloader.ListBackends(nodeID)
|
reply, err := unloader.ListBackends(nodeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
xlog.Error("Failed to list backends on node", "node", nodeID, "error", err)
|
xlog.Error("Failed to list backends on node", "node", nodeID, "error", err)
|
||||||
|
|||||||
103
core/http/endpoints/localai/nodes_backends_list_test.go
Normal file
103
core/http/endpoints/localai/nodes_backends_list_test.go
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
package localai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
|
||||||
|
"github.com/labstack/echo/v4"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/LocalAI/core/services/nodes"
|
||||||
|
"github.com/mudler/LocalAI/core/services/testutil"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
// stubNodeCommandSender records whether ListBackends was invoked so the test can
|
||||||
|
// assert the endpoint short-circuits (no NATS request) for agent-type nodes.
|
||||||
|
type stubNodeCommandSender struct {
|
||||||
|
listBackendsCalled bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) InstallBackend(_, _, _, _, _, _, _ string, _ int, _ string, _ func(messaging.BackendInstallProgressEvent)) (*messaging.BackendInstallReply, error) {
|
||||||
|
return &messaging.BackendInstallReply{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) UpgradeBackend(_, _, _, _, _, _ string, _ int, _ string, _ func(messaging.BackendInstallProgressEvent)) (*messaging.BackendUpgradeReply, error) {
|
||||||
|
return &messaging.BackendUpgradeReply{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) DeleteBackend(_, _ string) (*messaging.BackendDeleteReply, error) {
|
||||||
|
return &messaging.BackendDeleteReply{Success: true}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) ListBackends(_ string) (*messaging.BackendListReply, error) {
|
||||||
|
s.listBackendsCalled = true
|
||||||
|
return &messaging.BackendListReply{Backends: []messaging.NodeBackendInfo{{Name: "llama-cpp"}}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) StopBackend(_, _ string) error { return nil }
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) UnloadModelOnNode(_, _ string) error { return nil }
|
||||||
|
|
||||||
|
var _ = Describe("ListBackendsOnNodeEndpoint", func() {
|
||||||
|
var registry *nodes.NodeRegistry
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
db := testutil.SetupTestDB()
|
||||||
|
var err error
|
||||||
|
registry, err = nodes.NewNodeRegistry(db)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
|
callEndpoint := func(unloader nodes.NodeCommandSender, nodeID string) *httptest.ResponseRecorder {
|
||||||
|
e := echo.New()
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
c := e.NewContext(req, rec)
|
||||||
|
c.SetParamNames("id")
|
||||||
|
c.SetParamValues(nodeID)
|
||||||
|
handler := ListBackendsOnNodeEndpoint(unloader, registry)
|
||||||
|
Expect(handler(c)).To(Succeed())
|
||||||
|
return rec
|
||||||
|
}
|
||||||
|
|
||||||
|
It("returns an empty list for an agent node without issuing a NATS request", func() {
|
||||||
|
ctx := context.Background()
|
||||||
|
node := &nodes.BackendNode{Name: "agent-1", NodeType: nodes.NodeTypeAgent}
|
||||||
|
Expect(registry.Register(ctx, node, true)).To(Succeed())
|
||||||
|
|
||||||
|
stub := &stubNodeCommandSender{}
|
||||||
|
rec := callEndpoint(stub, node.ID)
|
||||||
|
|
||||||
|
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||||
|
Expect(stub.listBackendsCalled).To(BeFalse(),
|
||||||
|
"agent workers don't subscribe to backend.list; the endpoint must not issue the doomed NATS request")
|
||||||
|
|
||||||
|
var list []messaging.NodeBackendInfo
|
||||||
|
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
||||||
|
Expect(list).To(BeEmpty())
|
||||||
|
// Must be `[]`, not `null`, so the UI can render it.
|
||||||
|
Expect(rec.Body.String()).To(ContainSubstring("[]"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("consults the unloader (NATS) for a backend node", func() {
|
||||||
|
ctx := context.Background()
|
||||||
|
node := &nodes.BackendNode{Name: "backend-1", NodeType: nodes.NodeTypeBackend, Address: "10.0.0.1:50051"}
|
||||||
|
Expect(registry.Register(ctx, node, true)).To(Succeed())
|
||||||
|
|
||||||
|
stub := &stubNodeCommandSender{}
|
||||||
|
rec := callEndpoint(stub, node.ID)
|
||||||
|
|
||||||
|
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||||
|
Expect(stub.listBackendsCalled).To(BeTrue(),
|
||||||
|
"backend nodes must still be queried over NATS")
|
||||||
|
|
||||||
|
var list []messaging.NodeBackendInfo
|
||||||
|
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
||||||
|
Expect(list).To(HaveLen(1))
|
||||||
|
Expect(list[0].Name).To(Equal("llama-cpp"))
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
|
|
||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||||
"github.com/mudler/LocalAI/core/services/modeladmin"
|
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
@@ -24,7 +25,7 @@ import (
|
|||||||
// @Failure 404 {object} ModelResponse
|
// @Failure 404 {object} ModelResponse
|
||||||
// @Failure 500 {object} ModelResponse
|
// @Failure 500 {object} ModelResponse
|
||||||
// @Router /api/models/{name}/{action} [put]
|
// @Router /api/models/{name}/{action} [put]
|
||||||
func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
modelName := c.Param("name")
|
modelName := c.Param("name")
|
||||||
@@ -36,6 +37,14 @@ func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoade
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enabling/disabling rewrites the config on disk and reloads only the
|
||||||
|
// local loader; tell peers to refresh so the model's availability is
|
||||||
|
// consistent across replicas. No-op in standalone mode.
|
||||||
|
if gs != nil {
|
||||||
|
gs.BroadcastModelsChanged(modelName, "install")
|
||||||
|
}
|
||||||
|
|
||||||
msg := fmt.Sprintf("Model '%s' has been %sd successfully.", modelName, action)
|
msg := fmt.Sprintf("Model '%s' has been %sd successfully.", modelName, action)
|
||||||
if action == modeladmin.ActionDisable {
|
if action == modeladmin.ActionDisable {
|
||||||
msg += " The model will not be loaded on demand until re-enabled."
|
msg += " The model will not be loaded on demand until re-enabled."
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package openresponses
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -10,6 +11,7 @@ import (
|
|||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/http/auth"
|
||||||
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
||||||
openaiEndpoint "github.com/mudler/LocalAI/core/http/endpoints/openai"
|
openaiEndpoint "github.com/mudler/LocalAI/core/http/endpoints/openai"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
"github.com/mudler/LocalAI/core/http/middleware"
|
||||||
@@ -246,8 +248,11 @@ func ResponsesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval
|
|||||||
// Create cancellable context for background execution
|
// Create cancellable context for background execution
|
||||||
bgCtx, bgCancel := context.WithCancel(context.Background())
|
bgCtx, bgCancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
// Store the background response
|
// Store the background response and stamp its owner before the ID
|
||||||
|
// is returned to the client, so later GET/cancel/resume can verify
|
||||||
|
// the caller owns it.
|
||||||
store.StoreBackground(responseID, input, queuedResponse, bgCancel, input.Stream)
|
store.StoreBackground(responseID, input, queuedResponse, bgCancel, input.Stream)
|
||||||
|
store.SetOwner(responseID, ownerFromContext(c))
|
||||||
|
|
||||||
// Start background processing goroutine
|
// Start background processing goroutine
|
||||||
go func() {
|
go func() {
|
||||||
@@ -1587,6 +1592,7 @@ func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt i
|
|||||||
if shouldStore {
|
if shouldStore {
|
||||||
store := GetGlobalStore()
|
store := GetGlobalStore()
|
||||||
store.Store(responseID, input, response)
|
store.Store(responseID, input, response)
|
||||||
|
store.SetOwner(responseID, ownerFromContext(c))
|
||||||
}
|
}
|
||||||
|
|
||||||
return c.JSON(200, response)
|
return c.JSON(200, response)
|
||||||
@@ -2322,6 +2328,7 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
|
|||||||
if shouldStore {
|
if shouldStore {
|
||||||
store := GetGlobalStore()
|
store := GetGlobalStore()
|
||||||
store.Store(responseID, input, responseCompleted)
|
store.Store(responseID, input, responseCompleted)
|
||||||
|
store.SetOwner(responseID, ownerFromContext(c))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send [DONE]
|
// Send [DONE]
|
||||||
@@ -2966,6 +2973,18 @@ func convertORToolsToOpenAIFormat(orTools []schema.ORFunctionTool) []functions.T
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ownerFromContext returns the identity (user ID) of the authenticated
|
||||||
|
// caller, or empty string when no authentication was performed (single-key /
|
||||||
|
// no-auth deployments). It is the value stamped on a response at creation and
|
||||||
|
// compared on read/cancel/resume to prevent one caller from accessing
|
||||||
|
// another's response by guessing its ID.
|
||||||
|
func ownerFromContext(c echo.Context) string {
|
||||||
|
if u := auth.GetUser(c); u != nil {
|
||||||
|
return u.ID
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
// GetResponseEndpoint returns a handler for GET /responses/:id
|
// GetResponseEndpoint returns a handler for GET /responses/:id
|
||||||
// This endpoint is used for polling background responses or resuming streaming
|
// This endpoint is used for polling background responses or resuming streaming
|
||||||
// @Summary Get a response by ID
|
// @Summary Get a response by ID
|
||||||
@@ -2991,6 +3010,12 @@ func GetResponseEndpoint() func(c echo.Context) error {
|
|||||||
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enforce response ownership. Return 404 (not 403) on mismatch so the
|
||||||
|
// existence of another caller's response is not leaked.
|
||||||
|
if !accessAllowed(stored, ownerFromContext(c)) {
|
||||||
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
||||||
|
}
|
||||||
|
|
||||||
// Check if streaming resume is requested
|
// Check if streaming resume is requested
|
||||||
streamParam := c.QueryParam("stream")
|
streamParam := c.QueryParam("stream")
|
||||||
if streamParam == "true" {
|
if streamParam == "true" {
|
||||||
@@ -3022,16 +3047,21 @@ func GetResponseEndpoint() func(c echo.Context) error {
|
|||||||
|
|
||||||
// handleStreamResume handles resuming a streaming response from a specific sequence number
|
// handleStreamResume handles resuming a streaming response from a specific sequence number
|
||||||
func handleStreamResume(c echo.Context, store *ResponseStore, responseID string, stored *StoredResponse, startingAfter int) error {
|
func handleStreamResume(c echo.Context, store *ResponseStore, responseID string, stored *StoredResponse, startingAfter int) error {
|
||||||
|
// Fetch buffered events before committing to an SSE response so an
|
||||||
|
// offset-lost gap can be reported as a clean HTTP status rather than a
|
||||||
|
// silently truncated event stream.
|
||||||
|
events, err := store.GetEventsAfter(responseID, startingAfter)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, ErrOffsetLost) {
|
||||||
|
return sendOpenResponsesError(c, 409, "invalid_request_error", fmt.Sprintf("starting_after=%d is older than the oldest retained event; the resume buffer evicted those events and the stream cannot be resumed from that point", startingAfter), "starting_after")
|
||||||
|
}
|
||||||
|
return sendOpenResponsesError(c, 500, "server_error", fmt.Sprintf("failed to get events: %v", err), "")
|
||||||
|
}
|
||||||
|
|
||||||
c.Response().Header().Set("Content-Type", "text/event-stream")
|
c.Response().Header().Set("Content-Type", "text/event-stream")
|
||||||
c.Response().Header().Set("Cache-Control", "no-cache")
|
c.Response().Header().Set("Cache-Control", "no-cache")
|
||||||
c.Response().Header().Set("Connection", "keep-alive")
|
c.Response().Header().Set("Connection", "keep-alive")
|
||||||
|
|
||||||
// Get buffered events after the starting point
|
|
||||||
events, err := store.GetEventsAfter(responseID, startingAfter)
|
|
||||||
if err != nil {
|
|
||||||
return sendOpenResponsesError(c, 500, "server_error", fmt.Sprintf("failed to get events: %v", err), "")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send all buffered events
|
// Send all buffered events
|
||||||
for _, event := range events {
|
for _, event := range events {
|
||||||
fmt.Fprintf(c.Response().Writer, "event: %s\ndata: %s\n\n", event.EventType, string(event.Data))
|
fmt.Fprintf(c.Response().Writer, "event: %s\ndata: %s\n\n", event.EventType, string(event.Data))
|
||||||
@@ -3126,6 +3156,17 @@ func CancelResponseEndpoint() func(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
store := GetGlobalStore()
|
store := GetGlobalStore()
|
||||||
|
|
||||||
|
// Look up first so ownership can be checked before any mutation.
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
if err != nil {
|
||||||
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
||||||
|
}
|
||||||
|
// Return 404 (not 403) on owner mismatch so existence is not leaked.
|
||||||
|
if !accessAllowed(stored, ownerFromContext(c)) {
|
||||||
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
||||||
|
}
|
||||||
|
|
||||||
response, err := store.Cancel(responseID)
|
response, err := store.Cancel(responseID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package openresponses
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -11,6 +12,30 @@ import (
|
|||||||
"github.com/mudler/xlog"
|
"github.com/mudler/xlog"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// defaultMaxStreamEvents bounds how many resume-buffer events a single
|
||||||
|
// background response retains. Without a cap, a long-running or abandoned
|
||||||
|
// background generation grows StreamEvents without limit and can exhaust
|
||||||
|
// process memory. When the cap is exceeded the oldest events are evicted
|
||||||
|
// from the front (see AppendEvent). Mirrors llama.cpp's byte-capped slot
|
||||||
|
// ring used for resumable /slots state.
|
||||||
|
defaultMaxStreamEvents = 8192
|
||||||
|
|
||||||
|
// defaultMaxStreamBytes caps the total serialized size of retained
|
||||||
|
// resume-buffer events, evicting oldest-first when exceeded. This guards
|
||||||
|
// against a handful of very large events defeating the count cap. 0
|
||||||
|
// disables the byte cap (count cap still applies).
|
||||||
|
defaultMaxStreamBytes = 64 << 20 // 64 MiB
|
||||||
|
)
|
||||||
|
|
||||||
|
// ErrOffsetLost is returned by GetEventsAfter when the requested
|
||||||
|
// starting_after sequence number is older than the oldest event still
|
||||||
|
// retained in the resume buffer (i.e. the events between the requested
|
||||||
|
// offset and the current watermark were evicted by the cap). Callers should
|
||||||
|
// surface this to clients as a distinct error instead of silently returning
|
||||||
|
// a truncated stream that omits the dropped events.
|
||||||
|
var ErrOffsetLost = errors.New("resume offset lost: requested events were evicted from the buffer")
|
||||||
|
|
||||||
// ResponseStore provides thread-safe storage for Open Responses API responses
|
// ResponseStore provides thread-safe storage for Open Responses API responses
|
||||||
type ResponseStore struct {
|
type ResponseStore struct {
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
@@ -18,6 +43,12 @@ type ResponseStore struct {
|
|||||||
ttl time.Duration // Time-to-live for stored responses (0 = no expiration)
|
ttl time.Duration // Time-to-live for stored responses (0 = no expiration)
|
||||||
cleanupCtx context.Context
|
cleanupCtx context.Context
|
||||||
cleanupCancel context.CancelFunc
|
cleanupCancel context.CancelFunc
|
||||||
|
|
||||||
|
// maxStreamEvents / maxStreamBytes bound the per-response resume buffer.
|
||||||
|
// Set once at construction from the default constants; tests may lower
|
||||||
|
// them. A value <= 0 disables that particular cap.
|
||||||
|
maxStreamEvents int
|
||||||
|
maxStreamBytes int
|
||||||
}
|
}
|
||||||
|
|
||||||
// StreamedEvent represents a buffered SSE event for streaming resume
|
// StreamedEvent represents a buffered SSE event for streaming resume
|
||||||
@@ -35,6 +66,12 @@ type StoredResponse struct {
|
|||||||
StoredAt time.Time
|
StoredAt time.Time
|
||||||
ExpiresAt *time.Time // nil if no expiration
|
ExpiresAt *time.Time // nil if no expiration
|
||||||
|
|
||||||
|
// Owner is the identity (user ID) that created this response. It is set
|
||||||
|
// once at creation and never mutated, so it can be read without holding
|
||||||
|
// mu. Empty means "no owner" (single-key / no-auth deployments), in which
|
||||||
|
// case ownership checks are skipped for backward compatibility.
|
||||||
|
Owner string
|
||||||
|
|
||||||
// Background execution support
|
// Background execution support
|
||||||
CancelFunc context.CancelFunc // For cancellation of background tasks
|
CancelFunc context.CancelFunc // For cancellation of background tasks
|
||||||
StreamEvents []StreamedEvent // Buffered events for streaming resume
|
StreamEvents []StreamedEvent // Buffered events for streaming resume
|
||||||
@@ -42,6 +79,14 @@ type StoredResponse struct {
|
|||||||
IsBackground bool // Was created with background=true
|
IsBackground bool // Was created with background=true
|
||||||
EventsChan chan struct{} // Signals new events for live subscribers
|
EventsChan chan struct{} // Signals new events for live subscribers
|
||||||
mu sync.RWMutex // Protect concurrent access to this response
|
mu sync.RWMutex // Protect concurrent access to this response
|
||||||
|
|
||||||
|
// streamBytes tracks the total serialized size of the events currently
|
||||||
|
// retained in StreamEvents, used to enforce the byte cap. droppedThrough
|
||||||
|
// is the highest sequence number evicted from the front of the buffer
|
||||||
|
// (-1 = nothing evicted); it is the watermark GetEventsAfter compares
|
||||||
|
// against to detect a lost resume offset. Both are guarded by mu.
|
||||||
|
streamBytes int
|
||||||
|
droppedThrough int
|
||||||
}
|
}
|
||||||
|
|
||||||
var getGlobalStore = sync.OnceValue(func() *ResponseStore {
|
var getGlobalStore = sync.OnceValue(func() *ResponseStore {
|
||||||
@@ -81,8 +126,10 @@ func (s *ResponseStore) SetTTL(ttl time.Duration) {
|
|||||||
// If ttl is 0, responses are stored indefinitely
|
// If ttl is 0, responses are stored indefinitely
|
||||||
func NewResponseStore(ttl time.Duration) *ResponseStore {
|
func NewResponseStore(ttl time.Duration) *ResponseStore {
|
||||||
store := &ResponseStore{
|
store := &ResponseStore{
|
||||||
responses: make(map[string]*StoredResponse),
|
responses: make(map[string]*StoredResponse),
|
||||||
ttl: ttl,
|
ttl: ttl,
|
||||||
|
maxStreamEvents: defaultMaxStreamEvents,
|
||||||
|
maxStreamBytes: defaultMaxStreamBytes,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start cleanup goroutine if TTL is set
|
// Start cleanup goroutine if TTL is set
|
||||||
@@ -109,11 +156,12 @@ func (s *ResponseStore) Store(responseID string, request *schema.OpenResponsesRe
|
|||||||
}
|
}
|
||||||
|
|
||||||
stored := &StoredResponse{
|
stored := &StoredResponse{
|
||||||
Request: request,
|
Request: request,
|
||||||
Response: response,
|
Response: response,
|
||||||
Items: items,
|
Items: items,
|
||||||
StoredAt: time.Now(),
|
StoredAt: time.Now(),
|
||||||
ExpiresAt: nil,
|
ExpiresAt: nil,
|
||||||
|
droppedThrough: -1,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set expiration if TTL is configured
|
// Set expiration if TTL is configured
|
||||||
@@ -256,16 +304,17 @@ func (s *ResponseStore) StoreBackground(responseID string, request *schema.OpenR
|
|||||||
}
|
}
|
||||||
|
|
||||||
stored := &StoredResponse{
|
stored := &StoredResponse{
|
||||||
Request: request,
|
Request: request,
|
||||||
Response: response,
|
Response: response,
|
||||||
Items: items,
|
Items: items,
|
||||||
StoredAt: time.Now(),
|
StoredAt: time.Now(),
|
||||||
ExpiresAt: nil,
|
ExpiresAt: nil,
|
||||||
CancelFunc: cancelFunc,
|
CancelFunc: cancelFunc,
|
||||||
StreamEvents: []StreamedEvent{},
|
StreamEvents: []StreamedEvent{},
|
||||||
StreamEnabled: streamEnabled,
|
StreamEnabled: streamEnabled,
|
||||||
IsBackground: true,
|
IsBackground: true,
|
||||||
EventsChan: make(chan struct{}, 100), // Buffered channel for event notifications
|
EventsChan: make(chan struct{}, 100), // Buffered channel for event notifications
|
||||||
|
droppedThrough: -1,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set expiration if TTL is configured
|
// Set expiration if TTL is configured
|
||||||
@@ -349,6 +398,25 @@ func (s *ResponseStore) AppendEvent(responseID string, event *schema.ORStreamEve
|
|||||||
EventType: event.Type,
|
EventType: event.Type,
|
||||||
Data: data,
|
Data: data,
|
||||||
})
|
})
|
||||||
|
stored.streamBytes += len(data)
|
||||||
|
|
||||||
|
// Evict oldest events from the front once either cap is exceeded. The
|
||||||
|
// byte cap never evicts the only remaining event (a single oversized
|
||||||
|
// event is still served once). Each eviction advances droppedThrough so
|
||||||
|
// a later resume below the watermark is reported as ErrOffsetLost rather
|
||||||
|
// than silently skipping the dropped events.
|
||||||
|
for (s.maxStreamEvents > 0 && len(stored.StreamEvents) > s.maxStreamEvents) ||
|
||||||
|
(s.maxStreamBytes > 0 && stored.streamBytes > s.maxStreamBytes && len(stored.StreamEvents) > 1) {
|
||||||
|
evicted := stored.StreamEvents[0]
|
||||||
|
stored.streamBytes -= len(evicted.Data)
|
||||||
|
if evicted.SequenceNumber > stored.droppedThrough {
|
||||||
|
stored.droppedThrough = evicted.SequenceNumber
|
||||||
|
}
|
||||||
|
// Release the evicted payload so it can be GC'd even though the
|
||||||
|
// backing array element is still owned by the slice until reuse.
|
||||||
|
stored.StreamEvents[0].Data = nil
|
||||||
|
stored.StreamEvents = stored.StreamEvents[1:]
|
||||||
|
}
|
||||||
stored.mu.Unlock()
|
stored.mu.Unlock()
|
||||||
|
|
||||||
// Notify any subscribers of new event
|
// Notify any subscribers of new event
|
||||||
@@ -374,6 +442,14 @@ func (s *ResponseStore) GetEventsAfter(responseID string, startingAfter int) ([]
|
|||||||
stored.mu.RLock()
|
stored.mu.RLock()
|
||||||
defer stored.mu.RUnlock()
|
defer stored.mu.RUnlock()
|
||||||
|
|
||||||
|
// If the requested offset is older than the watermark, the events the
|
||||||
|
// client expects next (those in (startingAfter, droppedThrough]) were
|
||||||
|
// evicted by the cap. Signal the gap rather than returning a stream that
|
||||||
|
// silently skips them.
|
||||||
|
if startingAfter < stored.droppedThrough {
|
||||||
|
return nil, ErrOffsetLost
|
||||||
|
}
|
||||||
|
|
||||||
var result []StreamedEvent
|
var result []StreamedEvent
|
||||||
for _, event := range stored.StreamEvents {
|
for _, event := range stored.StreamEvents {
|
||||||
if event.SequenceNumber > startingAfter {
|
if event.SequenceNumber > startingAfter {
|
||||||
@@ -447,3 +523,30 @@ func (s *ResponseStore) IsStreamEnabled(responseID string) (bool, error) {
|
|||||||
|
|
||||||
return stored.StreamEnabled, nil
|
return stored.StreamEnabled, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetOwner records the identity that owns a stored response. It is called
|
||||||
|
// once, right after the response is stored and before its ID is handed back
|
||||||
|
// to any client, so no lock on the stored response is required. A no-op for
|
||||||
|
// an empty owner or unknown response ID.
|
||||||
|
func (s *ResponseStore) SetOwner(responseID, owner string) {
|
||||||
|
if owner == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
s.mu.RLock()
|
||||||
|
stored, exists := s.responses[responseID]
|
||||||
|
s.mu.RUnlock()
|
||||||
|
if !exists {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
stored.Owner = owner
|
||||||
|
}
|
||||||
|
|
||||||
|
// accessAllowed reports whether a caller identified by callerID may read or
|
||||||
|
// mutate the given stored response. An empty owner (single-key / no-auth
|
||||||
|
// deployments) is accessible by anyone, preserving backward compatibility;
|
||||||
|
// otherwise the caller identity must match the recorded owner.
|
||||||
|
func accessAllowed(stored *StoredResponse, callerID string) bool {
|
||||||
|
return stored.Owner == "" || stored.Owner == callerID
|
||||||
|
}
|
||||||
|
|||||||
@@ -585,6 +585,86 @@ var _ = Describe("ResponseStore", func() {
|
|||||||
Expect(enabled2).To(BeFalse())
|
Expect(enabled2).To(BeFalse())
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("should bound the resume buffer and evict oldest events past the cap", func() {
|
||||||
|
// Lower the caps so the test stays fast; production defaults are
|
||||||
|
// large. Same-package access to the unexported fields is fine.
|
||||||
|
store.maxStreamEvents = 5
|
||||||
|
store.maxStreamBytes = 0 // count cap only for this test
|
||||||
|
|
||||||
|
responseID := "resp_buffer_cap"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Status: schema.ORStatusInProgress,
|
||||||
|
}
|
||||||
|
|
||||||
|
_, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
store.StoreBackground(responseID, request, response, cancel, true)
|
||||||
|
|
||||||
|
// Append well past the cap.
|
||||||
|
const total = 20
|
||||||
|
for i := range total {
|
||||||
|
err := store.AppendEvent(responseID, &schema.ORStreamEvent{
|
||||||
|
Type: "response.output_text.delta",
|
||||||
|
SequenceNumber: i,
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
}
|
||||||
|
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
// (a) Buffer length stays bounded by the cap.
|
||||||
|
Expect(len(stored.StreamEvents)).To(Equal(5))
|
||||||
|
|
||||||
|
// (b) Oldest events were evicted: only the last 5 sequence numbers
|
||||||
|
// remain (15..19).
|
||||||
|
Expect(stored.StreamEvents[0].SequenceNumber).To(Equal(15))
|
||||||
|
Expect(stored.StreamEvents[len(stored.StreamEvents)-1].SequenceNumber).To(Equal(19))
|
||||||
|
|
||||||
|
// Asking for events after the last retained seq still works.
|
||||||
|
retained, err := store.GetEventsAfter(responseID, 14)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(retained).To(HaveLen(5))
|
||||||
|
|
||||||
|
// (c) Asking below the dropped watermark returns ErrOffsetLost.
|
||||||
|
_, err = store.GetEventsAfter(responseID, 0)
|
||||||
|
Expect(err).To(MatchError(ErrOffsetLost))
|
||||||
|
|
||||||
|
_, err = store.GetEventsAfter(responseID, -1)
|
||||||
|
Expect(err).To(MatchError(ErrOffsetLost))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should record and enforce response ownership", func() {
|
||||||
|
responseID := "resp_owner_test"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{ID: responseID, Object: "response", Status: schema.ORStatusCompleted}
|
||||||
|
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
store.SetOwner(responseID, "userA")
|
||||||
|
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(stored.Owner).To(Equal("userA"))
|
||||||
|
|
||||||
|
// Owner matches -> allowed; different identity -> denied.
|
||||||
|
Expect(accessAllowed(stored, "userA")).To(BeTrue())
|
||||||
|
Expect(accessAllowed(stored, "userB")).To(BeFalse())
|
||||||
|
|
||||||
|
// Backward compatibility: a response with no owner is accessible
|
||||||
|
// by any caller (single-key / no-auth deployments).
|
||||||
|
noOwnerID := "resp_no_owner"
|
||||||
|
store.Store(noOwnerID, request, &schema.ORResponseResource{ID: noOwnerID, Object: "response"})
|
||||||
|
noOwner, err := store.Get(noOwnerID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(noOwner.Owner).To(BeEmpty())
|
||||||
|
Expect(accessAllowed(noOwner, "anyone")).To(BeTrue())
|
||||||
|
Expect(accessAllowed(noOwner, "")).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
It("should notify subscribers of new events", func() {
|
It("should notify subscribers of new events", func() {
|
||||||
responseID := "resp_events_chan"
|
responseID := "resp_events_chan"
|
||||||
request := &schema.OpenResponsesRequest{Model: "test"}
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
|||||||
@@ -72,19 +72,19 @@ func RegisterLocalAIRoutes(router *echo.Echo,
|
|||||||
router.POST("/backends/upgrades/check", backendGalleryEndpointService.CheckUpgradesEndpoint(), adminMiddleware)
|
router.POST("/backends/upgrades/check", backendGalleryEndpointService.CheckUpgradesEndpoint(), adminMiddleware)
|
||||||
router.POST("/backends/upgrade/:name", backendGalleryEndpointService.UpgradeBackendEndpoint(), adminMiddleware)
|
router.POST("/backends/upgrade/:name", backendGalleryEndpointService.UpgradeBackendEndpoint(), adminMiddleware)
|
||||||
// Custom model import endpoint
|
// Custom model import endpoint
|
||||||
router.POST("/models/import", localai.ImportModelEndpoint(cl, appConfig), adminMiddleware)
|
router.POST("/models/import", localai.ImportModelEndpoint(cl, galleryService, appConfig), adminMiddleware)
|
||||||
|
|
||||||
// URI model import endpoint
|
// URI model import endpoint
|
||||||
router.POST("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache), adminMiddleware)
|
router.POST("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache), adminMiddleware)
|
||||||
|
|
||||||
// Custom model edit endpoint
|
// Custom model edit endpoint
|
||||||
router.POST("/models/edit/:name", localai.EditModelEndpoint(cl, ml, appConfig), adminMiddleware)
|
router.POST("/models/edit/:name", localai.EditModelEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||||
|
|
||||||
// List model aliases endpoint
|
// List model aliases endpoint
|
||||||
router.GET("/api/aliases", localai.ListAliasesEndpoint(cl), adminMiddleware)
|
router.GET("/api/aliases", localai.ListAliasesEndpoint(cl), adminMiddleware)
|
||||||
|
|
||||||
// Toggle model enable/disable endpoint
|
// Toggle model enable/disable endpoint
|
||||||
router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, appConfig), adminMiddleware)
|
router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||||
|
|
||||||
// Toggle model pinned status endpoint
|
// Toggle model pinned status endpoint
|
||||||
router.PUT("/models/toggle-pinned/:name/:action", localai.TogglePinnedModelEndpoint(cl, appConfig, func() {
|
router.PUT("/models/toggle-pinned/:name/:action", localai.TogglePinnedModelEndpoint(cl, appConfig, func() {
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ func RegisterNodeAdminRoutes(e *echo.Echo, registry *nodes.NodeRegistry, unloade
|
|||||||
admin.POST("/:id/approve", localai.ApproveNodeEndpoint(registry, authDB, hmacSecret, natsCfg))
|
admin.POST("/:id/approve", localai.ApproveNodeEndpoint(registry, authDB, hmacSecret, natsCfg))
|
||||||
|
|
||||||
// Backend management on workers
|
// Backend management on workers
|
||||||
admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader))
|
admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader, registry))
|
||||||
admin.POST("/:id/backends/install", localai.InstallBackendOnNodeEndpoint(unloader, galleryService, opcache, appConfig))
|
admin.POST("/:id/backends/install", localai.InstallBackendOnNodeEndpoint(unloader, galleryService, opcache, appConfig))
|
||||||
admin.POST("/:id/backends/delete", localai.DeleteBackendOnNodeEndpoint(unloader))
|
admin.POST("/:id/backends/delete", localai.DeleteBackendOnNodeEndpoint(unloader))
|
||||||
|
|
||||||
|
|||||||
@@ -922,7 +922,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
|||||||
app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware)
|
app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware)
|
||||||
|
|
||||||
// PATCH config endpoint - partial update using nested JSON merge
|
// PATCH config endpoint - partial update using nested JSON merge
|
||||||
app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, appConfig), adminMiddleware)
|
app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||||
|
|
||||||
// VRAM estimation endpoint
|
// VRAM estimation endpoint
|
||||||
app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware)
|
app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware)
|
||||||
|
|||||||
@@ -68,6 +68,32 @@ var _ = Describe("LLM tests", func() {
|
|||||||
Expect(protoMessages[0].Content).To(Equal("Hello World"))
|
Expect(protoMessages[0].Content).To(Equal("Hello World"))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Regression for mudler/LocalAI#10524: a text part whose inner text is
|
||||||
|
// itself a JSON-array string (mealie sends an ingredient list) must
|
||||||
|
// flatten to that exact string verbatim. ToProto must NOT escape or
|
||||||
|
// restructure it - the C++ backend then treats it as opaque text. This
|
||||||
|
// pins the precise Go-side input that produced the "unsupported
|
||||||
|
// content[].type" gRPC error before the backend stopped re-parsing it.
|
||||||
|
It("flattens a JSON-array-looking text part to the verbatim string (#10524)", func() {
|
||||||
|
ingredients := `["1/4 cup brown sugar, packed","1 pound ground beef"]`
|
||||||
|
messages := Messages{
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: []any{
|
||||||
|
map[string]any{
|
||||||
|
"type": "text",
|
||||||
|
"text": ingredients,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
protoMessages := messages.ToProto()
|
||||||
|
|
||||||
|
Expect(protoMessages).To(HaveLen(1))
|
||||||
|
Expect(protoMessages[0].Content).To(Equal(ingredients))
|
||||||
|
})
|
||||||
|
|
||||||
It("should convert message with tool_calls", func() {
|
It("should convert message with tool_calls", func() {
|
||||||
messages := Messages{
|
messages := Messages{
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -30,6 +30,8 @@ import (
|
|||||||
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/core/services/jobs"
|
"github.com/mudler/LocalAI/core/services/jobs"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||||
"github.com/mudler/LocalAI/core/templates"
|
"github.com/mudler/LocalAI/core/templates"
|
||||||
"github.com/mudler/LocalAI/pkg/httpclient"
|
"github.com/mudler/LocalAI/pkg/httpclient"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
@@ -43,8 +45,18 @@ type AgentJobService struct {
|
|||||||
configLoader *config.ModelConfigLoader
|
configLoader *config.ModelConfigLoader
|
||||||
evaluator *templates.Evaluator
|
evaluator *templates.Evaluator
|
||||||
|
|
||||||
|
// tasks is the cross-replica task store: an in-memory map kept consistent
|
||||||
|
// across replicas via NATS, with read-through to the configured persister
|
||||||
|
// (file in standalone, PostgreSQL in distributed). Unlike jobs - which already
|
||||||
|
// converge via the dispatcher + DB read-through - tasks previously read
|
||||||
|
// in-memory only, so ListTasks went stale on non-originating replicas.
|
||||||
|
tasks *syncstate.SyncedMap[string, schema.Task]
|
||||||
|
// taskNats is the distributed NATS client backing the tasks SyncedMap. It is
|
||||||
|
// not available at construction time, so it is injected via SetTaskSyncNATS
|
||||||
|
// during distributed wiring; nil keeps tasks in-memory-only (standalone).
|
||||||
|
taskNats messaging.MessagingClient
|
||||||
|
|
||||||
// Storage (in-memory primary, persister for secondary persistence)
|
// Storage (in-memory primary, persister for secondary persistence)
|
||||||
tasks *xsync.SyncedMap[string, schema.Task]
|
|
||||||
jobs *xsync.SyncedMap[string, schema.Job]
|
jobs *xsync.SyncedMap[string, schema.Job]
|
||||||
persister JobPersister
|
persister JobPersister
|
||||||
userID string // Scoping: empty for global (main service), set for per-user instances
|
userID string // Scoping: empty for global (main service), set for per-user instances
|
||||||
@@ -96,6 +108,31 @@ func (s *AgentJobService) SetDistributedJobStore(store *jobs.JobStore) {
|
|||||||
s.persister = &dbJobPersister{store: store}
|
s.persister = &dbJobPersister{store: store}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetTaskSyncNATS wires the distributed NATS client used to keep agent *tasks*
|
||||||
|
// consistent across replicas (jobs already converge via the dispatcher + DB
|
||||||
|
// read-through, so they are left untouched). The client is not available when the
|
||||||
|
// service is constructed, so it is injected here during distributed wiring and the
|
||||||
|
// tasks SyncedMap is rebuilt to pick it up. It is always called before Start /
|
||||||
|
// hydrate, while the map is still empty, so rebuilding loses no state. Passing nil
|
||||||
|
// (standalone) keeps the map in-memory-only with no broadcast.
|
||||||
|
func (s *AgentJobService) SetTaskSyncNATS(nats messaging.MessagingClient) {
|
||||||
|
s.taskNats = nats
|
||||||
|
s.buildTasksMap()
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildTasksMap (re)constructs the cross-replica tasks SyncedMap from the current
|
||||||
|
// taskNats. The Store adapter reads s.persister/s.userID live, so a persister swap
|
||||||
|
// (SetDistributedJobStore) needs no rebuild; only the NATS client, fixed at
|
||||||
|
// New-time, forces one - hence SetTaskSyncNATS calls this.
|
||||||
|
func (s *AgentJobService) buildTasksMap() {
|
||||||
|
s.tasks = syncstate.New(syncstate.Config[string, schema.Task]{
|
||||||
|
Name: "agent.tasks",
|
||||||
|
Key: func(t schema.Task) string { return t.ID },
|
||||||
|
Nats: s.taskNats,
|
||||||
|
Store: &taskStoreAdapter{svc: s},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// Dispatcher returns the distributed dispatcher (nil if not in distributed mode).
|
// Dispatcher returns the distributed dispatcher (nil if not in distributed mode).
|
||||||
func (s *AgentJobService) Dispatcher() DistributedDispatcher {
|
func (s *AgentJobService) Dispatcher() DistributedDispatcher {
|
||||||
return s.dispatcher
|
return s.dispatcher
|
||||||
@@ -106,13 +143,6 @@ func (s *AgentJobService) DBStore() *jobs.JobStore {
|
|||||||
return s.rawDBStore
|
return s.rawDBStore
|
||||||
}
|
}
|
||||||
|
|
||||||
// saveTasks persists tasks via the configured persister (file or DB).
|
|
||||||
func (s *AgentJobService) saveTasks(task schema.Task) {
|
|
||||||
if err := s.persister.SaveTask(s.userID, task); err != nil {
|
|
||||||
xlog.Warn("Failed to persist task", "error", err, "task_id", task.ID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// saveJobs persists jobs via the configured persister (file or DB).
|
// saveJobs persists jobs via the configured persister (file or DB).
|
||||||
func (s *AgentJobService) saveJobs(job schema.Job) {
|
func (s *AgentJobService) saveJobs(job schema.Job) {
|
||||||
if err := s.persister.SaveJob(s.userID, job); err != nil {
|
if err := s.persister.SaveJob(s.userID, job); err != nil {
|
||||||
@@ -129,18 +159,8 @@ func (s *AgentJobService) LoadFromDB() {
|
|||||||
|
|
||||||
// loadFromPersister loads tasks and jobs from the configured persister into memory.
|
// loadFromPersister loads tasks and jobs from the configured persister into memory.
|
||||||
func (s *AgentJobService) loadFromPersister() {
|
func (s *AgentJobService) loadFromPersister() {
|
||||||
if tasks, err := s.persister.LoadTasks(s.userID); err != nil {
|
if err := s.hydrateTasks(s.appConfig.Context); err != nil {
|
||||||
xlog.Warn("Failed to load tasks from persister", "error", err)
|
xlog.Warn("Failed to load tasks from persister", "error", err)
|
||||||
} else {
|
|
||||||
for _, task := range tasks {
|
|
||||||
s.tasks.Set(task.ID, task)
|
|
||||||
if task.Enabled && task.Cron != "" {
|
|
||||||
if err := s.ScheduleCronTask(task); err != nil {
|
|
||||||
xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
xlog.Info("Loaded tasks from persister", "count", len(tasks))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if loadedJobs, err := s.persister.LoadJobs(s.userID); err != nil {
|
if loadedJobs, err := s.persister.LoadJobs(s.userID); err != nil {
|
||||||
@@ -153,6 +173,27 @@ func (s *AgentJobService) loadFromPersister() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hydrateTasks loads tasks into the cross-replica SyncedMap and (re)schedules
|
||||||
|
// cron entries for enabled tasks. Hydration goes through the SyncedMap's Store
|
||||||
|
// read-through (Start), not Set, so it neither re-persists nor re-broadcasts the
|
||||||
|
// loaded tasks. Each service instance hydrates exactly once: the main service via
|
||||||
|
// Start -> loadFromPersister, per-user services via LoadFromDB or LoadTasksFromFile.
|
||||||
|
func (s *AgentJobService) hydrateTasks(ctx context.Context) error {
|
||||||
|
if err := s.tasks.Start(ctx); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
tasks := s.tasks.List()
|
||||||
|
for _, task := range tasks {
|
||||||
|
if task.Enabled && task.Cron != "" {
|
||||||
|
if err := s.ScheduleCronTask(task); err != nil {
|
||||||
|
xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
xlog.Info("Loaded tasks from persister", "count", len(tasks))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// JobExecution represents a job to be executed
|
// JobExecution represents a job to be executed
|
||||||
type JobExecution struct {
|
type JobExecution struct {
|
||||||
Job schema.Job
|
Job schema.Job
|
||||||
@@ -200,21 +241,19 @@ func NewAgentJobServiceWithPaths(
|
|||||||
) *AgentJobService {
|
) *AgentJobService {
|
||||||
retentionDays := cmp.Or(appConfig.AgentJobRetentionDays, 30)
|
retentionDays := cmp.Or(appConfig.AgentJobRetentionDays, 30)
|
||||||
|
|
||||||
tasks := xsync.NewSyncedMap[string, schema.Task]()
|
|
||||||
jobsMap := xsync.NewSyncedMap[string, schema.Job]()
|
jobsMap := xsync.NewSyncedMap[string, schema.Job]()
|
||||||
|
|
||||||
return &AgentJobService{
|
s := &AgentJobService{
|
||||||
appConfig: appConfig,
|
appConfig: appConfig,
|
||||||
modelLoader: modelLoader,
|
modelLoader: modelLoader,
|
||||||
configLoader: configLoader,
|
configLoader: configLoader,
|
||||||
evaluator: evaluator,
|
evaluator: evaluator,
|
||||||
tasks: tasks,
|
|
||||||
jobs: jobsMap,
|
jobs: jobsMap,
|
||||||
persister: &fileJobPersister{
|
persister: &fileJobPersister{
|
||||||
tasks: tasks,
|
|
||||||
jobs: jobsMap,
|
jobs: jobsMap,
|
||||||
tasksFile: tasksFile,
|
tasksFile: tasksFile,
|
||||||
jobsFile: jobsFile,
|
jobsFile: jobsFile,
|
||||||
|
taskSet: make(map[string]schema.Task),
|
||||||
},
|
},
|
||||||
jobQueue: make(chan JobExecution, 100), // Buffer for 100 jobs
|
jobQueue: make(chan JobExecution, 100), // Buffer for 100 jobs
|
||||||
cancellations: xsync.NewSyncedMap[string, context.CancelFunc](),
|
cancellations: xsync.NewSyncedMap[string, context.CancelFunc](),
|
||||||
@@ -222,25 +261,17 @@ func NewAgentJobServiceWithPaths(
|
|||||||
cronEntries: xsync.NewSyncedMap[string, cron.EntryID](),
|
cronEntries: xsync.NewSyncedMap[string, cron.EntryID](),
|
||||||
retentionDays: retentionDays,
|
retentionDays: retentionDays,
|
||||||
}
|
}
|
||||||
|
// Build the cross-replica tasks map standalone (nil NATS); SetTaskSyncNATS
|
||||||
|
// rebuilds it with the distributed client once that is available, before Start.
|
||||||
|
s.buildTasksMap()
|
||||||
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoadTasksFromFile loads tasks from the persister into the in-memory map
|
// LoadTasksFromFile loads tasks from the persister into the in-memory map
|
||||||
// and schedules cron entries. Named "FromFile" for backward compat; in DB
|
// and schedules cron entries. Named "FromFile" for backward compat; in DB
|
||||||
// mode it loads from the database.
|
// mode it loads from the database.
|
||||||
func (s *AgentJobService) LoadTasksFromFile() error {
|
func (s *AgentJobService) LoadTasksFromFile() error {
|
||||||
tasks, err := s.persister.LoadTasks(s.userID)
|
return s.hydrateTasks(s.appConfig.Context)
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for _, task := range tasks {
|
|
||||||
s.tasks.Set(task.ID, task)
|
|
||||||
if task.Enabled && task.Cron != "" {
|
|
||||||
if err := s.ScheduleCronTask(task); err != nil {
|
|
||||||
xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SaveTasksToFile flushes the current tasks map via the persister. File
|
// SaveTasksToFile flushes the current tasks map via the persister. File
|
||||||
@@ -293,8 +324,12 @@ func (s *AgentJobService) CreateTask(task schema.Task) (string, error) {
|
|||||||
task.Enabled = true // Default to enabled
|
task.Enabled = true // Default to enabled
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store task
|
// Store task: Set updates the in-memory map, write-throughs to the persister
|
||||||
s.tasks.Set(id, task)
|
// (file or DB), and broadcasts the create to peer replicas. Background ctx
|
||||||
|
// because CreateTask carries no request ctx (mirrors the finetune service).
|
||||||
|
if err := s.tasks.Set(context.Background(), task); err != nil {
|
||||||
|
return "", fmt.Errorf("failed to persist task: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Schedule cron if enabled and has cron expression
|
// Schedule cron if enabled and has cron expression
|
||||||
if task.Enabled && task.Cron != "" {
|
if task.Enabled && task.Cron != "" {
|
||||||
@@ -303,16 +338,15 @@ func (s *AgentJobService) CreateTask(task schema.Task) (string, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s.saveTasks(task)
|
|
||||||
return id, nil
|
return id, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateTask updates an existing task
|
// UpdateTask updates an existing task
|
||||||
func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
|
func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
|
||||||
if !s.tasks.Exists(id) {
|
existing, ok := s.tasks.Get(id)
|
||||||
|
if !ok {
|
||||||
return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
||||||
}
|
}
|
||||||
existing := s.tasks.Get(id)
|
|
||||||
|
|
||||||
// Preserve ID and CreatedAt
|
// Preserve ID and CreatedAt
|
||||||
task.ID = id
|
task.ID = id
|
||||||
@@ -324,8 +358,10 @@ func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
|
|||||||
s.UnscheduleCronTask(id)
|
s.UnscheduleCronTask(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store updated task
|
// Store updated task: write-through + broadcast (see CreateTask).
|
||||||
s.tasks.Set(id, task)
|
if err := s.tasks.Set(context.Background(), task); err != nil {
|
||||||
|
return fmt.Errorf("failed to persist task: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Schedule new cron if enabled and has cron expression
|
// Schedule new cron if enabled and has cron expression
|
||||||
if task.Enabled && task.Cron != "" {
|
if task.Enabled && task.Cron != "" {
|
||||||
@@ -334,24 +370,22 @@ func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s.saveTasks(task)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteTask deletes a task
|
// DeleteTask deletes a task
|
||||||
func (s *AgentJobService) DeleteTask(id string) error {
|
func (s *AgentJobService) DeleteTask(id string) error {
|
||||||
if !s.tasks.Exists(id) {
|
if _, ok := s.tasks.Get(id); !ok {
|
||||||
return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unschedule cron
|
// Unschedule cron
|
||||||
s.UnscheduleCronTask(id)
|
s.UnscheduleCronTask(id)
|
||||||
|
|
||||||
// Remove from memory
|
// Delete removes from the in-memory map, deletes from the persister, and
|
||||||
s.tasks.Delete(id)
|
// broadcasts the removal to peer replicas.
|
||||||
|
if err := s.tasks.Delete(context.Background(), id); err != nil {
|
||||||
if err := s.persister.DeleteTask(id); err != nil {
|
xlog.Warn("Failed to delete task from store", "error", err, "task_id", id)
|
||||||
xlog.Warn("Failed to delete task from persister", "error", err, "task_id", id)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@@ -359,8 +393,8 @@ func (s *AgentJobService) DeleteTask(id string) error {
|
|||||||
|
|
||||||
// GetTask retrieves a task by ID
|
// GetTask retrieves a task by ID
|
||||||
func (s *AgentJobService) GetTask(id string) (*schema.Task, error) {
|
func (s *AgentJobService) GetTask(id string) (*schema.Task, error) {
|
||||||
task := s.tasks.Get(id)
|
task, ok := s.tasks.Get(id)
|
||||||
if task.ID == "" {
|
if !ok {
|
||||||
return nil, fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
return nil, fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
||||||
}
|
}
|
||||||
return &task, nil
|
return &task, nil
|
||||||
@@ -368,7 +402,7 @@ func (s *AgentJobService) GetTask(id string) (*schema.Task, error) {
|
|||||||
|
|
||||||
// ListTasks returns all tasks, sorted by creation date (newest first)
|
// ListTasks returns all tasks, sorted by creation date (newest first)
|
||||||
func (s *AgentJobService) ListTasks() []schema.Task {
|
func (s *AgentJobService) ListTasks() []schema.Task {
|
||||||
tasks := s.tasks.Values()
|
tasks := s.tasks.List()
|
||||||
// Sort by CreatedAt descending (newest first), then by Name for stability
|
// Sort by CreatedAt descending (newest first), then by Name for stability
|
||||||
slices.SortFunc(tasks, func(a, b schema.Task) int {
|
slices.SortFunc(tasks, func(a, b schema.Task) int {
|
||||||
if a.CreatedAt.Equal(b.CreatedAt) {
|
if a.CreatedAt.Equal(b.CreatedAt) {
|
||||||
@@ -397,8 +431,8 @@ func (s *AgentJobService) buildPrompt(templateStr string, params map[string]stri
|
|||||||
// ExecuteJob creates and queues a job for execution
|
// ExecuteJob creates and queues a job for execution
|
||||||
// multimedia can be nil for backward compatibility
|
// multimedia can be nil for backward compatibility
|
||||||
func (s *AgentJobService) ExecuteJob(taskID string, params map[string]string, triggeredBy string, multimedia *schema.MultimediaAttachment) (string, error) {
|
func (s *AgentJobService) ExecuteJob(taskID string, params map[string]string, triggeredBy string, multimedia *schema.MultimediaAttachment) (string, error) {
|
||||||
task := s.tasks.Get(taskID)
|
task, ok := s.tasks.Get(taskID)
|
||||||
if task.ID == "" {
|
if !ok {
|
||||||
return "", fmt.Errorf("%w: %s", ErrTaskNotFound, taskID)
|
return "", fmt.Errorf("%w: %s", ErrTaskNotFound, taskID)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1451,6 +1485,12 @@ func (s *AgentJobService) Stop() error {
|
|||||||
if s.cronScheduler != nil {
|
if s.cronScheduler != nil {
|
||||||
s.cronScheduler.Stop()
|
s.cronScheduler.Stop()
|
||||||
}
|
}
|
||||||
|
// Release the tasks SyncedMap subscription / background workers.
|
||||||
|
if s.tasks != nil {
|
||||||
|
if err := s.tasks.Close(); err != nil {
|
||||||
|
xlog.Warn("Error closing tasks sync map", "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
xlog.Info("AgentJobService stopped")
|
xlog.Info("AgentJobService stopped")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,24 +14,38 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// fileJobPersister persists tasks and jobs to JSON files.
|
// fileJobPersister persists tasks and jobs to JSON files.
|
||||||
// It holds references to the service's syncmaps and serializes the entire
|
//
|
||||||
// map contents on each save (bulk write). Reads at runtime return nil
|
// Jobs serialize the service's in-memory jobs syncmap on each save (bulk write).
|
||||||
// (the in-memory map is the authoritative source); LoadTasks/LoadJobs
|
// Tasks are kept in this persister's own taskSet map instead: the tasks SyncedMap
|
||||||
// are used only at startup to bootstrap the syncmaps.
|
// calls SaveTask/DeleteTask while holding its internal lock (write-through), so
|
||||||
|
// reading back the SyncedMap here would re-enter that lock and deadlock. The
|
||||||
|
// self-contained taskSet, seeded by LoadTasks, lets a per-task write rewrite the
|
||||||
|
// whole bulk file without touching the SyncedMap.
|
||||||
|
//
|
||||||
|
// Runtime reads (GetJob/ListJobs) return nil (the in-memory state is the
|
||||||
|
// authoritative source); LoadTasks/LoadJobs bootstrap state at startup.
|
||||||
type fileJobPersister struct {
|
type fileJobPersister struct {
|
||||||
tasks *xsync.SyncedMap[string, schema.Task]
|
|
||||||
jobs *xsync.SyncedMap[string, schema.Job]
|
jobs *xsync.SyncedMap[string, schema.Job]
|
||||||
tasksFile string
|
tasksFile string
|
||||||
jobsFile string
|
jobsFile string
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
|
// taskSet is the persister's own view of all tasks, seeded by LoadTasks and
|
||||||
|
// updated by SaveTask/DeleteTask. The bulk JSON file is rewritten from it.
|
||||||
|
taskSet map[string]schema.Task
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *fileJobPersister) SaveTask(_ string, _ schema.Task) error {
|
func (p *fileJobPersister) SaveTask(_ string, task schema.Task) error {
|
||||||
return p.saveTasksToFile()
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
p.taskSet[task.ID] = task
|
||||||
|
return p.writeTasksLocked()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *fileJobPersister) DeleteTask(_ string) error {
|
func (p *fileJobPersister) DeleteTask(taskID string) error {
|
||||||
return p.saveTasksToFile()
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
delete(p.taskSet, taskID)
|
||||||
|
return p.writeTasksLocked()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *fileJobPersister) SaveJob(_ string, _ schema.Job) error {
|
func (p *fileJobPersister) SaveJob(_ string, _ schema.Job) error {
|
||||||
@@ -43,7 +57,9 @@ func (p *fileJobPersister) DeleteJob(_ string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (p *fileJobPersister) FlushTasks() error {
|
func (p *fileJobPersister) FlushTasks() error {
|
||||||
return p.saveTasksToFile()
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
return p.writeTasksLocked()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *fileJobPersister) FlushJobs() error {
|
func (p *fileJobPersister) FlushJobs() error {
|
||||||
@@ -83,6 +99,12 @@ func (p *fileJobPersister) LoadTasks(_ string) ([]schema.Task, error) {
|
|||||||
return nil, fmt.Errorf("failed to parse tasks file: %w", err)
|
return nil, fmt.Errorf("failed to parse tasks file: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Seed the in-memory set so subsequent per-task SaveTask/DeleteTask merge into
|
||||||
|
// (rather than overwrite) the persisted tasks when the bulk file is rewritten.
|
||||||
|
for _, t := range tf.Tasks {
|
||||||
|
p.taskSet[t.ID] = t
|
||||||
|
}
|
||||||
|
|
||||||
xlog.Info("Loaded tasks from file", "count", len(tf.Tasks))
|
xlog.Info("Loaded tasks from file", "count", len(tf.Tasks))
|
||||||
return tf.Tasks, nil
|
return tf.Tasks, nil
|
||||||
}
|
}
|
||||||
@@ -118,19 +140,20 @@ func (p *fileJobPersister) CleanupOldJobs(_ time.Duration) (int64, error) {
|
|||||||
return 0, nil // cleanup handled via in-memory filtering
|
return 0, nil // cleanup handled via in-memory filtering
|
||||||
}
|
}
|
||||||
|
|
||||||
// saveTasksToFile serializes the entire tasks map to the JSON file.
|
// writeTasksLocked serializes the persister's task set to the JSON file. Callers
|
||||||
func (p *fileJobPersister) saveTasksToFile() error {
|
// must hold p.mu.
|
||||||
|
func (p *fileJobPersister) writeTasksLocked() error {
|
||||||
if p.tasksFile == "" {
|
if p.tasksFile == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
p.mu.Lock()
|
tasks := make([]schema.Task, 0, len(p.taskSet))
|
||||||
defer p.mu.Unlock()
|
for _, t := range p.taskSet {
|
||||||
|
tasks = append(tasks, t)
|
||||||
tf := schema.TasksFile{
|
|
||||||
Tasks: p.tasks.Values(),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tf := schema.TasksFile{Tasks: tasks}
|
||||||
|
|
||||||
data, err := json.MarshalIndent(tf, "", " ")
|
data, err := json.MarshalIndent(tf, "", " ")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to marshal tasks: %w", err)
|
return fmt.Errorf("failed to marshal tasks: %w", err)
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user