mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-28 10:27:30 -04:00
Compare commits
23 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
be1ae9338b | ||
|
|
923c47020d | ||
|
|
b7a1dec773 | ||
|
|
de2ec2f136 | ||
|
|
d3a26f961d | ||
|
|
13b1ae53bc | ||
|
|
e68ca109c5 | ||
|
|
6740e988d2 | ||
|
|
ade9cc9e37 | ||
|
|
471e38e4e7 | ||
|
|
f3d829e2ef | ||
|
|
91885c2c7e | ||
|
|
f1fcafb888 | ||
|
|
fdff114701 | ||
|
|
1154be5eea | ||
|
|
8aba4fdba3 | ||
|
|
d7d7721eae | ||
|
|
c548150f99 | ||
|
|
ec26b86dd4 | ||
|
|
d11b202dd2 | ||
|
|
e95018ef70 | ||
|
|
0258f8af55 | ||
|
|
14b29ebf4e |
307
.github/backend-matrix.yml
vendored
307
.github/backend-matrix.yml
vendored
@@ -3745,6 +3745,302 @@ include:
|
|||||||
dockerfile: "./backend/Dockerfile.golang"
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
context: "./"
|
context: "./"
|
||||||
ubuntu-version: '2404'
|
ubuntu-version: '2404'
|
||||||
|
# voice-detect
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "8"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-12-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-13-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-cuda-13-arm64-voice-detect'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-voice-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f32'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f32-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f16'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f16-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-voice-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-arm64-voice-detect'
|
||||||
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2204'
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-rocm-hipblas-voice-detect'
|
||||||
|
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
# face-detect
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "8"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-12-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-13-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-cuda-13-arm64-face-detect'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-face-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f32'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f32-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f16'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f16-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-face-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-arm64-face-detect'
|
||||||
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2204'
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-rocm-hipblas-face-detect'
|
||||||
|
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
# acestep-cpp
|
# acestep-cpp
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
cuda-major-version: ""
|
cuda-major-version: ""
|
||||||
@@ -4928,6 +5224,14 @@ includeDarwin:
|
|||||||
tag-suffix: "-metal-darwin-arm64-ced"
|
tag-suffix: "-metal-darwin-arm64-ced"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
lang: "go"
|
lang: "go"
|
||||||
|
- backend: "voice-detect"
|
||||||
|
tag-suffix: "-metal-darwin-arm64-voice-detect"
|
||||||
|
build-type: "metal"
|
||||||
|
lang: "go"
|
||||||
|
- backend: "face-detect"
|
||||||
|
tag-suffix: "-metal-darwin-arm64-face-detect"
|
||||||
|
build-type: "metal"
|
||||||
|
lang: "go"
|
||||||
- backend: "acestep-cpp"
|
- backend: "acestep-cpp"
|
||||||
tag-suffix: "-metal-darwin-arm64-acestep-cpp"
|
tag-suffix: "-metal-darwin-arm64-acestep-cpp"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
@@ -4991,9 +5295,6 @@ includeDarwin:
|
|||||||
- backend: "qwen-tts"
|
- backend: "qwen-tts"
|
||||||
tag-suffix: "-metal-darwin-arm64-qwen-tts"
|
tag-suffix: "-metal-darwin-arm64-qwen-tts"
|
||||||
build-type: "mps"
|
build-type: "mps"
|
||||||
- backend: "fish-speech"
|
|
||||||
tag-suffix: "-metal-darwin-arm64-fish-speech"
|
|
||||||
build-type: "mps"
|
|
||||||
- backend: "voxcpm"
|
- backend: "voxcpm"
|
||||||
tag-suffix: "-metal-darwin-arm64-voxcpm"
|
tag-suffix: "-metal-darwin-arm64-voxcpm"
|
||||||
build-type: "mps"
|
build-type: "mps"
|
||||||
|
|||||||
8
.github/workflows/bump_deps.yaml
vendored
8
.github/workflows/bump_deps.yaml
vendored
@@ -46,6 +46,14 @@ jobs:
|
|||||||
variable: "CED_VERSION"
|
variable: "CED_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
file: "backend/go/ced/Makefile"
|
file: "backend/go/ced/Makefile"
|
||||||
|
- repository: "mudler/voice-detect.cpp"
|
||||||
|
variable: "VOICEDETECT_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
file: "backend/go/voice-detect/Makefile"
|
||||||
|
- repository: "mudler/face-detect.cpp"
|
||||||
|
variable: "FACEDETECT_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
file: "backend/go/face-detect/Makefile"
|
||||||
- repository: "mudler/depth-anything.cpp"
|
- repository: "mudler/depth-anything.cpp"
|
||||||
variable: "DEPTHANYTHING_VERSION"
|
variable: "DEPTHANYTHING_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
|
|||||||
6
.github/workflows/test-extra.yml
vendored
6
.github/workflows/test-extra.yml
vendored
@@ -1008,7 +1008,11 @@ jobs:
|
|||||||
# image + working dir.
|
# image + working dir.
|
||||||
tests-vibevoice-cpp-grpc-transcription:
|
tests-vibevoice-cpp-grpc-transcription:
|
||||||
needs: detect-changes
|
needs: detect-changes
|
||||||
if: needs.detect-changes.outputs.vibevoice-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true'
|
# Skip on release tag pushes: the ASR Q4_K model is ~10 GB and cannot be
|
||||||
|
# pulled from HF within the inner `go test -timeout 30m` budget on a CI
|
||||||
|
# runner, so every tag build hung and timed out. Still runs on PRs/branch
|
||||||
|
# pushes that touch vibevoice-cpp so regressions are caught off the release path.
|
||||||
|
if: (needs.detect-changes.outputs.vibevoice-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true') && !startsWith(github.ref, 'refs/tags/')
|
||||||
runs-on: bigger-runner
|
runs-on: bigger-runner
|
||||||
timeout-minutes: 150
|
timeout-minutes: 150
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
@@ -177,6 +177,7 @@ For more details, see the [Getting Started guide](https://localai.io/basics/gett
|
|||||||
|
|
||||||
## Latest News
|
## Latest News
|
||||||
|
|
||||||
|
- **June 2026**: New native biometric backends from the LocalAI team: [voice-detect.cpp](https://github.com/mudler/voice-detect.cpp) for speaker recognition and voice analysis (ECAPA-TDNN, WeSpeaker, ERes2Net, CAM++, wav2vec2 age/gender/emotion) and [face-detect.cpp](https://github.com/mudler/face-detect.cpp) for face detection, recognition, demographics and anti-spoofing (SCRFD/ArcFace, YuNet/SFace). Both are from-scratch C++/ggml engines with no Python or onnxruntime at inference, self-contained GGUF weights, bit-exact parity with the reference, and GPU cuDNN parity, replacing the heavier Python `insightface` and `speaker-recognition` backends ([PR #10441](https://github.com/mudler/LocalAI/pull/10441)).
|
||||||
- **June 2026**: New [realtime voice assistant demo](https://github.com/localai-org/localai-realtime-demo) (a tiny Go client for the Realtime API with a full talk-back voice loop and tool calling), plus [streaming of the realtime LLM / TTS / transcription pipeline stages](https://github.com/mudler/LocalAI/pull/10176) and [configurable WebRTC ICE candidates](https://github.com/mudler/LocalAI/pull/10231).
|
- **June 2026**: New [realtime voice assistant demo](https://github.com/localai-org/localai-realtime-demo) (a tiny Go client for the Realtime API with a full talk-back voice loop and tool calling), plus [streaming of the realtime LLM / TTS / transcription pipeline stages](https://github.com/mudler/LocalAI/pull/10176) and [configurable WebRTC ICE candidates](https://github.com/mudler/LocalAI/pull/10231).
|
||||||
- **June 2026**: Big speech push: the [parakeet.cpp](https://github.com/mudler/parakeet.cpp) ASR engine gains [NeMo-faithful segment timestamps](https://github.com/mudler/LocalAI/pull/10207), a [multilingual streaming Nemotron-3.5 model](https://github.com/mudler/LocalAI/pull/10199), [dynamic batching for concurrent transcription](https://github.com/mudler/LocalAI/pull/10112) and [CUDA graphs](https://github.com/mudler/LocalAI/pull/10273); the new [CrispASR backend](https://github.com/mudler/LocalAI/pull/10099) adds multi-architecture ASR + TTS, and [60 Piper TTS voices across 42 languages](https://github.com/mudler/LocalAI/pull/10296) land in the gallery (plus [per-request TTS instructions and params](https://github.com/mudler/LocalAI/pull/10172)).
|
- **June 2026**: Big speech push: the [parakeet.cpp](https://github.com/mudler/parakeet.cpp) ASR engine gains [NeMo-faithful segment timestamps](https://github.com/mudler/LocalAI/pull/10207), a [multilingual streaming Nemotron-3.5 model](https://github.com/mudler/LocalAI/pull/10199), [dynamic batching for concurrent transcription](https://github.com/mudler/LocalAI/pull/10112) and [CUDA graphs](https://github.com/mudler/LocalAI/pull/10273); the new [CrispASR backend](https://github.com/mudler/LocalAI/pull/10099) adds multi-architecture ASR + TTS, and [60 Piper TTS voices across 42 languages](https://github.com/mudler/LocalAI/pull/10296) land in the gallery (plus [per-request TTS instructions and params](https://github.com/mudler/LocalAI/pull/10172)).
|
||||||
- **June 2026**: New backends and models: [locate-anything.cpp](https://github.com/mudler/LocalAI/pull/10264) for open-vocabulary object detection via ggml, [Ideogram4 image generation](https://github.com/mudler/LocalAI/pull/10201) in stablediffusion-ggml, [llama.cpp video input](https://github.com/mudler/LocalAI/pull/10216), and the [Gemma 4 QAT family with MTP speculative-decoding pairs](https://github.com/mudler/LocalAI/pull/10215). Plus an [interactive CLI chat mode](https://github.com/mudler/LocalAI/pull/10226) and [RAG source citations in agent responses](https://github.com/mudler/LocalAI/pull/10228).
|
- **June 2026**: New backends and models: [locate-anything.cpp](https://github.com/mudler/LocalAI/pull/10264) for open-vocabulary object detection via ggml, [Ideogram4 image generation](https://github.com/mudler/LocalAI/pull/10201) in stablediffusion-ggml, [llama.cpp video input](https://github.com/mudler/LocalAI/pull/10216), and the [Gemma 4 QAT family with MTP speculative-decoding pairs](https://github.com/mudler/LocalAI/pull/10215). Plus an [interactive CLI chat mode](https://github.com/mudler/LocalAI/pull/10226) and [RAG source citations in agent responses](https://github.com/mudler/LocalAI/pull/10228).
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ RUN <<EOT bash
|
|||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
||||||
if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
|
if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} libcudnn9-dev-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
||||||
fi
|
fi
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|||||||
@@ -1,15 +1,6 @@
|
|||||||
## Clip/LLaVA library for multimodal support — built locally from copied sources
|
## Multimodal support is provided by the in-tree `mtmd` library target
|
||||||
set(TARGET myclip)
|
## (examples/mtmd/), which the grpc-server links and includes below. clip/llava
|
||||||
add_library(${TARGET} clip.cpp clip.h llava.cpp llava.h)
|
## were pruned upstream; the high-level mtmd_* / mtmd_helper_* API is used instead.
|
||||||
install(TARGETS ${TARGET} LIBRARY)
|
|
||||||
target_include_directories(myclip PUBLIC .)
|
|
||||||
target_include_directories(myclip PUBLIC ../..)
|
|
||||||
target_include_directories(myclip PUBLIC ../../common)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
||||||
if (NOT MSVC)
|
|
||||||
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(TARGET grpc-server)
|
set(TARGET grpc-server)
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
@@ -67,12 +58,16 @@ add_library(hw_grpc_proto
|
|||||||
${hw_proto_hdrs} )
|
${hw_proto_hdrs} )
|
||||||
|
|
||||||
add_executable(${TARGET} grpc-server.cpp json.hpp)
|
add_executable(${TARGET} grpc-server.cpp json.hpp)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
# mtmd public headers (mtmd.h / mtmd-helper.h) live in examples/mtmd/.
|
||||||
|
# Linking the mtmd target also propagates this include dir, but we add it
|
||||||
|
# explicitly for clarity.
|
||||||
|
target_include_directories(${TARGET} PRIVATE ../mtmd)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||||
absl::flags_parse
|
absl::flags_parse
|
||||||
gRPC::${_REFLECTION}
|
gRPC::${_REFLECTION}
|
||||||
gRPC::${_GRPC_GRPCPP}
|
gRPC::${_GRPC_GRPCPP}
|
||||||
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||||
if(TARGET BUILD_INFO)
|
if(TARGET BUILD_INFO)
|
||||||
add_dependencies(${TARGET} BUILD_INFO)
|
add_dependencies(${TARGET} BUILD_INFO)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
IK_LLAMA_VERSION?=b84902d2ad27c34f989f23947200c4b91b1568fd
|
IK_LLAMA_VERSION?=f96eaddba8bed6a9a5e628bbf6a566775c70b49c
|
||||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -11,8 +11,8 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include "clip.h"
|
#include "mtmd.h"
|
||||||
#include "llava.h"
|
#include "mtmd-helper.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
@@ -45,7 +45,9 @@ using backend::HealthMessage;
|
|||||||
|
|
||||||
///// LLAMA.CPP server code below
|
///// LLAMA.CPP server code below
|
||||||
|
|
||||||
using json = nlohmann::json;
|
// Match mtmd.h and ik_llama's server/common headers, which all use
|
||||||
|
// nlohmann::ordered_json; a plain nlohmann::json alias collides at global scope.
|
||||||
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
struct server_params
|
struct server_params
|
||||||
{
|
{
|
||||||
@@ -219,6 +221,11 @@ struct llama_client_slot
|
|||||||
|
|
||||||
// multimodal
|
// multimodal
|
||||||
std::vector<slot_image> images;
|
std::vector<slot_image> images;
|
||||||
|
// Full prompt with mtmd media markers (mtmd_default_marker()) substituted in
|
||||||
|
// place of the legacy [img-N] tags, covering the text up to and including the
|
||||||
|
// last image. The text after the last image is kept in params.input_suffix and
|
||||||
|
// decoded through the normal token path so the sampling loop is unchanged.
|
||||||
|
std::string mtmd_prompt;
|
||||||
|
|
||||||
// stats
|
// stats
|
||||||
size_t sent_count = 0;
|
size_t sent_count = 0;
|
||||||
@@ -252,14 +259,14 @@ struct llama_client_slot
|
|||||||
|
|
||||||
for (slot_image & img : images)
|
for (slot_image & img : images)
|
||||||
{
|
{
|
||||||
free(img.image_embedding);
|
if (img.bitmap) {
|
||||||
if (img.img_data) {
|
mtmd_bitmap_free(img.bitmap);
|
||||||
clip_image_u8_free(img.img_data);
|
img.bitmap = nullptr;
|
||||||
}
|
}
|
||||||
img.prefix_prompt = "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
images.clear();
|
images.clear();
|
||||||
|
mtmd_prompt = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has_budget(gpt_params &global_params) {
|
bool has_budget(gpt_params &global_params) {
|
||||||
@@ -396,46 +403,13 @@ struct llama_metrics {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llava_embd_batch {
|
|
||||||
std::vector<llama_pos> pos;
|
|
||||||
std::vector<int32_t> n_seq_id;
|
|
||||||
std::vector<llama_seq_id> seq_id_0;
|
|
||||||
std::vector<llama_seq_id *> seq_ids;
|
|
||||||
std::vector<int8_t> logits;
|
|
||||||
llama_batch batch;
|
|
||||||
llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
|
|
||||||
pos .resize(n_tokens);
|
|
||||||
n_seq_id.resize(n_tokens);
|
|
||||||
seq_ids .resize(n_tokens + 1);
|
|
||||||
logits .resize(n_tokens);
|
|
||||||
seq_id_0.resize(1);
|
|
||||||
seq_id_0[0] = seq_id;
|
|
||||||
seq_ids [n_tokens] = nullptr;
|
|
||||||
batch = {
|
|
||||||
/*n_tokens =*/ n_tokens,
|
|
||||||
/*tokens =*/ nullptr,
|
|
||||||
/*embd =*/ embd,
|
|
||||||
/*pos =*/ pos.data(),
|
|
||||||
/*n_seq_id =*/ n_seq_id.data(),
|
|
||||||
/*seq_id =*/ seq_ids.data(),
|
|
||||||
/*logits =*/ logits.data(),
|
|
||||||
};
|
|
||||||
for (int i = 0; i < n_tokens; i++) {
|
|
||||||
batch.pos [i] = pos_0 + i;
|
|
||||||
batch.n_seq_id[i] = 1;
|
|
||||||
batch.seq_id [i] = seq_id_0.data();
|
|
||||||
batch.logits [i] = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct llama_server_context
|
struct llama_server_context
|
||||||
{
|
{
|
||||||
llama_model *model = nullptr;
|
llama_model *model = nullptr;
|
||||||
llama_context *ctx = nullptr;
|
llama_context *ctx = nullptr;
|
||||||
const llama_vocab * vocab = nullptr;
|
const llama_vocab * vocab = nullptr;
|
||||||
|
|
||||||
clip_ctx *clp_ctx = nullptr;
|
mtmd_context *mctx = nullptr;
|
||||||
|
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
|
|
||||||
@@ -491,11 +465,6 @@ struct llama_server_context
|
|||||||
if (!params.mmproj.path.empty()) {
|
if (!params.mmproj.path.empty()) {
|
||||||
multimodal = true;
|
multimodal = true;
|
||||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||||
clp_ctx = clip_model_load(params.mmproj.path.c_str(), /*verbosity=*/ 1);
|
|
||||||
if(clp_ctx == nullptr) {
|
|
||||||
LOG_ERR("unable to load clip model: %s", params.mmproj.path.c_str());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (params.n_ctx < 2048) { // request larger context for the image embedding
|
if (params.n_ctx < 2048) { // request larger context for the image embedding
|
||||||
params.n_ctx = 2048;
|
params.n_ctx = 2048;
|
||||||
@@ -512,10 +481,24 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (multimodal) {
|
if (multimodal) {
|
||||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
// mtmd_init_from_file requires the already-loaded text model, so it must
|
||||||
const int n_embd_llm = llama_model_n_embd(model);
|
// run AFTER llama_init_from_gpt_params. It validates the projector
|
||||||
if (n_embd_clip != n_embd_llm) {
|
// against the model internally and returns nullptr on dim mismatch, so
|
||||||
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
// the explicit clip_n_mmproj_embd check is no longer needed.
|
||||||
|
mtmd_context_params mparams = mtmd_context_params_default();
|
||||||
|
mparams.use_gpu = params.mmproj_use_gpu;
|
||||||
|
mparams.print_timings = false;
|
||||||
|
mparams.n_threads = params.n_threads_mtmd != -1 ? params.n_threads_mtmd
|
||||||
|
: params.n_threads_batch != -1 ? params.n_threads_batch
|
||||||
|
: params.n_threads;
|
||||||
|
mparams.verbosity = GGML_LOG_LEVEL_INFO;
|
||||||
|
mparams.flash_attn_type = params.flash_attn ? LLAMA_FLASH_ATTN_TYPE_ENABLED
|
||||||
|
: LLAMA_FLASH_ATTN_TYPE_DISABLED;
|
||||||
|
mparams.image_min_tokens = params.image_min_tokens;
|
||||||
|
mparams.image_max_tokens = params.image_max_tokens;
|
||||||
|
mctx = mtmd_init_from_file(params.mmproj.path.c_str(), model, mparams);
|
||||||
|
if (mctx == nullptr) {
|
||||||
|
LOG_ERR("unable to load multimodal projector: %s", params.mmproj.path.c_str());
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
return false;
|
return false;
|
||||||
@@ -865,8 +848,8 @@ struct llama_server_context
|
|||||||
|
|
||||||
slot_image img_sl;
|
slot_image img_sl;
|
||||||
img_sl.id = img.count("id") != 0 ? img["id"].get<int>() : slot->images.size();
|
img_sl.id = img.count("id") != 0 ? img["id"].get<int>() : slot->images.size();
|
||||||
img_sl.img_data = clip_image_u8_init();
|
img_sl.bitmap = mtmd_helper_bitmap_init_from_buf(mctx, image_buffer.data(), image_buffer.size());
|
||||||
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
if (img_sl.bitmap == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
|
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
|
||||||
__func__,
|
__func__,
|
||||||
@@ -879,22 +862,40 @@ struct llama_server_context
|
|||||||
{"slot_id", slot->id},
|
{"slot_id", slot->id},
|
||||||
{"img_sl_id", img_sl.id}
|
{"img_sl_id", img_sl.id}
|
||||||
});
|
});
|
||||||
img_sl.request_encode_image = true;
|
|
||||||
slot->images.push_back(img_sl);
|
slot->images.push_back(img_sl);
|
||||||
}
|
}
|
||||||
// process prompt
|
// Translate the legacy [img-N] tags into mtmd media markers, in
|
||||||
// example: system prompt [img-102] user [img-103] describe [img-134] -> [{id: 102, prefix: 'system prompt '}, {id: 103, prefix: ' user '}, {id: 134, prefix: ' describe '}]}
|
// order, and collect the matching bitmaps in marker order so they
|
||||||
|
// line up with the markers passed to mtmd_tokenize(). The text after
|
||||||
|
// the last image stays in input_suffix and is decoded through the
|
||||||
|
// normal token path, so the sampling loop is unchanged.
|
||||||
|
// example: system prompt [img-102] user [img-103] describe [img-134]
|
||||||
if (slot->images.size() > 0 && !slot->prompt.is_array())
|
if (slot->images.size() > 0 && !slot->prompt.is_array())
|
||||||
{
|
{
|
||||||
|
const std::string marker = mtmd_default_marker();
|
||||||
std::string prompt = slot->prompt.get<std::string>();
|
std::string prompt = slot->prompt.get<std::string>();
|
||||||
size_t pos = 0, begin_prefix = 0;
|
std::string built_prompt;
|
||||||
|
std::vector<slot_image> ordered;
|
||||||
|
size_t pos = 0, copy_from = 0;
|
||||||
std::string pattern = "[img-";
|
std::string pattern = "[img-";
|
||||||
|
|
||||||
|
auto free_images = [&]() {
|
||||||
|
for (slot_image &img : slot->images) {
|
||||||
|
if (img.bitmap) {
|
||||||
|
mtmd_bitmap_free(img.bitmap);
|
||||||
|
img.bitmap = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slot->images.clear();
|
||||||
|
};
|
||||||
|
|
||||||
while ((pos = prompt.find(pattern, pos)) != std::string::npos) {
|
while ((pos = prompt.find(pattern, pos)) != std::string::npos) {
|
||||||
size_t end_prefix = pos;
|
size_t tag_begin = pos;
|
||||||
pos += pattern.length();
|
pos += pattern.length();
|
||||||
size_t end_pos = prompt.find(']', pos);
|
size_t end_pos = prompt.find(']', pos);
|
||||||
if (end_pos != std::string::npos)
|
if (end_pos == std::string::npos) {
|
||||||
{
|
break;
|
||||||
|
}
|
||||||
std::string image_id = prompt.substr(pos, end_pos - pos);
|
std::string image_id = prompt.substr(pos, end_pos - pos);
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@@ -904,25 +905,31 @@ struct llama_server_context
|
|||||||
{
|
{
|
||||||
if (img.id == img_id) {
|
if (img.id == img_id) {
|
||||||
found = true;
|
found = true;
|
||||||
img.prefix_prompt = prompt.substr(begin_prefix, end_prefix - begin_prefix);
|
// text before this tag, then the media marker
|
||||||
begin_prefix = end_pos + 1;
|
built_prompt += prompt.substr(copy_from, tag_begin - copy_from);
|
||||||
|
built_prompt += marker;
|
||||||
|
copy_from = end_pos + 1;
|
||||||
|
ordered.push_back(img);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!found) {
|
if (!found) {
|
||||||
LOG("ERROR: Image with id: %i, not found.\n", img_id);
|
LOG("ERROR: Image with id: %i, not found.\n", img_id);
|
||||||
slot->images.clear();
|
free_images();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} catch (const std::invalid_argument& e) {
|
} catch (const std::invalid_argument& e) {
|
||||||
LOG("Invalid image number id in prompt\n");
|
LOG("Invalid image number id in prompt\n");
|
||||||
slot->images.clear();
|
free_images();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
pos = end_pos + 1;
|
||||||
}
|
}
|
||||||
}
|
// bitmaps are consumed in marker order by mtmd_tokenize()
|
||||||
|
slot->images = ordered;
|
||||||
|
slot->mtmd_prompt = built_prompt;
|
||||||
slot->prompt = "";
|
slot->prompt = "";
|
||||||
slot->params.input_suffix = prompt.substr(begin_prefix);
|
slot->params.input_suffix = prompt.substr(copy_from);
|
||||||
slot->params.cache_prompt = false; // multimodal doesn't support cache prompt
|
slot->params.cache_prompt = false; // multimodal doesn't support cache prompt
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1176,21 +1183,10 @@ struct llama_server_context
|
|||||||
|
|
||||||
bool process_images(llama_client_slot &slot) const
|
bool process_images(llama_client_slot &slot) const
|
||||||
{
|
{
|
||||||
for (slot_image &img : slot.images)
|
// With the mtmd pipeline, image encoding is no longer eager: the bitmaps
|
||||||
{
|
// are tokenized and encoded together with the surrounding text inside
|
||||||
if (!img.request_encode_image)
|
// ingest_images() via mtmd_tokenize() + mtmd_helper_eval_chunks(). This
|
||||||
{
|
// just reports whether the slot carries any images to process.
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
|
||||||
LOG("Error processing the given image");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
img.request_encode_image = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return slot.images.size() > 0;
|
return slot.images.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1435,70 +1431,71 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// for multiple images processing
|
// Tokenize the multimodal prompt (text interleaved with media markers) together
|
||||||
|
// with the slot's bitmaps, then decode the resulting chunks into the llama
|
||||||
|
// context via the high-level mtmd helper. The helper runs llama_decode() on the
|
||||||
|
// text chunks and mtmd_encode() + llama_decode() on the image chunks, handling
|
||||||
|
// batching and any pre/post decode setup (e.g. non-causal attention for gemma3).
|
||||||
|
// Advances slot.n_past by the number of positions consumed, then leaves the
|
||||||
|
// post-image suffix tokens in `batch` so the normal decode + sampling loop
|
||||||
|
// produces the first generated token.
|
||||||
bool ingest_images(llama_client_slot &slot, int n_batch)
|
bool ingest_images(llama_client_slot &slot, int n_batch)
|
||||||
{
|
{
|
||||||
int image_idx = 0;
|
if (mctx == nullptr)
|
||||||
|
|
||||||
while (image_idx < (int) slot.images.size())
|
|
||||||
{
|
{
|
||||||
slot_image &img = slot.images[image_idx];
|
LOG("%s : multimodal context is not initialized\n", __func__);
|
||||||
|
|
||||||
// process prefix prompt
|
|
||||||
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
|
|
||||||
{
|
|
||||||
const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
|
|
||||||
llama_batch batch_view = {
|
|
||||||
n_tokens,
|
|
||||||
batch.token + i,
|
|
||||||
nullptr,
|
|
||||||
batch.pos + i,
|
|
||||||
batch.n_seq_id + i,
|
|
||||||
batch.seq_id + i,
|
|
||||||
batch.logits + i,
|
|
||||||
};
|
|
||||||
if (llama_decode(ctx, batch_view))
|
|
||||||
{
|
|
||||||
LOG("%s : failed to eval\n", __func__);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bitmaps stay owned by slot.images (freed on reset()); pass non-owning ptrs
|
||||||
|
std::vector<const mtmd_bitmap *> bitmaps;
|
||||||
|
bitmaps.reserve(slot.images.size());
|
||||||
|
for (const slot_image &img : slot.images)
|
||||||
|
{
|
||||||
|
bitmaps.push_back(img.bitmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
// process image with llm
|
mtmd_input_text inp_txt;
|
||||||
for (int i = 0; i < img.image_tokens; i += n_batch)
|
inp_txt.text = slot.mtmd_prompt.c_str();
|
||||||
{
|
inp_txt.add_special = add_bos_token;
|
||||||
int n_eval = img.image_tokens - i;
|
inp_txt.parse_special = true;
|
||||||
if (n_eval > n_batch)
|
|
||||||
{
|
|
||||||
n_eval = n_batch;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int n_embd = llama_model_n_embd(model);
|
mtmd::input_chunks chunks(mtmd_input_chunks_init());
|
||||||
float * embd = img.image_embedding + i * n_embd;
|
int32_t res = mtmd_tokenize(mctx,
|
||||||
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
|
chunks.ptr.get(),
|
||||||
if (llama_decode(ctx, llava_batch.batch))
|
&inp_txt,
|
||||||
|
bitmaps.data(),
|
||||||
|
bitmaps.size());
|
||||||
|
if (res != 0)
|
||||||
{
|
{
|
||||||
LOG("%s : failed to eval image\n", __func__);
|
LOG("%s : failed to tokenize multimodal prompt, res = %d\n", __func__, res);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
slot.n_past += n_eval;
|
|
||||||
}
|
|
||||||
image_idx++;
|
|
||||||
|
|
||||||
|
const llama_pos start_pos = (llama_pos) system_tokens.size() + slot.n_past;
|
||||||
|
llama_pos new_n_past = start_pos;
|
||||||
|
if (mtmd_helper_eval_chunks(mctx,
|
||||||
|
ctx,
|
||||||
|
chunks.ptr.get(),
|
||||||
|
start_pos,
|
||||||
|
slot.id,
|
||||||
|
n_batch,
|
||||||
|
/*logits_last=*/ false,
|
||||||
|
&new_n_past) != 0)
|
||||||
|
{
|
||||||
|
LOG("%s : failed to eval multimodal chunks\n", __func__);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
slot.n_past += (int32_t) (new_n_past - start_pos);
|
||||||
|
|
||||||
|
// queue the post-image suffix text for the normal decode + sampling path
|
||||||
common_batch_clear(batch);
|
common_batch_clear(batch);
|
||||||
|
std::vector<llama_token> suffix_tokens = tokenize(slot.params.input_suffix, false);
|
||||||
// append prefix of next image
|
for (llama_token tok : suffix_tokens)
|
||||||
const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
|
|
||||||
slot.params.input_suffix : // no more images, then process suffix prompt
|
|
||||||
(json)(slot.images[image_idx].prefix_prompt);
|
|
||||||
|
|
||||||
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
|
|
||||||
for (int i = 0; i < (int) append_tokens.size(); ++i)
|
|
||||||
{
|
{
|
||||||
common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
|
common_batch_add(batch, tok, system_tokens.size() + slot.n_past, { slot.id }, false);
|
||||||
slot.n_past += 1;
|
slot.n_past += 1;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -1884,8 +1881,11 @@ struct llama_server_context
|
|||||||
|
|
||||||
const bool has_images = process_images(slot);
|
const bool has_images = process_images(slot);
|
||||||
|
|
||||||
// process the prefix of first image
|
// For the multimodal path the whole pre-image / inter-image text is
|
||||||
std::vector<llama_token> prefix_tokens = has_images ? tokenize(slot.images[0].prefix_prompt, add_bos_token) : prompt_tokens;
|
// tokenized and decoded inside ingest_images() via mtmd, so no prefix
|
||||||
|
// tokens are queued here; the post-image suffix is appended by
|
||||||
|
// ingest_images() for the normal decode + sampling loop.
|
||||||
|
std::vector<llama_token> prefix_tokens = has_images ? std::vector<llama_token>() : prompt_tokens;
|
||||||
|
|
||||||
int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past;
|
int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past;
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +0,0 @@
|
|||||||
--- a/examples/llava/clip.cpp
|
|
||||||
+++ b/examples/llava/clip.cpp
|
|
||||||
@@ -2494,7 +2494,7 @@
|
|
||||||
}
|
|
||||||
new_data = work.data();
|
|
||||||
|
|
||||||
- new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr);
|
|
||||||
+ new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr, nullptr);
|
|
||||||
} else {
|
|
||||||
new_type = cur->type;
|
|
||||||
new_data = cur->data;
|
|
||||||
@@ -17,28 +17,9 @@ cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
|||||||
cp -r utils.hpp llama.cpp/examples/grpc-server/
|
cp -r utils.hpp llama.cpp/examples/grpc-server/
|
||||||
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/examples/grpc-server/
|
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
|
||||||
## Copy clip/llava files for multimodal support (built as myclip library)
|
## Multimodal support is provided by the `mtmd` library target (examples/mtmd/),
|
||||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
## which the grpc-server links and includes directly. No source copy is needed:
|
||||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
## clip/llava were pruned upstream and the high-level mtmd_* API is used instead.
|
||||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
|
||||||
# Prepend llama.h include to llava.h
|
|
||||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
|
||||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
|
||||||
# Copy clip-impl.h if it exists
|
|
||||||
if [ -f llama.cpp/examples/llava/clip-impl.h ]; then
|
|
||||||
cp -rfv llama.cpp/examples/llava/clip-impl.h llama.cpp/examples/grpc-server/clip-impl.h
|
|
||||||
fi
|
|
||||||
# Copy stb_image.h
|
|
||||||
if [ -f llama.cpp/vendor/stb/stb_image.h ]; then
|
|
||||||
cp -rfv llama.cpp/vendor/stb/stb_image.h llama.cpp/examples/grpc-server/stb_image.h
|
|
||||||
elif [ -f llama.cpp/common/stb_image.h ]; then
|
|
||||||
cp -rfv llama.cpp/common/stb_image.h llama.cpp/examples/grpc-server/stb_image.h
|
|
||||||
fi
|
|
||||||
|
|
||||||
## Fix API compatibility in llava.cpp (llama_n_embd -> llama_model_n_embd)
|
|
||||||
if [ -f llama.cpp/examples/grpc-server/llava.cpp ]; then
|
|
||||||
sed -i 's/llama_n_embd(/llama_model_n_embd(/g' llama.cpp/examples/grpc-server/llava.cpp
|
|
||||||
fi
|
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
||||||
|
|||||||
@@ -11,9 +11,12 @@
|
|||||||
|
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
|
||||||
#include "clip.h"
|
#include "mtmd.h"
|
||||||
|
|
||||||
using json = nlohmann::json;
|
// mtmd.h and ik_llama's entire server/common stack (chat.h, server-common.h,
|
||||||
|
// server-task.h, ...) declare `using json = nlohmann::ordered_json`, so match it
|
||||||
|
// here: a plain `nlohmann::json` alias collides with mtmd.h's at global scope.
|
||||||
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
extern bool server_verbose;
|
extern bool server_verbose;
|
||||||
|
|
||||||
@@ -111,13 +114,12 @@ struct slot_image
|
|||||||
{
|
{
|
||||||
int32_t id;
|
int32_t id;
|
||||||
|
|
||||||
bool request_encode_image = false;
|
// mtmd bitmap (image/audio) decoded from the request buffer. Owned by the
|
||||||
float * image_embedding = nullptr;
|
// slot; freed via mtmd_bitmap_free() on reset. The high-level mtmd pipeline
|
||||||
int32_t image_tokens = 0;
|
// (mtmd_tokenize + mtmd_helper_eval_chunks) consumes these directly, so the
|
||||||
|
// legacy eager-encode fields (embedding/tokens) and per-image prefix prompt
|
||||||
clip_image_u8 * img_data;
|
// are no longer needed.
|
||||||
|
mtmd_bitmap * bitmap = nullptr;
|
||||||
std::string prefix_prompt; // before of this image
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// completion token output with probabilities
|
// completion token output with probabilities
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=9d5d882d8cd0f0a9283d87ed5e6fe3ee0d925fb1
|
LLAMA_VERSION?=0ed235ea2c17a19fc8238668653946721ed136fd
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
@@ -156,11 +156,11 @@ llama-cpp-grpc: llama.cpp
|
|||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
||||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target ggml-rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
|
||||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
|
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
|
||||||
|
|
||||||
llama-cpp-rpc-server: llama-cpp-grpc
|
llama-cpp-rpc-server: llama-cpp-grpc
|
||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/ggml-rpc-server llama-cpp-rpc-server
|
||||||
|
|
||||||
llama.cpp:
|
llama.cpp:
|
||||||
mkdir -p llama.cpp
|
mkdir -p llama.cpp
|
||||||
|
|||||||
@@ -30,6 +30,19 @@
|
|||||||
#define LOCALAI_HAS_SERVER_SCHEMA 1
|
#define LOCALAI_HAS_SERVER_SCHEMA 1
|
||||||
#include "server-schema.cpp"
|
#include "server-schema.cpp"
|
||||||
#endif
|
#endif
|
||||||
|
// server-stream.cpp exists only in llama.cpp after the upstream refactor that
|
||||||
|
// added the SSE stream-resumption layer (stream_session/stream_pipe_producer).
|
||||||
|
// server-context.cpp calls into it (spipe->cleanup(), stream_aware_should_stop,
|
||||||
|
// stream_session_attach_pipe), so its definitions must be part of this
|
||||||
|
// translation unit or the link fails with "undefined reference to
|
||||||
|
// stream_pipe_producer::cleanup()". The file is self-contained (its only
|
||||||
|
// external symbols come from server-common, already pulled in above) and the
|
||||||
|
// http route-handler factories it also defines are unused here but harmless.
|
||||||
|
// __has_include keeps the source compatible with older pins/forks that predate
|
||||||
|
// the split.
|
||||||
|
#if __has_include("server-stream.cpp")
|
||||||
|
#include "server-stream.cpp"
|
||||||
|
#endif
|
||||||
#include "server-context.cpp"
|
#include "server-context.cpp"
|
||||||
|
|
||||||
// LocalAI
|
// LocalAI
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# CrispASR version (release tag)
|
# CrispASR version (release tag)
|
||||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||||
CRISPASR_VERSION?=8f1218141b792b8868861c1af17ba1e361b05dc0
|
CRISPASR_VERSION?=6514c9da00b03a2f0f1b49a43fae4f3a01a41844
|
||||||
SO_TARGET?=libgocrispasr.so
|
SO_TARGET?=libgocrispasr.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
18
backend/go/face-detect/.gitignore
vendored
Normal file
18
backend/go/face-detect/.gitignore
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Fetched upstream sources
|
||||||
|
sources/
|
||||||
|
|
||||||
|
# CMake build directories
|
||||||
|
build*/
|
||||||
|
|
||||||
|
# build artifacts staged in-tree by the Makefile (cp from sources/) or
|
||||||
|
# symlinked for local dev; the real sources live in face-detect.cpp upstream.
|
||||||
|
*.so
|
||||||
|
*.so.*
|
||||||
|
facedetect_capi.h
|
||||||
|
compile_commands.json
|
||||||
|
|
||||||
|
# Compiled backend binary
|
||||||
|
face-detect-grpc
|
||||||
|
|
||||||
|
# Packaging output
|
||||||
|
package/
|
||||||
110
backend/go/face-detect/Makefile
Normal file
110
backend/go/face-detect/Makefile
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
# face-detect backend Makefile.
|
||||||
|
#
|
||||||
|
# Upstream pin lives below as FACEDETECT_VERSION?=06914b0... (.github/bump_deps.sh
|
||||||
|
# can find and update it - matches the voice-detect / parakeet.cpp / whisper.cpp
|
||||||
|
# convention).
|
||||||
|
#
|
||||||
|
# Local dev shortcut: if you already have an out-of-tree face-detect.cpp build,
|
||||||
|
# symlink the .so + header into this directory and skip the clone/cmake steps:
|
||||||
|
#
|
||||||
|
# ln -sf /path/to/face-detect.cpp/build-shared/libfacedetect.so .
|
||||||
|
# ln -sf /path/to/face-detect.cpp/include/facedetect_capi.h .
|
||||||
|
# go build -o face-detect-grpc .
|
||||||
|
#
|
||||||
|
# The default target below does the proper clone-at-pin + cmake build so CI does
|
||||||
|
# not need a side-checkout.
|
||||||
|
|
||||||
|
FACEDETECT_VERSION?=06914b077d52f90d5421299138e7be6bdd06b5e8
|
||||||
|
FACEDETECT_REPO?=https://github.com/mudler/face-detect.cpp
|
||||||
|
|
||||||
|
GOCMD?=go
|
||||||
|
GO_TAGS?=
|
||||||
|
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
|
||||||
|
|
||||||
|
BUILD_TYPE?=
|
||||||
|
NATIVE?=false
|
||||||
|
|
||||||
|
# Resolve the target arch. The backend matrix / Docker build pass TARGETARCH
|
||||||
|
# (amd64|arm64); fall back to uname -m (aarch64|x86_64) for a local build.
|
||||||
|
RECON_ARCH?=$(or $(TARGETARCH),$(shell uname -m))
|
||||||
|
|
||||||
|
# Build ggml + the vendored libjpeg-turbo statically into libfacedetect.so (PIC)
|
||||||
|
# so the shared lib is self-contained: dlopen needs no libggml*.so alongside it,
|
||||||
|
# only system libs (libstdc++/libgomp/libc) the runtime image already provides.
|
||||||
|
# The vendored jpeg symbols are hidden via -Wl,--exclude-libs,ALL on the C++
|
||||||
|
# side, so only the facedetect_capi_* surface is exported.
|
||||||
|
CMAKE_ARGS?=-DCMAKE_BUILD_TYPE=Release -DFACEDETECT_SHARED=ON -DFACEDETECT_BUILD_CLI=OFF -DFACEDETECT_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||||
|
|
||||||
|
ifeq ($(NATIVE),false)
|
||||||
|
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
|
endif
|
||||||
|
|
||||||
|
# face-detect.cpp gates its GGML backends behind FACEDETECT_GGML_* options and
|
||||||
|
# does set(GGML_CUDA ${FACEDETECT_GGML_CUDA} CACHE BOOL "" FORCE), so a bare
|
||||||
|
# -DGGML_CUDA=ON is overwritten back to OFF. Forward the FACEDETECT_GGML_*
|
||||||
|
# options instead. (openblas is not gated, so -DGGML_BLAS passes through.)
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_CUDA=ON
|
||||||
|
# Opt-in cuDNN implicit-GEMM conv path (kills im2col on GPU, SCRFD 2.3x
|
||||||
|
# vs torch-cuDNN parity). Only the arm64 + CUDA 13 image (GB10/Jetson/L4T)
|
||||||
|
# ships libcudnn9 + the -dev headers, so gate cuDNN to that variant.
|
||||||
|
# x86 CUDA images carry no cuDNN -> enabling it there is a link failure.
|
||||||
|
ifeq ($(CUDA_MAJOR_VERSION),13)
|
||||||
|
ifneq (,$(filter arm64 aarch64,$(RECON_ARCH)))
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_CUDNN=ON
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_HIP=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),vulkan)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_VULKAN=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_METAL=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: face-detect-grpc package build clean purge test all
|
||||||
|
|
||||||
|
all: face-detect-grpc
|
||||||
|
|
||||||
|
# Clone the upstream face-detect.cpp source at the pinned commit. Directory acts
|
||||||
|
# as the target so make only re-clones when missing. After a FACEDETECT_VERSION
|
||||||
|
# bump, run 'make purge && make' to refetch.
|
||||||
|
sources/face-detect.cpp:
|
||||||
|
mkdir -p sources/face-detect.cpp
|
||||||
|
cd sources/face-detect.cpp && \
|
||||||
|
git init -q && \
|
||||||
|
git remote add origin $(FACEDETECT_REPO) && \
|
||||||
|
git fetch --depth 1 origin $(FACEDETECT_VERSION) && \
|
||||||
|
git checkout FETCH_HEAD && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
# Build the shared lib + header out-of-tree, then stage them next to the Go
|
||||||
|
# sources so purego.Dlopen("libfacedetect.so") and the cgo-less build both pick
|
||||||
|
# them up.
|
||||||
|
libfacedetect.so: sources/face-detect.cpp
|
||||||
|
cmake -B sources/face-detect.cpp/build-shared -S sources/face-detect.cpp $(CMAKE_ARGS)
|
||||||
|
cmake --build sources/face-detect.cpp/build-shared --config Release -j$(JOBS) --target facedetect
|
||||||
|
cp -fv sources/face-detect.cpp/build-shared/libfacedetect.so* ./ 2>/dev/null || true
|
||||||
|
cp -fv sources/face-detect.cpp/include/facedetect_capi.h ./
|
||||||
|
|
||||||
|
face-detect-grpc: libfacedetect.so main.go gofacedetect.go options.go
|
||||||
|
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o face-detect-grpc .
|
||||||
|
|
||||||
|
package: face-detect-grpc
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: package
|
||||||
|
|
||||||
|
# Test target. The embed/detect/verify/analyze smoke specs are gated on
|
||||||
|
# FACEDETECT_BACKEND_TEST_MODEL + FACEDETECT_BACKEND_TEST_IMAGE; without them the
|
||||||
|
# heavy specs auto-skip and only the pure-Go parsing specs run.
|
||||||
|
test:
|
||||||
|
LD_LIBRARY_PATH=$(CURDIR):$$LD_LIBRARY_PATH $(GOCMD) test ./... -count=1
|
||||||
|
|
||||||
|
clean: purge
|
||||||
|
rm -rf libfacedetect.so* facedetect_capi.h package face-detect-grpc
|
||||||
|
|
||||||
|
purge:
|
||||||
|
rm -rf sources/face-detect.cpp
|
||||||
431
backend/go/face-detect/gofacedetect.go
Normal file
431
backend/go/face-detect/gofacedetect.go
Normal file
@@ -0,0 +1,431 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/mudler/xlog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// purego-bound entry points from libfacedetect.so. Names match
|
||||||
|
// facedetect_capi.h exactly so a `nm libfacedetect.so | grep facedetect_capi`
|
||||||
|
// is enough to spot drift.
|
||||||
|
//
|
||||||
|
// The opaque ctx and the malloc'd char*/float* return values are declared as
|
||||||
|
// uintptr so we get the raw pointer back and can release it via the matching
|
||||||
|
// capi free function. purego's native string/[]float32 returns would copy and
|
||||||
|
// forget the original pointer, leaking the C-owned buffer on every call.
|
||||||
|
var (
|
||||||
|
CppAbiVersion func() int32
|
||||||
|
CppLoad func(ggufPath string) uintptr
|
||||||
|
CppFree func(ctx uintptr)
|
||||||
|
CppLastError func(ctx uintptr) string
|
||||||
|
CppFreeString func(s uintptr)
|
||||||
|
CppFreeVec func(v uintptr)
|
||||||
|
CppEmbedPath func(ctx uintptr, imagePath string, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppEmbedRGB func(ctx uintptr, rgb []byte, width, height int32, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppDetectJSON func(ctx uintptr, imagePath string) uintptr
|
||||||
|
CppVerifyPaths func(ctx uintptr, a, b string, threshold float32, antiSpoof int32, outDistance, outVerified unsafe.Pointer) int32
|
||||||
|
CppAnalyzeJSON func(ctx uintptr, imagePath string) uintptr
|
||||||
|
)
|
||||||
|
|
||||||
|
// FaceDetect implements the face-recognition (biometric) subset of the Backend
|
||||||
|
// gRPC service over libfacedetect.so. The C side keeps a single loaded model
|
||||||
|
// pack plus a per-ctx last-error buffer and is not reentrant, so
|
||||||
|
// base.SingleThread serializes every call.
|
||||||
|
type FaceDetect struct {
|
||||||
|
base.SingleThread
|
||||||
|
opts loadOptions
|
||||||
|
ctxPtr uintptr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FaceDetect) Load(opts *pb.ModelOptions) error {
|
||||||
|
model := opts.ModelFile
|
||||||
|
if model == "" {
|
||||||
|
model = opts.ModelPath
|
||||||
|
}
|
||||||
|
if !filepath.IsAbs(model) && opts.ModelPath != "" {
|
||||||
|
model = filepath.Join(opts.ModelPath, model)
|
||||||
|
}
|
||||||
|
if model == "" {
|
||||||
|
return errors.New("face-detect: ModelFile is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
f.opts = parseOptions(opts.Options)
|
||||||
|
if f.opts.modelName == "" {
|
||||||
|
f.opts.modelName = filepath.Base(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns
|
||||||
|
// one backend process per model and serves requests concurrently, so the
|
||||||
|
// engine's own min(hardware_concurrency, 8) default can oversubscribe cores.
|
||||||
|
// FACEDETECT_THREADS is read by the engine at backend construction, so it
|
||||||
|
// must be set before the capi load. A non-positive Threads means "unset":
|
||||||
|
// leave the env alone so the engine keeps its sane default.
|
||||||
|
threads := opts.Threads
|
||||||
|
if threads > 0 {
|
||||||
|
if err := os.Setenv("FACEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil {
|
||||||
|
return fmt.Errorf("face-detect: set FACEDETECT_THREADS: %w", err)
|
||||||
|
}
|
||||||
|
xlog.Info("face-detect: applying LocalAI thread budget", "threads", threads)
|
||||||
|
}
|
||||||
|
|
||||||
|
xlog.Info("face-detect: loading model", "model", model,
|
||||||
|
"verify_threshold", f.opts.verifyThreshold, "abi", CppAbiVersion())
|
||||||
|
|
||||||
|
ctx := CppLoad(model)
|
||||||
|
if ctx == 0 {
|
||||||
|
// The last-error buffer lives on the ctx that was never returned, so
|
||||||
|
// surface the path the operator tried to load instead.
|
||||||
|
return fmt.Errorf("face-detect: facedetect_capi_load failed for %q", model)
|
||||||
|
}
|
||||||
|
f.ctxPtr = ctx
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Embeddings returns the L2-normalized ArcFace embedding of the primary face in
|
||||||
|
// the supplied image. Mirroring the Python face backend, the image is read from
|
||||||
|
// Images[0] as a base64 payload; materializeImage decodes it to a temp file so
|
||||||
|
// the path-based C-API can run its own decode (cv2.imread parity). The gRPC
|
||||||
|
// server wraps the returned slice in an EmbeddingResult.
|
||||||
|
func (f *FaceDetect) Embeddings(req *pb.PredictOptions) ([]float32, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return nil, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if len(req.Images) == 0 || req.Images[0] == "" {
|
||||||
|
return nil, errors.New("face-detect: Embedding requires Images[0] to be a base64 image")
|
||||||
|
}
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(req.Images[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
return f.embedPath(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FaceDetect) embedPath(path string) ([]float32, error) {
|
||||||
|
var vec uintptr
|
||||||
|
var dim int32
|
||||||
|
rc := CppEmbedPath(f.ctxPtr, path, unsafe.Pointer(&vec), unsafe.Pointer(&dim))
|
||||||
|
if rc != 0 || vec == 0 || dim <= 0 {
|
||||||
|
return nil, f.lastErr("embed", path)
|
||||||
|
}
|
||||||
|
defer CppFreeVec(vec)
|
||||||
|
// Copy out of the C-owned malloc'd buffer before freeing it. The
|
||||||
|
// uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory; safe here, the GC neither tracks
|
||||||
|
// nor moves this buffer and we copy immediately.
|
||||||
|
src := unsafe.Slice((*float32)(unsafe.Pointer(vec)), int(dim)) //nolint:govet // C-owned malloc'd vector, copied out before free
|
||||||
|
out := make([]float32, int(dim))
|
||||||
|
copy(out, src)
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect runs SCRFD over the image and returns one Detection per face. The
|
||||||
|
// C-API emits a box as [x1,y1,x2,y2] in pixels; the proto carries x/y plus
|
||||||
|
// width/height, so the corners are converted. The 5 facial landmarks the engine
|
||||||
|
// also returns are dropped: the Detection message has no field for them.
|
||||||
|
func (f *FaceDetect) Detect(req *pb.DetectOptions) (pb.DetectResponse, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return pb.DetectResponse{}, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Src == "" {
|
||||||
|
return pb.DetectResponse{}, errors.New("face-detect: src image is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(req.Src)
|
||||||
|
if err != nil {
|
||||||
|
return pb.DetectResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
faces, err := f.detectFaces(path)
|
||||||
|
if err != nil {
|
||||||
|
return pb.DetectResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
dets := make([]*pb.Detection, 0, len(faces))
|
||||||
|
for _, fc := range faces {
|
||||||
|
if req.Threshold > 0 && fc.Score < req.Threshold {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x, y, w, h := fc.xywh()
|
||||||
|
dets = append(dets, &pb.Detection{
|
||||||
|
X: x,
|
||||||
|
Y: y,
|
||||||
|
Width: w,
|
||||||
|
Height: h,
|
||||||
|
Confidence: fc.Score,
|
||||||
|
ClassName: "face",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return pb.DetectResponse{Detections: dets}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FaceVerify embeds the primary face in each image and reports whether they are
|
||||||
|
// the same identity by cosine distance against a threshold. A request threshold
|
||||||
|
// <= 0 falls back to the model-configured default (verify_threshold option,
|
||||||
|
// 0.35 if unset). When anti_spoofing is set, the C-API applies a MiniFASNet
|
||||||
|
// veto internally (verified forced false on a spoof); the per-image liveness
|
||||||
|
// scores are not exposed by the verify entry point, so img*_is_real /
|
||||||
|
// img*_antispoof_score stay at their zero values.
|
||||||
|
func (f *FaceDetect) FaceVerify(req *pb.FaceVerifyRequest) (pb.FaceVerifyResponse, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return pb.FaceVerifyResponse{}, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Img1 == "" || req.Img2 == "" {
|
||||||
|
return pb.FaceVerifyResponse{}, errors.New("face-detect: img1 and img2 are required")
|
||||||
|
}
|
||||||
|
|
||||||
|
path1, cleanup1, err := materializeImage(req.Img1)
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceVerifyResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup1()
|
||||||
|
path2, cleanup2, err := materializeImage(req.Img2)
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceVerifyResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup2()
|
||||||
|
|
||||||
|
threshold := req.Threshold
|
||||||
|
if threshold <= 0 {
|
||||||
|
threshold = f.opts.verifyThreshold
|
||||||
|
}
|
||||||
|
|
||||||
|
antiSpoof := int32(0)
|
||||||
|
if req.AntiSpoofing {
|
||||||
|
antiSpoof = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
started := time.Now()
|
||||||
|
var distance float32
|
||||||
|
var verified int32
|
||||||
|
rc := CppVerifyPaths(f.ctxPtr, path1, path2, threshold, antiSpoof,
|
||||||
|
unsafe.Pointer(&distance), unsafe.Pointer(&verified))
|
||||||
|
if rc != 0 {
|
||||||
|
return pb.FaceVerifyResponse{}, f.lastErr("verify", req.Img1[:min(8, len(req.Img1))]+"...")
|
||||||
|
}
|
||||||
|
elapsedMs := float32(time.Since(started).Seconds() * 1000.0)
|
||||||
|
|
||||||
|
// Confidence decays linearly from 100 at distance 0 to 0 at the threshold,
|
||||||
|
// matching the Python face backend's reporting.
|
||||||
|
confidence := float32(0)
|
||||||
|
if threshold > 0 {
|
||||||
|
confidence = float32(math.Max(0, math.Min(100, (1.0-float64(distance)/float64(threshold))*100.0)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return pb.FaceVerifyResponse{
|
||||||
|
Verified: verified != 0,
|
||||||
|
Distance: distance,
|
||||||
|
Threshold: threshold,
|
||||||
|
Confidence: confidence,
|
||||||
|
Model: f.opts.modelName,
|
||||||
|
Img1Area: f.bestArea(path1),
|
||||||
|
Img2Area: f.bestArea(path2),
|
||||||
|
ProcessingTimeMs: elapsedMs,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FaceAnalyze runs the genderage head on every detected face. The C-API returns
|
||||||
|
// "M"/"F" gender labels and a rounded age; the labels are normalized to the
|
||||||
|
// "Man"/"Woman" values the proto documents.
|
||||||
|
func (f *FaceDetect) FaceAnalyze(req *pb.FaceAnalyzeRequest) (pb.FaceAnalyzeResponse, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return pb.FaceAnalyzeResponse{}, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Img == "" {
|
||||||
|
return pb.FaceAnalyzeResponse{}, errors.New("face-detect: img is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(req.Img)
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceAnalyzeResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
ptr := CppAnalyzeJSON(f.ctxPtr, path)
|
||||||
|
if ptr == 0 {
|
||||||
|
return pb.FaceAnalyzeResponse{}, f.lastErr("analyze", path)
|
||||||
|
}
|
||||||
|
defer CppFreeString(ptr)
|
||||||
|
|
||||||
|
faces, err := parseAnalyzeJSON(goStringFromCPtr(ptr))
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceAnalyzeResponse{}, fmt.Errorf("face-detect: analyze JSON: %w", err)
|
||||||
|
}
|
||||||
|
return pb.FaceAnalyzeResponse{Faces: faces}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// faceBox is one entry of the detect/analyze JSON documents the engine emits.
|
||||||
|
type faceBox struct {
|
||||||
|
Score float32 `json:"score"`
|
||||||
|
Box []float32 `json:"box"`
|
||||||
|
Age float32 `json:"age"`
|
||||||
|
Gender string `json:"gender"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// xywh converts the engine's [x1,y1,x2,y2] box into the x/y/width/height the
|
||||||
|
// proto carries. A short or missing box yields zeros.
|
||||||
|
func (b faceBox) xywh() (x, y, w, h float32) {
|
||||||
|
if len(b.Box) < 4 {
|
||||||
|
return 0, 0, 0, 0
|
||||||
|
}
|
||||||
|
return b.Box[0], b.Box[1], b.Box[2] - b.Box[0], b.Box[3] - b.Box[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
type facesJSON struct {
|
||||||
|
Faces []faceBox `json:"faces"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FaceDetect) detectFaces(path string) ([]faceBox, error) {
|
||||||
|
ptr := CppDetectJSON(f.ctxPtr, path)
|
||||||
|
if ptr == 0 {
|
||||||
|
return nil, f.lastErr("detect", path)
|
||||||
|
}
|
||||||
|
defer CppFreeString(ptr)
|
||||||
|
|
||||||
|
var doc facesJSON
|
||||||
|
if err := json.Unmarshal([]byte(goStringFromCPtr(ptr)), &doc); err != nil {
|
||||||
|
return nil, fmt.Errorf("face-detect: detect JSON: %w", err)
|
||||||
|
}
|
||||||
|
return doc.Faces, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// bestArea returns the FacialArea of the highest-scoring face in an image, or an
|
||||||
|
// empty area when detection fails or finds nothing. Best-effort: verify already
|
||||||
|
// succeeded, so a missing region must not turn a valid match into an error.
|
||||||
|
func (f *FaceDetect) bestArea(path string) *pb.FacialArea {
|
||||||
|
faces, err := f.detectFaces(path)
|
||||||
|
if err != nil || len(faces) == 0 {
|
||||||
|
return &pb.FacialArea{}
|
||||||
|
}
|
||||||
|
best := faces[0]
|
||||||
|
for _, fc := range faces[1:] {
|
||||||
|
if fc.Score > best.Score {
|
||||||
|
best = fc
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x, y, w, h := best.xywh()
|
||||||
|
return &pb.FacialArea{X: x, Y: y, W: w, H: h}
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseAnalyzeJSON maps the engine's analyze document onto FaceAnalysis entries.
|
||||||
|
// The engine reports gender as "M"/"F"; both the dominant label and the score
|
||||||
|
// map are filled with the "Man"/"Woman" form the proto documents.
|
||||||
|
func parseAnalyzeJSON(doc string) ([]*pb.FaceAnalysis, error) {
|
||||||
|
var parsed facesJSON
|
||||||
|
if err := json.Unmarshal([]byte(doc), &parsed); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]*pb.FaceAnalysis, 0, len(parsed.Faces))
|
||||||
|
for _, fc := range parsed.Faces {
|
||||||
|
x, y, w, h := fc.xywh()
|
||||||
|
fa := &pb.FaceAnalysis{
|
||||||
|
Region: &pb.FacialArea{X: x, Y: y, W: w, H: h},
|
||||||
|
FaceConfidence: fc.Score,
|
||||||
|
Age: fc.Age,
|
||||||
|
}
|
||||||
|
if label := normalizeGender(fc.Gender); label != "" {
|
||||||
|
fa.DominantGender = label
|
||||||
|
fa.Gender = map[string]float32{label: 1.0}
|
||||||
|
}
|
||||||
|
out = append(out, fa)
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeGender maps the engine's "M"/"F" code to the "Man"/"Woman" labels the
|
||||||
|
// proto documents. Unknown codes pass through unchanged.
|
||||||
|
func normalizeGender(g string) string {
|
||||||
|
switch strings.ToUpper(strings.TrimSpace(g)) {
|
||||||
|
case "M":
|
||||||
|
return "Man"
|
||||||
|
case "F":
|
||||||
|
return "Woman"
|
||||||
|
case "":
|
||||||
|
return ""
|
||||||
|
default:
|
||||||
|
return g
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// materializeImage decodes a base64 image payload into a temp file and returns
|
||||||
|
// its path plus a cleanup func. As a convenience for callers that already pass a
|
||||||
|
// filesystem path (e.g. a test fixture), an existing path is used as-is with a
|
||||||
|
// no-op cleanup. data: URI prefixes are stripped before decoding.
|
||||||
|
func materializeImage(src string) (path string, cleanup func(), err error) {
|
||||||
|
noop := func() {}
|
||||||
|
if src == "" {
|
||||||
|
return "", noop, errors.New("face-detect: empty image input")
|
||||||
|
}
|
||||||
|
if _, statErr := os.Stat(src); statErr == nil {
|
||||||
|
return src, noop, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
payload := src
|
||||||
|
if i := strings.Index(payload, ","); strings.HasPrefix(payload, "data:") && i >= 0 {
|
||||||
|
payload = payload[i+1:]
|
||||||
|
}
|
||||||
|
data, decErr := base64.StdEncoding.DecodeString(strings.TrimSpace(payload))
|
||||||
|
if decErr != nil || len(data) == 0 {
|
||||||
|
return "", noop, errors.New("face-detect: image is neither an existing path nor valid base64")
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp, createErr := os.CreateTemp("", "face-detect-*.img")
|
||||||
|
if createErr != nil {
|
||||||
|
return "", noop, fmt.Errorf("face-detect: create temp image: %w", createErr)
|
||||||
|
}
|
||||||
|
cleanup = func() { _ = os.Remove(tmp.Name()) }
|
||||||
|
if _, wErr := tmp.Write(data); wErr != nil {
|
||||||
|
_ = tmp.Close()
|
||||||
|
cleanup()
|
||||||
|
return "", noop, fmt.Errorf("face-detect: write temp image: %w", wErr)
|
||||||
|
}
|
||||||
|
if cErr := tmp.Close(); cErr != nil {
|
||||||
|
cleanup()
|
||||||
|
return "", noop, fmt.Errorf("face-detect: close temp image: %w", cErr)
|
||||||
|
}
|
||||||
|
return tmp.Name(), cleanup, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// lastErr wraps the C-API's per-ctx last-error buffer into a Go error.
|
||||||
|
func (f *FaceDetect) lastErr(op, subject string) error {
|
||||||
|
msg := strings.TrimSpace(CppLastError(f.ctxPtr))
|
||||||
|
if msg == "" {
|
||||||
|
msg = "no error detail"
|
||||||
|
}
|
||||||
|
return fmt.Errorf("face-detect: %s failed for %q: %s", op, subject, msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// goStringFromCPtr copies a NUL-terminated C string into Go memory. cptr is a
|
||||||
|
// malloc'd buffer the caller owns; release it via CppFreeString after the copy.
|
||||||
|
//
|
||||||
|
// The uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory. Safe here: the GC neither tracks nor
|
||||||
|
// moves the buffer and we dereference it immediately to copy the bytes out.
|
||||||
|
func goStringFromCPtr(cptr uintptr) string {
|
||||||
|
if cptr == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
p := unsafe.Pointer(cptr) //nolint:govet // C-owned malloc'd buffer, not Go-GC memory (see doc above)
|
||||||
|
n := 0
|
||||||
|
for *(*byte)(unsafe.Add(p, n)) != 0 {
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
return string(unsafe.Slice((*byte)(p), n))
|
||||||
|
}
|
||||||
230
backend/go/face-detect/gofacedetect_test.go
Normal file
230
backend/go/face-detect/gofacedetect_test.go
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFaceDetect(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "face-detect Backend Suite")
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
libLoadOnce sync.Once
|
||||||
|
libLoadErr error
|
||||||
|
)
|
||||||
|
|
||||||
|
// ensureLibLoaded mirrors main.go's bootstrap so a Go test can drive the C-API
|
||||||
|
// bridge without spinning up the gRPC server. Records the error (the smoke
|
||||||
|
// specs skip themselves) when libfacedetect.so is not loadable from cwd
|
||||||
|
// (LD_LIBRARY_PATH or a symlink in ./).
|
||||||
|
func ensureLibLoaded() error {
|
||||||
|
libLoadOnce.Do(func() {
|
||||||
|
libName := os.Getenv("FACEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libfacedetect.so"
|
||||||
|
}
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
libLoadErr = err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
purego.RegisterLibFunc(&CppAbiVersion, lib, "facedetect_capi_abi_version")
|
||||||
|
purego.RegisterLibFunc(&CppLoad, lib, "facedetect_capi_load")
|
||||||
|
purego.RegisterLibFunc(&CppFree, lib, "facedetect_capi_free")
|
||||||
|
purego.RegisterLibFunc(&CppLastError, lib, "facedetect_capi_last_error")
|
||||||
|
purego.RegisterLibFunc(&CppFreeString, lib, "facedetect_capi_free_string")
|
||||||
|
purego.RegisterLibFunc(&CppFreeVec, lib, "facedetect_capi_free_vec")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedPath, lib, "facedetect_capi_embed_path")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedRGB, lib, "facedetect_capi_embed_rgb")
|
||||||
|
purego.RegisterLibFunc(&CppDetectJSON, lib, "facedetect_capi_detect_path_json")
|
||||||
|
purego.RegisterLibFunc(&CppVerifyPaths, lib, "facedetect_capi_verify_paths")
|
||||||
|
purego.RegisterLibFunc(&CppAnalyzeJSON, lib, "facedetect_capi_analyze_path_json")
|
||||||
|
})
|
||||||
|
return libLoadErr
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("parseOptions", func() {
|
||||||
|
It("defaults verify_threshold to 0.35", func() {
|
||||||
|
o := parseOptions(nil)
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.35)))
|
||||||
|
Expect(o.modelName).To(Equal(""))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("parses verify_threshold, threshold alias and model_name", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0.4", "model_name:buffalo_l", "unknown:x"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.4)))
|
||||||
|
Expect(o.modelName).To(Equal("buffalo_l"))
|
||||||
|
|
||||||
|
o2 := parseOptions([]string{"threshold:0.3"})
|
||||||
|
Expect(o2.verifyThreshold).To(Equal(float32(0.3)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("ignores non-positive thresholds and keeps the default", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0", "threshold:-1"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.35)))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("normalizeGender", func() {
|
||||||
|
It("maps M/F codes to Man/Woman", func() {
|
||||||
|
Expect(normalizeGender("M")).To(Equal("Man"))
|
||||||
|
Expect(normalizeGender("f")).To(Equal("Woman"))
|
||||||
|
Expect(normalizeGender(" m ")).To(Equal("Man"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("passes empty and unknown codes through", func() {
|
||||||
|
Expect(normalizeGender("")).To(Equal(""))
|
||||||
|
Expect(normalizeGender("nonbinary")).To(Equal("nonbinary"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("faceBox.xywh", func() {
|
||||||
|
It("converts an [x1,y1,x2,y2] box to x/y/width/height", func() {
|
||||||
|
b := faceBox{Box: []float32{10, 20, 50, 80}}
|
||||||
|
x, y, w, h := b.xywh()
|
||||||
|
Expect(x).To(Equal(float32(10)))
|
||||||
|
Expect(y).To(Equal(float32(20)))
|
||||||
|
Expect(w).To(Equal(float32(40)))
|
||||||
|
Expect(h).To(Equal(float32(60)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns zeros for a short box", func() {
|
||||||
|
x, y, w, h := faceBox{Box: []float32{1, 2}}.xywh()
|
||||||
|
Expect([]float32{x, y, w, h}).To(Equal([]float32{0, 0, 0, 0}))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("parseAnalyzeJSON", func() {
|
||||||
|
It("maps region, age and gender for each face", func() {
|
||||||
|
doc := `{"faces":[
|
||||||
|
{"score":0.997,"box":[10,20,50,80],"age":31,"gender":"M"},
|
||||||
|
{"score":0.81,"box":[0,0,40,40],"age":24,"gender":"F"}]}`
|
||||||
|
faces, err := parseAnalyzeJSON(doc)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(faces).To(HaveLen(2))
|
||||||
|
|
||||||
|
Expect(faces[0].FaceConfidence).To(BeNumerically("~", 0.997, 1e-4))
|
||||||
|
Expect(faces[0].Age).To(BeNumerically("~", 31, 1e-4))
|
||||||
|
Expect(faces[0].DominantGender).To(Equal("Man"))
|
||||||
|
Expect(faces[0].Gender).To(HaveKeyWithValue("Man", float32(1.0)))
|
||||||
|
Expect(faces[0].Region.W).To(Equal(float32(40)))
|
||||||
|
Expect(faces[0].Region.H).To(Equal(float32(60)))
|
||||||
|
|
||||||
|
Expect(faces[1].DominantGender).To(Equal("Woman"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("tolerates a missing gender field", func() {
|
||||||
|
faces, err := parseAnalyzeJSON(`{"faces":[{"score":0.5,"box":[0,0,10,10],"age":40}]}`)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(faces).To(HaveLen(1))
|
||||||
|
Expect(faces[0].DominantGender).To(Equal(""))
|
||||||
|
Expect(faces[0].Gender).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns no faces for an empty document", func() {
|
||||||
|
faces, err := parseAnalyzeJSON(`{"faces":[]}`)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(faces).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns an error on malformed JSON", func() {
|
||||||
|
_, err := parseAnalyzeJSON(`{not-json`)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("materializeImage", func() {
|
||||||
|
It("decodes a base64 payload to a temp file", func() {
|
||||||
|
payload := base64.StdEncoding.EncodeToString([]byte("\xff\xd8\xff\xe0fake-jpeg"))
|
||||||
|
path, cleanup, err := materializeImage(payload)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer cleanup()
|
||||||
|
data, rerr := os.ReadFile(path)
|
||||||
|
Expect(rerr).ToNot(HaveOccurred())
|
||||||
|
Expect(data).To(Equal([]byte("\xff\xd8\xff\xe0fake-jpeg")))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("strips a data: URI prefix before decoding", func() {
|
||||||
|
payload := "data:image/png;base64," + base64.StdEncoding.EncodeToString([]byte("hello"))
|
||||||
|
path, cleanup, err := materializeImage(payload)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer cleanup()
|
||||||
|
data, rerr := os.ReadFile(path)
|
||||||
|
Expect(rerr).ToNot(HaveOccurred())
|
||||||
|
Expect(data).To(Equal([]byte("hello")))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("uses an existing path as-is", func() {
|
||||||
|
tmp, err := os.CreateTemp("", "face-detect-fixture-*.bin")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer func() { _ = os.Remove(tmp.Name()) }()
|
||||||
|
Expect(tmp.Close()).To(Succeed())
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(tmp.Name())
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer cleanup()
|
||||||
|
Expect(path).To(Equal(tmp.Name()))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("errors on input that is neither a path nor base64", func() {
|
||||||
|
_, _, err := materializeImage("not base64!!!")
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// The specs below exercise the real C-API end to end. They run only when both a
|
||||||
|
// model GGUF and a test image are provided, and skip cleanly otherwise so the
|
||||||
|
// suite stays green without large assets.
|
||||||
|
var _ = Describe("FaceDetect end-to-end", Ordered, func() {
|
||||||
|
var (
|
||||||
|
f *FaceDetect
|
||||||
|
modelPath = os.Getenv("FACEDETECT_BACKEND_TEST_MODEL")
|
||||||
|
imagePath = os.Getenv("FACEDETECT_BACKEND_TEST_IMAGE")
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeAll(func() {
|
||||||
|
if modelPath == "" || imagePath == "" {
|
||||||
|
Skip("set FACEDETECT_BACKEND_TEST_MODEL and FACEDETECT_BACKEND_TEST_IMAGE to run the e2e specs")
|
||||||
|
}
|
||||||
|
if err := ensureLibLoaded(); err != nil {
|
||||||
|
Skip("libfacedetect.so not loadable: " + err.Error())
|
||||||
|
}
|
||||||
|
f = &FaceDetect{}
|
||||||
|
Expect(f.Load(&pb.ModelOptions{ModelFile: modelPath})).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("embeds the primary face in an image", func() {
|
||||||
|
emb, err := f.Embeddings(&pb.PredictOptions{Images: []string{imagePath}})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(emb).ToNot(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("detects at least one face", func() {
|
||||||
|
resp, err := f.Detect(&pb.DetectOptions{Src: imagePath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Detections).ToNot(BeEmpty())
|
||||||
|
Expect(resp.Detections[0].ClassName).To(Equal("face"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("verifies an image against itself as the same identity", func() {
|
||||||
|
resp, err := f.FaceVerify(&pb.FaceVerifyRequest{Img1: imagePath, Img2: imagePath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Verified).To(BeTrue())
|
||||||
|
Expect(resp.Distance).To(BeNumerically("<=", resp.Threshold))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("analyzes age/gender for each face", func() {
|
||||||
|
resp, err := f.FaceAnalyze(&pb.FaceAnalyzeRequest{Img: imagePath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Faces).ToNot(BeEmpty())
|
||||||
|
})
|
||||||
|
})
|
||||||
65
backend/go/face-detect/main.go
Normal file
65
backend/go/face-detect/main.go
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Started internally by LocalAI - one gRPC server per loaded model.
|
||||||
|
//
|
||||||
|
// Loads libfacedetect.so via purego and registers the flat C-API entry points
|
||||||
|
// declared in facedetect_capi.h. The library name can be overridden with
|
||||||
|
// FACEDETECT_LIBRARY (mirrors the VOICEDETECT_LIBRARY / PARAKEET_LIBRARY
|
||||||
|
// convention in the sibling backends); the default looks for the .so next to
|
||||||
|
// this binary (resolved via LD_LIBRARY_PATH by run.sh).
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
type LibFuncs struct {
|
||||||
|
FuncPtr any
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
libName := os.Getenv("FACEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libfacedetect.so"
|
||||||
|
}
|
||||||
|
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("face-detect: dlopen %q: %w", libName, err))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bound 1:1 to facedetect_capi.h. char*/float* returns are registered as
|
||||||
|
// uintptr so the raw pointer can be freed via the matching capi free fn.
|
||||||
|
libFuncs := []LibFuncs{
|
||||||
|
{&CppAbiVersion, "facedetect_capi_abi_version"},
|
||||||
|
{&CppLoad, "facedetect_capi_load"},
|
||||||
|
{&CppFree, "facedetect_capi_free"},
|
||||||
|
{&CppLastError, "facedetect_capi_last_error"},
|
||||||
|
{&CppFreeString, "facedetect_capi_free_string"},
|
||||||
|
{&CppFreeVec, "facedetect_capi_free_vec"},
|
||||||
|
{&CppEmbedPath, "facedetect_capi_embed_path"},
|
||||||
|
{&CppEmbedRGB, "facedetect_capi_embed_rgb"},
|
||||||
|
{&CppDetectJSON, "facedetect_capi_detect_path_json"},
|
||||||
|
{&CppVerifyPaths, "facedetect_capi_verify_paths"},
|
||||||
|
{&CppAnalyzeJSON, "facedetect_capi_analyze_path_json"},
|
||||||
|
}
|
||||||
|
for _, lf := range libFuncs {
|
||||||
|
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(os.Stderr, "[face-detect] ABI=%d\n", CppAbiVersion())
|
||||||
|
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &FaceDetect{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
47
backend/go/face-detect/options.go
Normal file
47
backend/go/face-detect/options.go
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// defaultVerifyThreshold is the cosine-distance cutoff used when a request does
|
||||||
|
// not set one. Matches the insightface buffalo_l ArcFace R50 default the Python
|
||||||
|
// face backend ships with so the two implementations agree on verdicts out of
|
||||||
|
// the box.
|
||||||
|
const defaultVerifyThreshold float32 = 0.35
|
||||||
|
|
||||||
|
// loadOptions holds the parsed model-level options for face-detect.
|
||||||
|
type loadOptions struct {
|
||||||
|
verifyThreshold float32
|
||||||
|
modelName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitOption(o string) (key, value string, ok bool) {
|
||||||
|
i := strings.Index(o, ":")
|
||||||
|
if i < 0 {
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(o[:i]), strings.TrimSpace(o[i+1:]), true
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseOptions reads the backend "key:value" option slice. Unknown keys are
|
||||||
|
// ignored. Defaults: verify_threshold 0.35, model_name derived from the file.
|
||||||
|
func parseOptions(opts []string) loadOptions {
|
||||||
|
o := loadOptions{verifyThreshold: defaultVerifyThreshold}
|
||||||
|
for _, oo := range opts {
|
||||||
|
key, value, ok := splitOption(oo)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch key {
|
||||||
|
case "verify_threshold", "threshold":
|
||||||
|
if f, err := strconv.ParseFloat(value, 32); err == nil && f > 0 {
|
||||||
|
o.verifyThreshold = float32(f)
|
||||||
|
}
|
||||||
|
case "model_name":
|
||||||
|
o.modelName = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return o
|
||||||
|
}
|
||||||
68
backend/go/face-detect/package.sh
Normal file
68
backend/go/face-detect/package.sh
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Bundle the face-detect-grpc binary, libfacedetect.so, the core runtime libs
|
||||||
|
# (libc/libstdc++/libgomp + ld.so) and the GPU runtime for the active BUILD_TYPE
|
||||||
|
# so the package is self-contained. Mirrors backend/go/voice-detect/package.sh;
|
||||||
|
# run.sh routes the (CGO_ENABLED=0) binary through lib/ld.so so the packaged libc
|
||||||
|
# is used instead of the host's.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
REPO_ROOT="${CURDIR}/../../.."
|
||||||
|
|
||||||
|
mkdir -p "$CURDIR/package/lib"
|
||||||
|
|
||||||
|
cp -avf "$CURDIR/face-detect-grpc" "$CURDIR/package/"
|
||||||
|
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||||
|
|
||||||
|
# libfacedetect.so + any soname symlinks. purego.Dlopen resolves it via
|
||||||
|
# LD_LIBRARY_PATH, which run.sh points at lib/.
|
||||||
|
cp -avf "$CURDIR"/libfacedetect.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||||
|
echo "ERROR: libfacedetect.so not found in $CURDIR, run 'make' first" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detect architecture and copy the core runtime libs libfacedetect.so links
|
||||||
|
# against, plus the matching dynamic loader as lib/ld.so.
|
||||||
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||||
|
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||||
|
cp -arfLv /lib/ld-linux-aarch64.so.1 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||||
|
echo "Detected Darwin"
|
||||||
|
else
|
||||||
|
echo "Error: Could not detect architecture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Package GPU libraries (CUDA/ROCm/Intel/Vulkan loader + ICDs + drivers) based on
|
||||||
|
# BUILD_TYPE so the backend can reach the GPU without the runtime base image
|
||||||
|
# shipping those drivers.
|
||||||
|
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
|
||||||
|
if [ -f "$GPU_LIB_SCRIPT" ]; then
|
||||||
|
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
|
||||||
|
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
|
||||||
|
package_gpu_libs
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Packaging completed successfully"
|
||||||
|
ls -liah "$CURDIR/package/" "$CURDIR/package/lib/"
|
||||||
16
backend/go/face-detect/run.sh
Normal file
16
backend/go/face-detect/run.sh
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||||
|
|
||||||
|
# If a self-contained ld.so was packaged, route through it so the packaged
|
||||||
|
# libc / libstdc++ are used instead of the host's (matches the voice-detect /
|
||||||
|
# whisper / parakeet backends' runtime layout).
|
||||||
|
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||||
|
echo "Using lib/ld.so"
|
||||||
|
exec "$CURDIR/lib/ld.so" "$CURDIR/face-detect-grpc" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec "$CURDIR/face-detect-grpc" "$@"
|
||||||
15
backend/go/face-detect/test.sh
Normal file
15
backend/go/face-detect/test.sh
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
cd "$CURDIR"
|
||||||
|
|
||||||
|
echo "Running face-detect backend tests..."
|
||||||
|
|
||||||
|
# The pure-Go parsing specs always run. The embed/detect/verify/analyze smoke
|
||||||
|
# specs run only when a model + image are provided via
|
||||||
|
# FACEDETECT_BACKEND_TEST_MODEL and FACEDETECT_BACKEND_TEST_IMAGE; otherwise they
|
||||||
|
# auto-skip.
|
||||||
|
LD_LIBRARY_PATH="$CURDIR:${LD_LIBRARY_PATH:-}" go test -v -timeout 1200s .
|
||||||
|
|
||||||
|
echo "face-detect tests completed."
|
||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# stablediffusion.cpp (ggml)
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||||
STABLEDIFFUSION_GGML_VERSION?=8caa3f908ae6d4a4bef531e73b9a969f266a3d1f
|
STABLEDIFFUSION_GGML_VERSION?=9956436c925a367daeab097598b1ea1f32d3503f
|
||||||
|
|
||||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||||
|
|
||||||
|
|||||||
18
backend/go/voice-detect/.gitignore
vendored
Normal file
18
backend/go/voice-detect/.gitignore
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Fetched upstream sources
|
||||||
|
sources/
|
||||||
|
|
||||||
|
# CMake build directories
|
||||||
|
build*/
|
||||||
|
|
||||||
|
# build artifacts staged in-tree by the Makefile (cp from sources/) or
|
||||||
|
# symlinked for local dev; the real sources live in voice-detect.cpp upstream.
|
||||||
|
*.so
|
||||||
|
*.so.*
|
||||||
|
voicedetect_capi.h
|
||||||
|
compile_commands.json
|
||||||
|
|
||||||
|
# Compiled backend binary
|
||||||
|
voice-detect-grpc
|
||||||
|
|
||||||
|
# Packaging output
|
||||||
|
package/
|
||||||
107
backend/go/voice-detect/Makefile
Normal file
107
backend/go/voice-detect/Makefile
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
# voice-detect backend Makefile.
|
||||||
|
#
|
||||||
|
# Upstream pin lives below as VOICEDETECT_VERSION?=3d51077... (.github/bump_deps.sh
|
||||||
|
# can find and update it - matches the parakeet.cpp / whisper.cpp / ds4 convention).
|
||||||
|
#
|
||||||
|
# Local dev shortcut: if you already have an out-of-tree voice-detect.cpp build,
|
||||||
|
# symlink the .so + header into this directory and skip the clone/cmake steps:
|
||||||
|
#
|
||||||
|
# ln -sf /path/to/voice-detect.cpp/build-shared/libvoicedetect.so .
|
||||||
|
# ln -sf /path/to/voice-detect.cpp/include/voicedetect_capi.h .
|
||||||
|
# go build -o voice-detect-grpc .
|
||||||
|
#
|
||||||
|
# The default target below does the proper clone-at-pin + cmake build so CI does
|
||||||
|
# not need a side-checkout.
|
||||||
|
|
||||||
|
VOICEDETECT_VERSION?=3d510772357538c5182808ac7de2278b84824e24
|
||||||
|
VOICEDETECT_REPO?=https://github.com/mudler/voice-detect.cpp
|
||||||
|
|
||||||
|
GOCMD?=go
|
||||||
|
GO_TAGS?=
|
||||||
|
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
|
||||||
|
|
||||||
|
BUILD_TYPE?=
|
||||||
|
NATIVE?=false
|
||||||
|
|
||||||
|
# Resolve the target arch. The backend matrix / Docker build pass TARGETARCH
|
||||||
|
# (amd64|arm64); fall back to uname -m (aarch64|x86_64) for a local build.
|
||||||
|
RECON_ARCH?=$(or $(TARGETARCH),$(shell uname -m))
|
||||||
|
|
||||||
|
# Build ggml statically into libvoicedetect.so (PIC) so the shared lib is
|
||||||
|
# self-contained: dlopen needs no libggml*.so alongside it, only system libs
|
||||||
|
# (libstdc++/libgomp/libc) that the runtime image already provides.
|
||||||
|
CMAKE_ARGS?=-DCMAKE_BUILD_TYPE=Release -DVOICEDETECT_SHARED=ON -DVOICEDETECT_BUILD_CLI=OFF -DVOICEDETECT_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||||
|
|
||||||
|
ifeq ($(NATIVE),false)
|
||||||
|
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
|
endif
|
||||||
|
|
||||||
|
# voice-detect.cpp gates its GGML backends behind VOICEDETECT_GGML_* options and
|
||||||
|
# does set(GGML_CUDA ${VOICEDETECT_GGML_CUDA} CACHE BOOL "" FORCE), so a bare
|
||||||
|
# -DGGML_CUDA=ON is overwritten back to OFF. Forward the VOICEDETECT_GGML_*
|
||||||
|
# options instead. (openblas is not gated, so -DGGML_BLAS passes through.)
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_CUDA=ON
|
||||||
|
# Opt-in cuDNN implicit-GEMM conv path (kills im2col on GPU, reaches
|
||||||
|
# torch-cuDNN parity). Only the arm64 + CUDA 13 image (GB10/Jetson/L4T)
|
||||||
|
# ships libcudnn9 + the -dev headers, so gate cuDNN to that variant.
|
||||||
|
# x86 CUDA images carry no cuDNN -> enabling it there is a link failure.
|
||||||
|
ifeq ($(CUDA_MAJOR_VERSION),13)
|
||||||
|
ifneq (,$(filter arm64 aarch64,$(RECON_ARCH)))
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_CUDNN=ON
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_HIP=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),vulkan)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_VULKAN=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_METAL=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: voice-detect-grpc package build clean purge test all
|
||||||
|
|
||||||
|
all: voice-detect-grpc
|
||||||
|
|
||||||
|
# Clone the upstream voice-detect.cpp source at the pinned commit. Directory acts
|
||||||
|
# as the target so make only re-clones when missing. After a VOICEDETECT_VERSION
|
||||||
|
# bump, run 'make purge && make' to refetch.
|
||||||
|
sources/voice-detect.cpp:
|
||||||
|
mkdir -p sources/voice-detect.cpp
|
||||||
|
cd sources/voice-detect.cpp && \
|
||||||
|
git init -q && \
|
||||||
|
git remote add origin $(VOICEDETECT_REPO) && \
|
||||||
|
git fetch --depth 1 origin $(VOICEDETECT_VERSION) && \
|
||||||
|
git checkout FETCH_HEAD && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
# Build the shared lib + header out-of-tree, then stage them next to the Go
|
||||||
|
# sources so purego.Dlopen("libvoicedetect.so") and the cgo-less build both pick
|
||||||
|
# them up.
|
||||||
|
libvoicedetect.so: sources/voice-detect.cpp
|
||||||
|
cmake -B sources/voice-detect.cpp/build-shared -S sources/voice-detect.cpp $(CMAKE_ARGS)
|
||||||
|
cmake --build sources/voice-detect.cpp/build-shared --config Release -j$(JOBS) --target voicedetect
|
||||||
|
cp -fv sources/voice-detect.cpp/build-shared/libvoicedetect.so* ./ 2>/dev/null || true
|
||||||
|
cp -fv sources/voice-detect.cpp/include/voicedetect_capi.h ./
|
||||||
|
|
||||||
|
voice-detect-grpc: libvoicedetect.so main.go govoicedetect.go options.go
|
||||||
|
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o voice-detect-grpc .
|
||||||
|
|
||||||
|
package: voice-detect-grpc
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: package
|
||||||
|
|
||||||
|
# Test target. The embed/verify/analyze smoke specs are gated on
|
||||||
|
# VOICEDETECT_BACKEND_TEST_MODEL + VOICEDETECT_BACKEND_TEST_WAV; without them the
|
||||||
|
# heavy specs auto-skip and only the pure-Go parsing specs run.
|
||||||
|
test:
|
||||||
|
LD_LIBRARY_PATH=$(CURDIR):$$LD_LIBRARY_PATH $(GOCMD) test ./... -count=1
|
||||||
|
|
||||||
|
clean: purge
|
||||||
|
rm -rf libvoicedetect.so* voicedetect_capi.h package voice-detect-grpc
|
||||||
|
|
||||||
|
purge:
|
||||||
|
rm -rf sources/voice-detect.cpp
|
||||||
273
backend/go/voice-detect/govoicedetect.go
Normal file
273
backend/go/voice-detect/govoicedetect.go
Normal file
@@ -0,0 +1,273 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/mudler/xlog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// purego-bound entry points from libvoicedetect.so. Names match
|
||||||
|
// voicedetect_capi.h exactly so a `nm libvoicedetect.so | grep voicedetect_capi`
|
||||||
|
// is enough to spot drift.
|
||||||
|
//
|
||||||
|
// The opaque ctx and the malloc'd char*/float* return values are declared as
|
||||||
|
// uintptr so we get the raw pointer back and can release it via the matching
|
||||||
|
// capi free function. purego's native string/[]float32 returns would copy and
|
||||||
|
// forget the original pointer, leaking the C-owned buffer on every call.
|
||||||
|
var (
|
||||||
|
CppAbiVersion func() int32
|
||||||
|
CppLoad func(ggufPath string) uintptr
|
||||||
|
CppFree func(ctx uintptr)
|
||||||
|
CppLastError func(ctx uintptr) string
|
||||||
|
CppFreeString func(s uintptr)
|
||||||
|
CppFreeVec func(v uintptr)
|
||||||
|
CppEmbedPath func(ctx uintptr, wavPath string, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppEmbedPCM func(ctx uintptr, pcm []float32, nSamples, sampleRate int32, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppVerifyPaths func(ctx uintptr, a, b string, threshold float32, outDistance, outVerified unsafe.Pointer) int32
|
||||||
|
CppAnalyzeJSON func(ctx uintptr, wavPath string) uintptr
|
||||||
|
)
|
||||||
|
|
||||||
|
// VoiceDetect implements the speaker-recognition voice subset of the Backend
|
||||||
|
// gRPC service over libvoicedetect.so. The C side keeps a single loaded model
|
||||||
|
// plus a per-ctx last-error buffer and is not reentrant, so base.SingleThread
|
||||||
|
// serializes every call.
|
||||||
|
type VoiceDetect struct {
|
||||||
|
base.SingleThread
|
||||||
|
opts loadOptions
|
||||||
|
ctxPtr uintptr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *VoiceDetect) Load(opts *pb.ModelOptions) error {
|
||||||
|
model := opts.ModelFile
|
||||||
|
if model == "" {
|
||||||
|
model = opts.ModelPath
|
||||||
|
}
|
||||||
|
if !filepath.IsAbs(model) && opts.ModelPath != "" {
|
||||||
|
model = filepath.Join(opts.ModelPath, model)
|
||||||
|
}
|
||||||
|
if model == "" {
|
||||||
|
return errors.New("voice-detect: ModelFile is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
v.opts = parseOptions(opts.Options)
|
||||||
|
if v.opts.modelName == "" {
|
||||||
|
v.opts.modelName = filepath.Base(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns
|
||||||
|
// one backend process per model and serves requests concurrently, so the
|
||||||
|
// engine's own min(hardware_concurrency, 8) default can oversubscribe cores.
|
||||||
|
// VOICEDETECT_THREADS is read by the engine at backend construction, so it
|
||||||
|
// must be set before the capi load. A non-positive Threads means "unset":
|
||||||
|
// leave the env alone so the engine keeps its sane default.
|
||||||
|
threads := opts.Threads
|
||||||
|
if threads > 0 {
|
||||||
|
if err := os.Setenv("VOICEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil {
|
||||||
|
return fmt.Errorf("voice-detect: set VOICEDETECT_THREADS: %w", err)
|
||||||
|
}
|
||||||
|
xlog.Info("voice-detect: applying LocalAI thread budget", "threads", threads)
|
||||||
|
}
|
||||||
|
|
||||||
|
xlog.Info("voice-detect: loading model", "model", model,
|
||||||
|
"verify_threshold", v.opts.verifyThreshold, "abi", CppAbiVersion())
|
||||||
|
|
||||||
|
ctx := CppLoad(model)
|
||||||
|
if ctx == 0 {
|
||||||
|
// The last-error buffer lives on the ctx that was never returned, so
|
||||||
|
// surface the path the operator tried to load instead.
|
||||||
|
return fmt.Errorf("voice-detect: voicedetect_capi_load failed for %q", model)
|
||||||
|
}
|
||||||
|
v.ctxPtr = ctx
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// VoiceEmbed returns the L2-normalized speaker embedding for an audio clip.
|
||||||
|
// The request carries a filesystem PATH; the HTTP layer materializes
|
||||||
|
// base64/URL/data-URI inputs to a temp file before the gRPC call.
|
||||||
|
func (v *VoiceDetect) VoiceEmbed(req *pb.VoiceEmbedRequest) (pb.VoiceEmbedResponse, error) {
|
||||||
|
if v.ctxPtr == 0 {
|
||||||
|
return pb.VoiceEmbedResponse{}, errors.New("voice-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Audio == "" {
|
||||||
|
return pb.VoiceEmbedResponse{}, errors.New("voice-detect: audio path is required")
|
||||||
|
}
|
||||||
|
emb, err := v.embedPath(req.Audio)
|
||||||
|
if err != nil {
|
||||||
|
return pb.VoiceEmbedResponse{}, err
|
||||||
|
}
|
||||||
|
return pb.VoiceEmbedResponse{Embedding: emb, Model: v.opts.modelName}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *VoiceDetect) embedPath(path string) ([]float32, error) {
|
||||||
|
var vec uintptr
|
||||||
|
var dim int32
|
||||||
|
rc := CppEmbedPath(v.ctxPtr, path, unsafe.Pointer(&vec), unsafe.Pointer(&dim))
|
||||||
|
if rc != 0 || vec == 0 || dim <= 0 {
|
||||||
|
return nil, v.lastErr("embed", path)
|
||||||
|
}
|
||||||
|
defer CppFreeVec(vec)
|
||||||
|
// Copy out of the C-owned malloc'd buffer before freeing it. The
|
||||||
|
// uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory; safe here, the GC neither tracks
|
||||||
|
// nor moves this buffer and we copy immediately.
|
||||||
|
src := unsafe.Slice((*float32)(unsafe.Pointer(vec)), int(dim)) //nolint:govet // C-owned malloc'd vector, copied out before free
|
||||||
|
out := make([]float32, int(dim))
|
||||||
|
copy(out, src)
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// VoiceVerify embeds two clips and reports whether they are the same speaker by
|
||||||
|
// cosine distance against a threshold. A request threshold <= 0 falls back to
|
||||||
|
// the model-configured default (verify_threshold option, 0.25 if unset).
|
||||||
|
func (v *VoiceDetect) VoiceVerify(req *pb.VoiceVerifyRequest) (pb.VoiceVerifyResponse, error) {
|
||||||
|
if v.ctxPtr == 0 {
|
||||||
|
return pb.VoiceVerifyResponse{}, errors.New("voice-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Audio1 == "" || req.Audio2 == "" {
|
||||||
|
return pb.VoiceVerifyResponse{}, errors.New("voice-detect: audio1 and audio2 are required")
|
||||||
|
}
|
||||||
|
|
||||||
|
threshold := req.Threshold
|
||||||
|
if threshold <= 0 {
|
||||||
|
threshold = v.opts.verifyThreshold
|
||||||
|
}
|
||||||
|
|
||||||
|
started := time.Now()
|
||||||
|
var distance float32
|
||||||
|
var verified int32
|
||||||
|
rc := CppVerifyPaths(v.ctxPtr, req.Audio1, req.Audio2, threshold,
|
||||||
|
unsafe.Pointer(&distance), unsafe.Pointer(&verified))
|
||||||
|
if rc != 0 {
|
||||||
|
return pb.VoiceVerifyResponse{}, v.lastErr("verify", req.Audio1+","+req.Audio2)
|
||||||
|
}
|
||||||
|
elapsedMs := float32(time.Since(started).Seconds() * 1000.0)
|
||||||
|
|
||||||
|
// Confidence decays linearly from 100 at distance 0 to 0 at the threshold,
|
||||||
|
// matching the Python speaker-recognition backend's reporting.
|
||||||
|
confidence := float32(0)
|
||||||
|
if threshold > 0 {
|
||||||
|
confidence = float32(math.Max(0, math.Min(100, (1.0-float64(distance)/float64(threshold))*100.0)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return pb.VoiceVerifyResponse{
|
||||||
|
Verified: verified != 0,
|
||||||
|
Distance: distance,
|
||||||
|
Threshold: threshold,
|
||||||
|
Confidence: confidence,
|
||||||
|
Model: v.opts.modelName,
|
||||||
|
ProcessingTimeMs: elapsedMs,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// VoiceAnalyze runs the age/gender/emotion heads on a single clip. The C-API
|
||||||
|
// always evaluates every supported head, so the request's actions filter is
|
||||||
|
// advisory and the full analysis is returned as a single segment (the engine
|
||||||
|
// does not produce time-bounded segments).
|
||||||
|
func (v *VoiceDetect) VoiceAnalyze(req *pb.VoiceAnalyzeRequest) (pb.VoiceAnalyzeResponse, error) {
|
||||||
|
if v.ctxPtr == 0 {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, errors.New("voice-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Audio == "" {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, errors.New("voice-detect: audio path is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr := CppAnalyzeJSON(v.ctxPtr, req.Audio)
|
||||||
|
if ptr == 0 {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, v.lastErr("analyze", req.Audio)
|
||||||
|
}
|
||||||
|
defer CppFreeString(ptr)
|
||||||
|
|
||||||
|
seg, err := parseAnalyzeJSON(goStringFromCPtr(ptr))
|
||||||
|
if err != nil {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, fmt.Errorf("voice-detect: analyze JSON for %q: %w", req.Audio, err)
|
||||||
|
}
|
||||||
|
return pb.VoiceAnalyzeResponse{Segments: []*pb.VoiceAnalysis{seg}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// analyzeJSON mirrors the document returned by voicedetect_capi_analyze_path_json:
|
||||||
|
//
|
||||||
|
// {"age":42.0,
|
||||||
|
// "gender":{"label":"female","female":0.88,"male":0.12},
|
||||||
|
// "emotion":{"label":"neutral","scores":{"neutral":0.7, ...}}}
|
||||||
|
//
|
||||||
|
// gender is a mixed object (a "label" string plus per-class float scores), so
|
||||||
|
// it is decoded into raw messages and split in parseAnalyzeJSON.
|
||||||
|
type analyzeJSON struct {
|
||||||
|
Age float32 `json:"age"`
|
||||||
|
Gender map[string]json.RawMessage `json:"gender"`
|
||||||
|
Emotion struct {
|
||||||
|
Label string `json:"label"`
|
||||||
|
Scores map[string]float32 `json:"scores"`
|
||||||
|
} `json:"emotion"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseAnalyzeJSON maps the engine's analyze document onto a VoiceAnalysis.
|
||||||
|
// start/end stay 0: the model emits a single whole-utterance result, not
|
||||||
|
// time-bounded segments.
|
||||||
|
func parseAnalyzeJSON(doc string) (*pb.VoiceAnalysis, error) {
|
||||||
|
var a analyzeJSON
|
||||||
|
if err := json.Unmarshal([]byte(doc), &a); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
seg := &pb.VoiceAnalysis{
|
||||||
|
Age: a.Age,
|
||||||
|
DominantEmotion: a.Emotion.Label,
|
||||||
|
Emotion: a.Emotion.Scores,
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(a.Gender) > 0 {
|
||||||
|
gender := make(map[string]float32, len(a.Gender))
|
||||||
|
for k, raw := range a.Gender {
|
||||||
|
if k == "label" {
|
||||||
|
_ = json.Unmarshal(raw, &seg.DominantGender)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var score float32
|
||||||
|
if err := json.Unmarshal(raw, &score); err == nil {
|
||||||
|
gender[k] = score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
seg.Gender = gender
|
||||||
|
}
|
||||||
|
|
||||||
|
return seg, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// lastErr wraps the C-API's per-ctx last-error buffer into a Go error.
|
||||||
|
func (v *VoiceDetect) lastErr(op, subject string) error {
|
||||||
|
msg := strings.TrimSpace(CppLastError(v.ctxPtr))
|
||||||
|
if msg == "" {
|
||||||
|
msg = "no error detail"
|
||||||
|
}
|
||||||
|
return fmt.Errorf("voice-detect: %s failed for %q: %s", op, subject, msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// goStringFromCPtr copies a NUL-terminated C string into Go memory. cptr is a
|
||||||
|
// malloc'd buffer the caller owns; release it via CppFreeString after the copy.
|
||||||
|
//
|
||||||
|
// The uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory. Safe here: the GC neither tracks nor
|
||||||
|
// moves the buffer and we dereference it immediately to copy the bytes out.
|
||||||
|
func goStringFromCPtr(cptr uintptr) string {
|
||||||
|
if cptr == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
p := unsafe.Pointer(cptr) //nolint:govet // C-owned malloc'd buffer, not Go-GC memory (see doc above)
|
||||||
|
n := 0
|
||||||
|
for *(*byte)(unsafe.Add(p, n)) != 0 {
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
return string(unsafe.Slice((*byte)(p), n))
|
||||||
|
}
|
||||||
144
backend/go/voice-detect/govoicedetect_test.go
Normal file
144
backend/go/voice-detect/govoicedetect_test.go
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestVoiceDetect(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "voice-detect Backend Suite")
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
libLoadOnce sync.Once
|
||||||
|
libLoadErr error
|
||||||
|
)
|
||||||
|
|
||||||
|
// ensureLibLoaded mirrors main.go's bootstrap so a Go test can drive the C-API
|
||||||
|
// bridge without spinning up the gRPC server. Records the error (the smoke
|
||||||
|
// specs skip themselves) when libvoicedetect.so is not loadable from cwd
|
||||||
|
// (LD_LIBRARY_PATH or a symlink in ./).
|
||||||
|
func ensureLibLoaded() error {
|
||||||
|
libLoadOnce.Do(func() {
|
||||||
|
libName := os.Getenv("VOICEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libvoicedetect.so"
|
||||||
|
}
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
libLoadErr = err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
purego.RegisterLibFunc(&CppAbiVersion, lib, "voicedetect_capi_abi_version")
|
||||||
|
purego.RegisterLibFunc(&CppLoad, lib, "voicedetect_capi_load")
|
||||||
|
purego.RegisterLibFunc(&CppFree, lib, "voicedetect_capi_free")
|
||||||
|
purego.RegisterLibFunc(&CppLastError, lib, "voicedetect_capi_last_error")
|
||||||
|
purego.RegisterLibFunc(&CppFreeString, lib, "voicedetect_capi_free_string")
|
||||||
|
purego.RegisterLibFunc(&CppFreeVec, lib, "voicedetect_capi_free_vec")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedPath, lib, "voicedetect_capi_embed_path")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedPCM, lib, "voicedetect_capi_embed_pcm")
|
||||||
|
purego.RegisterLibFunc(&CppVerifyPaths, lib, "voicedetect_capi_verify_paths")
|
||||||
|
purego.RegisterLibFunc(&CppAnalyzeJSON, lib, "voicedetect_capi_analyze_path_json")
|
||||||
|
})
|
||||||
|
return libLoadErr
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("parseOptions", func() {
|
||||||
|
It("defaults verify_threshold to 0.25", func() {
|
||||||
|
o := parseOptions(nil)
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.25)))
|
||||||
|
Expect(o.modelName).To(Equal(""))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("parses verify_threshold, threshold alias and model_name", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0.4", "model_name:ecapa", "unknown:x"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.4)))
|
||||||
|
Expect(o.modelName).To(Equal("ecapa"))
|
||||||
|
|
||||||
|
o2 := parseOptions([]string{"threshold:0.3"})
|
||||||
|
Expect(o2.verifyThreshold).To(Equal(float32(0.3)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("ignores non-positive thresholds and keeps the default", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0", "threshold:-1"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.25)))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("parseAnalyzeJSON", func() {
|
||||||
|
It("maps age, gender label+scores and emotion label+scores", func() {
|
||||||
|
doc := `{"age":42.0,
|
||||||
|
"gender":{"label":"female","female":0.88,"male":0.12},
|
||||||
|
"emotion":{"label":"neutral","scores":{"neutral":0.7,"happy":0.2,"sad":0.1}}}`
|
||||||
|
seg, err := parseAnalyzeJSON(doc)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(seg.Age).To(BeNumerically("~", 42.0, 1e-4))
|
||||||
|
Expect(seg.Start).To(Equal(float32(0)))
|
||||||
|
Expect(seg.End).To(Equal(float32(0)))
|
||||||
|
|
||||||
|
Expect(seg.DominantGender).To(Equal("female"))
|
||||||
|
Expect(seg.Gender).To(HaveKeyWithValue("female", BeNumerically("~", 0.88, 1e-4)))
|
||||||
|
Expect(seg.Gender).To(HaveKeyWithValue("male", BeNumerically("~", 0.12, 1e-4)))
|
||||||
|
// The "label" entry is consumed into DominantGender, not the score map.
|
||||||
|
Expect(seg.Gender).ToNot(HaveKey("label"))
|
||||||
|
|
||||||
|
Expect(seg.DominantEmotion).To(Equal("neutral"))
|
||||||
|
Expect(seg.Emotion).To(HaveKeyWithValue("neutral", BeNumerically("~", 0.7, 1e-4)))
|
||||||
|
Expect(seg.Emotion).To(HaveKeyWithValue("happy", BeNumerically("~", 0.2, 1e-4)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("tolerates a missing gender block", func() {
|
||||||
|
seg, err := parseAnalyzeJSON(`{"age":30.0,"emotion":{"label":"happy","scores":{"happy":1.0}}}`)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(seg.DominantGender).To(Equal(""))
|
||||||
|
Expect(seg.DominantEmotion).To(Equal("happy"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns an error on malformed JSON", func() {
|
||||||
|
_, err := parseAnalyzeJSON(`{not-json`)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// The specs below exercise the real C-API end to end. They run only when both a
|
||||||
|
// model GGUF and a test WAV are provided, and skip cleanly otherwise so the
|
||||||
|
// suite stays green without large assets.
|
||||||
|
var _ = Describe("VoiceDetect end-to-end", Ordered, func() {
|
||||||
|
var (
|
||||||
|
v *VoiceDetect
|
||||||
|
modelPath = os.Getenv("VOICEDETECT_BACKEND_TEST_MODEL")
|
||||||
|
wavPath = os.Getenv("VOICEDETECT_BACKEND_TEST_WAV")
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeAll(func() {
|
||||||
|
if modelPath == "" || wavPath == "" {
|
||||||
|
Skip("set VOICEDETECT_BACKEND_TEST_MODEL and VOICEDETECT_BACKEND_TEST_WAV to run the e2e specs")
|
||||||
|
}
|
||||||
|
if err := ensureLibLoaded(); err != nil {
|
||||||
|
Skip("libvoicedetect.so not loadable: " + err.Error())
|
||||||
|
}
|
||||||
|
v = &VoiceDetect{}
|
||||||
|
Expect(v.Load(&pb.ModelOptions{ModelFile: modelPath})).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("embeds an audio clip", func() {
|
||||||
|
resp, err := v.VoiceEmbed(&pb.VoiceEmbedRequest{Audio: wavPath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Embedding).ToNot(BeEmpty())
|
||||||
|
Expect(resp.Model).ToNot(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("verifies a clip against itself as the same speaker", func() {
|
||||||
|
resp, err := v.VoiceVerify(&pb.VoiceVerifyRequest{Audio1: wavPath, Audio2: wavPath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Verified).To(BeTrue())
|
||||||
|
Expect(resp.Distance).To(BeNumerically("<=", resp.Threshold))
|
||||||
|
})
|
||||||
|
})
|
||||||
64
backend/go/voice-detect/main.go
Normal file
64
backend/go/voice-detect/main.go
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Started internally by LocalAI - one gRPC server per loaded model.
|
||||||
|
//
|
||||||
|
// Loads libvoicedetect.so via purego and registers the flat C-API entry points
|
||||||
|
// declared in voicedetect_capi.h. The library name can be overridden with
|
||||||
|
// VOICEDETECT_LIBRARY (mirrors the PARAKEET_LIBRARY / OMNIVOICE_LIBRARY
|
||||||
|
// convention in the sibling backends); the default looks for the .so next to
|
||||||
|
// this binary (resolved via LD_LIBRARY_PATH by run.sh).
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
type LibFuncs struct {
|
||||||
|
FuncPtr any
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
libName := os.Getenv("VOICEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libvoicedetect.so"
|
||||||
|
}
|
||||||
|
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("voice-detect: dlopen %q: %w", libName, err))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bound 1:1 to voicedetect_capi.h. char*/float* returns are registered as
|
||||||
|
// uintptr so the raw pointer can be freed via the matching capi free fn.
|
||||||
|
libFuncs := []LibFuncs{
|
||||||
|
{&CppAbiVersion, "voicedetect_capi_abi_version"},
|
||||||
|
{&CppLoad, "voicedetect_capi_load"},
|
||||||
|
{&CppFree, "voicedetect_capi_free"},
|
||||||
|
{&CppLastError, "voicedetect_capi_last_error"},
|
||||||
|
{&CppFreeString, "voicedetect_capi_free_string"},
|
||||||
|
{&CppFreeVec, "voicedetect_capi_free_vec"},
|
||||||
|
{&CppEmbedPath, "voicedetect_capi_embed_path"},
|
||||||
|
{&CppEmbedPCM, "voicedetect_capi_embed_pcm"},
|
||||||
|
{&CppVerifyPaths, "voicedetect_capi_verify_paths"},
|
||||||
|
{&CppAnalyzeJSON, "voicedetect_capi_analyze_path_json"},
|
||||||
|
}
|
||||||
|
for _, lf := range libFuncs {
|
||||||
|
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(os.Stderr, "[voice-detect] ABI=%d\n", CppAbiVersion())
|
||||||
|
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &VoiceDetect{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
46
backend/go/voice-detect/options.go
Normal file
46
backend/go/voice-detect/options.go
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// defaultVerifyThreshold is the cosine-distance cutoff used when a request does
|
||||||
|
// not set one. Matches the Python speaker-recognition backend's default so the
|
||||||
|
// two implementations agree on verdicts out of the box.
|
||||||
|
const defaultVerifyThreshold float32 = 0.25
|
||||||
|
|
||||||
|
// loadOptions holds the parsed model-level options for voice-detect.
|
||||||
|
type loadOptions struct {
|
||||||
|
verifyThreshold float32
|
||||||
|
modelName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitOption(o string) (key, value string, ok bool) {
|
||||||
|
i := strings.Index(o, ":")
|
||||||
|
if i < 0 {
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(o[:i]), strings.TrimSpace(o[i+1:]), true
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseOptions reads the backend "key:value" option slice. Unknown keys are
|
||||||
|
// ignored. Defaults: verify_threshold 0.25, model_name derived from the file.
|
||||||
|
func parseOptions(opts []string) loadOptions {
|
||||||
|
o := loadOptions{verifyThreshold: defaultVerifyThreshold}
|
||||||
|
for _, oo := range opts {
|
||||||
|
key, value, ok := splitOption(oo)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch key {
|
||||||
|
case "verify_threshold", "threshold":
|
||||||
|
if f, err := strconv.ParseFloat(value, 32); err == nil && f > 0 {
|
||||||
|
o.verifyThreshold = float32(f)
|
||||||
|
}
|
||||||
|
case "model_name":
|
||||||
|
o.modelName = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return o
|
||||||
|
}
|
||||||
68
backend/go/voice-detect/package.sh
Executable file
68
backend/go/voice-detect/package.sh
Executable file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Bundle the voice-detect-grpc binary, libvoicedetect.so, the core runtime libs
|
||||||
|
# (libc/libstdc++/libgomp + ld.so) and the GPU runtime for the active BUILD_TYPE
|
||||||
|
# so the package is self-contained. Mirrors backend/go/parakeet-cpp/package.sh;
|
||||||
|
# run.sh routes the (CGO_ENABLED=0) binary through lib/ld.so so the packaged libc
|
||||||
|
# is used instead of the host's.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
REPO_ROOT="${CURDIR}/../../.."
|
||||||
|
|
||||||
|
mkdir -p "$CURDIR/package/lib"
|
||||||
|
|
||||||
|
cp -avf "$CURDIR/voice-detect-grpc" "$CURDIR/package/"
|
||||||
|
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||||
|
|
||||||
|
# libvoicedetect.so + any soname symlinks. purego.Dlopen resolves it via
|
||||||
|
# LD_LIBRARY_PATH, which run.sh points at lib/.
|
||||||
|
cp -avf "$CURDIR"/libvoicedetect.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||||
|
echo "ERROR: libvoicedetect.so not found in $CURDIR, run 'make' first" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detect architecture and copy the core runtime libs libvoicedetect.so links
|
||||||
|
# against, plus the matching dynamic loader as lib/ld.so.
|
||||||
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||||
|
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||||
|
cp -arfLv /lib/ld-linux-aarch64.so.1 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||||
|
echo "Detected Darwin"
|
||||||
|
else
|
||||||
|
echo "Error: Could not detect architecture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Package GPU libraries (CUDA/ROCm/Intel/Vulkan loader + ICDs + drivers) based on
|
||||||
|
# BUILD_TYPE so the backend can reach the GPU without the runtime base image
|
||||||
|
# shipping those drivers.
|
||||||
|
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
|
||||||
|
if [ -f "$GPU_LIB_SCRIPT" ]; then
|
||||||
|
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
|
||||||
|
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
|
||||||
|
package_gpu_libs
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Packaging completed successfully"
|
||||||
|
ls -liah "$CURDIR/package/" "$CURDIR/package/lib/"
|
||||||
16
backend/go/voice-detect/run.sh
Executable file
16
backend/go/voice-detect/run.sh
Executable file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||||
|
|
||||||
|
# If a self-contained ld.so was packaged, route through it so the packaged
|
||||||
|
# libc / libstdc++ are used instead of the host's (matches the whisper /
|
||||||
|
# parakeet backends' runtime layout).
|
||||||
|
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||||
|
echo "Using lib/ld.so"
|
||||||
|
exec "$CURDIR/lib/ld.so" "$CURDIR/voice-detect-grpc" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec "$CURDIR/voice-detect-grpc" "$@"
|
||||||
14
backend/go/voice-detect/test.sh
Executable file
14
backend/go/voice-detect/test.sh
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
cd "$CURDIR"
|
||||||
|
|
||||||
|
echo "Running voice-detect backend tests..."
|
||||||
|
|
||||||
|
# The pure-Go parsing specs always run. The embed/verify/analyze smoke specs run
|
||||||
|
# only when a model + WAV are provided via VOICEDETECT_BACKEND_TEST_MODEL and
|
||||||
|
# VOICEDETECT_BACKEND_TEST_WAV; otherwise they auto-skip.
|
||||||
|
LD_LIBRARY_PATH="$CURDIR:${LD_LIBRARY_PATH:-}" go test -v -timeout 1200s .
|
||||||
|
|
||||||
|
echo "voice-detect tests completed."
|
||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=43d78af5be58f41d6ffbc227d608f104577741ea
|
WHISPER_CPP_VERSION?=0ae02cdb2c7317b50991367c165736ce42ed96ac
|
||||||
SO_TARGET?=libgowhisper.so
|
SO_TARGET?=libgowhisper.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
@@ -13,8 +13,14 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$(uname)" = "Darwin" ]; then
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
# macOS: single dylib variant (Metal or Accelerate)
|
# macOS: single fallback variant (Metal/Accelerate). The cmake build emits a
|
||||||
|
# Mach-O named .so, but tolerate .dylib too — pick whichever exists so the Go
|
||||||
|
# loader doesn't panic on a hardcoded name that isn't on disk.
|
||||||
|
if [ -e "$CURDIR/libgowhisper-fallback.dylib" ]; then
|
||||||
LIBRARY="$CURDIR/libgowhisper-fallback.dylib"
|
LIBRARY="$CURDIR/libgowhisper-fallback.dylib"
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libgowhisper-fallback.so"
|
||||||
|
fi
|
||||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||||
else
|
else
|
||||||
LIBRARY="$CURDIR/libgowhisper-fallback.so"
|
LIBRARY="$CURDIR/libgowhisper-fallback.so"
|
||||||
|
|||||||
@@ -209,6 +209,78 @@
|
|||||||
nvidia-cuda-12: "cuda12-ced"
|
nvidia-cuda-12: "cuda12-ced"
|
||||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-ced"
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-ced"
|
||||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-ced"
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-ced"
|
||||||
|
- &voicedetect
|
||||||
|
name: "voice-detect"
|
||||||
|
alias: "voice-detect"
|
||||||
|
license: mit
|
||||||
|
icon: https://avatars.githubusercontent.com/u/95302084
|
||||||
|
description: |
|
||||||
|
voice-detect speaker recognition and voice analysis.
|
||||||
|
voice-detect.cpp is a C++/ggml engine that produces L2-normalised
|
||||||
|
speaker embeddings (ECAPA-TDNN, WeSpeaker ResNet34, 3D-Speaker
|
||||||
|
ERes2Net, CAM++) for voice verification and 1:N identification, plus
|
||||||
|
a wav2vec2 age / gender / emotion analysis head. It replaces the
|
||||||
|
Python speaker-recognition backend and is exposed through the Voice*
|
||||||
|
gRPC rpcs and the /v1/voice/* REST endpoints. It runs on CPU, NVIDIA
|
||||||
|
CUDA, AMD ROCm/HIP, Intel SYCL, Vulkan and NVIDIA Jetson (L4T) targets.
|
||||||
|
urls:
|
||||||
|
- https://github.com/mudler/voice-detect.cpp
|
||||||
|
tags:
|
||||||
|
- voice-recognition
|
||||||
|
- speaker-verification
|
||||||
|
- speaker-embedding
|
||||||
|
- CPU
|
||||||
|
- GPU
|
||||||
|
- CUDA
|
||||||
|
- HIP
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-voice-detect"
|
||||||
|
nvidia: "cuda12-voice-detect"
|
||||||
|
intel: "intel-sycl-f16-voice-detect"
|
||||||
|
metal: "metal-voice-detect"
|
||||||
|
amd: "rocm-voice-detect"
|
||||||
|
vulkan: "vulkan-voice-detect"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-voice-detect"
|
||||||
|
nvidia-cuda-13: "cuda13-voice-detect"
|
||||||
|
nvidia-cuda-12: "cuda12-voice-detect"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-voice-detect"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-voice-detect"
|
||||||
|
- &facedetect
|
||||||
|
name: "face-detect"
|
||||||
|
alias: "face-detect"
|
||||||
|
license: mit
|
||||||
|
icon: https://avatars.githubusercontent.com/u/95302084
|
||||||
|
description: |
|
||||||
|
face-detect face detection, embedding, verification and analysis.
|
||||||
|
face-detect.cpp is a C++/ggml engine that runs SCRFD / YuNet face
|
||||||
|
detection and ArcFace / SFace 512-d (or 128-d) L2-normalised face
|
||||||
|
embeddings for verification and 1:N identification, plus a landmark /
|
||||||
|
age / gender analysis head. It replaces the Python insightface backend
|
||||||
|
and is exposed through the Embedding, Detect and Face* gRPC rpcs and
|
||||||
|
the /v1/face/* REST endpoints. It runs on CPU, NVIDIA CUDA, AMD
|
||||||
|
ROCm/HIP, Intel SYCL, Vulkan and NVIDIA Jetson (L4T) targets.
|
||||||
|
urls:
|
||||||
|
- https://github.com/mudler/face-detect.cpp
|
||||||
|
tags:
|
||||||
|
- face-recognition
|
||||||
|
- face-verification
|
||||||
|
- face-embedding
|
||||||
|
- CPU
|
||||||
|
- GPU
|
||||||
|
- CUDA
|
||||||
|
- HIP
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-face-detect"
|
||||||
|
nvidia: "cuda12-face-detect"
|
||||||
|
intel: "intel-sycl-f16-face-detect"
|
||||||
|
metal: "metal-face-detect"
|
||||||
|
amd: "rocm-face-detect"
|
||||||
|
vulkan: "vulkan-face-detect"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-face-detect"
|
||||||
|
nvidia-cuda-13: "cuda13-face-detect"
|
||||||
|
nvidia-cuda-12: "cuda12-face-detect"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-face-detect"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-face-detect"
|
||||||
- &voxtral
|
- &voxtral
|
||||||
name: "voxtral"
|
name: "voxtral"
|
||||||
alias: "voxtral"
|
alias: "voxtral"
|
||||||
@@ -1356,7 +1428,6 @@
|
|||||||
intel: "intel-fish-speech"
|
intel: "intel-fish-speech"
|
||||||
amd: "rocm-fish-speech"
|
amd: "rocm-fish-speech"
|
||||||
nvidia-l4t: "nvidia-l4t-fish-speech"
|
nvidia-l4t: "nvidia-l4t-fish-speech"
|
||||||
metal: "metal-fish-speech"
|
|
||||||
default: "cpu-fish-speech"
|
default: "cpu-fish-speech"
|
||||||
nvidia-cuda-13: "cuda13-fish-speech"
|
nvidia-cuda-13: "cuda13-fish-speech"
|
||||||
nvidia-cuda-12: "cuda12-fish-speech"
|
nvidia-cuda-12: "cuda12-fish-speech"
|
||||||
@@ -2828,6 +2899,236 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-ced"
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-ced"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-ced
|
- localai/localai-backends:master-gpu-nvidia-cuda-13-ced
|
||||||
|
## voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "voice-detect-development"
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-voice-detect-development"
|
||||||
|
nvidia: "cuda12-voice-detect-development"
|
||||||
|
intel: "intel-sycl-f16-voice-detect-development"
|
||||||
|
metal: "metal-voice-detect-development"
|
||||||
|
amd: "rocm-voice-detect-development"
|
||||||
|
vulkan: "vulkan-voice-detect-development"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
nvidia-cuda-13: "cuda13-voice-detect-development"
|
||||||
|
nvidia-cuda-12: "cuda12-voice-detect-development"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "nvidia-l4t-arm64-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cpu-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-cpu-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cpu-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-cpu-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "metal-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-metal-darwin-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "metal-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-metal-darwin-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda12-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-12-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda12-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-12-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "rocm-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-rocm-hipblas-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "rocm-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-rocm-hipblas-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f32-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f32-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f32-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f32-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f16-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f16-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f16-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f16-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "vulkan-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-vulkan-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "vulkan-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-vulkan-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-13-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-13-voice-detect
|
||||||
|
## face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "face-detect-development"
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-face-detect-development"
|
||||||
|
nvidia: "cuda12-face-detect-development"
|
||||||
|
intel: "intel-sycl-f16-face-detect-development"
|
||||||
|
metal: "metal-face-detect-development"
|
||||||
|
amd: "rocm-face-detect-development"
|
||||||
|
vulkan: "vulkan-face-detect-development"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-face-detect-development"
|
||||||
|
nvidia-cuda-13: "cuda13-face-detect-development"
|
||||||
|
nvidia-cuda-12: "cuda12-face-detect-development"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-face-detect-development"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-face-detect-development"
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "nvidia-l4t-arm64-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "nvidia-l4t-arm64-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cpu-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-cpu-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cpu-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-cpu-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "metal-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-metal-darwin-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "metal-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-metal-darwin-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda12-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-12-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda12-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-12-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "rocm-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-rocm-hipblas-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "rocm-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-rocm-hipblas-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f32-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f32-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f32-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f32-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f16-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f16-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f16-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f16-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "vulkan-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-vulkan-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "vulkan-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-vulkan-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-13-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-13-face-detect
|
||||||
## stablediffusion-ggml
|
## stablediffusion-ggml
|
||||||
- !!merge <<: *stablediffusionggml
|
- !!merge <<: *stablediffusionggml
|
||||||
name: "cpu-stablediffusion-ggml"
|
name: "cpu-stablediffusion-ggml"
|
||||||
@@ -4870,7 +5171,6 @@
|
|||||||
intel: "intel-fish-speech-development"
|
intel: "intel-fish-speech-development"
|
||||||
amd: "rocm-fish-speech-development"
|
amd: "rocm-fish-speech-development"
|
||||||
nvidia-l4t: "nvidia-l4t-fish-speech-development"
|
nvidia-l4t: "nvidia-l4t-fish-speech-development"
|
||||||
metal: "metal-fish-speech-development"
|
|
||||||
default: "cpu-fish-speech-development"
|
default: "cpu-fish-speech-development"
|
||||||
nvidia-cuda-13: "cuda13-fish-speech-development"
|
nvidia-cuda-13: "cuda13-fish-speech-development"
|
||||||
nvidia-cuda-12: "cuda12-fish-speech-development"
|
nvidia-cuda-12: "cuda12-fish-speech-development"
|
||||||
@@ -4946,16 +5246,6 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-fish-speech"
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-fish-speech"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-fish-speech
|
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-fish-speech
|
||||||
- !!merge <<: *fish-speech
|
|
||||||
name: "metal-fish-speech"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-fish-speech"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:latest-metal-darwin-arm64-fish-speech
|
|
||||||
- !!merge <<: *fish-speech
|
|
||||||
name: "metal-fish-speech-development"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-fish-speech"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:master-metal-darwin-arm64-fish-speech
|
|
||||||
## faster-qwen3-tts
|
## faster-qwen3-tts
|
||||||
- !!merge <<: *faster-qwen3-tts
|
- !!merge <<: *faster-qwen3-tts
|
||||||
name: "faster-qwen3-tts-development"
|
name: "faster-qwen3-tts-development"
|
||||||
|
|||||||
@@ -1,2 +0,0 @@
|
|||||||
torch
|
|
||||||
torchaudio
|
|
||||||
@@ -7,3 +7,7 @@ setuptools
|
|||||||
six
|
six
|
||||||
scipy
|
scipy
|
||||||
numpy
|
numpy
|
||||||
|
# fish-speech is installed editable with --no-build-isolation, so the build
|
||||||
|
# backends of its transitive deps must already be in the venv. One of them
|
||||||
|
# builds a Rust extension and needs setuptools-rust present at metadata time.
|
||||||
|
setuptools-rust
|
||||||
|
|||||||
@@ -4,3 +4,4 @@ certifi
|
|||||||
packaging==24.1
|
packaging==24.1
|
||||||
pip
|
pip
|
||||||
chardet
|
chardet
|
||||||
|
click
|
||||||
|
|||||||
@@ -11,14 +11,31 @@ fi
|
|||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade "
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade "
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|
||||||
# Fetch convert_hf_to_gguf.py from llama.cpp
|
# Fetch convert_hf_to_gguf.py from llama.cpp.
|
||||||
|
# Upstream split the model-specific logic out of the single file into a
|
||||||
|
# sibling `conversion/` package (convert_hf_to_gguf.py now does
|
||||||
|
# `from conversion import ...`), so a single-file download no longer runs —
|
||||||
|
# it fails with `ModuleNotFoundError: No module named 'conversion'`. We clone
|
||||||
|
# the repo and copy both the script and the package; Python puts the script's
|
||||||
|
# own directory on sys.path[0], so the package resolves when placed beside it.
|
||||||
LLAMA_CPP_CONVERT_VERSION="${LLAMA_CPP_CONVERT_VERSION:-master}"
|
LLAMA_CPP_CONVERT_VERSION="${LLAMA_CPP_CONVERT_VERSION:-master}"
|
||||||
|
LLAMA_CPP_SRC="${EDIR}/llama.cpp"
|
||||||
CONVERT_SCRIPT="${EDIR}/convert_hf_to_gguf.py"
|
CONVERT_SCRIPT="${EDIR}/convert_hf_to_gguf.py"
|
||||||
if [ ! -f "${CONVERT_SCRIPT}" ]; then
|
|
||||||
echo "Downloading convert_hf_to_gguf.py from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
cloneLlamaCpp() {
|
||||||
curl -L --fail --retry 3 \
|
if [ ! -d "${LLAMA_CPP_SRC}/.git" ]; then
|
||||||
"https://raw.githubusercontent.com/ggml-org/llama.cpp/${LLAMA_CPP_CONVERT_VERSION}/convert_hf_to_gguf.py" \
|
git clone --depth 1 --branch "${LLAMA_CPP_CONVERT_VERSION}" \
|
||||||
-o "${CONVERT_SCRIPT}" || echo "Warning: Failed to download convert_hf_to_gguf.py."
|
https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}" 2>/dev/null || \
|
||||||
|
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ ! -f "${CONVERT_SCRIPT}" ] || [ ! -d "${EDIR}/conversion" ]; then
|
||||||
|
echo "Fetching convert_hf_to_gguf.py + conversion/ from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
||||||
|
cloneLlamaCpp
|
||||||
|
cp "${LLAMA_CPP_SRC}/convert_hf_to_gguf.py" "${CONVERT_SCRIPT}"
|
||||||
|
rm -rf "${EDIR}/conversion"
|
||||||
|
cp -r "${LLAMA_CPP_SRC}/conversion" "${EDIR}/conversion"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Install gguf package from the same llama.cpp commit to keep them in sync
|
# Install gguf package from the same llama.cpp commit to keep them in sync
|
||||||
@@ -41,12 +58,7 @@ QUANTIZE_BIN="${EDIR}/llama-quantize"
|
|||||||
if [ ! -x "${QUANTIZE_BIN}" ] && ! command -v llama-quantize &>/dev/null; then
|
if [ ! -x "${QUANTIZE_BIN}" ] && ! command -v llama-quantize &>/dev/null; then
|
||||||
if command -v cmake &>/dev/null; then
|
if command -v cmake &>/dev/null; then
|
||||||
echo "Building llama-quantize from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
echo "Building llama-quantize from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
||||||
LLAMA_CPP_SRC="${EDIR}/llama.cpp"
|
cloneLlamaCpp # reuses the clone fetched for convert_hf_to_gguf.py
|
||||||
if [ ! -d "${LLAMA_CPP_SRC}" ]; then
|
|
||||||
git clone --depth 1 --branch "${LLAMA_CPP_CONVERT_VERSION}" \
|
|
||||||
https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}" 2>/dev/null || \
|
|
||||||
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}"
|
|
||||||
fi
|
|
||||||
cmake -B "${LLAMA_CPP_SRC}/build" -S "${LLAMA_CPP_SRC}" -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF
|
cmake -B "${LLAMA_CPP_SRC}/build" -S "${LLAMA_CPP_SRC}" -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF
|
||||||
cmake --build "${LLAMA_CPP_SRC}/build" --target llama-quantize -j"$(nproc 2>/dev/null || echo 2)"
|
cmake --build "${LLAMA_CPP_SRC}/build" --target llama-quantize -j"$(nproc 2>/dev/null || echo 2)"
|
||||||
cp "${LLAMA_CPP_SRC}/build/bin/llama-quantize" "${QUANTIZE_BIN}"
|
cp "${LLAMA_CPP_SRC}/build/bin/llama-quantize" "${QUANTIZE_BIN}"
|
||||||
|
|||||||
@@ -85,9 +85,15 @@ if [ "x${BUILD_TYPE}" == "x" ] || [ "x${FROM_SOURCE:-}" == "xtrue" ]; then
|
|||||||
# The resulting binary still requires an AVX-512 capable CPU at runtime,
|
# The resulting binary still requires an AVX-512 capable CPU at runtime,
|
||||||
# same constraint sglang upstream documents in docker/xeon.Dockerfile.
|
# same constraint sglang upstream documents in docker/xeon.Dockerfile.
|
||||||
|
|
||||||
|
# Pin the source build to the same release the GPU path floors on
|
||||||
|
# (0.5.11, see requirements-cublas12-after.txt). An unpinned master clone
|
||||||
|
# pulls in newer CPU kernels (e.g. mamba/fla.cpp) that fail to compile
|
||||||
|
# (constexpr non-constant + kineto_LIBRARY-NOTFOUND). Bump deliberately.
|
||||||
|
SGLANG_VERSION="${SGLANG_VERSION:-v0.5.11}"
|
||||||
_sgl_src=$(mktemp -d)
|
_sgl_src=$(mktemp -d)
|
||||||
trap 'rm -rf "${_sgl_src}"' EXIT
|
trap 'rm -rf "${_sgl_src}"' EXIT
|
||||||
git clone --depth 1 https://github.com/sgl-project/sglang "${_sgl_src}/sglang"
|
git clone --depth 1 --branch "${SGLANG_VERSION}" \
|
||||||
|
https://github.com/sgl-project/sglang "${_sgl_src}/sglang"
|
||||||
|
|
||||||
# Patch -march=native → -march=sapphirerapids in the CPU kernel CMakeLists
|
# Patch -march=native → -march=sapphirerapids in the CPU kernel CMakeLists
|
||||||
sed -i 's/-march=native/-march=sapphirerapids/g' \
|
sed -i 's/-march=native/-march=sapphirerapids/g' \
|
||||||
|
|||||||
@@ -570,6 +570,43 @@ impl Backend for KokorosService {
|
|||||||
) -> Result<Response<backend::Result>, Status> {
|
) -> Result<Response<backend::Result>, Status> {
|
||||||
Err(Status::unimplemented("Not supported"))
|
Err(Status::unimplemented("Not supported"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn sound_detection(
|
||||||
|
&self,
|
||||||
|
_: Request<backend::SoundDetectionRequest>,
|
||||||
|
) -> Result<Response<backend::SoundDetectionResponse>, Status> {
|
||||||
|
Err(Status::unimplemented("Not supported"))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn depth(
|
||||||
|
&self,
|
||||||
|
_: Request<backend::DepthRequest>,
|
||||||
|
) -> Result<Response<backend::DepthResponse>, Status> {
|
||||||
|
Err(Status::unimplemented("Not supported"))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn token_classify(
|
||||||
|
&self,
|
||||||
|
_: Request<backend::TokenClassifyRequest>,
|
||||||
|
) -> Result<Response<backend::TokenClassifyResponse>, Status> {
|
||||||
|
Err(Status::unimplemented("Not supported"))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn score(
|
||||||
|
&self,
|
||||||
|
_: Request<backend::ScoreRequest>,
|
||||||
|
) -> Result<Response<backend::ScoreResponse>, Status> {
|
||||||
|
Err(Status::unimplemented("Not supported"))
|
||||||
|
}
|
||||||
|
|
||||||
|
type ForwardStream = ReceiverStream<Result<backend::ForwardReply, Status>>;
|
||||||
|
|
||||||
|
async fn forward(
|
||||||
|
&self,
|
||||||
|
_: Request<tonic::Streaming<backend::ForwardRequest>>,
|
||||||
|
) -> Result<Response<Self::ForwardStream>, Status> {
|
||||||
|
Err(Status::unimplemented("Not supported"))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@@ -429,7 +429,7 @@ func (l *Launcher) CheckForUpdates() (bool, string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// DownloadUpdate downloads the latest version
|
// DownloadUpdate downloads the latest version
|
||||||
func (l *Launcher) DownloadUpdate(version string, progressCallback func(float64)) error {
|
func (l *Launcher) DownloadUpdate(version string, progressCallback func(downloaded, total int64)) error {
|
||||||
return l.releaseManager.DownloadRelease(version, progressCallback)
|
return l.releaseManager.DownloadRelease(version, progressCallback)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -486,7 +486,6 @@ func (l *Launcher) showDownloadLocalAIDialog() {
|
|||||||
fyne.DoAndWait(func() {
|
fyne.DoAndWait(func() {
|
||||||
// Create a standalone window for the download dialog
|
// Create a standalone window for the download dialog
|
||||||
dialogWindow := l.app.NewWindow("LocalAI Installation Required")
|
dialogWindow := l.app.NewWindow("LocalAI Installation Required")
|
||||||
dialogWindow.Resize(fyne.NewSize(500, 350))
|
|
||||||
dialogWindow.CenterOnScreen()
|
dialogWindow.CenterOnScreen()
|
||||||
dialogWindow.SetCloseIntercept(func() {
|
dialogWindow.SetCloseIntercept(func() {
|
||||||
dialogWindow.Close()
|
dialogWindow.Close()
|
||||||
@@ -548,6 +547,7 @@ func (l *Launcher) showDownloadLocalAIDialog() {
|
|||||||
)
|
)
|
||||||
|
|
||||||
dialogWindow.SetContent(content)
|
dialogWindow.SetContent(content)
|
||||||
|
resizeToContent(dialogWindow, content)
|
||||||
dialogWindow.Show()
|
dialogWindow.Show()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -621,88 +621,134 @@ func (l *Launcher) showDownloadError(title, message string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// showDownloadProgress shows a standalone progress window for downloading LocalAI
|
// showDownloadProgress shows a standalone progress window for downloading LocalAI
|
||||||
|
// after a fresh install (no LocalAI binary present yet).
|
||||||
func (l *Launcher) showDownloadProgress(version, title string) {
|
func (l *Launcher) showDownloadProgress(version, title string) {
|
||||||
|
l.showDownloadProgressWindow(version, title, func(win fyne.Window) {
|
||||||
|
dialog.ShowConfirm("Installation Complete",
|
||||||
|
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
|
||||||
|
func(bool) {
|
||||||
|
win.Close()
|
||||||
|
l.updateStatus("LocalAI installed successfully")
|
||||||
|
if l.systray != nil {
|
||||||
|
l.systray.recreateMenu()
|
||||||
|
}
|
||||||
|
}, win)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// showDownloadProgressWindow renders the download progress popup shared by every
|
||||||
|
// "download/upgrade LocalAI" entry point. It owns the progress bar, the
|
||||||
|
// human-readable byte readout, resume-aware retry, and content-fit window
|
||||||
|
// sizing so the behaviour stays identical everywhere. onSuccess runs (on the UI
|
||||||
|
// goroutine) once the download verifies, and is responsible for the success
|
||||||
|
// dialog and any follow-up; the window is passed in so it can be parented/closed.
|
||||||
|
func (l *Launcher) showDownloadProgressWindow(version, title string, onSuccess func(win fyne.Window)) {
|
||||||
fyne.DoAndWait(func() {
|
fyne.DoAndWait(func() {
|
||||||
// Create progress window
|
|
||||||
progressWindow := l.app.NewWindow("Downloading LocalAI")
|
progressWindow := l.app.NewWindow("Downloading LocalAI")
|
||||||
progressWindow.Resize(fyne.NewSize(400, 250))
|
|
||||||
progressWindow.CenterOnScreen()
|
progressWindow.CenterOnScreen()
|
||||||
progressWindow.SetCloseIntercept(func() {
|
progressWindow.SetCloseIntercept(func() {
|
||||||
progressWindow.Close()
|
progressWindow.Close()
|
||||||
})
|
})
|
||||||
|
|
||||||
// Progress bar
|
|
||||||
progressBar := widget.NewProgressBar()
|
progressBar := widget.NewProgressBar()
|
||||||
progressBar.SetValue(0)
|
progressBar.SetValue(0)
|
||||||
|
|
||||||
// Status label. Truncate with an ellipsis so a long "Download failed:
|
// Status label. Truncate with an ellipsis so a long "Download failed:
|
||||||
// <url>" message can't stretch the window (and progress bar) to fit the
|
// <url>" message can't stretch the window (and progress bar) to fit the
|
||||||
// whole error on one line; the full error is shown in the dialog below.
|
// whole error on one line.
|
||||||
statusLabel := widget.NewLabel("Preparing download...")
|
statusLabel := widget.NewLabel("Preparing download...")
|
||||||
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
||||||
|
|
||||||
// Release notes button
|
|
||||||
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
||||||
releaseNotesURL, err := l.githubReleaseNotesURL(version)
|
releaseNotesURL, err := l.githubReleaseNotesURL(version)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("Failed to parse URL: %v", err)
|
log.Printf("Failed to parse URL: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
l.app.OpenURL(releaseNotesURL)
|
l.app.OpenURL(releaseNotesURL)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Progress container
|
// Retry button: hidden until a download fails. GitHub downloads are
|
||||||
progressContainer := container.NewVBox(
|
// flaky, and the underlying download resumes from the partial file, so
|
||||||
|
// a retry continues where it left off rather than starting over.
|
||||||
|
retryButton := widget.NewButton("Retry", nil)
|
||||||
|
retryButton.Importance = widget.HighImportance
|
||||||
|
retryButton.Hide()
|
||||||
|
|
||||||
|
buttonRow := container.NewHBox(releaseNotesButton, retryButton)
|
||||||
|
content := container.NewVBox(
|
||||||
widget.NewLabel(title),
|
widget.NewLabel(title),
|
||||||
progressBar,
|
progressBar,
|
||||||
statusLabel,
|
statusLabel,
|
||||||
widget.NewSeparator(),
|
widget.NewSeparator(),
|
||||||
releaseNotesButton,
|
buttonRow,
|
||||||
)
|
)
|
||||||
|
progressWindow.SetContent(content)
|
||||||
|
resizeToContent(progressWindow, content)
|
||||||
|
|
||||||
progressWindow.SetContent(progressContainer)
|
var startDownload func()
|
||||||
progressWindow.Show()
|
startDownload = func() {
|
||||||
|
retryButton.Hide()
|
||||||
|
progressBar.SetValue(0)
|
||||||
|
statusLabel.SetText("Preparing download...")
|
||||||
|
resizeToContent(progressWindow, content)
|
||||||
|
|
||||||
// Start download in background
|
|
||||||
go func() {
|
go func() {
|
||||||
err := l.DownloadUpdate(version, func(progress float64) {
|
err := l.DownloadUpdate(version, func(downloaded, total int64) {
|
||||||
// Update progress bar
|
|
||||||
fyne.Do(func() {
|
fyne.Do(func() {
|
||||||
progressBar.SetValue(progress)
|
if total > 0 {
|
||||||
percentage := int(progress * 100)
|
progressBar.SetValue(float64(downloaded) / float64(total))
|
||||||
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
|
statusLabel.SetText(fmt.Sprintf("Downloading… %s / %s", formatBytes(downloaded), formatBytes(total)))
|
||||||
|
} else {
|
||||||
|
statusLabel.SetText(fmt.Sprintf("Downloading… %s", formatBytes(downloaded)))
|
||||||
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
// Handle completion
|
|
||||||
fyne.Do(func() {
|
fyne.Do(func() {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
||||||
// Show error dialog
|
retryButton.Show()
|
||||||
dialog.ShowError(err, progressWindow)
|
resizeToContent(progressWindow, content)
|
||||||
} else {
|
return
|
||||||
statusLabel.SetText("Download completed successfully!")
|
}
|
||||||
progressBar.SetValue(1.0)
|
progressBar.SetValue(1.0)
|
||||||
|
statusLabel.SetText("Download complete")
|
||||||
// Show success dialog
|
onSuccess(progressWindow)
|
||||||
dialog.ShowConfirm("Installation Complete",
|
|
||||||
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
|
|
||||||
func(close bool) {
|
|
||||||
progressWindow.Close()
|
|
||||||
// Update status and refresh systray menu
|
|
||||||
l.updateStatus("LocalAI installed successfully")
|
|
||||||
|
|
||||||
if l.systray != nil {
|
|
||||||
l.systray.recreateMenu()
|
|
||||||
}
|
|
||||||
}, progressWindow)
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
}()
|
}()
|
||||||
|
}
|
||||||
|
retryButton.OnTapped = startDownload
|
||||||
|
|
||||||
|
progressWindow.Show()
|
||||||
|
startDownload()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// resizeToContent sizes a window to fit its content (with a sane minimum width)
|
||||||
|
// so the dialog doesn't show a large blank gap below the last widget.
|
||||||
|
func resizeToContent(w fyne.Window, content fyne.CanvasObject) {
|
||||||
|
size := content.MinSize()
|
||||||
|
if size.Width < 400 {
|
||||||
|
size.Width = 400
|
||||||
|
}
|
||||||
|
w.Resize(size)
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatBytes renders a byte count as a human-readable size (e.g. "12.3 MB").
|
||||||
|
func formatBytes(b int64) string {
|
||||||
|
const unit = 1024
|
||||||
|
if b < unit {
|
||||||
|
return fmt.Sprintf("%d B", b)
|
||||||
|
}
|
||||||
|
div, exp := int64(unit), 0
|
||||||
|
for n := b / unit; n >= unit; n /= unit {
|
||||||
|
div *= unit
|
||||||
|
exp++
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "KMGTPE"[exp])
|
||||||
|
}
|
||||||
|
|
||||||
// monitorLogs monitors the output of LocalAI and adds it to the log buffer
|
// monitorLogs monitors the output of LocalAI and adds it to the log buffer
|
||||||
func (l *Launcher) monitorLogs(reader io.Reader, prefix string) {
|
func (l *Launcher) monitorLogs(reader io.Reader, prefix string) {
|
||||||
scanner := bufio.NewScanner(reader)
|
scanner := bufio.NewScanner(reader)
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -50,6 +51,12 @@ type ReleaseManager struct {
|
|||||||
ChecksumsPath string
|
ChecksumsPath string
|
||||||
// MetadataPath is where version metadata is stored
|
// MetadataPath is where version metadata is stored
|
||||||
MetadataPath string
|
MetadataPath string
|
||||||
|
// BaseDownloadURL is the base URL release assets are downloaded from
|
||||||
|
// (defaults to https://github.com; overridable for testing)
|
||||||
|
BaseDownloadURL string
|
||||||
|
// RetryBackoff is the base wait between download attempts; the Nth retry
|
||||||
|
// waits N*RetryBackoff (defaults to 1s; lowered in tests)
|
||||||
|
RetryBackoff time.Duration
|
||||||
// HTTPClient is the HTTP client used for downloads
|
// HTTPClient is the HTTP client used for downloads
|
||||||
HTTPClient *http.Client
|
HTTPClient *http.Client
|
||||||
}
|
}
|
||||||
@@ -68,22 +75,88 @@ func NewReleaseManager() *ReleaseManager {
|
|||||||
CurrentVersion: internal.PrintableVersion(),
|
CurrentVersion: internal.PrintableVersion(),
|
||||||
ChecksumsPath: checksumsPath,
|
ChecksumsPath: checksumsPath,
|
||||||
MetadataPath: metadataPath,
|
MetadataPath: metadataPath,
|
||||||
|
BaseDownloadURL: "https://github.com",
|
||||||
|
RetryBackoff: 1 * time.Second,
|
||||||
HTTPClient: httpclient.NewWithTimeout(30*time.Second, httpclient.WithFollowRedirects()),
|
HTTPClient: httpclient.NewWithTimeout(30*time.Second, httpclient.WithFollowRedirects()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLatestRelease fetches the latest release information from GitHub
|
// GetLatestRelease resolves the latest LocalAI release.
|
||||||
|
//
|
||||||
|
// It first follows the github.com "releases/latest" redirect, which reveals the
|
||||||
|
// latest tag in the final URL and—crucially—is NOT subject to the
|
||||||
|
// 60-requests/hour unauthenticated rate limit of api.github.com. That limit is
|
||||||
|
// per-IP, so on shared/NAT/CGNAT/cloud addresses the API returns 403 almost
|
||||||
|
// immediately (e.g. on a fresh install with no LocalAI present yet). The
|
||||||
|
// redirect avoids that entirely. The richer JSON API is kept only as a fallback.
|
||||||
|
//
|
||||||
|
// Only the version is consumed by callers, so the redirect's tag is sufficient.
|
||||||
func (rm *ReleaseManager) GetLatestRelease() (*Release, error) {
|
func (rm *ReleaseManager) GetLatestRelease() (*Release, error) {
|
||||||
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/releases/latest", rm.GitHubOwner, rm.GitHubRepo)
|
version, redirectErr := rm.latestVersionFromRedirect()
|
||||||
|
if redirectErr == nil {
|
||||||
|
return &Release{Version: version}, nil
|
||||||
|
}
|
||||||
|
log.Printf("Could not resolve latest version via release redirect (%v); falling back to GitHub API", redirectErr)
|
||||||
|
|
||||||
|
release, apiErr := rm.latestReleaseFromAPI()
|
||||||
|
if apiErr != nil {
|
||||||
|
// Surface both failures so a rate-limited API doesn't mask the (usually
|
||||||
|
// more relevant) redirect error.
|
||||||
|
return nil, fmt.Errorf("failed to fetch latest release: %v (redirect: %v)", apiErr, redirectErr)
|
||||||
|
}
|
||||||
|
return release, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// latestVersionFromRedirect returns the latest tag by following the github.com
|
||||||
|
// "releases/latest" redirect to ".../releases/tag/<tag>".
|
||||||
|
func (rm *ReleaseManager) latestVersionFromRedirect() (string, error) {
|
||||||
|
url := fmt.Sprintf("%s/%s/%s/releases/latest", rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo)
|
||||||
|
|
||||||
resp, err := rm.HTTPClient.Get(url)
|
resp, err := rm.HTTPClient.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return "", fmt.Errorf("unexpected status %s", resp.Status)
|
||||||
|
}
|
||||||
|
|
||||||
|
// After the redirect is followed, the final request URL is the tag page.
|
||||||
|
version := path.Base(resp.Request.URL.Path)
|
||||||
|
if version == "" || version == "." || version == "latest" {
|
||||||
|
return "", fmt.Errorf("could not determine version from %s", resp.Request.URL.String())
|
||||||
|
}
|
||||||
|
return version, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// latestReleaseFromAPI fetches the latest release JSON from api.github.com. This
|
||||||
|
// is the fallback path; it is rate-limited unless GITHUB_TOKEN is set.
|
||||||
|
func (rm *ReleaseManager) latestReleaseFromAPI() (*Release, error) {
|
||||||
|
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/releases/latest", rm.GitHubOwner, rm.GitHubRepo)
|
||||||
|
|
||||||
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("Accept", "application/vnd.github+json")
|
||||||
|
// An optional token lifts the unauthenticated 60/hour limit to 5000/hour.
|
||||||
|
if token := os.Getenv("GITHUB_TOKEN"); token != "" {
|
||||||
|
req.Header.Set("Authorization", "Bearer "+token)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := rm.HTTPClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to fetch latest release: %w", err)
|
return nil, fmt.Errorf("failed to fetch latest release: %w", err)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
return nil, fmt.Errorf("failed to fetch latest release: status %d", resp.StatusCode)
|
if (resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusTooManyRequests) &&
|
||||||
|
resp.Header.Get("X-RateLimit-Remaining") == "0" {
|
||||||
|
return nil, fmt.Errorf("GitHub API rate limit exceeded (status %d); retry later or set GITHUB_TOKEN to raise the limit", resp.StatusCode)
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse the JSON response properly
|
// Parse the JSON response properly
|
||||||
@@ -106,7 +179,7 @@ func (rm *ReleaseManager) GetLatestRelease() (*Release, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// DownloadRelease downloads a specific version of LocalAI
|
// DownloadRelease downloads a specific version of LocalAI
|
||||||
func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(float64)) error {
|
func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(downloaded, total int64)) error {
|
||||||
// Ensure the binary directory exists
|
// Ensure the binary directory exists
|
||||||
if err := os.MkdirAll(rm.BinaryPath, 0755); err != nil {
|
if err := os.MkdirAll(rm.BinaryPath, 0755); err != nil {
|
||||||
return fmt.Errorf("failed to create binary directory: %w", err)
|
return fmt.Errorf("failed to create binary directory: %w", err)
|
||||||
@@ -117,16 +190,16 @@ func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(
|
|||||||
localPath := filepath.Join(rm.BinaryPath, "local-ai")
|
localPath := filepath.Join(rm.BinaryPath, "local-ai")
|
||||||
|
|
||||||
// Download the binary
|
// Download the binary
|
||||||
downloadURL := fmt.Sprintf("https://github.com/%s/%s/releases/download/%s/%s",
|
downloadURL := fmt.Sprintf("%s/%s/%s/releases/download/%s/%s",
|
||||||
rm.GitHubOwner, rm.GitHubRepo, version, binaryName)
|
rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo, version, binaryName)
|
||||||
|
|
||||||
if err := rm.downloadFile(downloadURL, localPath, progressCallback); err != nil {
|
if err := rm.downloadFile(downloadURL, localPath, progressCallback); err != nil {
|
||||||
return fmt.Errorf("failed to download binary: %w", err)
|
return fmt.Errorf("failed to download binary: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Download and verify checksums
|
// Download and verify checksums
|
||||||
checksumURL := fmt.Sprintf("https://github.com/%s/%s/releases/download/%s/LocalAI-%s-checksums.txt",
|
checksumURL := fmt.Sprintf("%s/%s/%s/releases/download/%s/LocalAI-%s-checksums.txt",
|
||||||
rm.GitHubOwner, rm.GitHubRepo, version, version)
|
rm.BaseDownloadURL, rm.GitHubOwner, rm.GitHubRepo, version, version)
|
||||||
|
|
||||||
checksumPath := filepath.Join(rm.BinaryPath, "checksums.txt")
|
checksumPath := filepath.Join(rm.BinaryPath, "checksums.txt")
|
||||||
manualChecksumPath := filepath.Join(rm.ChecksumsPath, fmt.Sprintf("checksums-%s.txt", version))
|
manualChecksumPath := filepath.Join(rm.ChecksumsPath, fmt.Sprintf("checksums-%s.txt", version))
|
||||||
@@ -154,6 +227,10 @@ func (rm *ReleaseManager) DownloadRelease(version string, progressCallback func(
|
|||||||
// Verify the checksum if we have a checksum file
|
// Verify the checksum if we have a checksum file
|
||||||
if _, err := os.Stat(checksumPath); err == nil {
|
if _, err := os.Stat(checksumPath); err == nil {
|
||||||
if err := rm.VerifyChecksum(localPath, checksumPath, binaryName); err != nil {
|
if err := rm.VerifyChecksum(localPath, checksumPath, binaryName); err != nil {
|
||||||
|
// Discard the corrupt binary (and any leftover partial) so the next
|
||||||
|
// retry starts from a clean slate rather than resuming corruption.
|
||||||
|
os.Remove(localPath)
|
||||||
|
os.Remove(localPath + ".part")
|
||||||
return fmt.Errorf("checksum verification failed: %w", err)
|
return fmt.Errorf("checksum verification failed: %w", err)
|
||||||
}
|
}
|
||||||
log.Printf("Checksum verification successful")
|
log.Printf("Checksum verification successful")
|
||||||
@@ -196,44 +273,88 @@ func (rm *ReleaseManager) GetBinaryName(version string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// downloadFile downloads a file from a URL to a local path with optional progress callback
|
// downloadFile downloads a file from a URL to a local path with optional progress callback
|
||||||
func (rm *ReleaseManager) downloadFile(url, filepath string, progressCallback func(float64)) error {
|
func (rm *ReleaseManager) downloadFile(url, filepath string, progressCallback func(downloaded, total int64)) error {
|
||||||
return rm.downloadFileWithRetry(url, filepath, progressCallback, 3)
|
return rm.downloadFileWithRetry(url, filepath, progressCallback, 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
// downloadFileWithRetry downloads a file from a URL with retry logic
|
// downloadFileWithRetry downloads a file with retry and HTTP Range resume.
|
||||||
func (rm *ReleaseManager) downloadFileWithRetry(url, filepath string, progressCallback func(float64), maxRetries int) error {
|
//
|
||||||
|
// The body is streamed to "<dest>.part" and only renamed to dest on success, so
|
||||||
|
// a dropped connection leaves a partial file that the next attempt continues via
|
||||||
|
// a "Range: bytes=N-" request instead of restarting from zero. This matters for
|
||||||
|
// GitHub release downloads, which are large and flaky.
|
||||||
|
func (rm *ReleaseManager) downloadFileWithRetry(url, dest string, progressCallback func(downloaded, total int64), maxRetries int) error {
|
||||||
|
partPath := dest + ".part"
|
||||||
var lastErr error
|
var lastErr error
|
||||||
|
|
||||||
for attempt := 1; attempt <= maxRetries; attempt++ {
|
for attempt := 1; attempt <= maxRetries; attempt++ {
|
||||||
if attempt > 1 {
|
if attempt > 1 {
|
||||||
log.Printf("Retrying download (attempt %d/%d): %s", attempt, maxRetries, url)
|
log.Printf("Retrying download (attempt %d/%d): %s", attempt, maxRetries, url)
|
||||||
time.Sleep(time.Duration(attempt) * time.Second)
|
time.Sleep(time.Duration(attempt) * rm.RetryBackoff)
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := rm.HTTPClient.Get(url)
|
// Resume from however much we already have on disk.
|
||||||
|
var offset int64
|
||||||
|
if fi, err := os.Stat(partPath); err == nil {
|
||||||
|
offset = fi.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if offset > 0 {
|
||||||
|
req.Header.Set("Range", fmt.Sprintf("bytes=%d-", offset))
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := rm.HTTPClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
lastErr = err
|
lastErr = err
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
switch resp.StatusCode {
|
||||||
|
case http.StatusOK:
|
||||||
|
// Server ignored the Range (or we had nothing): start fresh.
|
||||||
|
offset = 0
|
||||||
|
case http.StatusPartialContent:
|
||||||
|
// Resume: append to the existing partial file.
|
||||||
|
case http.StatusRequestedRangeNotSatisfiable:
|
||||||
|
// Stale or already-complete partial: discard and restart fresh.
|
||||||
|
resp.Body.Close()
|
||||||
|
os.Remove(partPath)
|
||||||
|
lastErr = fmt.Errorf("partial download no longer valid (status %s), restarting", resp.Status)
|
||||||
|
continue
|
||||||
|
default:
|
||||||
resp.Body.Close()
|
resp.Body.Close()
|
||||||
lastErr = fmt.Errorf("bad status: %s", resp.Status)
|
lastErr = fmt.Errorf("bad status: %s", resp.Status)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
out, err := os.Create(filepath)
|
var out *os.File
|
||||||
|
if offset > 0 {
|
||||||
|
out, err = os.OpenFile(partPath, os.O_WRONLY|os.O_APPEND, 0644)
|
||||||
|
} else {
|
||||||
|
out, err = os.Create(partPath)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
resp.Body.Close()
|
resp.Body.Close()
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a progress reader if callback is provided
|
// On a 206 the Content-Length is the remaining bytes, so the full size
|
||||||
|
// is what we already have plus what's still to come.
|
||||||
|
total := resp.ContentLength
|
||||||
|
if offset > 0 && total > 0 {
|
||||||
|
total += offset
|
||||||
|
}
|
||||||
|
|
||||||
var reader io.Reader = resp.Body
|
var reader io.Reader = resp.Body
|
||||||
if progressCallback != nil && resp.ContentLength > 0 {
|
if progressCallback != nil && total > 0 {
|
||||||
reader = &progressReader{
|
reader = &progressReader{
|
||||||
Reader: resp.Body,
|
Reader: resp.Body,
|
||||||
Total: resp.ContentLength,
|
Total: total,
|
||||||
|
Current: offset,
|
||||||
Callback: progressCallback,
|
Callback: progressCallback,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -243,11 +364,14 @@ func (rm *ReleaseManager) downloadFileWithRetry(url, filepath string, progressCa
|
|||||||
out.Close()
|
out.Close()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// Keep the partial file so the next attempt can resume from it.
|
||||||
lastErr = err
|
lastErr = err
|
||||||
os.Remove(filepath)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := os.Rename(partPath, dest); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -322,20 +446,21 @@ func (rm *ReleaseManager) saveVersionMetadata(version string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// progressReader wraps an io.Reader to provide download progress
|
// progressReader wraps an io.Reader to provide download progress as a
|
||||||
|
// (downloaded, total) byte count so callers can render both a progress bar and
|
||||||
|
// a human-readable size.
|
||||||
type progressReader struct {
|
type progressReader struct {
|
||||||
io.Reader
|
io.Reader
|
||||||
Total int64
|
Total int64
|
||||||
Current int64
|
Current int64
|
||||||
Callback func(float64)
|
Callback func(downloaded, total int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pr *progressReader) Read(p []byte) (int, error) {
|
func (pr *progressReader) Read(p []byte) (int, error) {
|
||||||
n, err := pr.Reader.Read(p)
|
n, err := pr.Reader.Read(p)
|
||||||
pr.Current += int64(n)
|
pr.Current += int64(n)
|
||||||
if pr.Callback != nil {
|
if pr.Callback != nil {
|
||||||
progress := float64(pr.Current) / float64(pr.Total)
|
pr.Callback(pr.Current, pr.Total)
|
||||||
pr.Callback(progress)
|
|
||||||
}
|
}
|
||||||
return n, err
|
return n, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,17 @@
|
|||||||
package launcher_test
|
package launcher_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
. "github.com/onsi/ginkgo/v2"
|
||||||
@@ -178,4 +186,221 @@ var _ = Describe("ReleaseManager", func() {
|
|||||||
Expect(err.Error()).To(ContainSubstring("checksum not found"))
|
Expect(err.Error()).To(ContainSubstring("checksum not found"))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Describe("DownloadRelease resume and retry", func() {
|
||||||
|
var (
|
||||||
|
version string
|
||||||
|
binaryName string
|
||||||
|
content []byte
|
||||||
|
checksums string
|
||||||
|
finalPath string
|
||||||
|
partPath string
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
version = "v9.9.9"
|
||||||
|
binaryName = rm.GetBinaryName(version)
|
||||||
|
|
||||||
|
// Deterministic, non-trivial content so resume/append bugs surface.
|
||||||
|
content = make([]byte, 4096)
|
||||||
|
for i := range content {
|
||||||
|
content[i] = byte(i % 251)
|
||||||
|
}
|
||||||
|
sum := sha256.Sum256(content)
|
||||||
|
checksums = fmt.Sprintf("%s %s\n", hex.EncodeToString(sum[:]), binaryName)
|
||||||
|
|
||||||
|
finalPath = filepath.Join(tempDir, "local-ai")
|
||||||
|
partPath = finalPath + ".part"
|
||||||
|
|
||||||
|
// Isolate the persistent checksum/metadata dirs to the temp dir so
|
||||||
|
// the test never touches the real ~/.localai and existing checksum
|
||||||
|
// files don't short-circuit the download.
|
||||||
|
rm.ChecksumsPath = filepath.Join(tempDir, "checksums")
|
||||||
|
rm.MetadataPath = filepath.Join(tempDir, "metadata")
|
||||||
|
rm.GitHubOwner = "owner"
|
||||||
|
rm.GitHubRepo = "repo"
|
||||||
|
rm.RetryBackoff = time.Millisecond
|
||||||
|
|
||||||
|
Expect(os.MkdirAll(tempDir, 0755)).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("resumes from a partial .part file using a Range request", func() {
|
||||||
|
Expect(os.WriteFile(partPath, content[:1024], 0644)).To(Succeed())
|
||||||
|
|
||||||
|
var mu sync.Mutex
|
||||||
|
sawRange := false
|
||||||
|
binBytesServed := 0
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if rangeHdr := r.Header.Get("Range"); rangeHdr != "" {
|
||||||
|
var start int
|
||||||
|
_, _ = fmt.Sscanf(rangeHdr, "bytes=%d-", &start)
|
||||||
|
mu.Lock()
|
||||||
|
sawRange = true
|
||||||
|
mu.Unlock()
|
||||||
|
w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, len(content)-1, len(content)))
|
||||||
|
w.WriteHeader(http.StatusPartialContent)
|
||||||
|
n, _ := w.Write(content[start:])
|
||||||
|
mu.Lock()
|
||||||
|
binBytesServed += n
|
||||||
|
mu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
n, _ := w.Write(content)
|
||||||
|
mu.Lock()
|
||||||
|
binBytesServed += n
|
||||||
|
mu.Unlock()
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
got, err := os.ReadFile(finalPath)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got).To(Equal(content))
|
||||||
|
Expect(sawRange).To(BeTrue(), "expected the download to resume with a Range request")
|
||||||
|
Expect(binBytesServed).To(Equal(len(content)-1024), "expected only the remaining bytes to be served")
|
||||||
|
Expect(partPath).ToNot(BeAnExistingFile())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("starts fresh when the server ignores the Range header (200)", func() {
|
||||||
|
// A stale/garbage partial that must NOT be appended to.
|
||||||
|
Expect(os.WriteFile(partPath, []byte("garbage-garbage-garbage"), 0644)).To(Succeed())
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Ignore any Range and always serve the full body.
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(content)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
got, err := os.ReadFile(finalPath)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got).To(Equal(content))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("restarts the download when the partial is stale (416)", func() {
|
||||||
|
// Oversized partial -> requested Range start is beyond the content.
|
||||||
|
Expect(os.WriteFile(partPath, make([]byte, len(content)+10), 0644)).To(Succeed())
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if rangeHdr := r.Header.Get("Range"); rangeHdr != "" {
|
||||||
|
var start int
|
||||||
|
_, _ = fmt.Sscanf(rangeHdr, "bytes=%d-", &start)
|
||||||
|
if start >= len(content) {
|
||||||
|
w.WriteHeader(http.StatusRequestedRangeNotSatisfiable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, len(content)-1, len(content)))
|
||||||
|
w.WriteHeader(http.StatusPartialContent)
|
||||||
|
_, _ = w.Write(content[start:])
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(content)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
got, err := os.ReadFile(finalPath)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got).To(Equal(content))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("removes the downloaded file when checksum verification fails", func() {
|
||||||
|
bad := []byte("this is definitely not the expected binary content")
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
// Checksums are for `content`, but we serve `bad`.
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(bad)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
err := rm.DownloadRelease(version, nil)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
Expect(err.Error()).To(ContainSubstring("checksum"))
|
||||||
|
Expect(finalPath).ToNot(BeAnExistingFile())
|
||||||
|
Expect(partPath).ToNot(BeAnExistingFile())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("reports progress as downloaded and total byte counts", func() {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if strings.HasSuffix(r.URL.Path, "checksums.txt") {
|
||||||
|
_, _ = w.Write([]byte(checksums))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Length", strconv.Itoa(len(content)))
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write(content)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
|
||||||
|
var mu sync.Mutex
|
||||||
|
var lastDownloaded, lastTotal int64
|
||||||
|
err := rm.DownloadRelease(version, func(downloaded, total int64) {
|
||||||
|
mu.Lock()
|
||||||
|
lastDownloaded = downloaded
|
||||||
|
lastTotal = total
|
||||||
|
mu.Unlock()
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(lastTotal).To(Equal(int64(len(content))))
|
||||||
|
Expect(lastDownloaded).To(Equal(int64(len(content))))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("GetLatestRelease", func() {
|
||||||
|
It("resolves the latest version from the releases/latest redirect", func() {
|
||||||
|
// The github.com redirect path must be preferred over the
|
||||||
|
// rate-limited api.github.com, so a working redirect yields the tag
|
||||||
|
// without ever needing the API.
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case strings.HasSuffix(r.URL.Path, "/releases/latest"):
|
||||||
|
http.Redirect(w, r, "/owner/repo/releases/tag/v9.9.9", http.StatusFound)
|
||||||
|
case strings.HasSuffix(r.URL.Path, "/releases/tag/v9.9.9"):
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
rm.BaseDownloadURL = srv.URL
|
||||||
|
rm.GitHubOwner = "owner"
|
||||||
|
rm.GitHubRepo = "repo"
|
||||||
|
|
||||||
|
release, err := rm.GetLatestRelease()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(release.Version).To(Equal("v9.9.9"))
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -443,84 +443,23 @@ func (sm *SystrayManager) showStartupErrorDialog(err error) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// showDownloadProgress shows a progress window for downloading updates
|
// showDownloadProgress shows a progress window for downloading updates. The
|
||||||
|
// progress UI (byte readout, resume-aware retry, sizing) is shared with the
|
||||||
|
// other download entry points via the launcher; only the post-success behaviour
|
||||||
|
// (restart prompt + systray refresh) is specific to the update flow.
|
||||||
func (sm *SystrayManager) showDownloadProgress(version string) {
|
func (sm *SystrayManager) showDownloadProgress(version string) {
|
||||||
// Create a new window for download progress
|
sm.launcher.showDownloadProgressWindow(version, fmt.Sprintf("Downloading LocalAI version %s", version), func(win fyne.Window) {
|
||||||
progressWindow := sm.app.NewWindow("Downloading LocalAI Update")
|
|
||||||
progressWindow.Resize(fyne.NewSize(400, 250))
|
|
||||||
progressWindow.CenterOnScreen()
|
|
||||||
|
|
||||||
// Progress bar
|
|
||||||
progressBar := widget.NewProgressBar()
|
|
||||||
progressBar.SetValue(0)
|
|
||||||
|
|
||||||
// Status label. Truncate with an ellipsis so a long "Download failed:
|
|
||||||
// <url>" message can't stretch the window (and progress bar) to fit the
|
|
||||||
// whole error on one line; the full error is shown in the dialog below.
|
|
||||||
statusLabel := widget.NewLabel("Preparing download...")
|
|
||||||
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
|
||||||
|
|
||||||
// Release notes button
|
|
||||||
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
|
||||||
releaseNotesURL, err := sm.launcher.githubReleaseNotesURL(version)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("Failed to parse URL: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
sm.app.OpenURL(releaseNotesURL)
|
|
||||||
})
|
|
||||||
|
|
||||||
// Progress container
|
|
||||||
progressContainer := container.NewVBox(
|
|
||||||
widget.NewLabel(fmt.Sprintf("Downloading LocalAI version %s", version)),
|
|
||||||
progressBar,
|
|
||||||
statusLabel,
|
|
||||||
widget.NewSeparator(),
|
|
||||||
releaseNotesButton,
|
|
||||||
)
|
|
||||||
|
|
||||||
progressWindow.SetContent(progressContainer)
|
|
||||||
progressWindow.Show()
|
|
||||||
|
|
||||||
// Start download in background
|
|
||||||
go func() {
|
|
||||||
err := sm.launcher.DownloadUpdate(version, func(progress float64) {
|
|
||||||
// Update progress bar
|
|
||||||
fyne.Do(func() {
|
|
||||||
progressBar.SetValue(progress)
|
|
||||||
percentage := int(progress * 100)
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// Handle completion
|
|
||||||
fyne.Do(func() {
|
|
||||||
if err != nil {
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
|
||||||
// Show error dialog
|
|
||||||
dialog.ShowError(err, progressWindow)
|
|
||||||
} else {
|
|
||||||
statusLabel.SetText("Download completed successfully!")
|
|
||||||
progressBar.SetValue(1.0)
|
|
||||||
|
|
||||||
// Show restart dialog
|
|
||||||
dialog.ShowConfirm("Update Downloaded",
|
dialog.ShowConfirm("Update Downloaded",
|
||||||
"LocalAI has been updated successfully. Please restart the launcher to use the new version.",
|
"LocalAI has been updated successfully. Please restart the launcher to use the new version.",
|
||||||
func(restart bool) {
|
func(restart bool) {
|
||||||
if restart {
|
if restart {
|
||||||
sm.app.Quit()
|
sm.app.Quit()
|
||||||
}
|
}
|
||||||
progressWindow.Close()
|
win.Close()
|
||||||
}, progressWindow)
|
}, win)
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Update systray menu
|
|
||||||
if err == nil {
|
|
||||||
sm.hasUpdateAvailable = false
|
sm.hasUpdateAvailable = false
|
||||||
sm.latestVersion = ""
|
sm.latestVersion = ""
|
||||||
sm.recreateMenu()
|
sm.recreateMenu()
|
||||||
}
|
})
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -490,14 +490,19 @@ func (ui *LauncherUI) downloadUpdate() {
|
|||||||
ui.UpdateStatus("Downloading update " + version + "...")
|
ui.UpdateStatus("Downloading update " + version + "...")
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
err := ui.launcher.DownloadUpdate(version, func(progress float64) {
|
err := ui.launcher.DownloadUpdate(version, func(downloaded, total int64) {
|
||||||
// Update progress bar
|
|
||||||
fyne.Do(func() {
|
fyne.Do(func() {
|
||||||
ui.progressBar.SetValue(progress)
|
if total > 0 {
|
||||||
|
ui.progressBar.SetValue(float64(downloaded) / float64(total))
|
||||||
|
}
|
||||||
})
|
})
|
||||||
// Update status with percentage
|
// The progress bar already shows the percentage, so report the
|
||||||
percentage := int(progress * 100)
|
// human-readable size here instead of repeating the percent.
|
||||||
ui.UpdateStatus(fmt.Sprintf("Downloading update %s... %d%%", version, percentage))
|
if total > 0 {
|
||||||
|
ui.UpdateStatus(fmt.Sprintf("Downloading update %s… %s / %s", version, formatBytes(downloaded), formatBytes(total)))
|
||||||
|
} else {
|
||||||
|
ui.UpdateStatus(fmt.Sprintf("Downloading update %s… %s", version, formatBytes(downloaded)))
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
fyne.Do(func() {
|
fyne.Do(func() {
|
||||||
@@ -598,82 +603,6 @@ func (ui *LauncherUI) LoadConfiguration() {
|
|||||||
log.Printf("UI LoadConfiguration: configuration loaded successfully")
|
log.Printf("UI LoadConfiguration: configuration loaded successfully")
|
||||||
}
|
}
|
||||||
|
|
||||||
// showDownloadProgress shows a progress window for downloading LocalAI
|
|
||||||
func (ui *LauncherUI) showDownloadProgress(version, title string) {
|
|
||||||
fyne.DoAndWait(func() {
|
|
||||||
// Create progress window using the launcher's app
|
|
||||||
progressWindow := ui.launcher.app.NewWindow("Downloading LocalAI")
|
|
||||||
progressWindow.Resize(fyne.NewSize(400, 250))
|
|
||||||
progressWindow.CenterOnScreen()
|
|
||||||
|
|
||||||
// Progress bar
|
|
||||||
progressBar := widget.NewProgressBar()
|
|
||||||
progressBar.SetValue(0)
|
|
||||||
|
|
||||||
// Status label. Truncate with an ellipsis so a long "Download failed:
|
|
||||||
// <url>" message can't stretch the window (and progress bar) to fit the
|
|
||||||
// whole error on one line; the full error is shown in the dialog below.
|
|
||||||
statusLabel := widget.NewLabel("Preparing download...")
|
|
||||||
statusLabel.Truncation = fyne.TextTruncateEllipsis
|
|
||||||
|
|
||||||
// Release notes button
|
|
||||||
releaseNotesButton := widget.NewButton("View Release Notes", func() {
|
|
||||||
releaseNotesURL, err := ui.launcher.githubReleaseNotesURL(version)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("Failed to parse URL: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
ui.launcher.app.OpenURL(releaseNotesURL)
|
|
||||||
})
|
|
||||||
|
|
||||||
// Progress container
|
|
||||||
progressContainer := container.NewVBox(
|
|
||||||
widget.NewLabel(title),
|
|
||||||
progressBar,
|
|
||||||
statusLabel,
|
|
||||||
widget.NewSeparator(),
|
|
||||||
releaseNotesButton,
|
|
||||||
)
|
|
||||||
|
|
||||||
progressWindow.SetContent(progressContainer)
|
|
||||||
progressWindow.Show()
|
|
||||||
|
|
||||||
// Start download in background
|
|
||||||
go func() {
|
|
||||||
err := ui.launcher.DownloadUpdate(version, func(progress float64) {
|
|
||||||
// Update progress bar
|
|
||||||
fyne.Do(func() {
|
|
||||||
progressBar.SetValue(progress)
|
|
||||||
percentage := int(progress * 100)
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Downloading... %d%%", percentage))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// Handle completion
|
|
||||||
fyne.Do(func() {
|
|
||||||
if err != nil {
|
|
||||||
statusLabel.SetText(fmt.Sprintf("Download failed: %v", err))
|
|
||||||
// Show error dialog
|
|
||||||
dialog.ShowError(err, progressWindow)
|
|
||||||
} else {
|
|
||||||
statusLabel.SetText("Download completed successfully!")
|
|
||||||
progressBar.SetValue(1.0)
|
|
||||||
|
|
||||||
// Show success dialog
|
|
||||||
dialog.ShowConfirm("Installation Complete",
|
|
||||||
"LocalAI has been downloaded and installed successfully. You can now start LocalAI from the launcher.",
|
|
||||||
func(close bool) {
|
|
||||||
progressWindow.Close()
|
|
||||||
// Update status
|
|
||||||
ui.UpdateStatus("LocalAI installed successfully")
|
|
||||||
}, progressWindow)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// UpdateRunningState updates UI based on LocalAI running state
|
// UpdateRunningState updates UI based on LocalAI running state
|
||||||
func (ui *LauncherUI) UpdateRunningState(isRunning bool) {
|
func (ui *LauncherUI) UpdateRunningState(isRunning bool) {
|
||||||
fyne.Do(func() {
|
fyne.Do(func() {
|
||||||
|
|||||||
@@ -37,6 +37,8 @@ func (a *Application) RestartAgentJobService() error {
|
|||||||
if d.JobStore != nil {
|
if d.JobStore != nil {
|
||||||
agentJobService.SetDistributedJobStore(d.JobStore)
|
agentJobService.SetDistributedJobStore(d.JobStore)
|
||||||
}
|
}
|
||||||
|
// Keep agent tasks consistent across replicas (same client the dispatcher uses).
|
||||||
|
agentJobService.SetTaskSyncNATS(d.Nats)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start the service
|
// Start the service
|
||||||
|
|||||||
@@ -604,6 +604,10 @@ func (a *Application) StartAgentPool() {
|
|||||||
usm.SetJobDBStore(s)
|
usm.SetJobDBStore(s)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Keep per-user agent tasks consistent across replicas (nil in standalone).
|
||||||
|
if d := a.Distributed(); d != nil {
|
||||||
|
usm.SetJobSyncNATS(d.Nats)
|
||||||
|
}
|
||||||
aps.SetUserServicesManager(usm)
|
aps.SetUserServicesManager(usm)
|
||||||
|
|
||||||
a.agentPoolService.Store(aps)
|
a.agentPoolService.Store(aps)
|
||||||
|
|||||||
@@ -355,6 +355,7 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoade
|
|||||||
PrefixProvider: prefixProvider,
|
PrefixProvider: prefixProvider,
|
||||||
PrefixConfig: prefixCfg,
|
PrefixConfig: prefixCfg,
|
||||||
Pressure: pressure,
|
Pressure: pressure,
|
||||||
|
SharedModels: cfg.Distributed.SharedModels,
|
||||||
})
|
})
|
||||||
|
|
||||||
// Wire staging-progress broadcasting so file-staging shows up on every
|
// Wire staging-progress broadcasting so file-staging shows up on every
|
||||||
|
|||||||
@@ -280,6 +280,9 @@ func New(opts ...config.AppOption) (*Application, error) {
|
|||||||
if application.agentJobService != nil {
|
if application.agentJobService != nil {
|
||||||
application.agentJobService.SetDistributedBackends(distSvc.Dispatcher)
|
application.agentJobService.SetDistributedBackends(distSvc.Dispatcher)
|
||||||
application.agentJobService.SetDistributedJobStore(distSvc.JobStore)
|
application.agentJobService.SetDistributedJobStore(distSvc.JobStore)
|
||||||
|
// Keep agent tasks consistent across replicas (jobs already sync via the
|
||||||
|
// dispatcher + DB read-through). Same NATS client the dispatcher uses.
|
||||||
|
application.agentJobService.SetTaskSyncNATS(distSvc.Nats)
|
||||||
}
|
}
|
||||||
// Wire skill store into AgentPoolService (wired at pool start time via closure)
|
// Wire skill store into AgentPoolService (wired at pool start time via closure)
|
||||||
// The actual wiring happens in StartAgentPool since the pool doesn't exist yet.
|
// The actual wiring happens in StartAgentPool since the pool doesn't exist yet.
|
||||||
|
|||||||
@@ -160,6 +160,7 @@ type RunCMD struct {
|
|||||||
RegistrationRequireAuth bool `env:"LOCALAI_REGISTRATION_REQUIRE_AUTH" default:"false" help:"Fail startup when distributed mode is enabled but LOCALAI_REGISTRATION_TOKEN is empty (node endpoints and worker file-transfer server would otherwise be unauthenticated)" group:"distributed"`
|
RegistrationRequireAuth bool `env:"LOCALAI_REGISTRATION_REQUIRE_AUTH" default:"false" help:"Fail startup when distributed mode is enabled but LOCALAI_REGISTRATION_TOKEN is empty (node endpoints and worker file-transfer server would otherwise be unauthenticated)" group:"distributed"`
|
||||||
DistributedRequireAuth bool `env:"LOCALAI_DISTRIBUTED_REQUIRE_AUTH" default:"false" help:"Umbrella switch: require BOTH NATS JWT credentials and a registration token when distributed mode is enabled (implies --nats-require-auth and --registration-require-auth)" group:"distributed"`
|
DistributedRequireAuth bool `env:"LOCALAI_DISTRIBUTED_REQUIRE_AUTH" default:"false" help:"Umbrella switch: require BOTH NATS JWT credentials and a registration token when distributed mode is enabled (implies --nats-require-auth and --registration-require-auth)" group:"distributed"`
|
||||||
AutoApproveNodes bool `env:"LOCALAI_AUTO_APPROVE_NODES" default:"false" help:"Auto-approve new worker nodes (skip admin approval)" group:"distributed"`
|
AutoApproveNodes bool `env:"LOCALAI_AUTO_APPROVE_NODES" default:"false" help:"Auto-approve new worker nodes (skip admin approval)" group:"distributed"`
|
||||||
|
DistributedSharedModels bool `env:"LOCALAI_DISTRIBUTED_SHARED_MODELS" default:"false" help:"Assert that every node mounts the SAME models directory at the SAME path (shared volume). When true, the router skips staging model files to workers and loads them directly from the shared path, avoiding re-downloads." group:"distributed"`
|
||||||
DistributedPrefixCache bool `env:"LOCALAI_DISTRIBUTED_PREFIX_CACHE" default:"true" help:"Enable prefix-cache-aware routing in distributed mode (default true). When false, routing falls back to round-robin." group:"distributed"`
|
DistributedPrefixCache bool `env:"LOCALAI_DISTRIBUTED_PREFIX_CACHE" default:"true" help:"Enable prefix-cache-aware routing in distributed mode (default true). When false, routing falls back to round-robin." group:"distributed"`
|
||||||
DistributedPrefixCacheTTL string `env:"LOCALAI_DISTRIBUTED_PREFIX_CACHE_TTL" help:"Idle-timeout for prefix-cache index entries; also drives the background eviction cadence (every TTL/2). Default 5m." group:"distributed"`
|
DistributedPrefixCacheTTL string `env:"LOCALAI_DISTRIBUTED_PREFIX_CACHE_TTL" help:"Idle-timeout for prefix-cache index entries; also drives the background eviction cadence (every TTL/2). Default 5m." group:"distributed"`
|
||||||
BackendInstallTimeout string `env:"LOCALAI_NATS_BACKEND_INSTALL_TIMEOUT" help:"NATS round-trip timeout for backend.install requests sent to worker nodes (default 15m). Increase for slow links pulling multi-GB images." group:"distributed"`
|
BackendInstallTimeout string `env:"LOCALAI_NATS_BACKEND_INSTALL_TIMEOUT" help:"NATS round-trip timeout for backend.install requests sent to worker nodes (default 15m). Increase for slow links pulling multi-GB images." group:"distributed"`
|
||||||
@@ -310,6 +311,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
if r.DistributedRequireAuth {
|
if r.DistributedRequireAuth {
|
||||||
opts = append(opts, config.EnableDistributedRequireAuth)
|
opts = append(opts, config.EnableDistributedRequireAuth)
|
||||||
}
|
}
|
||||||
|
if r.DistributedSharedModels {
|
||||||
|
opts = append(opts, config.EnableDistributedSharedModels)
|
||||||
|
}
|
||||||
if r.NatsAccountSeed != "" {
|
if r.NatsAccountSeed != "" {
|
||||||
opts = append(opts, config.WithNatsAccountSeed(r.NatsAccountSeed))
|
opts = append(opts, config.WithNatsAccountSeed(r.NatsAccountSeed))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -542,6 +542,19 @@ var BackendCapabilities = map[string]BackendCapability{
|
|||||||
DefaultUsecases: []string{UsecaseSpeakerRecognition},
|
DefaultUsecases: []string{UsecaseSpeakerRecognition},
|
||||||
Description: "Speaker recognition — voice identity verification and analysis",
|
Description: "Speaker recognition — voice identity verification and analysis",
|
||||||
},
|
},
|
||||||
|
"voice-detect": {
|
||||||
|
GRPCMethods: []GRPCMethod{MethodVoiceVerify, MethodVoiceEmbed, MethodVoiceAnalyze},
|
||||||
|
PossibleUsecases: []string{UsecaseSpeakerRecognition},
|
||||||
|
DefaultUsecases: []string{UsecaseSpeakerRecognition},
|
||||||
|
Description: "voice-detect.cpp: C++/ggml speaker embedding, verification and voice analysis (age/gender/emotion)",
|
||||||
|
},
|
||||||
|
"face-detect": {
|
||||||
|
GRPCMethods: []GRPCMethod{MethodEmbedding, MethodDetect, MethodFaceVerify, MethodFaceAnalyze},
|
||||||
|
PossibleUsecases: []string{UsecaseEmbeddings, UsecaseDetection, UsecaseFaceRecognition},
|
||||||
|
DefaultUsecases: []string{UsecaseFaceRecognition},
|
||||||
|
AcceptsImages: true,
|
||||||
|
Description: "face-detect.cpp: C++/ggml face detection, embedding, verification and attribute analysis",
|
||||||
|
},
|
||||||
"silero-vad": {
|
"silero-vad": {
|
||||||
GRPCMethods: []GRPCMethod{MethodVAD},
|
GRPCMethods: []GRPCMethod{MethodVAD},
|
||||||
PossibleUsecases: []string{UsecaseVAD},
|
PossibleUsecases: []string{UsecaseVAD},
|
||||||
|
|||||||
@@ -12,14 +12,12 @@ package config
|
|||||||
// these; config never imports backend.
|
// these; config never imports backend.
|
||||||
const (
|
const (
|
||||||
// DefaultContextSize is the fallback context window when none is configured
|
// DefaultContextSize is the fallback context window when none is configured
|
||||||
// or estimable from the model.
|
// or estimable from the model. It is also the fallback for a GGUF whose
|
||||||
|
// metadata yields no usable estimate or that the parser cannot read at all
|
||||||
|
// (e.g. a quant type it does not know, such as NVFP4): a model-agnostic
|
||||||
|
// safe default beats a tiny, surprising window that truncates real prompts.
|
||||||
DefaultContextSize = 4096
|
DefaultContextSize = 4096
|
||||||
|
|
||||||
// GGUFFallbackContextSize is the context window for a GGUF model whose
|
|
||||||
// metadata yields no usable estimate (see guessGGUFFromFile). Deliberately
|
|
||||||
// smaller than DefaultContextSize to stay conservative on memory there.
|
|
||||||
GGUFFallbackContextSize = 1024
|
|
||||||
|
|
||||||
// DefaultNGPULayers means "offload all layers"; the backend (fit_params)
|
// DefaultNGPULayers means "offload all layers"; the backend (fit_params)
|
||||||
// clamps to what actually fits in device memory.
|
// clamps to what actually fits in device memory.
|
||||||
DefaultNGPULayers = 99999999
|
DefaultNGPULayers = 99999999
|
||||||
|
|||||||
@@ -31,6 +31,14 @@ type DistributedConfig struct {
|
|||||||
// available to enforce just one layer.
|
// available to enforce just one layer.
|
||||||
RequireAuth bool // LOCALAI_DISTRIBUTED_REQUIRE_AUTH
|
RequireAuth bool // LOCALAI_DISTRIBUTED_REQUIRE_AUTH
|
||||||
AutoApproveNodes bool // --auto-approve-nodes / LOCALAI_AUTO_APPROVE_NODES (skip admin approval for new workers)
|
AutoApproveNodes bool // --auto-approve-nodes / LOCALAI_AUTO_APPROVE_NODES (skip admin approval for new workers)
|
||||||
|
// SharedModels asserts that every node (frontend and workers) mounts the
|
||||||
|
// SAME models directory at the SAME path (e.g. a shared volume, as in
|
||||||
|
// docker-compose.distributed.yaml). When true, the router skips staging
|
||||||
|
// model files to workers entirely: the frontend's absolute model paths are
|
||||||
|
// already valid on the worker, so re-uploading them into a per-model
|
||||||
|
// subdirectory only re-downloads what is already present (#10556). Default
|
||||||
|
// false preserves the historical per-node staging behavior.
|
||||||
|
SharedModels bool // --distributed-shared-models / LOCALAI_DISTRIBUTED_SHARED_MODELS
|
||||||
|
|
||||||
// NATS JWT auth (optional; see pkg/natsauth and docs/features/distributed-mode.md)
|
// NATS JWT auth (optional; see pkg/natsauth and docs/features/distributed-mode.md)
|
||||||
NatsAccountSeed string // LOCALAI_NATS_ACCOUNT_SEED — account signing seed to mint per-node worker JWTs
|
NatsAccountSeed string // LOCALAI_NATS_ACCOUNT_SEED — account signing seed to mint per-node worker JWTs
|
||||||
@@ -282,6 +290,13 @@ var EnableAutoApproveNodes = func(o *ApplicationConfig) {
|
|||||||
o.Distributed.AutoApproveNodes = true
|
o.Distributed.AutoApproveNodes = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EnableDistributedSharedModels marks the cluster as sharing one models
|
||||||
|
// directory across all nodes, so the router skips staging model files to
|
||||||
|
// workers (see DistributedConfig.SharedModels).
|
||||||
|
var EnableDistributedSharedModels = func(o *ApplicationConfig) {
|
||||||
|
o.Distributed.SharedModels = true
|
||||||
|
}
|
||||||
|
|
||||||
// DisablePrefixCache turns off prefix-cache-aware routing (falls back to
|
// DisablePrefixCache turns off prefix-cache-aware routing (falls back to
|
||||||
// round-robin). Prefix-cache routing is enabled by default in distributed mode.
|
// round-robin). Prefix-cache routing is enabled by default in distributed mode.
|
||||||
var DisablePrefixCache = func(o *ApplicationConfig) {
|
var DisablePrefixCache = func(o *ApplicationConfig) {
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {
|
|||||||
cSize := int(ctxSize)
|
cSize := int(ctxSize)
|
||||||
cfg.ContextSize = &cSize
|
cfg.ContextSize = &cSize
|
||||||
} else {
|
} else {
|
||||||
defaultCtx = GGUFFallbackContextSize
|
defaultCtx = DefaultContextSize
|
||||||
cfg.ContextSize = &defaultCtx
|
cfg.ContextSize = &defaultCtx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ func llamaCppDefaults(cfg *ModelConfig, modelPath string) {
|
|||||||
// Default context size if not set, regardless of whether GGUF parsing succeeds
|
// Default context size if not set, regardless of whether GGUF parsing succeeds
|
||||||
defer func() {
|
defer func() {
|
||||||
if cfg.ContextSize == nil {
|
if cfg.ContextSize == nil {
|
||||||
ctx := GGUFFallbackContextSize
|
ctx := DefaultContextSize
|
||||||
cfg.ContextSize = &ctx
|
cfg.ContextSize = &ctx
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|||||||
@@ -248,7 +248,11 @@ var _ = Describe("Backend hooks and parser defaults", func() {
|
|||||||
}
|
}
|
||||||
cfg.SetDefaults(ModelPath(dir))
|
cfg.SetDefaults(ModelPath(dir))
|
||||||
|
|
||||||
|
// An unreadable/unparseable GGUF (e.g. a quant type the parser does
|
||||||
|
// not know, such as NVFP4) yields no estimate, so the hook must fall
|
||||||
|
// back to DefaultContextSize rather than a tiny, surprising value.
|
||||||
Expect(cfg.ContextSize).NotTo(BeNil())
|
Expect(cfg.ContextSize).NotTo(BeNil())
|
||||||
|
Expect(*cfg.ContextSize).To(Equal(DefaultContextSize))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -293,7 +293,7 @@ func pickPreferredGroup(groups []hfapi.ShardGroup, prefs []string) *hfapi.ShardG
|
|||||||
for _, pref := range prefs {
|
for _, pref := range prefs {
|
||||||
lower := strings.ToLower(pref)
|
lower := strings.ToLower(pref)
|
||||||
for i := range groups {
|
for i := range groups {
|
||||||
if strings.Contains(strings.ToLower(groups[i].Base), lower) {
|
if quantTokenMatches(strings.ToLower(groups[i].Base), lower) {
|
||||||
return &groups[i]
|
return &groups[i]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -301,6 +301,39 @@ func pickPreferredGroup(groups []hfapi.ShardGroup, prefs []string) *hfapi.ShardG
|
|||||||
return &groups[len(groups)-1]
|
return &groups[len(groups)-1]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// quantTokenMatches reports whether pref appears in base as a whole token
|
||||||
|
// rather than as a substring of a larger alphanumeric run. Both arguments
|
||||||
|
// must already be lowercased.
|
||||||
|
//
|
||||||
|
// A plain strings.Contains is wrong here: `f16` is a substring of `bf16`, so
|
||||||
|
// asking for the `F16` quant used to wrongly select a `BF16` file (#10559).
|
||||||
|
// Only the OUTER edges of the matched preference must hit a boundary — a
|
||||||
|
// non-alphanumeric char (or the start/end of base). Separators inside the
|
||||||
|
// preference itself (e.g. `ud-q4_k_xl`) are intentionally left untouched.
|
||||||
|
func quantTokenMatches(base, pref string) bool {
|
||||||
|
if pref == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for start := strings.Index(base, pref); start != -1; {
|
||||||
|
end := start + len(pref)
|
||||||
|
leftOK := start == 0 || !isAlphaNum(base[start-1])
|
||||||
|
rightOK := end == len(base) || !isAlphaNum(base[end])
|
||||||
|
if leftOK && rightOK {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
next := strings.Index(base[start+1:], pref)
|
||||||
|
if next == -1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
start += next + 1
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func isAlphaNum(b byte) bool {
|
||||||
|
return (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9')
|
||||||
|
}
|
||||||
|
|
||||||
// maybeApplyMTPDefaults parses the picked GGUF header (range-fetched over
|
// maybeApplyMTPDefaults parses the picked GGUF header (range-fetched over
|
||||||
// HTTP for HF/URL imports) and, if the file declares a Multi-Token Prediction
|
// HTTP for HF/URL imports) and, if the file declares a Multi-Token Prediction
|
||||||
// head, appends the auto-MTP option keys to modelConfig.Options. Failures
|
// head, appends the auto-MTP option keys to modelConfig.Options. Failures
|
||||||
|
|||||||
@@ -374,6 +374,104 @@ var _ = Describe("LlamaCPPImporter", func() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Context("quant token boundary matching", func() {
|
||||||
|
// Regression for #10559: the quant preference must match as a whole
|
||||||
|
// token, not as a substring. Asking for `F16` used to select a
|
||||||
|
// `BF16` mmproj because strings.Contains("...bf16.gguf", "f16") is
|
||||||
|
// true — the leading `b` was ignored.
|
||||||
|
|
||||||
|
const repoBase = "https://huggingface.co/acme/example-GGUF/resolve/main/"
|
||||||
|
|
||||||
|
hfFile := func(path, sha string) hfapi.ModelFile {
|
||||||
|
return hfapi.ModelFile{
|
||||||
|
Path: path,
|
||||||
|
SHA256: sha,
|
||||||
|
URL: repoBase + path,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
withHF := func(preferences string, files ...hfapi.ModelFile) Details {
|
||||||
|
d := Details{
|
||||||
|
URI: "https://huggingface.co/acme/example-GGUF",
|
||||||
|
HuggingFace: &hfapi.ModelDetails{
|
||||||
|
ModelID: "acme/example-GGUF",
|
||||||
|
Files: files,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if preferences != "" {
|
||||||
|
d.Preferences = json.RawMessage(preferences)
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
It("selects the F16 mmproj over BF16 (BF16 listed first)", func() {
|
||||||
|
details := withHF(`{"name":"VL","mmproj_quantizations":"F16"}`,
|
||||||
|
hfFile("model-Q4_K_M.gguf", "model"),
|
||||||
|
hfFile("mmproj-x-BF16.gguf", "bf16"),
|
||||||
|
hfFile("mmproj-x-F16.gguf", "f16"),
|
||||||
|
)
|
||||||
|
|
||||||
|
modelConfig, err := importer.Import(details)
|
||||||
|
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-F16.gguf"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("BF16"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("selects the F16 mmproj over BF16 (F16 listed first)", func() {
|
||||||
|
details := withHF(`{"name":"VL","mmproj_quantizations":"F16"}`,
|
||||||
|
hfFile("model-Q4_K_M.gguf", "model"),
|
||||||
|
hfFile("mmproj-x-F16.gguf", "f16"),
|
||||||
|
hfFile("mmproj-x-BF16.gguf", "bf16"),
|
||||||
|
)
|
||||||
|
|
||||||
|
modelConfig, err := importer.Import(details)
|
||||||
|
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-F16.gguf"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("BF16"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("selects BF16 when BF16 is the requested mmproj quant", func() {
|
||||||
|
details := withHF(`{"name":"VL","mmproj_quantizations":"BF16"}`,
|
||||||
|
hfFile("model-Q4_K_M.gguf", "model"),
|
||||||
|
hfFile("mmproj-x-F16.gguf", "f16"),
|
||||||
|
hfFile("mmproj-x-BF16.gguf", "bf16"),
|
||||||
|
)
|
||||||
|
|
||||||
|
modelConfig, err := importer.Import(details)
|
||||||
|
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-BF16.gguf"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("still matches a normal model quant with internal separators", func() {
|
||||||
|
// ud-q4_k_xl contains `-`/`_` internally; only the outer edges
|
||||||
|
// must hit a token boundary.
|
||||||
|
details := withHF(`{"name":"M","quantizations":"ud-q4_k_xl"}`,
|
||||||
|
hfFile("model-UD-Q4_K_XL.gguf", "xl"),
|
||||||
|
hfFile("model-Q3_K_M.gguf", "q3"),
|
||||||
|
)
|
||||||
|
|
||||||
|
modelConfig, err := importer.Import(details)
|
||||||
|
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/M/model-UD-Q4_K_XL.gguf"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("falls back to the last group when no preference matches", func() {
|
||||||
|
details := withHF(`{"name":"M","quantizations":"Q2_K"}`,
|
||||||
|
hfFile("model-Q8_0.gguf", "q8"),
|
||||||
|
hfFile("model-Q3_K_M.gguf", "q3"),
|
||||||
|
)
|
||||||
|
|
||||||
|
modelConfig, err := importer.Import(details)
|
||||||
|
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/M/model-Q3_K_M.gguf"), fmt.Sprintf("%+v", modelConfig))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
Context("AdditionalBackends", func() {
|
Context("AdditionalBackends", func() {
|
||||||
It("advertises ik-llama-cpp and turboquant as drop-in replacements", func() {
|
It("advertises ik-llama-cpp and turboquant as drop-in replacements", func() {
|
||||||
entries := importer.AdditionalBackends()
|
entries := importer.AdditionalBackends()
|
||||||
|
|||||||
@@ -23,8 +23,10 @@ import (
|
|||||||
|
|
||||||
"github.com/mudler/LocalAI/core/application"
|
"github.com/mudler/LocalAI/core/application"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/core/services/distributed"
|
||||||
"github.com/mudler/LocalAI/core/services/finetune"
|
"github.com/mudler/LocalAI/core/services/finetune"
|
||||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
"github.com/mudler/LocalAI/core/services/nodes"
|
"github.com/mudler/LocalAI/core/services/nodes"
|
||||||
"github.com/mudler/LocalAI/core/services/quantization"
|
"github.com/mudler/LocalAI/core/services/quantization"
|
||||||
|
|
||||||
@@ -400,25 +402,45 @@ func API(application *application.Application) (*echo.Echo, error) {
|
|||||||
routes.RegisterAgentPoolRoutes(e, application, agentsMw, skillsMw, collectionsMw)
|
routes.RegisterAgentPoolRoutes(e, application, agentsMw, skillsMw, collectionsMw)
|
||||||
// Fine-tuning routes
|
// Fine-tuning routes
|
||||||
fineTuningMw := auth.RequireFeature(application.AuthDB(), auth.FeatureFineTuning)
|
fineTuningMw := auth.RequireFeature(application.AuthDB(), auth.FeatureFineTuning)
|
||||||
|
// In distributed mode pass the shared NATS client + PostgreSQL store so
|
||||||
|
// fine-tune jobs stay consistent across replicas (the SyncedMap broadcasts
|
||||||
|
// mutations and hydrates from the DB); standalone passes nil for both.
|
||||||
|
var ftNats messaging.MessagingClient
|
||||||
|
var ftStore *distributed.FineTuneStore
|
||||||
|
if d := application.Distributed(); d != nil {
|
||||||
|
ftNats = d.Nats
|
||||||
|
if d.DistStores != nil && d.DistStores.FineTune != nil {
|
||||||
|
ftStore = d.DistStores.FineTune
|
||||||
|
}
|
||||||
|
}
|
||||||
ftService := finetune.NewFineTuneService(
|
ftService := finetune.NewFineTuneService(
|
||||||
application.ApplicationConfig(),
|
application.ApplicationConfig(),
|
||||||
application.ModelLoader(),
|
application.ModelLoader(),
|
||||||
application.ModelConfigLoader(),
|
application.ModelConfigLoader(),
|
||||||
|
ftNats,
|
||||||
|
ftStore,
|
||||||
)
|
)
|
||||||
if d := application.Distributed(); d != nil {
|
|
||||||
ftService.SetNATSClient(d.Nats)
|
|
||||||
if d.DistStores != nil && d.DistStores.FineTune != nil {
|
|
||||||
ftService.SetFineTuneStore(d.DistStores.FineTune)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
routes.RegisterFineTuningRoutes(e, ftService, application.ApplicationConfig(), fineTuningMw)
|
routes.RegisterFineTuningRoutes(e, ftService, application.ApplicationConfig(), fineTuningMw)
|
||||||
|
|
||||||
// Quantization routes
|
// Quantization routes
|
||||||
quantizationMw := auth.RequireFeature(application.AuthDB(), auth.FeatureQuantization)
|
quantizationMw := auth.RequireFeature(application.AuthDB(), auth.FeatureQuantization)
|
||||||
|
// In distributed mode pass the shared NATS client + PostgreSQL store so
|
||||||
|
// quantization jobs stay consistent across replicas (the SyncedMap broadcasts
|
||||||
|
// mutations and hydrates from the DB); standalone passes nil for both.
|
||||||
|
var quantNats messaging.MessagingClient
|
||||||
|
var quantStore *distributed.QuantStore
|
||||||
|
if d := application.Distributed(); d != nil {
|
||||||
|
quantNats = d.Nats
|
||||||
|
if d.DistStores != nil && d.DistStores.Quant != nil {
|
||||||
|
quantStore = d.DistStores.Quant
|
||||||
|
}
|
||||||
|
}
|
||||||
qService := quantization.NewQuantizationService(
|
qService := quantization.NewQuantizationService(
|
||||||
application.ApplicationConfig(),
|
application.ApplicationConfig(),
|
||||||
application.ModelLoader(),
|
application.ModelLoader(),
|
||||||
application.ModelConfigLoader(),
|
application.ModelConfigLoader(),
|
||||||
|
quantNats,
|
||||||
|
quantStore,
|
||||||
)
|
)
|
||||||
routes.RegisterQuantizationRoutes(e, qService, application.ApplicationConfig(), quantizationMw)
|
routes.RegisterQuantizationRoutes(e, qService, application.ApplicationConfig(), quantizationMw)
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/http/auth"
|
"github.com/mudler/LocalAI/core/http/auth"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
"github.com/mudler/LocalAI/core/services/nodes"
|
"github.com/mudler/LocalAI/core/services/nodes"
|
||||||
"github.com/mudler/LocalAI/core/services/nodes/prefixcache"
|
"github.com/mudler/LocalAI/core/services/nodes/prefixcache"
|
||||||
"github.com/mudler/LocalAI/pkg/httpclient"
|
"github.com/mudler/LocalAI/pkg/httpclient"
|
||||||
@@ -550,12 +551,23 @@ func DeleteBackendOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.HandlerF
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ListBackendsOnNodeEndpoint lists installed backends on a worker node via NATS.
|
// ListBackendsOnNodeEndpoint lists installed backends on a worker node via NATS.
|
||||||
func ListBackendsOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.HandlerFunc {
|
func ListBackendsOnNodeEndpoint(unloader nodes.NodeCommandSender, registry *nodes.NodeRegistry) echo.HandlerFunc {
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
|
nodeID := c.Param("id")
|
||||||
|
// Agent-type workers don't run backends and never subscribe to the
|
||||||
|
// nodes.<id>.backend.list NATS subject, so the request would hang
|
||||||
|
// until timeout with "no responders". Their backend list is simply
|
||||||
|
// empty. Mirror the aggregate-list guard in managers_distributed.go
|
||||||
|
// (skip nodes whose NodeType is set and not "backend") so the
|
||||||
|
// single-node and cluster-wide views stay consistent.
|
||||||
|
if node, err := registry.Get(c.Request().Context(), nodeID); err == nil {
|
||||||
|
if node.NodeType != "" && node.NodeType != nodes.NodeTypeBackend {
|
||||||
|
return c.JSON(http.StatusOK, []messaging.NodeBackendInfo{})
|
||||||
|
}
|
||||||
|
}
|
||||||
if unloader == nil {
|
if unloader == nil {
|
||||||
return c.JSON(http.StatusServiceUnavailable, nodeError(http.StatusServiceUnavailable, "NATS not configured"))
|
return c.JSON(http.StatusServiceUnavailable, nodeError(http.StatusServiceUnavailable, "NATS not configured"))
|
||||||
}
|
}
|
||||||
nodeID := c.Param("id")
|
|
||||||
reply, err := unloader.ListBackends(nodeID)
|
reply, err := unloader.ListBackends(nodeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
xlog.Error("Failed to list backends on node", "node", nodeID, "error", err)
|
xlog.Error("Failed to list backends on node", "node", nodeID, "error", err)
|
||||||
|
|||||||
103
core/http/endpoints/localai/nodes_backends_list_test.go
Normal file
103
core/http/endpoints/localai/nodes_backends_list_test.go
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
package localai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
|
||||||
|
"github.com/labstack/echo/v4"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/LocalAI/core/services/nodes"
|
||||||
|
"github.com/mudler/LocalAI/core/services/testutil"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
// stubNodeCommandSender records whether ListBackends was invoked so the test can
|
||||||
|
// assert the endpoint short-circuits (no NATS request) for agent-type nodes.
|
||||||
|
type stubNodeCommandSender struct {
|
||||||
|
listBackendsCalled bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) InstallBackend(_, _, _, _, _, _, _ string, _ int, _ string, _ func(messaging.BackendInstallProgressEvent)) (*messaging.BackendInstallReply, error) {
|
||||||
|
return &messaging.BackendInstallReply{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) UpgradeBackend(_, _, _, _, _, _ string, _ int, _ string, _ func(messaging.BackendInstallProgressEvent)) (*messaging.BackendUpgradeReply, error) {
|
||||||
|
return &messaging.BackendUpgradeReply{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) DeleteBackend(_, _ string) (*messaging.BackendDeleteReply, error) {
|
||||||
|
return &messaging.BackendDeleteReply{Success: true}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) ListBackends(_ string) (*messaging.BackendListReply, error) {
|
||||||
|
s.listBackendsCalled = true
|
||||||
|
return &messaging.BackendListReply{Backends: []messaging.NodeBackendInfo{{Name: "llama-cpp"}}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) StopBackend(_, _ string) error { return nil }
|
||||||
|
|
||||||
|
func (s *stubNodeCommandSender) UnloadModelOnNode(_, _ string) error { return nil }
|
||||||
|
|
||||||
|
var _ = Describe("ListBackendsOnNodeEndpoint", func() {
|
||||||
|
var registry *nodes.NodeRegistry
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
db := testutil.SetupTestDB()
|
||||||
|
var err error
|
||||||
|
registry, err = nodes.NewNodeRegistry(db)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
|
callEndpoint := func(unloader nodes.NodeCommandSender, nodeID string) *httptest.ResponseRecorder {
|
||||||
|
e := echo.New()
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
c := e.NewContext(req, rec)
|
||||||
|
c.SetParamNames("id")
|
||||||
|
c.SetParamValues(nodeID)
|
||||||
|
handler := ListBackendsOnNodeEndpoint(unloader, registry)
|
||||||
|
Expect(handler(c)).To(Succeed())
|
||||||
|
return rec
|
||||||
|
}
|
||||||
|
|
||||||
|
It("returns an empty list for an agent node without issuing a NATS request", func() {
|
||||||
|
ctx := context.Background()
|
||||||
|
node := &nodes.BackendNode{Name: "agent-1", NodeType: nodes.NodeTypeAgent}
|
||||||
|
Expect(registry.Register(ctx, node, true)).To(Succeed())
|
||||||
|
|
||||||
|
stub := &stubNodeCommandSender{}
|
||||||
|
rec := callEndpoint(stub, node.ID)
|
||||||
|
|
||||||
|
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||||
|
Expect(stub.listBackendsCalled).To(BeFalse(),
|
||||||
|
"agent workers don't subscribe to backend.list; the endpoint must not issue the doomed NATS request")
|
||||||
|
|
||||||
|
var list []messaging.NodeBackendInfo
|
||||||
|
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
||||||
|
Expect(list).To(BeEmpty())
|
||||||
|
// Must be `[]`, not `null`, so the UI can render it.
|
||||||
|
Expect(rec.Body.String()).To(ContainSubstring("[]"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("consults the unloader (NATS) for a backend node", func() {
|
||||||
|
ctx := context.Background()
|
||||||
|
node := &nodes.BackendNode{Name: "backend-1", NodeType: nodes.NodeTypeBackend, Address: "10.0.0.1:50051"}
|
||||||
|
Expect(registry.Register(ctx, node, true)).To(Succeed())
|
||||||
|
|
||||||
|
stub := &stubNodeCommandSender{}
|
||||||
|
rec := callEndpoint(stub, node.ID)
|
||||||
|
|
||||||
|
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||||
|
Expect(stub.listBackendsCalled).To(BeTrue(),
|
||||||
|
"backend nodes must still be queried over NATS")
|
||||||
|
|
||||||
|
var list []messaging.NodeBackendInfo
|
||||||
|
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
||||||
|
Expect(list).To(HaveLen(1))
|
||||||
|
Expect(list[0].Name).To(Equal("llama-cpp"))
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -3,6 +3,7 @@ package openresponses
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -10,6 +11,7 @@ import (
|
|||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/http/auth"
|
||||||
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
||||||
openaiEndpoint "github.com/mudler/LocalAI/core/http/endpoints/openai"
|
openaiEndpoint "github.com/mudler/LocalAI/core/http/endpoints/openai"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
"github.com/mudler/LocalAI/core/http/middleware"
|
||||||
@@ -246,8 +248,11 @@ func ResponsesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval
|
|||||||
// Create cancellable context for background execution
|
// Create cancellable context for background execution
|
||||||
bgCtx, bgCancel := context.WithCancel(context.Background())
|
bgCtx, bgCancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
// Store the background response
|
// Store the background response and stamp its owner before the ID
|
||||||
|
// is returned to the client, so later GET/cancel/resume can verify
|
||||||
|
// the caller owns it.
|
||||||
store.StoreBackground(responseID, input, queuedResponse, bgCancel, input.Stream)
|
store.StoreBackground(responseID, input, queuedResponse, bgCancel, input.Stream)
|
||||||
|
store.SetOwner(responseID, ownerFromContext(c))
|
||||||
|
|
||||||
// Start background processing goroutine
|
// Start background processing goroutine
|
||||||
go func() {
|
go func() {
|
||||||
@@ -1587,6 +1592,7 @@ func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt i
|
|||||||
if shouldStore {
|
if shouldStore {
|
||||||
store := GetGlobalStore()
|
store := GetGlobalStore()
|
||||||
store.Store(responseID, input, response)
|
store.Store(responseID, input, response)
|
||||||
|
store.SetOwner(responseID, ownerFromContext(c))
|
||||||
}
|
}
|
||||||
|
|
||||||
return c.JSON(200, response)
|
return c.JSON(200, response)
|
||||||
@@ -2322,6 +2328,7 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
|
|||||||
if shouldStore {
|
if shouldStore {
|
||||||
store := GetGlobalStore()
|
store := GetGlobalStore()
|
||||||
store.Store(responseID, input, responseCompleted)
|
store.Store(responseID, input, responseCompleted)
|
||||||
|
store.SetOwner(responseID, ownerFromContext(c))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send [DONE]
|
// Send [DONE]
|
||||||
@@ -2966,6 +2973,18 @@ func convertORToolsToOpenAIFormat(orTools []schema.ORFunctionTool) []functions.T
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ownerFromContext returns the identity (user ID) of the authenticated
|
||||||
|
// caller, or empty string when no authentication was performed (single-key /
|
||||||
|
// no-auth deployments). It is the value stamped on a response at creation and
|
||||||
|
// compared on read/cancel/resume to prevent one caller from accessing
|
||||||
|
// another's response by guessing its ID.
|
||||||
|
func ownerFromContext(c echo.Context) string {
|
||||||
|
if u := auth.GetUser(c); u != nil {
|
||||||
|
return u.ID
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
// GetResponseEndpoint returns a handler for GET /responses/:id
|
// GetResponseEndpoint returns a handler for GET /responses/:id
|
||||||
// This endpoint is used for polling background responses or resuming streaming
|
// This endpoint is used for polling background responses or resuming streaming
|
||||||
// @Summary Get a response by ID
|
// @Summary Get a response by ID
|
||||||
@@ -2991,6 +3010,12 @@ func GetResponseEndpoint() func(c echo.Context) error {
|
|||||||
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enforce response ownership. Return 404 (not 403) on mismatch so the
|
||||||
|
// existence of another caller's response is not leaked.
|
||||||
|
if !accessAllowed(stored, ownerFromContext(c)) {
|
||||||
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
||||||
|
}
|
||||||
|
|
||||||
// Check if streaming resume is requested
|
// Check if streaming resume is requested
|
||||||
streamParam := c.QueryParam("stream")
|
streamParam := c.QueryParam("stream")
|
||||||
if streamParam == "true" {
|
if streamParam == "true" {
|
||||||
@@ -3022,16 +3047,21 @@ func GetResponseEndpoint() func(c echo.Context) error {
|
|||||||
|
|
||||||
// handleStreamResume handles resuming a streaming response from a specific sequence number
|
// handleStreamResume handles resuming a streaming response from a specific sequence number
|
||||||
func handleStreamResume(c echo.Context, store *ResponseStore, responseID string, stored *StoredResponse, startingAfter int) error {
|
func handleStreamResume(c echo.Context, store *ResponseStore, responseID string, stored *StoredResponse, startingAfter int) error {
|
||||||
|
// Fetch buffered events before committing to an SSE response so an
|
||||||
|
// offset-lost gap can be reported as a clean HTTP status rather than a
|
||||||
|
// silently truncated event stream.
|
||||||
|
events, err := store.GetEventsAfter(responseID, startingAfter)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, ErrOffsetLost) {
|
||||||
|
return sendOpenResponsesError(c, 409, "invalid_request_error", fmt.Sprintf("starting_after=%d is older than the oldest retained event; the resume buffer evicted those events and the stream cannot be resumed from that point", startingAfter), "starting_after")
|
||||||
|
}
|
||||||
|
return sendOpenResponsesError(c, 500, "server_error", fmt.Sprintf("failed to get events: %v", err), "")
|
||||||
|
}
|
||||||
|
|
||||||
c.Response().Header().Set("Content-Type", "text/event-stream")
|
c.Response().Header().Set("Content-Type", "text/event-stream")
|
||||||
c.Response().Header().Set("Cache-Control", "no-cache")
|
c.Response().Header().Set("Cache-Control", "no-cache")
|
||||||
c.Response().Header().Set("Connection", "keep-alive")
|
c.Response().Header().Set("Connection", "keep-alive")
|
||||||
|
|
||||||
// Get buffered events after the starting point
|
|
||||||
events, err := store.GetEventsAfter(responseID, startingAfter)
|
|
||||||
if err != nil {
|
|
||||||
return sendOpenResponsesError(c, 500, "server_error", fmt.Sprintf("failed to get events: %v", err), "")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send all buffered events
|
// Send all buffered events
|
||||||
for _, event := range events {
|
for _, event := range events {
|
||||||
fmt.Fprintf(c.Response().Writer, "event: %s\ndata: %s\n\n", event.EventType, string(event.Data))
|
fmt.Fprintf(c.Response().Writer, "event: %s\ndata: %s\n\n", event.EventType, string(event.Data))
|
||||||
@@ -3126,6 +3156,17 @@ func CancelResponseEndpoint() func(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
store := GetGlobalStore()
|
store := GetGlobalStore()
|
||||||
|
|
||||||
|
// Look up first so ownership can be checked before any mutation.
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
if err != nil {
|
||||||
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
||||||
|
}
|
||||||
|
// Return 404 (not 403) on owner mismatch so existence is not leaked.
|
||||||
|
if !accessAllowed(stored, ownerFromContext(c)) {
|
||||||
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
||||||
|
}
|
||||||
|
|
||||||
response, err := store.Cancel(responseID)
|
response, err := store.Cancel(responseID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
return sendOpenResponsesError(c, 404, "not_found", fmt.Sprintf("response not found: %s", responseID), "id")
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package openresponses
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -11,6 +12,30 @@ import (
|
|||||||
"github.com/mudler/xlog"
|
"github.com/mudler/xlog"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// defaultMaxStreamEvents bounds how many resume-buffer events a single
|
||||||
|
// background response retains. Without a cap, a long-running or abandoned
|
||||||
|
// background generation grows StreamEvents without limit and can exhaust
|
||||||
|
// process memory. When the cap is exceeded the oldest events are evicted
|
||||||
|
// from the front (see AppendEvent). Mirrors llama.cpp's byte-capped slot
|
||||||
|
// ring used for resumable /slots state.
|
||||||
|
defaultMaxStreamEvents = 8192
|
||||||
|
|
||||||
|
// defaultMaxStreamBytes caps the total serialized size of retained
|
||||||
|
// resume-buffer events, evicting oldest-first when exceeded. This guards
|
||||||
|
// against a handful of very large events defeating the count cap. 0
|
||||||
|
// disables the byte cap (count cap still applies).
|
||||||
|
defaultMaxStreamBytes = 64 << 20 // 64 MiB
|
||||||
|
)
|
||||||
|
|
||||||
|
// ErrOffsetLost is returned by GetEventsAfter when the requested
|
||||||
|
// starting_after sequence number is older than the oldest event still
|
||||||
|
// retained in the resume buffer (i.e. the events between the requested
|
||||||
|
// offset and the current watermark were evicted by the cap). Callers should
|
||||||
|
// surface this to clients as a distinct error instead of silently returning
|
||||||
|
// a truncated stream that omits the dropped events.
|
||||||
|
var ErrOffsetLost = errors.New("resume offset lost: requested events were evicted from the buffer")
|
||||||
|
|
||||||
// ResponseStore provides thread-safe storage for Open Responses API responses
|
// ResponseStore provides thread-safe storage for Open Responses API responses
|
||||||
type ResponseStore struct {
|
type ResponseStore struct {
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
@@ -18,6 +43,12 @@ type ResponseStore struct {
|
|||||||
ttl time.Duration // Time-to-live for stored responses (0 = no expiration)
|
ttl time.Duration // Time-to-live for stored responses (0 = no expiration)
|
||||||
cleanupCtx context.Context
|
cleanupCtx context.Context
|
||||||
cleanupCancel context.CancelFunc
|
cleanupCancel context.CancelFunc
|
||||||
|
|
||||||
|
// maxStreamEvents / maxStreamBytes bound the per-response resume buffer.
|
||||||
|
// Set once at construction from the default constants; tests may lower
|
||||||
|
// them. A value <= 0 disables that particular cap.
|
||||||
|
maxStreamEvents int
|
||||||
|
maxStreamBytes int
|
||||||
}
|
}
|
||||||
|
|
||||||
// StreamedEvent represents a buffered SSE event for streaming resume
|
// StreamedEvent represents a buffered SSE event for streaming resume
|
||||||
@@ -35,6 +66,12 @@ type StoredResponse struct {
|
|||||||
StoredAt time.Time
|
StoredAt time.Time
|
||||||
ExpiresAt *time.Time // nil if no expiration
|
ExpiresAt *time.Time // nil if no expiration
|
||||||
|
|
||||||
|
// Owner is the identity (user ID) that created this response. It is set
|
||||||
|
// once at creation and never mutated, so it can be read without holding
|
||||||
|
// mu. Empty means "no owner" (single-key / no-auth deployments), in which
|
||||||
|
// case ownership checks are skipped for backward compatibility.
|
||||||
|
Owner string
|
||||||
|
|
||||||
// Background execution support
|
// Background execution support
|
||||||
CancelFunc context.CancelFunc // For cancellation of background tasks
|
CancelFunc context.CancelFunc // For cancellation of background tasks
|
||||||
StreamEvents []StreamedEvent // Buffered events for streaming resume
|
StreamEvents []StreamedEvent // Buffered events for streaming resume
|
||||||
@@ -42,6 +79,14 @@ type StoredResponse struct {
|
|||||||
IsBackground bool // Was created with background=true
|
IsBackground bool // Was created with background=true
|
||||||
EventsChan chan struct{} // Signals new events for live subscribers
|
EventsChan chan struct{} // Signals new events for live subscribers
|
||||||
mu sync.RWMutex // Protect concurrent access to this response
|
mu sync.RWMutex // Protect concurrent access to this response
|
||||||
|
|
||||||
|
// streamBytes tracks the total serialized size of the events currently
|
||||||
|
// retained in StreamEvents, used to enforce the byte cap. droppedThrough
|
||||||
|
// is the highest sequence number evicted from the front of the buffer
|
||||||
|
// (-1 = nothing evicted); it is the watermark GetEventsAfter compares
|
||||||
|
// against to detect a lost resume offset. Both are guarded by mu.
|
||||||
|
streamBytes int
|
||||||
|
droppedThrough int
|
||||||
}
|
}
|
||||||
|
|
||||||
var getGlobalStore = sync.OnceValue(func() *ResponseStore {
|
var getGlobalStore = sync.OnceValue(func() *ResponseStore {
|
||||||
@@ -83,6 +128,8 @@ func NewResponseStore(ttl time.Duration) *ResponseStore {
|
|||||||
store := &ResponseStore{
|
store := &ResponseStore{
|
||||||
responses: make(map[string]*StoredResponse),
|
responses: make(map[string]*StoredResponse),
|
||||||
ttl: ttl,
|
ttl: ttl,
|
||||||
|
maxStreamEvents: defaultMaxStreamEvents,
|
||||||
|
maxStreamBytes: defaultMaxStreamBytes,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start cleanup goroutine if TTL is set
|
// Start cleanup goroutine if TTL is set
|
||||||
@@ -114,6 +161,7 @@ func (s *ResponseStore) Store(responseID string, request *schema.OpenResponsesRe
|
|||||||
Items: items,
|
Items: items,
|
||||||
StoredAt: time.Now(),
|
StoredAt: time.Now(),
|
||||||
ExpiresAt: nil,
|
ExpiresAt: nil,
|
||||||
|
droppedThrough: -1,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set expiration if TTL is configured
|
// Set expiration if TTL is configured
|
||||||
@@ -266,6 +314,7 @@ func (s *ResponseStore) StoreBackground(responseID string, request *schema.OpenR
|
|||||||
StreamEnabled: streamEnabled,
|
StreamEnabled: streamEnabled,
|
||||||
IsBackground: true,
|
IsBackground: true,
|
||||||
EventsChan: make(chan struct{}, 100), // Buffered channel for event notifications
|
EventsChan: make(chan struct{}, 100), // Buffered channel for event notifications
|
||||||
|
droppedThrough: -1,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set expiration if TTL is configured
|
// Set expiration if TTL is configured
|
||||||
@@ -349,6 +398,25 @@ func (s *ResponseStore) AppendEvent(responseID string, event *schema.ORStreamEve
|
|||||||
EventType: event.Type,
|
EventType: event.Type,
|
||||||
Data: data,
|
Data: data,
|
||||||
})
|
})
|
||||||
|
stored.streamBytes += len(data)
|
||||||
|
|
||||||
|
// Evict oldest events from the front once either cap is exceeded. The
|
||||||
|
// byte cap never evicts the only remaining event (a single oversized
|
||||||
|
// event is still served once). Each eviction advances droppedThrough so
|
||||||
|
// a later resume below the watermark is reported as ErrOffsetLost rather
|
||||||
|
// than silently skipping the dropped events.
|
||||||
|
for (s.maxStreamEvents > 0 && len(stored.StreamEvents) > s.maxStreamEvents) ||
|
||||||
|
(s.maxStreamBytes > 0 && stored.streamBytes > s.maxStreamBytes && len(stored.StreamEvents) > 1) {
|
||||||
|
evicted := stored.StreamEvents[0]
|
||||||
|
stored.streamBytes -= len(evicted.Data)
|
||||||
|
if evicted.SequenceNumber > stored.droppedThrough {
|
||||||
|
stored.droppedThrough = evicted.SequenceNumber
|
||||||
|
}
|
||||||
|
// Release the evicted payload so it can be GC'd even though the
|
||||||
|
// backing array element is still owned by the slice until reuse.
|
||||||
|
stored.StreamEvents[0].Data = nil
|
||||||
|
stored.StreamEvents = stored.StreamEvents[1:]
|
||||||
|
}
|
||||||
stored.mu.Unlock()
|
stored.mu.Unlock()
|
||||||
|
|
||||||
// Notify any subscribers of new event
|
// Notify any subscribers of new event
|
||||||
@@ -374,6 +442,14 @@ func (s *ResponseStore) GetEventsAfter(responseID string, startingAfter int) ([]
|
|||||||
stored.mu.RLock()
|
stored.mu.RLock()
|
||||||
defer stored.mu.RUnlock()
|
defer stored.mu.RUnlock()
|
||||||
|
|
||||||
|
// If the requested offset is older than the watermark, the events the
|
||||||
|
// client expects next (those in (startingAfter, droppedThrough]) were
|
||||||
|
// evicted by the cap. Signal the gap rather than returning a stream that
|
||||||
|
// silently skips them.
|
||||||
|
if startingAfter < stored.droppedThrough {
|
||||||
|
return nil, ErrOffsetLost
|
||||||
|
}
|
||||||
|
|
||||||
var result []StreamedEvent
|
var result []StreamedEvent
|
||||||
for _, event := range stored.StreamEvents {
|
for _, event := range stored.StreamEvents {
|
||||||
if event.SequenceNumber > startingAfter {
|
if event.SequenceNumber > startingAfter {
|
||||||
@@ -447,3 +523,30 @@ func (s *ResponseStore) IsStreamEnabled(responseID string) (bool, error) {
|
|||||||
|
|
||||||
return stored.StreamEnabled, nil
|
return stored.StreamEnabled, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetOwner records the identity that owns a stored response. It is called
|
||||||
|
// once, right after the response is stored and before its ID is handed back
|
||||||
|
// to any client, so no lock on the stored response is required. A no-op for
|
||||||
|
// an empty owner or unknown response ID.
|
||||||
|
func (s *ResponseStore) SetOwner(responseID, owner string) {
|
||||||
|
if owner == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
s.mu.RLock()
|
||||||
|
stored, exists := s.responses[responseID]
|
||||||
|
s.mu.RUnlock()
|
||||||
|
if !exists {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
stored.Owner = owner
|
||||||
|
}
|
||||||
|
|
||||||
|
// accessAllowed reports whether a caller identified by callerID may read or
|
||||||
|
// mutate the given stored response. An empty owner (single-key / no-auth
|
||||||
|
// deployments) is accessible by anyone, preserving backward compatibility;
|
||||||
|
// otherwise the caller identity must match the recorded owner.
|
||||||
|
func accessAllowed(stored *StoredResponse, callerID string) bool {
|
||||||
|
return stored.Owner == "" || stored.Owner == callerID
|
||||||
|
}
|
||||||
|
|||||||
@@ -585,6 +585,86 @@ var _ = Describe("ResponseStore", func() {
|
|||||||
Expect(enabled2).To(BeFalse())
|
Expect(enabled2).To(BeFalse())
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("should bound the resume buffer and evict oldest events past the cap", func() {
|
||||||
|
// Lower the caps so the test stays fast; production defaults are
|
||||||
|
// large. Same-package access to the unexported fields is fine.
|
||||||
|
store.maxStreamEvents = 5
|
||||||
|
store.maxStreamBytes = 0 // count cap only for this test
|
||||||
|
|
||||||
|
responseID := "resp_buffer_cap"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Status: schema.ORStatusInProgress,
|
||||||
|
}
|
||||||
|
|
||||||
|
_, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
store.StoreBackground(responseID, request, response, cancel, true)
|
||||||
|
|
||||||
|
// Append well past the cap.
|
||||||
|
const total = 20
|
||||||
|
for i := range total {
|
||||||
|
err := store.AppendEvent(responseID, &schema.ORStreamEvent{
|
||||||
|
Type: "response.output_text.delta",
|
||||||
|
SequenceNumber: i,
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
}
|
||||||
|
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
// (a) Buffer length stays bounded by the cap.
|
||||||
|
Expect(len(stored.StreamEvents)).To(Equal(5))
|
||||||
|
|
||||||
|
// (b) Oldest events were evicted: only the last 5 sequence numbers
|
||||||
|
// remain (15..19).
|
||||||
|
Expect(stored.StreamEvents[0].SequenceNumber).To(Equal(15))
|
||||||
|
Expect(stored.StreamEvents[len(stored.StreamEvents)-1].SequenceNumber).To(Equal(19))
|
||||||
|
|
||||||
|
// Asking for events after the last retained seq still works.
|
||||||
|
retained, err := store.GetEventsAfter(responseID, 14)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(retained).To(HaveLen(5))
|
||||||
|
|
||||||
|
// (c) Asking below the dropped watermark returns ErrOffsetLost.
|
||||||
|
_, err = store.GetEventsAfter(responseID, 0)
|
||||||
|
Expect(err).To(MatchError(ErrOffsetLost))
|
||||||
|
|
||||||
|
_, err = store.GetEventsAfter(responseID, -1)
|
||||||
|
Expect(err).To(MatchError(ErrOffsetLost))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should record and enforce response ownership", func() {
|
||||||
|
responseID := "resp_owner_test"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{ID: responseID, Object: "response", Status: schema.ORStatusCompleted}
|
||||||
|
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
store.SetOwner(responseID, "userA")
|
||||||
|
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(stored.Owner).To(Equal("userA"))
|
||||||
|
|
||||||
|
// Owner matches -> allowed; different identity -> denied.
|
||||||
|
Expect(accessAllowed(stored, "userA")).To(BeTrue())
|
||||||
|
Expect(accessAllowed(stored, "userB")).To(BeFalse())
|
||||||
|
|
||||||
|
// Backward compatibility: a response with no owner is accessible
|
||||||
|
// by any caller (single-key / no-auth deployments).
|
||||||
|
noOwnerID := "resp_no_owner"
|
||||||
|
store.Store(noOwnerID, request, &schema.ORResponseResource{ID: noOwnerID, Object: "response"})
|
||||||
|
noOwner, err := store.Get(noOwnerID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(noOwner.Owner).To(BeEmpty())
|
||||||
|
Expect(accessAllowed(noOwner, "anyone")).To(BeTrue())
|
||||||
|
Expect(accessAllowed(noOwner, "")).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
It("should notify subscribers of new events", func() {
|
It("should notify subscribers of new events", func() {
|
||||||
responseID := "resp_events_chan"
|
responseID := "resp_events_chan"
|
||||||
request := &schema.OpenResponsesRequest{Model: "test"}
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ func RegisterNodeAdminRoutes(e *echo.Echo, registry *nodes.NodeRegistry, unloade
|
|||||||
admin.POST("/:id/approve", localai.ApproveNodeEndpoint(registry, authDB, hmacSecret, natsCfg))
|
admin.POST("/:id/approve", localai.ApproveNodeEndpoint(registry, authDB, hmacSecret, natsCfg))
|
||||||
|
|
||||||
// Backend management on workers
|
// Backend management on workers
|
||||||
admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader))
|
admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader, registry))
|
||||||
admin.POST("/:id/backends/install", localai.InstallBackendOnNodeEndpoint(unloader, galleryService, opcache, appConfig))
|
admin.POST("/:id/backends/install", localai.InstallBackendOnNodeEndpoint(unloader, galleryService, opcache, appConfig))
|
||||||
admin.POST("/:id/backends/delete", localai.DeleteBackendOnNodeEndpoint(unloader))
|
admin.POST("/:id/backends/delete", localai.DeleteBackendOnNodeEndpoint(unloader))
|
||||||
|
|
||||||
|
|||||||
@@ -30,6 +30,8 @@ import (
|
|||||||
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/core/services/jobs"
|
"github.com/mudler/LocalAI/core/services/jobs"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||||
"github.com/mudler/LocalAI/core/templates"
|
"github.com/mudler/LocalAI/core/templates"
|
||||||
"github.com/mudler/LocalAI/pkg/httpclient"
|
"github.com/mudler/LocalAI/pkg/httpclient"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
@@ -43,8 +45,18 @@ type AgentJobService struct {
|
|||||||
configLoader *config.ModelConfigLoader
|
configLoader *config.ModelConfigLoader
|
||||||
evaluator *templates.Evaluator
|
evaluator *templates.Evaluator
|
||||||
|
|
||||||
|
// tasks is the cross-replica task store: an in-memory map kept consistent
|
||||||
|
// across replicas via NATS, with read-through to the configured persister
|
||||||
|
// (file in standalone, PostgreSQL in distributed). Unlike jobs - which already
|
||||||
|
// converge via the dispatcher + DB read-through - tasks previously read
|
||||||
|
// in-memory only, so ListTasks went stale on non-originating replicas.
|
||||||
|
tasks *syncstate.SyncedMap[string, schema.Task]
|
||||||
|
// taskNats is the distributed NATS client backing the tasks SyncedMap. It is
|
||||||
|
// not available at construction time, so it is injected via SetTaskSyncNATS
|
||||||
|
// during distributed wiring; nil keeps tasks in-memory-only (standalone).
|
||||||
|
taskNats messaging.MessagingClient
|
||||||
|
|
||||||
// Storage (in-memory primary, persister for secondary persistence)
|
// Storage (in-memory primary, persister for secondary persistence)
|
||||||
tasks *xsync.SyncedMap[string, schema.Task]
|
|
||||||
jobs *xsync.SyncedMap[string, schema.Job]
|
jobs *xsync.SyncedMap[string, schema.Job]
|
||||||
persister JobPersister
|
persister JobPersister
|
||||||
userID string // Scoping: empty for global (main service), set for per-user instances
|
userID string // Scoping: empty for global (main service), set for per-user instances
|
||||||
@@ -96,6 +108,31 @@ func (s *AgentJobService) SetDistributedJobStore(store *jobs.JobStore) {
|
|||||||
s.persister = &dbJobPersister{store: store}
|
s.persister = &dbJobPersister{store: store}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetTaskSyncNATS wires the distributed NATS client used to keep agent *tasks*
|
||||||
|
// consistent across replicas (jobs already converge via the dispatcher + DB
|
||||||
|
// read-through, so they are left untouched). The client is not available when the
|
||||||
|
// service is constructed, so it is injected here during distributed wiring and the
|
||||||
|
// tasks SyncedMap is rebuilt to pick it up. It is always called before Start /
|
||||||
|
// hydrate, while the map is still empty, so rebuilding loses no state. Passing nil
|
||||||
|
// (standalone) keeps the map in-memory-only with no broadcast.
|
||||||
|
func (s *AgentJobService) SetTaskSyncNATS(nats messaging.MessagingClient) {
|
||||||
|
s.taskNats = nats
|
||||||
|
s.buildTasksMap()
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildTasksMap (re)constructs the cross-replica tasks SyncedMap from the current
|
||||||
|
// taskNats. The Store adapter reads s.persister/s.userID live, so a persister swap
|
||||||
|
// (SetDistributedJobStore) needs no rebuild; only the NATS client, fixed at
|
||||||
|
// New-time, forces one - hence SetTaskSyncNATS calls this.
|
||||||
|
func (s *AgentJobService) buildTasksMap() {
|
||||||
|
s.tasks = syncstate.New(syncstate.Config[string, schema.Task]{
|
||||||
|
Name: "agent.tasks",
|
||||||
|
Key: func(t schema.Task) string { return t.ID },
|
||||||
|
Nats: s.taskNats,
|
||||||
|
Store: &taskStoreAdapter{svc: s},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// Dispatcher returns the distributed dispatcher (nil if not in distributed mode).
|
// Dispatcher returns the distributed dispatcher (nil if not in distributed mode).
|
||||||
func (s *AgentJobService) Dispatcher() DistributedDispatcher {
|
func (s *AgentJobService) Dispatcher() DistributedDispatcher {
|
||||||
return s.dispatcher
|
return s.dispatcher
|
||||||
@@ -106,13 +143,6 @@ func (s *AgentJobService) DBStore() *jobs.JobStore {
|
|||||||
return s.rawDBStore
|
return s.rawDBStore
|
||||||
}
|
}
|
||||||
|
|
||||||
// saveTasks persists tasks via the configured persister (file or DB).
|
|
||||||
func (s *AgentJobService) saveTasks(task schema.Task) {
|
|
||||||
if err := s.persister.SaveTask(s.userID, task); err != nil {
|
|
||||||
xlog.Warn("Failed to persist task", "error", err, "task_id", task.ID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// saveJobs persists jobs via the configured persister (file or DB).
|
// saveJobs persists jobs via the configured persister (file or DB).
|
||||||
func (s *AgentJobService) saveJobs(job schema.Job) {
|
func (s *AgentJobService) saveJobs(job schema.Job) {
|
||||||
if err := s.persister.SaveJob(s.userID, job); err != nil {
|
if err := s.persister.SaveJob(s.userID, job); err != nil {
|
||||||
@@ -129,18 +159,8 @@ func (s *AgentJobService) LoadFromDB() {
|
|||||||
|
|
||||||
// loadFromPersister loads tasks and jobs from the configured persister into memory.
|
// loadFromPersister loads tasks and jobs from the configured persister into memory.
|
||||||
func (s *AgentJobService) loadFromPersister() {
|
func (s *AgentJobService) loadFromPersister() {
|
||||||
if tasks, err := s.persister.LoadTasks(s.userID); err != nil {
|
if err := s.hydrateTasks(s.appConfig.Context); err != nil {
|
||||||
xlog.Warn("Failed to load tasks from persister", "error", err)
|
xlog.Warn("Failed to load tasks from persister", "error", err)
|
||||||
} else {
|
|
||||||
for _, task := range tasks {
|
|
||||||
s.tasks.Set(task.ID, task)
|
|
||||||
if task.Enabled && task.Cron != "" {
|
|
||||||
if err := s.ScheduleCronTask(task); err != nil {
|
|
||||||
xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
xlog.Info("Loaded tasks from persister", "count", len(tasks))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if loadedJobs, err := s.persister.LoadJobs(s.userID); err != nil {
|
if loadedJobs, err := s.persister.LoadJobs(s.userID); err != nil {
|
||||||
@@ -153,6 +173,27 @@ func (s *AgentJobService) loadFromPersister() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hydrateTasks loads tasks into the cross-replica SyncedMap and (re)schedules
|
||||||
|
// cron entries for enabled tasks. Hydration goes through the SyncedMap's Store
|
||||||
|
// read-through (Start), not Set, so it neither re-persists nor re-broadcasts the
|
||||||
|
// loaded tasks. Each service instance hydrates exactly once: the main service via
|
||||||
|
// Start -> loadFromPersister, per-user services via LoadFromDB or LoadTasksFromFile.
|
||||||
|
func (s *AgentJobService) hydrateTasks(ctx context.Context) error {
|
||||||
|
if err := s.tasks.Start(ctx); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
tasks := s.tasks.List()
|
||||||
|
for _, task := range tasks {
|
||||||
|
if task.Enabled && task.Cron != "" {
|
||||||
|
if err := s.ScheduleCronTask(task); err != nil {
|
||||||
|
xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
xlog.Info("Loaded tasks from persister", "count", len(tasks))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// JobExecution represents a job to be executed
|
// JobExecution represents a job to be executed
|
||||||
type JobExecution struct {
|
type JobExecution struct {
|
||||||
Job schema.Job
|
Job schema.Job
|
||||||
@@ -200,21 +241,19 @@ func NewAgentJobServiceWithPaths(
|
|||||||
) *AgentJobService {
|
) *AgentJobService {
|
||||||
retentionDays := cmp.Or(appConfig.AgentJobRetentionDays, 30)
|
retentionDays := cmp.Or(appConfig.AgentJobRetentionDays, 30)
|
||||||
|
|
||||||
tasks := xsync.NewSyncedMap[string, schema.Task]()
|
|
||||||
jobsMap := xsync.NewSyncedMap[string, schema.Job]()
|
jobsMap := xsync.NewSyncedMap[string, schema.Job]()
|
||||||
|
|
||||||
return &AgentJobService{
|
s := &AgentJobService{
|
||||||
appConfig: appConfig,
|
appConfig: appConfig,
|
||||||
modelLoader: modelLoader,
|
modelLoader: modelLoader,
|
||||||
configLoader: configLoader,
|
configLoader: configLoader,
|
||||||
evaluator: evaluator,
|
evaluator: evaluator,
|
||||||
tasks: tasks,
|
|
||||||
jobs: jobsMap,
|
jobs: jobsMap,
|
||||||
persister: &fileJobPersister{
|
persister: &fileJobPersister{
|
||||||
tasks: tasks,
|
|
||||||
jobs: jobsMap,
|
jobs: jobsMap,
|
||||||
tasksFile: tasksFile,
|
tasksFile: tasksFile,
|
||||||
jobsFile: jobsFile,
|
jobsFile: jobsFile,
|
||||||
|
taskSet: make(map[string]schema.Task),
|
||||||
},
|
},
|
||||||
jobQueue: make(chan JobExecution, 100), // Buffer for 100 jobs
|
jobQueue: make(chan JobExecution, 100), // Buffer for 100 jobs
|
||||||
cancellations: xsync.NewSyncedMap[string, context.CancelFunc](),
|
cancellations: xsync.NewSyncedMap[string, context.CancelFunc](),
|
||||||
@@ -222,25 +261,17 @@ func NewAgentJobServiceWithPaths(
|
|||||||
cronEntries: xsync.NewSyncedMap[string, cron.EntryID](),
|
cronEntries: xsync.NewSyncedMap[string, cron.EntryID](),
|
||||||
retentionDays: retentionDays,
|
retentionDays: retentionDays,
|
||||||
}
|
}
|
||||||
|
// Build the cross-replica tasks map standalone (nil NATS); SetTaskSyncNATS
|
||||||
|
// rebuilds it with the distributed client once that is available, before Start.
|
||||||
|
s.buildTasksMap()
|
||||||
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoadTasksFromFile loads tasks from the persister into the in-memory map
|
// LoadTasksFromFile loads tasks from the persister into the in-memory map
|
||||||
// and schedules cron entries. Named "FromFile" for backward compat; in DB
|
// and schedules cron entries. Named "FromFile" for backward compat; in DB
|
||||||
// mode it loads from the database.
|
// mode it loads from the database.
|
||||||
func (s *AgentJobService) LoadTasksFromFile() error {
|
func (s *AgentJobService) LoadTasksFromFile() error {
|
||||||
tasks, err := s.persister.LoadTasks(s.userID)
|
return s.hydrateTasks(s.appConfig.Context)
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for _, task := range tasks {
|
|
||||||
s.tasks.Set(task.ID, task)
|
|
||||||
if task.Enabled && task.Cron != "" {
|
|
||||||
if err := s.ScheduleCronTask(task); err != nil {
|
|
||||||
xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SaveTasksToFile flushes the current tasks map via the persister. File
|
// SaveTasksToFile flushes the current tasks map via the persister. File
|
||||||
@@ -293,8 +324,12 @@ func (s *AgentJobService) CreateTask(task schema.Task) (string, error) {
|
|||||||
task.Enabled = true // Default to enabled
|
task.Enabled = true // Default to enabled
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store task
|
// Store task: Set updates the in-memory map, write-throughs to the persister
|
||||||
s.tasks.Set(id, task)
|
// (file or DB), and broadcasts the create to peer replicas. Background ctx
|
||||||
|
// because CreateTask carries no request ctx (mirrors the finetune service).
|
||||||
|
if err := s.tasks.Set(context.Background(), task); err != nil {
|
||||||
|
return "", fmt.Errorf("failed to persist task: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Schedule cron if enabled and has cron expression
|
// Schedule cron if enabled and has cron expression
|
||||||
if task.Enabled && task.Cron != "" {
|
if task.Enabled && task.Cron != "" {
|
||||||
@@ -303,16 +338,15 @@ func (s *AgentJobService) CreateTask(task schema.Task) (string, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s.saveTasks(task)
|
|
||||||
return id, nil
|
return id, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateTask updates an existing task
|
// UpdateTask updates an existing task
|
||||||
func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
|
func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
|
||||||
if !s.tasks.Exists(id) {
|
existing, ok := s.tasks.Get(id)
|
||||||
|
if !ok {
|
||||||
return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
||||||
}
|
}
|
||||||
existing := s.tasks.Get(id)
|
|
||||||
|
|
||||||
// Preserve ID and CreatedAt
|
// Preserve ID and CreatedAt
|
||||||
task.ID = id
|
task.ID = id
|
||||||
@@ -324,8 +358,10 @@ func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
|
|||||||
s.UnscheduleCronTask(id)
|
s.UnscheduleCronTask(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store updated task
|
// Store updated task: write-through + broadcast (see CreateTask).
|
||||||
s.tasks.Set(id, task)
|
if err := s.tasks.Set(context.Background(), task); err != nil {
|
||||||
|
return fmt.Errorf("failed to persist task: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Schedule new cron if enabled and has cron expression
|
// Schedule new cron if enabled and has cron expression
|
||||||
if task.Enabled && task.Cron != "" {
|
if task.Enabled && task.Cron != "" {
|
||||||
@@ -334,24 +370,22 @@ func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s.saveTasks(task)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteTask deletes a task
|
// DeleteTask deletes a task
|
||||||
func (s *AgentJobService) DeleteTask(id string) error {
|
func (s *AgentJobService) DeleteTask(id string) error {
|
||||||
if !s.tasks.Exists(id) {
|
if _, ok := s.tasks.Get(id); !ok {
|
||||||
return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unschedule cron
|
// Unschedule cron
|
||||||
s.UnscheduleCronTask(id)
|
s.UnscheduleCronTask(id)
|
||||||
|
|
||||||
// Remove from memory
|
// Delete removes from the in-memory map, deletes from the persister, and
|
||||||
s.tasks.Delete(id)
|
// broadcasts the removal to peer replicas.
|
||||||
|
if err := s.tasks.Delete(context.Background(), id); err != nil {
|
||||||
if err := s.persister.DeleteTask(id); err != nil {
|
xlog.Warn("Failed to delete task from store", "error", err, "task_id", id)
|
||||||
xlog.Warn("Failed to delete task from persister", "error", err, "task_id", id)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@@ -359,8 +393,8 @@ func (s *AgentJobService) DeleteTask(id string) error {
|
|||||||
|
|
||||||
// GetTask retrieves a task by ID
|
// GetTask retrieves a task by ID
|
||||||
func (s *AgentJobService) GetTask(id string) (*schema.Task, error) {
|
func (s *AgentJobService) GetTask(id string) (*schema.Task, error) {
|
||||||
task := s.tasks.Get(id)
|
task, ok := s.tasks.Get(id)
|
||||||
if task.ID == "" {
|
if !ok {
|
||||||
return nil, fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
return nil, fmt.Errorf("%w: %s", ErrTaskNotFound, id)
|
||||||
}
|
}
|
||||||
return &task, nil
|
return &task, nil
|
||||||
@@ -368,7 +402,7 @@ func (s *AgentJobService) GetTask(id string) (*schema.Task, error) {
|
|||||||
|
|
||||||
// ListTasks returns all tasks, sorted by creation date (newest first)
|
// ListTasks returns all tasks, sorted by creation date (newest first)
|
||||||
func (s *AgentJobService) ListTasks() []schema.Task {
|
func (s *AgentJobService) ListTasks() []schema.Task {
|
||||||
tasks := s.tasks.Values()
|
tasks := s.tasks.List()
|
||||||
// Sort by CreatedAt descending (newest first), then by Name for stability
|
// Sort by CreatedAt descending (newest first), then by Name for stability
|
||||||
slices.SortFunc(tasks, func(a, b schema.Task) int {
|
slices.SortFunc(tasks, func(a, b schema.Task) int {
|
||||||
if a.CreatedAt.Equal(b.CreatedAt) {
|
if a.CreatedAt.Equal(b.CreatedAt) {
|
||||||
@@ -397,8 +431,8 @@ func (s *AgentJobService) buildPrompt(templateStr string, params map[string]stri
|
|||||||
// ExecuteJob creates and queues a job for execution
|
// ExecuteJob creates and queues a job for execution
|
||||||
// multimedia can be nil for backward compatibility
|
// multimedia can be nil for backward compatibility
|
||||||
func (s *AgentJobService) ExecuteJob(taskID string, params map[string]string, triggeredBy string, multimedia *schema.MultimediaAttachment) (string, error) {
|
func (s *AgentJobService) ExecuteJob(taskID string, params map[string]string, triggeredBy string, multimedia *schema.MultimediaAttachment) (string, error) {
|
||||||
task := s.tasks.Get(taskID)
|
task, ok := s.tasks.Get(taskID)
|
||||||
if task.ID == "" {
|
if !ok {
|
||||||
return "", fmt.Errorf("%w: %s", ErrTaskNotFound, taskID)
|
return "", fmt.Errorf("%w: %s", ErrTaskNotFound, taskID)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1451,6 +1485,12 @@ func (s *AgentJobService) Stop() error {
|
|||||||
if s.cronScheduler != nil {
|
if s.cronScheduler != nil {
|
||||||
s.cronScheduler.Stop()
|
s.cronScheduler.Stop()
|
||||||
}
|
}
|
||||||
|
// Release the tasks SyncedMap subscription / background workers.
|
||||||
|
if s.tasks != nil {
|
||||||
|
if err := s.tasks.Close(); err != nil {
|
||||||
|
xlog.Warn("Error closing tasks sync map", "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
xlog.Info("AgentJobService stopped")
|
xlog.Info("AgentJobService stopped")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,24 +14,38 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// fileJobPersister persists tasks and jobs to JSON files.
|
// fileJobPersister persists tasks and jobs to JSON files.
|
||||||
// It holds references to the service's syncmaps and serializes the entire
|
//
|
||||||
// map contents on each save (bulk write). Reads at runtime return nil
|
// Jobs serialize the service's in-memory jobs syncmap on each save (bulk write).
|
||||||
// (the in-memory map is the authoritative source); LoadTasks/LoadJobs
|
// Tasks are kept in this persister's own taskSet map instead: the tasks SyncedMap
|
||||||
// are used only at startup to bootstrap the syncmaps.
|
// calls SaveTask/DeleteTask while holding its internal lock (write-through), so
|
||||||
|
// reading back the SyncedMap here would re-enter that lock and deadlock. The
|
||||||
|
// self-contained taskSet, seeded by LoadTasks, lets a per-task write rewrite the
|
||||||
|
// whole bulk file without touching the SyncedMap.
|
||||||
|
//
|
||||||
|
// Runtime reads (GetJob/ListJobs) return nil (the in-memory state is the
|
||||||
|
// authoritative source); LoadTasks/LoadJobs bootstrap state at startup.
|
||||||
type fileJobPersister struct {
|
type fileJobPersister struct {
|
||||||
tasks *xsync.SyncedMap[string, schema.Task]
|
|
||||||
jobs *xsync.SyncedMap[string, schema.Job]
|
jobs *xsync.SyncedMap[string, schema.Job]
|
||||||
tasksFile string
|
tasksFile string
|
||||||
jobsFile string
|
jobsFile string
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
|
// taskSet is the persister's own view of all tasks, seeded by LoadTasks and
|
||||||
|
// updated by SaveTask/DeleteTask. The bulk JSON file is rewritten from it.
|
||||||
|
taskSet map[string]schema.Task
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *fileJobPersister) SaveTask(_ string, _ schema.Task) error {
|
func (p *fileJobPersister) SaveTask(_ string, task schema.Task) error {
|
||||||
return p.saveTasksToFile()
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
p.taskSet[task.ID] = task
|
||||||
|
return p.writeTasksLocked()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *fileJobPersister) DeleteTask(_ string) error {
|
func (p *fileJobPersister) DeleteTask(taskID string) error {
|
||||||
return p.saveTasksToFile()
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
delete(p.taskSet, taskID)
|
||||||
|
return p.writeTasksLocked()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *fileJobPersister) SaveJob(_ string, _ schema.Job) error {
|
func (p *fileJobPersister) SaveJob(_ string, _ schema.Job) error {
|
||||||
@@ -43,7 +57,9 @@ func (p *fileJobPersister) DeleteJob(_ string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (p *fileJobPersister) FlushTasks() error {
|
func (p *fileJobPersister) FlushTasks() error {
|
||||||
return p.saveTasksToFile()
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
return p.writeTasksLocked()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *fileJobPersister) FlushJobs() error {
|
func (p *fileJobPersister) FlushJobs() error {
|
||||||
@@ -83,6 +99,12 @@ func (p *fileJobPersister) LoadTasks(_ string) ([]schema.Task, error) {
|
|||||||
return nil, fmt.Errorf("failed to parse tasks file: %w", err)
|
return nil, fmt.Errorf("failed to parse tasks file: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Seed the in-memory set so subsequent per-task SaveTask/DeleteTask merge into
|
||||||
|
// (rather than overwrite) the persisted tasks when the bulk file is rewritten.
|
||||||
|
for _, t := range tf.Tasks {
|
||||||
|
p.taskSet[t.ID] = t
|
||||||
|
}
|
||||||
|
|
||||||
xlog.Info("Loaded tasks from file", "count", len(tf.Tasks))
|
xlog.Info("Loaded tasks from file", "count", len(tf.Tasks))
|
||||||
return tf.Tasks, nil
|
return tf.Tasks, nil
|
||||||
}
|
}
|
||||||
@@ -118,19 +140,20 @@ func (p *fileJobPersister) CleanupOldJobs(_ time.Duration) (int64, error) {
|
|||||||
return 0, nil // cleanup handled via in-memory filtering
|
return 0, nil // cleanup handled via in-memory filtering
|
||||||
}
|
}
|
||||||
|
|
||||||
// saveTasksToFile serializes the entire tasks map to the JSON file.
|
// writeTasksLocked serializes the persister's task set to the JSON file. Callers
|
||||||
func (p *fileJobPersister) saveTasksToFile() error {
|
// must hold p.mu.
|
||||||
|
func (p *fileJobPersister) writeTasksLocked() error {
|
||||||
if p.tasksFile == "" {
|
if p.tasksFile == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
p.mu.Lock()
|
tasks := make([]schema.Task, 0, len(p.taskSet))
|
||||||
defer p.mu.Unlock()
|
for _, t := range p.taskSet {
|
||||||
|
tasks = append(tasks, t)
|
||||||
tf := schema.TasksFile{
|
|
||||||
Tasks: p.tasks.Values(),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tf := schema.TasksFile{Tasks: tasks}
|
||||||
|
|
||||||
data, err := json.MarshalIndent(tf, "", " ")
|
data, err := json.MarshalIndent(tf, "", " ")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to marshal tasks: %w", err)
|
return fmt.Errorf("failed to marshal tasks: %w", err)
|
||||||
|
|||||||
@@ -20,28 +20,26 @@ var _ = Describe("JobPersister", func() {
|
|||||||
Context("fileJobPersister", func() {
|
Context("fileJobPersister", func() {
|
||||||
var (
|
var (
|
||||||
p *fileJobPersister
|
p *fileJobPersister
|
||||||
tasks *xsync.SyncedMap[string, schema.Task]
|
|
||||||
jobsMap *xsync.SyncedMap[string, schema.Job]
|
jobsMap *xsync.SyncedMap[string, schema.Job]
|
||||||
tmpDir string
|
tmpDir string
|
||||||
)
|
)
|
||||||
|
|
||||||
BeforeEach(func() {
|
BeforeEach(func() {
|
||||||
tmpDir = GinkgoT().TempDir()
|
tmpDir = GinkgoT().TempDir()
|
||||||
tasks = xsync.NewSyncedMap[string, schema.Task]()
|
|
||||||
jobsMap = xsync.NewSyncedMap[string, schema.Job]()
|
jobsMap = xsync.NewSyncedMap[string, schema.Job]()
|
||||||
p = &fileJobPersister{
|
p = &fileJobPersister{
|
||||||
tasks: tasks,
|
|
||||||
jobs: jobsMap,
|
jobs: jobsMap,
|
||||||
tasksFile: filepath.Join(tmpDir, "tasks.json"),
|
tasksFile: filepath.Join(tmpDir, "tasks.json"),
|
||||||
jobsFile: filepath.Join(tmpDir, "jobs.json"),
|
jobsFile: filepath.Join(tmpDir, "jobs.json"),
|
||||||
|
// taskSet is the persister's own task view (decoupled from the tasks
|
||||||
|
// SyncedMap to avoid re-entering its lock during write-through).
|
||||||
|
taskSet: make(map[string]schema.Task),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
It("SaveTask writes all tasks to file", func() {
|
It("SaveTask writes all tasks to file", func() {
|
||||||
tasks.Set("t1", schema.Task{ID: "t1", Name: "Task One", Model: "m", Prompt: "p"})
|
Expect(p.SaveTask("", schema.Task{ID: "t1", Name: "Task One", Model: "m", Prompt: "p"})).To(Succeed())
|
||||||
tasks.Set("t2", schema.Task{ID: "t2", Name: "Task Two", Model: "m", Prompt: "p"})
|
Expect(p.SaveTask("", schema.Task{ID: "t2", Name: "Task Two", Model: "m", Prompt: "p"})).To(Succeed())
|
||||||
|
|
||||||
Expect(p.SaveTask("", schema.Task{})).To(Succeed())
|
|
||||||
|
|
||||||
// Verify file contents
|
// Verify file contents
|
||||||
data, err := os.ReadFile(p.tasksFile)
|
data, err := os.ReadFile(p.tasksFile)
|
||||||
@@ -52,11 +50,9 @@ var _ = Describe("JobPersister", func() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
It("DeleteTask writes updated tasks to file", func() {
|
It("DeleteTask writes updated tasks to file", func() {
|
||||||
tasks.Set("t1", schema.Task{ID: "t1", Name: "Keep"})
|
Expect(p.SaveTask("", schema.Task{ID: "t1", Name: "Keep"})).To(Succeed())
|
||||||
tasks.Set("t2", schema.Task{ID: "t2", Name: "Delete"})
|
Expect(p.SaveTask("", schema.Task{ID: "t2", Name: "Delete"})).To(Succeed())
|
||||||
|
|
||||||
// Simulate deletion from memory (caller does this before calling persister)
|
|
||||||
tasks.Delete("t2")
|
|
||||||
Expect(p.DeleteTask("t2")).To(Succeed())
|
Expect(p.DeleteTask("t2")).To(Succeed())
|
||||||
|
|
||||||
data, err := os.ReadFile(p.tasksFile)
|
data, err := os.ReadFile(p.tasksFile)
|
||||||
|
|||||||
152
core/services/agentpool/task_sync_test.go
Normal file
152
core/services/agentpool/task_sync_test.go
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
package agentpool
|
||||||
|
|
||||||
|
// White-box tests (package agentpool) so a spec can build two AgentJobService
|
||||||
|
// instances sharing one in-memory bus and assert that agent *tasks* converge
|
||||||
|
// across replicas - the bug this migration fixes (ListTasks used to read
|
||||||
|
// in-memory only, so a task created on replica A was invisible on replica B).
|
||||||
|
// Jobs are deliberately untouched here: they already converge via the dispatcher
|
||||||
|
// + DB read-through.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||||
|
"github.com/mudler/LocalAI/core/services/testutil"
|
||||||
|
"github.com/mudler/LocalAI/pkg/system"
|
||||||
|
)
|
||||||
|
|
||||||
|
// newTaskSyncService builds an AgentJobService wired to the given bus and a
|
||||||
|
// throwaway data dir (so the file persister has somewhere to write). Model/config
|
||||||
|
// loaders are nil because the task sync paths under test never touch them.
|
||||||
|
func newTaskSyncService(bus messaging.MessagingClient) *AgentJobService {
|
||||||
|
tmpDir := GinkgoT().TempDir()
|
||||||
|
sysState := &system.SystemState{}
|
||||||
|
sysState.Model.ModelsPath = tmpDir
|
||||||
|
appConfig := config.NewApplicationConfig(
|
||||||
|
config.WithDynamicConfigDir(tmpDir),
|
||||||
|
config.WithContext(context.Background()),
|
||||||
|
)
|
||||||
|
appConfig.SystemState = sysState
|
||||||
|
|
||||||
|
svc := NewAgentJobServiceWithPaths(appConfig, nil, nil, nil,
|
||||||
|
// Distinct per-replica files so the file persister write-through never
|
||||||
|
// crosses replicas: convergence here must be proven via the bus alone.
|
||||||
|
tmpDir+"/tasks.json", tmpDir+"/jobs.json")
|
||||||
|
svc.SetTaskSyncNATS(bus)
|
||||||
|
return svc
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("AgentJobService task cross-replica sync", func() {
|
||||||
|
Describe("two replicas sharing one bus", func() {
|
||||||
|
var (
|
||||||
|
bus *testutil.FakeBus
|
||||||
|
a, b *AgentJobService
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
// One shared bus, two replicas: exactly the distributed topology where a
|
||||||
|
// round-robin request may land on a replica that did not originate the
|
||||||
|
// change.
|
||||||
|
bus = testutil.NewFakeBus()
|
||||||
|
a = newTaskSyncService(bus)
|
||||||
|
b = newTaskSyncService(bus)
|
||||||
|
// Start hydrates (empty here) and subscribes both replicas to deltas.
|
||||||
|
Expect(a.Start(context.Background())).To(Succeed())
|
||||||
|
Expect(b.Start(context.Background())).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
AfterEach(func() {
|
||||||
|
Expect(a.Stop()).To(Succeed())
|
||||||
|
Expect(b.Stop()).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("makes a task created on A visible via B's GetTask and ListTasks", func() {
|
||||||
|
id, err := a.CreateTask(schema.Task{Name: "Shared", Model: "m", Prompt: "p"})
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
got, err := b.GetTask(id)
|
||||||
|
Expect(err).NotTo(HaveOccurred(), "B must see a task A just created")
|
||||||
|
Expect(got.Name).To(Equal("Shared"))
|
||||||
|
|
||||||
|
listed := b.ListTasks()
|
||||||
|
Expect(listed).To(HaveLen(1))
|
||||||
|
Expect(listed[0].ID).To(Equal(id))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("propagates a task update from A to B", func() {
|
||||||
|
id, err := a.CreateTask(schema.Task{Name: "Before", Model: "m", Prompt: "p"})
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
Expect(a.UpdateTask(id, schema.Task{Name: "After", Model: "m", Prompt: "p"})).To(Succeed())
|
||||||
|
|
||||||
|
got, err := b.GetTask(id)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
Expect(got.Name).To(Equal("After"), "an update on A must be visible on B")
|
||||||
|
})
|
||||||
|
|
||||||
|
It("removes a task from B when it is deleted on A", func() {
|
||||||
|
id, err := a.CreateTask(schema.Task{Name: "Doomed", Model: "m", Prompt: "p"})
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
_, err = b.GetTask(id)
|
||||||
|
Expect(err).NotTo(HaveOccurred(), "precondition: B must have the task before the delete")
|
||||||
|
|
||||||
|
Expect(a.DeleteTask(id)).To(Succeed())
|
||||||
|
|
||||||
|
_, err = b.GetTask(id)
|
||||||
|
Expect(err).To(HaveOccurred(), "a delete on A must remove the task from B")
|
||||||
|
Expect(b.ListTasks()).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("does not re-broadcast a delta it received (echo-loop guard)", func() {
|
||||||
|
subject := messaging.SubjectSyncStateDelta("agent.tasks")
|
||||||
|
|
||||||
|
_, err := a.CreateTask(schema.Task{Name: "Once", Model: "m", Prompt: "p"})
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
// Exactly one publish: A's create. B applies it without re-publishing,
|
||||||
|
// otherwise this would be 2+ and a real bus would storm.
|
||||||
|
Expect(bus.PublishCount(subject)).To(Equal(1))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("ListTasks ordering and scoping", func() {
|
||||||
|
var svc *AgentJobService
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
svc = newTaskSyncService(testutil.NewFakeBus())
|
||||||
|
Expect(svc.Start(context.Background())).To(Succeed())
|
||||||
|
})
|
||||||
|
AfterEach(func() { Expect(svc.Stop()).To(Succeed()) })
|
||||||
|
|
||||||
|
It("sorts newest-first, breaking ties by name", func() {
|
||||||
|
// CreateTask stamps CreatedAt with time.Now(); space them out so ordering
|
||||||
|
// is deterministic rather than relying on the sub-millisecond gap.
|
||||||
|
oldID, err := svc.CreateTask(schema.Task{Name: "Old", Model: "m", Prompt: "p"})
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
time.Sleep(5 * time.Millisecond)
|
||||||
|
newID, err := svc.CreateTask(schema.Task{Name: "New", Model: "m", Prompt: "p"})
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
listed := svc.ListTasks()
|
||||||
|
Expect(listed).To(HaveLen(2))
|
||||||
|
Expect(listed[0].ID).To(Equal(newID), "newest first")
|
||||||
|
Expect(listed[1].ID).To(Equal(oldID))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("compile-time adapter contract", func() {
|
||||||
|
It("satisfies syncstate.Store for tasks", func() {
|
||||||
|
// Mirrors the var assertion in task_syncstore.go; keeps the type
|
||||||
|
// referenced from a spec so drift surfaces here too.
|
||||||
|
var _ syncstate.Store[string, schema.Task] = (*taskStoreAdapter)(nil)
|
||||||
|
Expect(&taskStoreAdapter{}).ToNot(BeNil())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
47
core/services/agentpool/task_syncstore.go
Normal file
47
core/services/agentpool/task_syncstore.go
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
package agentpool
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||||
|
)
|
||||||
|
|
||||||
|
// taskStoreAdapter bridges the existing JobPersister (file- or DB-backed) to the
|
||||||
|
// generic syncstate.Store the tasks SyncedMap consumes. Only tasks are migrated:
|
||||||
|
// jobs already converge across replicas via the dispatcher (NATS) plus the DB
|
||||||
|
// read-through in ListJobs/GetJob, whereas ListTasks read in-memory only and so
|
||||||
|
// went stale on replicas that did not originate the change.
|
||||||
|
//
|
||||||
|
// The adapter reads svc.persister and svc.userID live (rather than capturing
|
||||||
|
// them) because both are configured by setters - SetDistributedJobStore swaps the
|
||||||
|
// file persister for the DB one, SetUserID scopes per-user queries - AFTER the
|
||||||
|
// service, and thus this adapter, is constructed. Reading them at call time means
|
||||||
|
// the SyncedMap never has to be rebuilt when the persister is swapped.
|
||||||
|
//
|
||||||
|
// The SyncedMap value type is schema.Task: the exact shape ListTasks returns, so
|
||||||
|
// reads need no conversion and REST responses are provably unchanged.
|
||||||
|
type taskStoreAdapter struct {
|
||||||
|
svc *AgentJobService
|
||||||
|
}
|
||||||
|
|
||||||
|
// compile-time assertion that the adapter satisfies the component's Store.
|
||||||
|
var _ syncstate.Store[string, schema.Task] = (*taskStoreAdapter)(nil)
|
||||||
|
|
||||||
|
// List hydrates the map from durable storage on Start/reconnect: the file's task
|
||||||
|
// list (standalone) or every task row (DB / distributed).
|
||||||
|
func (a *taskStoreAdapter) List(_ context.Context) ([]schema.Task, error) {
|
||||||
|
return a.svc.persister.LoadTasks(a.svc.userID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upsert write-through persists a single task created/updated locally; the
|
||||||
|
// SyncedMap then broadcasts the delta to peers.
|
||||||
|
func (a *taskStoreAdapter) Upsert(_ context.Context, task schema.Task) error {
|
||||||
|
return a.svc.persister.SaveTask(a.svc.userID, task)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete write-through removes a task locally; the SyncedMap then broadcasts the
|
||||||
|
// removal to peers.
|
||||||
|
func (a *taskStoreAdapter) Delete(_ context.Context, id string) error {
|
||||||
|
return a.svc.persister.DeleteTask(id)
|
||||||
|
}
|
||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"github.com/mudler/LocalAGI/webui/collections"
|
"github.com/mudler/LocalAGI/webui/collections"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/services/jobs"
|
"github.com/mudler/LocalAI/core/services/jobs"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
"github.com/mudler/LocalAI/core/templates"
|
"github.com/mudler/LocalAI/core/templates"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/mudler/xlog"
|
"github.com/mudler/xlog"
|
||||||
@@ -28,6 +29,9 @@ type UserServicesManager struct {
|
|||||||
// Shared distributed backends (set once, inherited by per-user job services)
|
// Shared distributed backends (set once, inherited by per-user job services)
|
||||||
jobDispatcher DistributedDispatcher
|
jobDispatcher DistributedDispatcher
|
||||||
jobDBStore *jobs.JobStore
|
jobDBStore *jobs.JobStore
|
||||||
|
// jobNats keeps per-user agent tasks consistent across replicas (nil in
|
||||||
|
// standalone). Inherited by each per-user AgentJobService.
|
||||||
|
jobNats messaging.MessagingClient
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewUserServicesManager creates a new UserServicesManager.
|
// NewUserServicesManager creates a new UserServicesManager.
|
||||||
@@ -162,6 +166,10 @@ func (m *UserServicesManager) GetJobs(userID string) (*AgentJobService, error) {
|
|||||||
if m.jobDispatcher != nil {
|
if m.jobDispatcher != nil {
|
||||||
svc.SetDistributedBackends(m.jobDispatcher)
|
svc.SetDistributedBackends(m.jobDispatcher)
|
||||||
}
|
}
|
||||||
|
// Inherit the NATS client so per-user tasks broadcast across replicas. Must be
|
||||||
|
// set before the hydrate below (LoadFromDB / LoadTasksFromFile) so the tasks
|
||||||
|
// SyncedMap is rebuilt with the client while it is still empty.
|
||||||
|
svc.SetTaskSyncNATS(m.jobNats)
|
||||||
if m.jobDBStore != nil {
|
if m.jobDBStore != nil {
|
||||||
svc.SetDistributedJobStore(m.jobDBStore)
|
svc.SetDistributedJobStore(m.jobDBStore)
|
||||||
// Load tasks/jobs from DB immediately (per-user services skip Start())
|
// Load tasks/jobs from DB immediately (per-user services skip Start())
|
||||||
@@ -189,6 +197,12 @@ func (m *UserServicesManager) SetJobDBStore(s *jobs.JobStore) {
|
|||||||
m.jobDBStore = s
|
m.jobDBStore = s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetJobSyncNATS sets the NATS client used to keep per-user agent tasks consistent
|
||||||
|
// across replicas.
|
||||||
|
func (m *UserServicesManager) SetJobSyncNATS(nats messaging.MessagingClient) {
|
||||||
|
m.jobNats = nats
|
||||||
|
}
|
||||||
|
|
||||||
// ListAllUserIDs returns all user IDs that have scoped data directories.
|
// ListAllUserIDs returns all user IDs that have scoped data directories.
|
||||||
func (m *UserServicesManager) ListAllUserIDs() ([]string, error) {
|
func (m *UserServicesManager) ListAllUserIDs() ([]string, error) {
|
||||||
return m.storage.ListUserDirs()
|
return m.storage.ListUserDirs()
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/mudler/LocalAI/core/services/advisorylock"
|
"github.com/mudler/LocalAI/core/services/advisorylock"
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
|
"gorm.io/gorm/clause"
|
||||||
)
|
)
|
||||||
|
|
||||||
// FineTuneJobRecord tracks fine-tune jobs in PostgreSQL.
|
// FineTuneJobRecord tracks fine-tune jobs in PostgreSQL.
|
||||||
@@ -80,6 +81,34 @@ func (s *FineTuneStore) List(userID string) ([]FineTuneJobRecord, error) {
|
|||||||
return jobs, q.Find(&jobs).Error
|
return jobs, q.Find(&jobs).Error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ListAll returns every fine-tune job across all users. The SyncedMap that backs
|
||||||
|
// FineTuneService is a single global map (the REST API filters by user at read
|
||||||
|
// time), so hydrate needs the full set rather than the per-user List above.
|
||||||
|
func (s *FineTuneStore) ListAll() ([]FineTuneJobRecord, error) {
|
||||||
|
var jobs []FineTuneJobRecord
|
||||||
|
return jobs, s.db.Order("created_at DESC").Find(&jobs).Error
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upsert idempotently inserts or fully replaces a job row by primary key. The
|
||||||
|
// SyncedMap write-through path issues a single Set per mutation regardless of
|
||||||
|
// whether the job already exists, so it needs one create-or-update primitive
|
||||||
|
// (Create alone fails on a duplicate key, UpdateStatus alone misses new rows and
|
||||||
|
// only touches a few columns).
|
||||||
|
func (s *FineTuneStore) Upsert(job *FineTuneJobRecord) error {
|
||||||
|
if job.ID == "" {
|
||||||
|
job.ID = uuid.New().String()
|
||||||
|
}
|
||||||
|
now := time.Now()
|
||||||
|
if job.CreatedAt.IsZero() {
|
||||||
|
job.CreatedAt = now
|
||||||
|
}
|
||||||
|
job.UpdatedAt = now
|
||||||
|
return s.db.Clauses(clause.OnConflict{
|
||||||
|
Columns: []clause.Column{{Name: "id"}},
|
||||||
|
UpdateAll: true,
|
||||||
|
}).Create(job).Error
|
||||||
|
}
|
||||||
|
|
||||||
// UpdateStatus updates the status and message of a fine-tune job.
|
// UpdateStatus updates the status and message of a fine-tune job.
|
||||||
func (s *FineTuneStore) UpdateStatus(id, status, message string) error {
|
func (s *FineTuneStore) UpdateStatus(id, status, message string) error {
|
||||||
return s.db.Model(&FineTuneJobRecord{}).Where("id = ?", id).Updates(map[string]any{
|
return s.db.Model(&FineTuneJobRecord{}).Where("id = ?", id).Updates(map[string]any{
|
||||||
|
|||||||
13
core/services/distributed/finetune_suite_test.go
Normal file
13
core/services/distributed/finetune_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
package distributed_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestDistributed(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "Distributed Suite")
|
||||||
|
}
|
||||||
61
core/services/distributed/finetune_test.go
Normal file
61
core/services/distributed/finetune_test.go
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
package distributed_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/services/distributed"
|
||||||
|
"github.com/mudler/LocalAI/core/services/testutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = Describe("FineTuneStore", func() {
|
||||||
|
var store *distributed.FineTuneStore
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
db := testutil.SetupTestDB()
|
||||||
|
var err error
|
||||||
|
store, err = distributed.NewFineTuneStore(db)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("ListAll", func() {
|
||||||
|
It("returns jobs across all users (unlike per-user List)", func() {
|
||||||
|
Expect(store.Create(&distributed.FineTuneJobRecord{ID: "j1", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||||
|
Expect(store.Create(&distributed.FineTuneJobRecord{ID: "j2", UserID: "u2", Status: "queued"})).To(Succeed())
|
||||||
|
|
||||||
|
all, err := store.ListAll()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(all).To(HaveLen(2))
|
||||||
|
|
||||||
|
perUser, err := store.List("u1")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(perUser).To(HaveLen(1), "List stays per-user")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("Upsert", func() {
|
||||||
|
It("inserts a new row", func() {
|
||||||
|
Expect(store.Upsert(&distributed.FineTuneJobRecord{ID: "up-1", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||||
|
|
||||||
|
got, err := store.Get("up-1")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got.Status).To(Equal("queued"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("idempotently updates an existing row on a repeated key", func() {
|
||||||
|
Expect(store.Upsert(&distributed.FineTuneJobRecord{ID: "up-2", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||||
|
// Second Upsert with the same primary key must update, not error on a
|
||||||
|
// duplicate-key violation (this is the SyncedMap write-through contract).
|
||||||
|
Expect(store.Upsert(&distributed.FineTuneJobRecord{ID: "up-2", UserID: "u1", Status: "completed", Message: "done"})).To(Succeed())
|
||||||
|
|
||||||
|
got, err := store.Get("up-2")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got.Status).To(Equal("completed"))
|
||||||
|
Expect(got.Message).To(Equal("done"))
|
||||||
|
|
||||||
|
all, err := store.ListAll()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(all).To(HaveLen(1), "upsert must not create a duplicate")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
type Stores struct {
|
type Stores struct {
|
||||||
Gallery *GalleryStore
|
Gallery *GalleryStore
|
||||||
FineTune *FineTuneStore
|
FineTune *FineTuneStore
|
||||||
|
Quant *QuantStore
|
||||||
Skills *SkillStore
|
Skills *SkillStore
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,15 +27,21 @@ func InitStores(db *gorm.DB) (*Stores, error) {
|
|||||||
return nil, fmt.Errorf("fine-tune store: %w", err)
|
return nil, fmt.Errorf("fine-tune store: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
quant, err := NewQuantStore(db)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("quantization store: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
skills, err := NewSkillStore(db)
|
skills, err := NewSkillStore(db)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("skills store: %w", err)
|
return nil, fmt.Errorf("skills store: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
xlog.Info("Distributed stores initialized (Gallery, FineTune, Skills)")
|
xlog.Info("Distributed stores initialized (Gallery, FineTune, Quant, Skills)")
|
||||||
return &Stores{
|
return &Stores{
|
||||||
Gallery: gallery,
|
Gallery: gallery,
|
||||||
FineTune: ft,
|
FineTune: ft,
|
||||||
|
Quant: quant,
|
||||||
Skills: skills,
|
Skills: skills,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
105
core/services/distributed/quant.go
Normal file
105
core/services/distributed/quant.go
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
package distributed
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/google/uuid"
|
||||||
|
"github.com/mudler/LocalAI/core/services/advisorylock"
|
||||||
|
"gorm.io/gorm"
|
||||||
|
"gorm.io/gorm/clause"
|
||||||
|
)
|
||||||
|
|
||||||
|
// QuantJobRecord tracks quantization jobs in PostgreSQL. The columns mirror the
|
||||||
|
// API shape (schema.QuantizationJob); the structured Config and ExtraOptions are
|
||||||
|
// serialized into JSON text columns so a record fully reconstructs the job.
|
||||||
|
type QuantJobRecord struct {
|
||||||
|
ID string `gorm:"primaryKey;size:36" json:"id"`
|
||||||
|
UserID string `gorm:"index;size:36" json:"user_id,omitempty"`
|
||||||
|
Model string `gorm:"size:255" json:"model"`
|
||||||
|
Backend string `gorm:"size:64" json:"backend"`
|
||||||
|
ModelID string `gorm:"size:255" json:"model_id,omitempty"`
|
||||||
|
QuantizationType string `gorm:"size:32" json:"quantization_type"`
|
||||||
|
Status string `gorm:"index;size:32;default:queued" json:"status"` // queued, downloading, converting, quantizing, completed, failed, stopped
|
||||||
|
Message string `gorm:"type:text" json:"message,omitempty"`
|
||||||
|
OutputDir string `gorm:"size:512" json:"output_dir,omitempty"`
|
||||||
|
OutputFile string `gorm:"size:512" json:"output_file,omitempty"`
|
||||||
|
ConfigJSON string `gorm:"column:config;type:text" json:"-"`
|
||||||
|
ExtraOptsJSON string `gorm:"column:extra_options;type:text" json:"-"`
|
||||||
|
ImportStatus string `gorm:"size:32" json:"import_status,omitempty"`
|
||||||
|
ImportMessage string `gorm:"type:text" json:"import_message,omitempty"`
|
||||||
|
ImportModelName string `gorm:"size:255" json:"import_model_name,omitempty"`
|
||||||
|
CreatedAt time.Time `json:"created_at"`
|
||||||
|
UpdatedAt time.Time `json:"updated_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (QuantJobRecord) TableName() string { return "quantization_jobs" }
|
||||||
|
|
||||||
|
// QuantStore manages quantization job state in PostgreSQL.
|
||||||
|
type QuantStore struct {
|
||||||
|
db *gorm.DB
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewQuantStore creates a new QuantStore and auto-migrates.
|
||||||
|
// Uses a PostgreSQL advisory lock to prevent concurrent migration races
|
||||||
|
// when multiple instances (frontend + workers) start at the same time.
|
||||||
|
func NewQuantStore(db *gorm.DB) (*QuantStore, error) {
|
||||||
|
if err := advisorylock.WithLockCtx(context.Background(), db, advisorylock.KeySchemaMigrate, func() error {
|
||||||
|
return db.AutoMigrate(&QuantJobRecord{})
|
||||||
|
}); err != nil {
|
||||||
|
return nil, fmt.Errorf("migrating quantization_jobs: %w", err)
|
||||||
|
}
|
||||||
|
return &QuantStore{db: db}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create stores a new quantization job.
|
||||||
|
func (s *QuantStore) Create(job *QuantJobRecord) error {
|
||||||
|
if job.ID == "" {
|
||||||
|
job.ID = uuid.New().String()
|
||||||
|
}
|
||||||
|
job.CreatedAt = time.Now()
|
||||||
|
job.UpdatedAt = job.CreatedAt
|
||||||
|
return s.db.Create(job).Error
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get retrieves a quantization job by ID.
|
||||||
|
func (s *QuantStore) Get(id string) (*QuantJobRecord, error) {
|
||||||
|
var job QuantJobRecord
|
||||||
|
if err := s.db.First(&job, "id = ?", id).Error; err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &job, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListAll returns every quantization job across all users. The SyncedMap that
|
||||||
|
// backs QuantizationService is a single global map (the REST API filters by user
|
||||||
|
// at read time), so hydrate needs the full set.
|
||||||
|
func (s *QuantStore) ListAll() ([]QuantJobRecord, error) {
|
||||||
|
var jobs []QuantJobRecord
|
||||||
|
return jobs, s.db.Order("created_at DESC").Find(&jobs).Error
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upsert idempotently inserts or fully replaces a job row by primary key. The
|
||||||
|
// SyncedMap write-through path issues a single Set per mutation regardless of
|
||||||
|
// whether the job already exists, so it needs one create-or-update primitive
|
||||||
|
// (Create alone fails on a duplicate key).
|
||||||
|
func (s *QuantStore) Upsert(job *QuantJobRecord) error {
|
||||||
|
if job.ID == "" {
|
||||||
|
job.ID = uuid.New().String()
|
||||||
|
}
|
||||||
|
now := time.Now()
|
||||||
|
if job.CreatedAt.IsZero() {
|
||||||
|
job.CreatedAt = now
|
||||||
|
}
|
||||||
|
job.UpdatedAt = now
|
||||||
|
return s.db.Clauses(clause.OnConflict{
|
||||||
|
Columns: []clause.Column{{Name: "id"}},
|
||||||
|
UpdateAll: true,
|
||||||
|
}).Create(job).Error
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete removes a quantization job.
|
||||||
|
func (s *QuantStore) Delete(id string) error {
|
||||||
|
return s.db.Where("id = ?", id).Delete(&QuantJobRecord{}).Error
|
||||||
|
}
|
||||||
57
core/services/distributed/quant_test.go
Normal file
57
core/services/distributed/quant_test.go
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
package distributed_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/services/distributed"
|
||||||
|
"github.com/mudler/LocalAI/core/services/testutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = Describe("QuantStore", func() {
|
||||||
|
var store *distributed.QuantStore
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
db := testutil.SetupTestDB()
|
||||||
|
var err error
|
||||||
|
store, err = distributed.NewQuantStore(db)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("ListAll", func() {
|
||||||
|
It("returns jobs across all users", func() {
|
||||||
|
Expect(store.Create(&distributed.QuantJobRecord{ID: "j1", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||||
|
Expect(store.Create(&distributed.QuantJobRecord{ID: "j2", UserID: "u2", Status: "queued"})).To(Succeed())
|
||||||
|
|
||||||
|
all, err := store.ListAll()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(all).To(HaveLen(2))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("Upsert", func() {
|
||||||
|
It("inserts a new row", func() {
|
||||||
|
Expect(store.Upsert(&distributed.QuantJobRecord{ID: "up-1", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||||
|
|
||||||
|
got, err := store.Get("up-1")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got.Status).To(Equal("queued"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("idempotently updates an existing row on a repeated key", func() {
|
||||||
|
Expect(store.Upsert(&distributed.QuantJobRecord{ID: "up-2", UserID: "u1", Status: "queued"})).To(Succeed())
|
||||||
|
// Second Upsert with the same primary key must update, not error on a
|
||||||
|
// duplicate-key violation (this is the SyncedMap write-through contract).
|
||||||
|
Expect(store.Upsert(&distributed.QuantJobRecord{ID: "up-2", UserID: "u1", Status: "completed", Message: "done"})).To(Succeed())
|
||||||
|
|
||||||
|
got, err := store.Get("up-2")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got.Status).To(Equal("completed"))
|
||||||
|
Expect(got.Message).To(Equal("done"))
|
||||||
|
|
||||||
|
all, err := store.ListAll()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(all).To(HaveLen(1), "upsert must not create a duplicate")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
13
core/services/finetune/finetune_suite_test.go
Normal file
13
core/services/finetune/finetune_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
package finetune
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFinetune(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "Finetune Suite")
|
||||||
|
}
|
||||||
@@ -19,6 +19,7 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/core/services/distributed"
|
"github.com/mudler/LocalAI/core/services/distributed"
|
||||||
"github.com/mudler/LocalAI/core/services/messaging"
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
@@ -32,44 +33,63 @@ type FineTuneService struct {
|
|||||||
modelLoader *model.ModelLoader
|
modelLoader *model.ModelLoader
|
||||||
configLoader *config.ModelConfigLoader
|
configLoader *config.ModelConfigLoader
|
||||||
|
|
||||||
|
// mu serializes the read-modify-write of job values. The SyncedMap guards its
|
||||||
|
// own map structure, but a job is a pointer mutated in place (e.g. the export
|
||||||
|
// goroutine), so the service still needs a lock to keep those field updates
|
||||||
|
// and the subsequent Set atomic with respect to readers.
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
jobs map[string]*schema.FineTuneJob
|
|
||||||
|
|
||||||
// Distributed mode (nil when not in distributed mode)
|
// jobs is the cross-replica job store: an in-memory map kept consistent across
|
||||||
natsClient messaging.Publisher
|
// replicas via NATS, optionally read-through to PostgreSQL in distributed mode.
|
||||||
fineTuneStore *distributed.FineTuneStore
|
jobs *syncstate.SyncedMap[string, *schema.FineTuneJob]
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetNATSClient sets the NATS client for distributed progress publishing.
|
// NewFineTuneService creates a new FineTuneService. In distributed mode pass the
|
||||||
func (s *FineTuneService) SetNATSClient(nc messaging.Publisher) {
|
// shared NATS client and PostgreSQL store so jobs stay consistent across
|
||||||
s.mu.Lock()
|
// replicas; pass nil for both in standalone mode, where the disk Loader hydrates
|
||||||
defer s.mu.Unlock()
|
// the map and there is nothing to broadcast.
|
||||||
s.natsClient = nc
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetFineTuneStore sets the PostgreSQL fine-tune store for distributed persistence.
|
|
||||||
func (s *FineTuneService) SetFineTuneStore(store *distributed.FineTuneStore) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
s.fineTuneStore = store
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewFineTuneService creates a new FineTuneService.
|
|
||||||
func NewFineTuneService(
|
func NewFineTuneService(
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
modelLoader *model.ModelLoader,
|
modelLoader *model.ModelLoader,
|
||||||
configLoader *config.ModelConfigLoader,
|
configLoader *config.ModelConfigLoader,
|
||||||
|
nats messaging.MessagingClient,
|
||||||
|
store *distributed.FineTuneStore,
|
||||||
) *FineTuneService {
|
) *FineTuneService {
|
||||||
s := &FineTuneService{
|
s := &FineTuneService{
|
||||||
appConfig: appConfig,
|
appConfig: appConfig,
|
||||||
modelLoader: modelLoader,
|
modelLoader: modelLoader,
|
||||||
configLoader: configLoader,
|
configLoader: configLoader,
|
||||||
jobs: make(map[string]*schema.FineTuneJob),
|
|
||||||
}
|
}
|
||||||
s.loadAllJobs()
|
|
||||||
|
// Only attach a Store interface when a concrete store exists, otherwise the
|
||||||
|
// SyncedMap would see a non-nil interface wrapping a nil pointer and try to
|
||||||
|
// hydrate/write through a nil DB.
|
||||||
|
var syncStore syncstate.Store[string, *schema.FineTuneJob]
|
||||||
|
if store != nil {
|
||||||
|
syncStore = &fineTuneStoreAdapter{store: store}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.jobs = syncstate.New(syncstate.Config[string, *schema.FineTuneJob]{
|
||||||
|
Name: "finetune.jobs",
|
||||||
|
Key: func(j *schema.FineTuneJob) string { return j.ID },
|
||||||
|
Nats: nats,
|
||||||
|
Store: syncStore,
|
||||||
|
Loader: s.loadJobsFromDisk, // ignored when Store is set (distributed mode)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Hydrate + subscribe. A hydrate failure must not take the server down: log
|
||||||
|
// and continue degraded (standalone), mirroring the OpCache wiring.
|
||||||
|
if err := s.jobs.Start(appConfig.Context); err != nil {
|
||||||
|
xlog.Warn("FineTune SyncedMap start failed; running degraded", "error", err)
|
||||||
|
}
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Close releases the SyncedMap subscription and background workers.
|
||||||
|
func (s *FineTuneService) Close() error {
|
||||||
|
return s.jobs.Close()
|
||||||
|
}
|
||||||
|
|
||||||
// fineTuneBaseDir returns the base directory for fine-tune job data.
|
// fineTuneBaseDir returns the base directory for fine-tune job data.
|
||||||
func (s *FineTuneService) fineTuneBaseDir() string {
|
func (s *FineTuneService) fineTuneBaseDir() string {
|
||||||
return filepath.Join(s.appConfig.DataPath, "fine-tune")
|
return filepath.Join(s.appConfig.DataPath, "fine-tune")
|
||||||
@@ -100,15 +120,18 @@ func (s *FineTuneService) saveJobState(job *schema.FineTuneJob) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// loadAllJobs scans the fine-tune directory for persisted jobs and loads them.
|
// loadJobsFromDisk scans the fine-tune directory for persisted jobs and returns
|
||||||
func (s *FineTuneService) loadAllJobs() {
|
// them. It is the SyncedMap Loader used in standalone mode (no DB); the returned
|
||||||
|
// slice hydrates the map on Start.
|
||||||
|
func (s *FineTuneService) loadJobsFromDisk(_ context.Context) ([]*schema.FineTuneJob, error) {
|
||||||
baseDir := s.fineTuneBaseDir()
|
baseDir := s.fineTuneBaseDir()
|
||||||
entries, err := os.ReadDir(baseDir)
|
entries, err := os.ReadDir(baseDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Directory doesn't exist yet — that's fine
|
// Directory doesn't exist yet — that's fine, start empty.
|
||||||
return
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var jobs []*schema.FineTuneJob
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
if !entry.IsDir() {
|
if !entry.IsDir() {
|
||||||
continue
|
continue
|
||||||
@@ -137,12 +160,13 @@ func (s *FineTuneService) loadAllJobs() {
|
|||||||
job.ExportMessage = "Server restarted while export was running"
|
job.ExportMessage = "Server restarted while export was running"
|
||||||
}
|
}
|
||||||
|
|
||||||
s.jobs[job.ID] = &job
|
jobs = append(jobs, &job)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(s.jobs) > 0 {
|
if len(jobs) > 0 {
|
||||||
xlog.Info("Loaded persisted fine-tune jobs", "count", len(s.jobs))
|
xlog.Info("Loaded persisted fine-tune jobs", "count", len(jobs))
|
||||||
}
|
}
|
||||||
|
return jobs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// StartJob starts a new fine-tuning job.
|
// StartJob starts a new fine-tuning job.
|
||||||
@@ -236,27 +260,13 @@ func (s *FineTuneService) StartJob(ctx context.Context, userID string, req schem
|
|||||||
CreatedAt: time.Now().UTC().Format(time.RFC3339),
|
CreatedAt: time.Now().UTC().Format(time.RFC3339),
|
||||||
Config: &req,
|
Config: &req,
|
||||||
}
|
}
|
||||||
s.jobs[jobID] = job
|
// Set write-through persists to PostgreSQL (distributed) and broadcasts to
|
||||||
s.saveJobState(job)
|
// peer replicas; the disk state.json is written separately for restart
|
||||||
|
// recovery / standalone hydrate.
|
||||||
// Persist to PostgreSQL in distributed mode
|
if err := s.jobs.Set(ctx, job); err != nil {
|
||||||
if s.fineTuneStore != nil {
|
return nil, fmt.Errorf("failed to persist job: %w", err)
|
||||||
configJSON, _ := json.Marshal(req)
|
|
||||||
extraJSON, _ := json.Marshal(req.ExtraOptions)
|
|
||||||
s.fineTuneStore.Create(&distributed.FineTuneJobRecord{
|
|
||||||
ID: jobID,
|
|
||||||
UserID: userID,
|
|
||||||
Model: req.Model,
|
|
||||||
Backend: backendName,
|
|
||||||
ModelID: modelID,
|
|
||||||
TrainingType: req.TrainingType,
|
|
||||||
TrainingMethod: req.TrainingMethod,
|
|
||||||
Status: "queued",
|
|
||||||
OutputDir: outputDir,
|
|
||||||
ConfigJSON: string(configJSON),
|
|
||||||
ExtraOptsJSON: string(extraJSON),
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
s.saveJobState(job)
|
||||||
|
|
||||||
return &schema.FineTuneJobResponse{
|
return &schema.FineTuneJobResponse{
|
||||||
ID: jobID,
|
ID: jobID,
|
||||||
@@ -270,7 +280,7 @@ func (s *FineTuneService) GetJob(userID, jobID string) (*schema.FineTuneJob, err
|
|||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, fmt.Errorf("job not found: %s", jobID)
|
return nil, fmt.Errorf("job not found: %s", jobID)
|
||||||
}
|
}
|
||||||
@@ -286,7 +296,7 @@ func (s *FineTuneService) ListJobs(userID string) []*schema.FineTuneJob {
|
|||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
var result []*schema.FineTuneJob
|
var result []*schema.FineTuneJob
|
||||||
for _, job := range s.jobs {
|
for _, job := range s.jobs.List() {
|
||||||
if userID == "" || job.UserID == userID {
|
if userID == "" || job.UserID == userID {
|
||||||
result = append(result, job)
|
result = append(result, job)
|
||||||
}
|
}
|
||||||
@@ -302,7 +312,7 @@ func (s *FineTuneService) ListJobs(userID string) []*schema.FineTuneJob {
|
|||||||
// StopJob stops a running fine-tuning job.
|
// StopJob stops a running fine-tuning job.
|
||||||
func (s *FineTuneService) StopJob(ctx context.Context, userID, jobID string, saveCheckpoint bool) error {
|
func (s *FineTuneService) StopJob(ctx context.Context, userID, jobID string, saveCheckpoint bool) error {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return fmt.Errorf("job not found: %s", jobID)
|
return fmt.Errorf("job not found: %s", jobID)
|
||||||
@@ -323,10 +333,10 @@ func (s *FineTuneService) StopJob(ctx context.Context, userID, jobID string, sav
|
|||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job.Status = "stopped"
|
job.Status = "stopped"
|
||||||
job.Message = "Training stopped by user"
|
job.Message = "Training stopped by user"
|
||||||
s.saveJobState(job)
|
if err := s.jobs.Set(ctx, job); err != nil {
|
||||||
if s.fineTuneStore != nil {
|
xlog.Warn("Failed to persist stopped job", "job_id", jobID, "error", err)
|
||||||
s.fineTuneStore.UpdateStatus(jobID, "stopped", "Training stopped by user")
|
|
||||||
}
|
}
|
||||||
|
s.saveJobState(job)
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@@ -335,7 +345,7 @@ func (s *FineTuneService) StopJob(ctx context.Context, userID, jobID string, sav
|
|||||||
// DeleteJob removes a fine-tuning job and its associated data from disk.
|
// DeleteJob removes a fine-tuning job and its associated data from disk.
|
||||||
func (s *FineTuneService) DeleteJob(userID, jobID string) error {
|
func (s *FineTuneService) DeleteJob(userID, jobID string) error {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return fmt.Errorf("job not found: %s", jobID)
|
return fmt.Errorf("job not found: %s", jobID)
|
||||||
@@ -360,9 +370,10 @@ func (s *FineTuneService) DeleteJob(userID, jobID string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
exportModelName := job.ExportModelName
|
exportModelName := job.ExportModelName
|
||||||
delete(s.jobs, jobID)
|
// Delete write-through removes the DB row (distributed) and broadcasts the
|
||||||
if s.fineTuneStore != nil {
|
// removal to peer replicas. DeleteJob has no ctx, so use Background.
|
||||||
s.fineTuneStore.Delete(jobID)
|
if err := s.jobs.Delete(context.Background(), jobID); err != nil {
|
||||||
|
xlog.Warn("Failed to delete job from store", "job_id", jobID, "error", err)
|
||||||
}
|
}
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
|
||||||
@@ -398,7 +409,7 @@ func (s *FineTuneService) DeleteJob(userID, jobID string) error {
|
|||||||
// StreamProgress opens a gRPC progress stream and calls the callback for each update.
|
// StreamProgress opens a gRPC progress stream and calls the callback for each update.
|
||||||
func (s *FineTuneService) StreamProgress(ctx context.Context, userID, jobID string, callback func(event *schema.FineTuneProgressEvent)) error {
|
func (s *FineTuneService) StreamProgress(ctx context.Context, userID, jobID string, callback func(event *schema.FineTuneProgressEvent)) error {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return fmt.Errorf("job not found: %s", jobID)
|
return fmt.Errorf("job not found: %s", jobID)
|
||||||
@@ -427,7 +438,7 @@ func (s *FineTuneService) StreamProgress(ctx context.Context, userID, jobID stri
|
|||||||
}, func(update *pb.FineTuneProgressUpdate) {
|
}, func(update *pb.FineTuneProgressUpdate) {
|
||||||
// Update job status and persist
|
// Update job status and persist
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
if j, ok := s.jobs[jobID]; ok {
|
if j, ok := s.jobs.Get(jobID); ok {
|
||||||
// Don't let progress updates overwrite terminal states
|
// Don't let progress updates overwrite terminal states
|
||||||
isTerminal := j.Status == "stopped" || j.Status == "completed" || j.Status == "failed"
|
isTerminal := j.Status == "stopped" || j.Status == "completed" || j.Status == "failed"
|
||||||
if !isTerminal {
|
if !isTerminal {
|
||||||
@@ -436,10 +447,10 @@ func (s *FineTuneService) StreamProgress(ctx context.Context, userID, jobID stri
|
|||||||
if update.Message != "" {
|
if update.Message != "" {
|
||||||
j.Message = update.Message
|
j.Message = update.Message
|
||||||
}
|
}
|
||||||
s.saveJobState(j)
|
if err := s.jobs.Set(ctx, j); err != nil {
|
||||||
if s.fineTuneStore != nil {
|
xlog.Warn("Failed to persist progress update", "job_id", jobID, "error", err)
|
||||||
s.fineTuneStore.UpdateStatus(jobID, j.Status, j.Message)
|
|
||||||
}
|
}
|
||||||
|
s.saveJobState(j)
|
||||||
}
|
}
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
|
||||||
@@ -474,7 +485,7 @@ func (s *FineTuneService) StreamProgress(ctx context.Context, userID, jobID stri
|
|||||||
// ListCheckpoints lists checkpoints for a job.
|
// ListCheckpoints lists checkpoints for a job.
|
||||||
func (s *FineTuneService) ListCheckpoints(ctx context.Context, userID, jobID string) ([]*pb.CheckpointInfo, error) {
|
func (s *FineTuneService) ListCheckpoints(ctx context.Context, userID, jobID string) ([]*pb.CheckpointInfo, error) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return nil, fmt.Errorf("job not found: %s", jobID)
|
return nil, fmt.Errorf("job not found: %s", jobID)
|
||||||
@@ -520,7 +531,7 @@ func sanitizeModelName(s string) string {
|
|||||||
// ExportModel starts an async model export from a checkpoint and returns the intended model name immediately.
|
// ExportModel starts an async model export from a checkpoint and returns the intended model name immediately.
|
||||||
func (s *FineTuneService) ExportModel(ctx context.Context, userID, jobID string, req schema.ExportRequest) (string, error) {
|
func (s *FineTuneService) ExportModel(ctx context.Context, userID, jobID string, req schema.ExportRequest) (string, error) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return "", fmt.Errorf("job not found: %s", jobID)
|
return "", fmt.Errorf("job not found: %s", jobID)
|
||||||
@@ -572,6 +583,9 @@ func (s *FineTuneService) ExportModel(ctx context.Context, userID, jobID string,
|
|||||||
job.ExportStatus = "exporting"
|
job.ExportStatus = "exporting"
|
||||||
job.ExportMessage = ""
|
job.ExportMessage = ""
|
||||||
job.ExportModelName = ""
|
job.ExportModelName = ""
|
||||||
|
if err := s.jobs.Set(ctx, job); err != nil {
|
||||||
|
xlog.Warn("Failed to persist export start", "job_id", jobID, "error", err)
|
||||||
|
}
|
||||||
s.saveJobState(job)
|
s.saveJobState(job)
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
|
||||||
@@ -662,24 +676,30 @@ func (s *FineTuneService) ExportModel(ctx context.Context, userID, jobID string,
|
|||||||
|
|
||||||
xlog.Info("Model exported and registered", "job_id", jobID, "model_name", modelName, "format", req.ExportFormat)
|
xlog.Info("Model exported and registered", "job_id", jobID, "model_name", modelName, "format", req.ExportFormat)
|
||||||
|
|
||||||
|
// Runs after the HTTP request returns, so use Background rather than the
|
||||||
|
// (now likely cancelled) request ctx for the write-through.
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job.ExportStatus = "completed"
|
job.ExportStatus = "completed"
|
||||||
job.ExportModelName = modelName
|
job.ExportModelName = modelName
|
||||||
job.ExportMessage = ""
|
job.ExportMessage = ""
|
||||||
s.saveJobState(job)
|
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||||
if s.fineTuneStore != nil {
|
xlog.Warn("Failed to persist export completion", "job_id", jobID, "error", err)
|
||||||
s.fineTuneStore.UpdateExportStatus(jobID, "completed", "", modelName)
|
|
||||||
}
|
}
|
||||||
|
s.saveJobState(job)
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return modelName, nil
|
return modelName, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// setExportMessage updates the export message and persists the job state.
|
// setExportMessage updates the export message and persists the job state. Called
|
||||||
|
// from the background export goroutine, so it uses Background for write-through.
|
||||||
func (s *FineTuneService) setExportMessage(job *schema.FineTuneJob, msg string) {
|
func (s *FineTuneService) setExportMessage(job *schema.FineTuneJob, msg string) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job.ExportMessage = msg
|
job.ExportMessage = msg
|
||||||
|
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||||
|
xlog.Warn("Failed to persist export message", "job_id", job.ID, "error", err)
|
||||||
|
}
|
||||||
s.saveJobState(job)
|
s.saveJobState(job)
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}
|
}
|
||||||
@@ -687,7 +707,7 @@ func (s *FineTuneService) setExportMessage(job *schema.FineTuneJob, msg string)
|
|||||||
// GetExportedModelPath returns the path to the exported model directory and its name.
|
// GetExportedModelPath returns the path to the exported model directory and its name.
|
||||||
func (s *FineTuneService) GetExportedModelPath(userID, jobID string) (string, string, error) {
|
func (s *FineTuneService) GetExportedModelPath(userID, jobID string) (string, string, error) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return "", "", fmt.Errorf("job not found: %s", jobID)
|
return "", "", fmt.Errorf("job not found: %s", jobID)
|
||||||
@@ -723,10 +743,10 @@ func (s *FineTuneService) setExportFailed(job *schema.FineTuneJob, message strin
|
|||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job.ExportStatus = "failed"
|
job.ExportStatus = "failed"
|
||||||
job.ExportMessage = message
|
job.ExportMessage = message
|
||||||
s.saveJobState(job)
|
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||||
if s.fineTuneStore != nil {
|
xlog.Warn("Failed to persist export failure", "job_id", job.ID, "error", err)
|
||||||
s.fineTuneStore.UpdateExportStatus(job.ID, "failed", message, "")
|
|
||||||
}
|
}
|
||||||
|
s.saveJobState(job)
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
185
core/services/finetune/service_test.go
Normal file
185
core/services/finetune/service_test.go
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
package finetune
|
||||||
|
|
||||||
|
// White-box tests (package finetune) so a spec can drive the service's internal
|
||||||
|
// SyncedMap the same way StartJob does (via jobs.Set) without standing up a
|
||||||
|
// training backend, then assert the cross-replica reads (GetJob/ListJobs) and
|
||||||
|
// the adapter conversions that keep REST responses byte-for-byte unchanged.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/core/services/distributed"
|
||||||
|
"github.com/mudler/LocalAI/core/services/testutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
// newTestService builds a standalone FineTuneService wired to the given bus. The
|
||||||
|
// model/config loaders are nil because the read/sync paths under test never touch
|
||||||
|
// them; the data dir is a throwaway temp dir so the disk Loader finds nothing.
|
||||||
|
func newTestService(bus *testutil.FakeBus) *FineTuneService {
|
||||||
|
appConfig := &config.ApplicationConfig{
|
||||||
|
Context: context.Background(),
|
||||||
|
DataPath: GinkgoT().TempDir(),
|
||||||
|
}
|
||||||
|
return NewFineTuneService(appConfig, nil, nil, bus, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("FineTuneService", func() {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
Describe("cross-replica job visibility", func() {
|
||||||
|
var (
|
||||||
|
bus *testutil.FakeBus
|
||||||
|
a, b *FineTuneService
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
// One shared bus, two replicas: exactly the distributed topology where
|
||||||
|
// a round-robin request may land on a replica that did not originate
|
||||||
|
// the change.
|
||||||
|
bus = testutil.NewFakeBus()
|
||||||
|
a = newTestService(bus)
|
||||||
|
b = newTestService(bus)
|
||||||
|
})
|
||||||
|
|
||||||
|
AfterEach(func() {
|
||||||
|
Expect(a.Close()).To(Succeed())
|
||||||
|
Expect(b.Close()).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("makes a job created on A visible via B's GetJob and ListJobs", func() {
|
||||||
|
job := &schema.FineTuneJob{ID: "job-1", UserID: "user-1", Status: "queued", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||||
|
// StartJob persists via jobs.Set; drive that directly to avoid a backend.
|
||||||
|
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||||
|
|
||||||
|
got, err := b.GetJob("user-1", "job-1")
|
||||||
|
Expect(err).ToNot(HaveOccurred(), "B must see a job A just created")
|
||||||
|
Expect(got.Status).To(Equal("queued"))
|
||||||
|
|
||||||
|
listed := b.ListJobs("user-1")
|
||||||
|
Expect(listed).To(HaveLen(1))
|
||||||
|
Expect(listed[0].ID).To(Equal("job-1"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("removes a job from B when it is deleted on A", func() {
|
||||||
|
job := &schema.FineTuneJob{ID: "job-2", UserID: "user-1", Status: "completed", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||||
|
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||||
|
_, err := b.GetJob("user-1", "job-2")
|
||||||
|
Expect(err).ToNot(HaveOccurred(), "precondition: B must have the job before the delete")
|
||||||
|
|
||||||
|
Expect(a.jobs.Delete(ctx, "job-2")).To(Succeed())
|
||||||
|
|
||||||
|
_, err = b.GetJob("user-1", "job-2")
|
||||||
|
Expect(err).To(HaveOccurred(), "a delete on A must remove the job from B")
|
||||||
|
})
|
||||||
|
|
||||||
|
It("propagates a status update from A to B", func() {
|
||||||
|
job := &schema.FineTuneJob{ID: "job-3", UserID: "user-1", Status: "training", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||||
|
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||||
|
|
||||||
|
updated := &schema.FineTuneJob{ID: "job-3", UserID: "user-1", Status: "completed", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||||
|
Expect(a.jobs.Set(ctx, updated)).To(Succeed())
|
||||||
|
|
||||||
|
got, err := b.GetJob("user-1", "job-3")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got.Status).To(Equal("completed"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("ListJobs", func() {
|
||||||
|
var svc *FineTuneService
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
svc = newTestService(testutil.NewFakeBus())
|
||||||
|
})
|
||||||
|
AfterEach(func() { Expect(svc.Close()).To(Succeed()) })
|
||||||
|
|
||||||
|
It("filters by user and sorts newest-first", func() {
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "old", UserID: "u1", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "new", UserID: "u1", CreatedAt: "2026-06-27T10:00:00Z"})).To(Succeed())
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "other", UserID: "u2", CreatedAt: "2026-06-26T10:00:00Z"})).To(Succeed())
|
||||||
|
|
||||||
|
jobs := svc.ListJobs("u1")
|
||||||
|
Expect(jobs).To(HaveLen(2), "only u1's jobs")
|
||||||
|
Expect(jobs[0].ID).To(Equal("new"), "newest first")
|
||||||
|
Expect(jobs[1].ID).To(Equal("old"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns every user's jobs when the userID filter is empty", func() {
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "a", UserID: "u1", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "b", UserID: "u2", CreatedAt: "2026-06-26T10:00:00Z"})).To(Succeed())
|
||||||
|
|
||||||
|
Expect(svc.ListJobs("")).To(HaveLen(2))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("rejects GetJob for a job owned by another user", func() {
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.FineTuneJob{ID: "x", UserID: "owner", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||||
|
|
||||||
|
_, err := svc.GetJob("intruder", "x")
|
||||||
|
Expect(err).To(HaveOccurred(), "a different user must not read someone else's job")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("store adapter conversion", func() {
|
||||||
|
// The SyncedMap value type is *schema.FineTuneJob (the exact REST shape).
|
||||||
|
// These specs prove the DB adapter round-trips it losslessly, so hydrate
|
||||||
|
// and write-through in distributed mode keep responses unchanged.
|
||||||
|
It("round-trips a job through jobToRecord/recordToJob preserving the API shape", func() {
|
||||||
|
original := &schema.FineTuneJob{
|
||||||
|
ID: "rt-1",
|
||||||
|
UserID: "user-1",
|
||||||
|
Model: "base-model",
|
||||||
|
Backend: "trl",
|
||||||
|
ModelID: "trl-finetune-rt-1",
|
||||||
|
TrainingType: "lora",
|
||||||
|
TrainingMethod: "sft",
|
||||||
|
Status: "completed",
|
||||||
|
Message: "done",
|
||||||
|
OutputDir: "/data/fine-tune/rt-1",
|
||||||
|
ExtraOptions: map[string]string{"hf_token": "secret"},
|
||||||
|
CreatedAt: "2026-06-27T10:00:00Z",
|
||||||
|
ExportStatus: "completed",
|
||||||
|
ExportMessage: "",
|
||||||
|
ExportModelName: "base-model-ft-rt-1",
|
||||||
|
Config: &schema.FineTuneJobRequest{Model: "base-model", Backend: "trl", DatasetSource: "data.jsonl"},
|
||||||
|
}
|
||||||
|
|
||||||
|
rec := jobToRecord(original)
|
||||||
|
Expect(rec.ID).To(Equal("rt-1"))
|
||||||
|
Expect(rec.ConfigJSON).ToNot(BeEmpty(), "structured config must serialize into the JSON column")
|
||||||
|
Expect(rec.ExtraOptsJSON).ToNot(BeEmpty())
|
||||||
|
|
||||||
|
back := recordToJob(rec)
|
||||||
|
Expect(back.ID).To(Equal(original.ID))
|
||||||
|
Expect(back.UserID).To(Equal(original.UserID))
|
||||||
|
Expect(back.Model).To(Equal(original.Model))
|
||||||
|
Expect(back.Backend).To(Equal(original.Backend))
|
||||||
|
Expect(back.ModelID).To(Equal(original.ModelID))
|
||||||
|
Expect(back.TrainingType).To(Equal(original.TrainingType))
|
||||||
|
Expect(back.TrainingMethod).To(Equal(original.TrainingMethod))
|
||||||
|
Expect(back.Status).To(Equal(original.Status))
|
||||||
|
Expect(back.Message).To(Equal(original.Message))
|
||||||
|
Expect(back.OutputDir).To(Equal(original.OutputDir))
|
||||||
|
Expect(back.ExportStatus).To(Equal(original.ExportStatus))
|
||||||
|
Expect(back.ExportModelName).To(Equal(original.ExportModelName))
|
||||||
|
Expect(back.CreatedAt).To(Equal(original.CreatedAt))
|
||||||
|
Expect(back.ExtraOptions).To(Equal(original.ExtraOptions))
|
||||||
|
Expect(back.Config).ToNot(BeNil())
|
||||||
|
Expect(back.Config.DatasetSource).To(Equal("data.jsonl"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("compile-time adapter contract", func() {
|
||||||
|
It("satisfies syncstate.Store for *distributed.FineTuneStore", func() {
|
||||||
|
// Guards against drift between the adapter and the component interface;
|
||||||
|
// the var assertion in syncstore.go covers it at build time, this keeps
|
||||||
|
// the type referenced from a spec too.
|
||||||
|
var _ *distributed.FineTuneStore
|
||||||
|
Expect(&fineTuneStoreAdapter{}).ToNot(BeNil())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
114
core/services/finetune/syncstore.go
Normal file
114
core/services/finetune/syncstore.go
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
package finetune
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/core/services/distributed"
|
||||||
|
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fineTuneStoreAdapter bridges the distributed PostgreSQL FineTuneStore to the
|
||||||
|
// generic syncstate.Store the SyncedMap consumes. It is only wired in distributed
|
||||||
|
// mode; standalone leaves Store nil and hydrates from disk via a Loader instead.
|
||||||
|
//
|
||||||
|
// The SyncedMap value type is *schema.FineTuneJob (the exact shape the REST API
|
||||||
|
// returns) so reads need no conversion and the response JSON is provably
|
||||||
|
// unchanged. The adapter is the single place that translates between that API
|
||||||
|
// shape and the DB FineTuneJobRecord.
|
||||||
|
type fineTuneStoreAdapter struct {
|
||||||
|
store *distributed.FineTuneStore
|
||||||
|
}
|
||||||
|
|
||||||
|
// compile-time assertion that the adapter satisfies the component's Store.
|
||||||
|
var _ syncstate.Store[string, *schema.FineTuneJob] = (*fineTuneStoreAdapter)(nil)
|
||||||
|
|
||||||
|
func (a *fineTuneStoreAdapter) List(_ context.Context) ([]*schema.FineTuneJob, error) {
|
||||||
|
records, err := a.store.ListAll()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
jobs := make([]*schema.FineTuneJob, 0, len(records))
|
||||||
|
for i := range records {
|
||||||
|
jobs = append(jobs, recordToJob(&records[i]))
|
||||||
|
}
|
||||||
|
return jobs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *fineTuneStoreAdapter) Upsert(_ context.Context, job *schema.FineTuneJob) error {
|
||||||
|
return a.store.Upsert(jobToRecord(job))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *fineTuneStoreAdapter) Delete(_ context.Context, id string) error {
|
||||||
|
return a.store.Delete(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// recordToJob maps a persisted DB record back to the API shape, reconstructing
|
||||||
|
// the structured Config / ExtraOptions from their JSON columns.
|
||||||
|
func recordToJob(r *distributed.FineTuneJobRecord) *schema.FineTuneJob {
|
||||||
|
job := &schema.FineTuneJob{
|
||||||
|
ID: r.ID,
|
||||||
|
UserID: r.UserID,
|
||||||
|
Model: r.Model,
|
||||||
|
Backend: r.Backend,
|
||||||
|
ModelID: r.ModelID,
|
||||||
|
TrainingType: r.TrainingType,
|
||||||
|
TrainingMethod: r.TrainingMethod,
|
||||||
|
Status: r.Status,
|
||||||
|
Message: r.Message,
|
||||||
|
OutputDir: r.OutputDir,
|
||||||
|
ExportStatus: r.ExportStatus,
|
||||||
|
ExportMessage: r.ExportMessage,
|
||||||
|
ExportModelName: r.ExportModelName,
|
||||||
|
CreatedAt: r.CreatedAt.UTC().Format(time.RFC3339),
|
||||||
|
}
|
||||||
|
if r.ExtraOptsJSON != "" {
|
||||||
|
// Best-effort: a malformed column must not drop the whole job from the API.
|
||||||
|
_ = json.Unmarshal([]byte(r.ExtraOptsJSON), &job.ExtraOptions)
|
||||||
|
}
|
||||||
|
if r.ConfigJSON != "" {
|
||||||
|
var cfg schema.FineTuneJobRequest
|
||||||
|
if err := json.Unmarshal([]byte(r.ConfigJSON), &cfg); err == nil {
|
||||||
|
job.Config = &cfg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return job
|
||||||
|
}
|
||||||
|
|
||||||
|
// jobToRecord maps the API shape to a DB record for write-through, serializing
|
||||||
|
// the structured Config / ExtraOptions into their JSON columns. CreatedAt is
|
||||||
|
// parsed back from the RFC3339 string the service stamps; an unparseable value
|
||||||
|
// is left zero so FineTuneStore.Upsert stamps "now".
|
||||||
|
func jobToRecord(job *schema.FineTuneJob) *distributed.FineTuneJobRecord {
|
||||||
|
rec := &distributed.FineTuneJobRecord{
|
||||||
|
ID: job.ID,
|
||||||
|
UserID: job.UserID,
|
||||||
|
Model: job.Model,
|
||||||
|
Backend: job.Backend,
|
||||||
|
ModelID: job.ModelID,
|
||||||
|
TrainingType: job.TrainingType,
|
||||||
|
TrainingMethod: job.TrainingMethod,
|
||||||
|
Status: job.Status,
|
||||||
|
Message: job.Message,
|
||||||
|
OutputDir: job.OutputDir,
|
||||||
|
ExportStatus: job.ExportStatus,
|
||||||
|
ExportMessage: job.ExportMessage,
|
||||||
|
ExportModelName: job.ExportModelName,
|
||||||
|
}
|
||||||
|
if job.Config != nil {
|
||||||
|
if data, err := json.Marshal(job.Config); err == nil {
|
||||||
|
rec.ConfigJSON = string(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if job.ExtraOptions != nil {
|
||||||
|
if data, err := json.Marshal(job.ExtraOptions); err == nil {
|
||||||
|
rec.ExtraOptsJSON = string(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if t, err := time.Parse(time.RFC3339, job.CreatedAt); err == nil {
|
||||||
|
rec.CreatedAt = t
|
||||||
|
}
|
||||||
|
return rec
|
||||||
|
}
|
||||||
@@ -22,6 +22,14 @@ const subscribeConfirmTimeout = 5 * time.Second
|
|||||||
type Client struct {
|
type Client struct {
|
||||||
conn *nats.Conn
|
conn *nats.Conn
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
|
|
||||||
|
// reconnectCbs are invoked after the underlying connection is
|
||||||
|
// re-established. nats.go transparently resubscribes existing
|
||||||
|
// subscriptions on reconnect, but it cannot know that a consumer kept
|
||||||
|
// derived in-memory state (e.g. syncstate.SyncedMap) that may have drifted
|
||||||
|
// while the link was down — these callbacks let such consumers re-hydrate.
|
||||||
|
cbMu sync.Mutex
|
||||||
|
reconnectCbs []func()
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new NATS client with auto-reconnect.
|
// New creates a new NATS client with auto-reconnect.
|
||||||
@@ -31,6 +39,10 @@ func New(url string, opts ...Option) (*Client, error) {
|
|||||||
o(&cfg)
|
o(&cfg)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Allocate the client up front so the reconnect handler closure can reach
|
||||||
|
// it; conn is populated after nats.Connect succeeds below.
|
||||||
|
c := &Client{}
|
||||||
|
|
||||||
natsOpts := []nats.Option{
|
natsOpts := []nats.Option{
|
||||||
nats.RetryOnFailedConnect(true),
|
nats.RetryOnFailedConnect(true),
|
||||||
nats.MaxReconnects(-1),
|
nats.MaxReconnects(-1),
|
||||||
@@ -41,6 +53,7 @@ func New(url string, opts ...Option) (*Client, error) {
|
|||||||
}),
|
}),
|
||||||
nats.ReconnectHandler(func(_ *nats.Conn) {
|
nats.ReconnectHandler(func(_ *nats.Conn) {
|
||||||
xlog.Info("NATS reconnected")
|
xlog.Info("NATS reconnected")
|
||||||
|
c.runReconnectCallbacks()
|
||||||
}),
|
}),
|
||||||
nats.ClosedHandler(func(_ *nats.Conn) {
|
nats.ClosedHandler(func(_ *nats.Conn) {
|
||||||
xlog.Info("NATS connection closed")
|
xlog.Info("NATS connection closed")
|
||||||
@@ -103,7 +116,33 @@ func New(url string, opts ...Option) (*Client, error) {
|
|||||||
return nil, fmt.Errorf("connecting to NATS at %s: %w", sanitize.URL(url), err)
|
return nil, fmt.Errorf("connecting to NATS at %s: %w", sanitize.URL(url), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Client{conn: nc}, nil
|
c.conn = nc
|
||||||
|
return c, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// OnReconnect registers a callback invoked after the NATS connection is
|
||||||
|
// re-established. It is consumed via an optional interface type-assertion
|
||||||
|
// (interface{ OnReconnect(func()) }) rather than being added to MessagingClient,
|
||||||
|
// so the messaging abstraction stays minimal and standalone/test clients are not
|
||||||
|
// forced to implement reconnect semantics. A nil callback is ignored.
|
||||||
|
func (c *Client) OnReconnect(cb func()) {
|
||||||
|
if cb == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.cbMu.Lock()
|
||||||
|
c.reconnectCbs = append(c.reconnectCbs, cb)
|
||||||
|
c.cbMu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// runReconnectCallbacks invokes registered reconnect callbacks. It copies the
|
||||||
|
// slice under the lock so a callback that (re)registers cannot deadlock.
|
||||||
|
func (c *Client) runReconnectCallbacks() {
|
||||||
|
c.cbMu.Lock()
|
||||||
|
cbs := append([]func(){}, c.reconnectCbs...)
|
||||||
|
c.cbMu.Unlock()
|
||||||
|
for _, cb := range cbs {
|
||||||
|
cb()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Publish marshals data as JSON and publishes it to the given subject.
|
// Publish marshals data as JSON and publishes it to the given subject.
|
||||||
|
|||||||
@@ -380,6 +380,20 @@ func SubjectCacheInvalidateCollection(name string) string {
|
|||||||
return "cache.invalidate.collections." + sanitizeSubjectToken(name)
|
return "cache.invalidate.collections." + sanitizeSubjectToken(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SyncedMap State Sync (Pub/Sub — broadcast to all frontends)
|
||||||
|
//
|
||||||
|
// The reusable syncstate.SyncedMap component publishes a {op,key,value} delta on
|
||||||
|
// this subject whenever a replica mutates a piece of cross-replica in-memory
|
||||||
|
// state. Peers subscribe and apply the delta to their own map, so a round-robin
|
||||||
|
// API request that lands on a replica which did not originate the change still
|
||||||
|
// sees it. Convergence on (re)connect is done by re-hydrating from the durable
|
||||||
|
// source, so no request/reply snapshot subject is needed here.
|
||||||
|
func SubjectSyncStateDelta(name string) string {
|
||||||
|
return subjectSyncStatePrefix + sanitizeSubjectToken(name) + ".delta"
|
||||||
|
}
|
||||||
|
|
||||||
|
const subjectSyncStatePrefix = "state."
|
||||||
|
|
||||||
// Prefix-Cache Routing Sync (Pub/Sub - broadcast to all frontends)
|
// Prefix-Cache Routing Sync (Pub/Sub - broadcast to all frontends)
|
||||||
//
|
//
|
||||||
// Frontends share prefix-cache observations so a request routed to any replica
|
// Frontends share prefix-cache observations so a request routed to any replica
|
||||||
|
|||||||
@@ -63,6 +63,11 @@ type SmartRouterOptions struct {
|
|||||||
// The reconciler reads the same instance to autoscale a saturated cache-warm
|
// The reconciler reads the same instance to autoscale a saturated cache-warm
|
||||||
// replica. nil disables recording (the disabled path stays a no-op).
|
// replica. nil disables recording (the disabled path stays a no-op).
|
||||||
Pressure *prefixcache.Pressure
|
Pressure *prefixcache.Pressure
|
||||||
|
// SharedModels asserts that every node mounts the same models directory at
|
||||||
|
// the same path. When true, stageModelFiles skips all uploading and leaves
|
||||||
|
// the absolute model paths untouched so the worker loads them directly from
|
||||||
|
// the shared volume (#10556). See config.DistributedConfig.SharedModels.
|
||||||
|
SharedModels bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// SmartRouter routes inference requests to the best available backend node.
|
// SmartRouter routes inference requests to the best available backend node.
|
||||||
@@ -93,6 +98,9 @@ type SmartRouter struct {
|
|||||||
// per-request routing doesn't stall behind a busy backend's serialized
|
// per-request routing doesn't stall behind a busy backend's serialized
|
||||||
// HealthCheck/Predict. See probe_cache.go for the rationale.
|
// HealthCheck/Predict. See probe_cache.go for the rationale.
|
||||||
probeCache *probeCache
|
probeCache *probeCache
|
||||||
|
// sharedModels skips file staging when all nodes mount the same models
|
||||||
|
// directory at the same path (see SmartRouterOptions.SharedModels).
|
||||||
|
sharedModels bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// probeCacheTTL is how long a successful gRPC HealthCheck on a backend is
|
// probeCacheTTL is how long a successful gRPC HealthCheck on a backend is
|
||||||
@@ -122,6 +130,7 @@ func NewSmartRouter(registry ModelRouter, opts SmartRouterOptions) *SmartRouter
|
|||||||
prefixProvider: opts.PrefixProvider,
|
prefixProvider: opts.PrefixProvider,
|
||||||
prefixConfig: opts.PrefixConfig,
|
prefixConfig: opts.PrefixConfig,
|
||||||
pressure: opts.Pressure,
|
pressure: opts.Pressure,
|
||||||
|
sharedModels: opts.SharedModels,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -947,6 +956,19 @@ func (r *SmartRouter) buildClientForAddr(node *BackendNode, addr string, paralle
|
|||||||
// simply remove the {ModelsPath}/{trackingKey}/ directory.
|
// simply remove the {ModelsPath}/{trackingKey}/ directory.
|
||||||
func (r *SmartRouter) stageModelFiles(ctx context.Context, node *BackendNode, opts *pb.ModelOptions, trackingKey string) (*pb.ModelOptions, error) {
|
func (r *SmartRouter) stageModelFiles(ctx context.Context, node *BackendNode, opts *pb.ModelOptions, trackingKey string) (*pb.ModelOptions, error) {
|
||||||
opts = proto.Clone(opts).(*pb.ModelOptions)
|
opts = proto.Clone(opts).(*pb.ModelOptions)
|
||||||
|
|
||||||
|
// Shared-models mode: every node mounts the same models directory at the
|
||||||
|
// same path, so the frontend's absolute model paths are already valid on the
|
||||||
|
// worker. Staging would only re-upload files that already exist on the shared
|
||||||
|
// volume (under a tracking-key subdir the probe never reuses), re-downloading
|
||||||
|
// the model on every load (#10556). Return the clone untouched: no upload, no
|
||||||
|
// path rewrite, no staging tracker.
|
||||||
|
if r.sharedModels {
|
||||||
|
xlog.Info("Skipping model file staging: shared-models mode is on (LOCALAI_DISTRIBUTED_SHARED_MODELS); worker loads directly from the shared volume",
|
||||||
|
"node", node.Name, "modelFile", opts.ModelFile, "trackingKey", trackingKey)
|
||||||
|
return opts, nil
|
||||||
|
}
|
||||||
|
|
||||||
xlog.Info("Staging model files for remote node", "node", node.Name, "modelFile", opts.ModelFile, "trackingKey", trackingKey)
|
xlog.Info("Staging model files for remote node", "node", node.Name, "modelFile", opts.ModelFile, "trackingKey", trackingKey)
|
||||||
|
|
||||||
// Derive the frontend models directory from ModelFile and Model.
|
// Derive the frontend models directory from ModelFile and Model.
|
||||||
|
|||||||
85
core/services/nodes/router_sharedmodels_test.go
Normal file
85
core/services/nodes/router_sharedmodels_test.go
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
package nodes
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
// These tests cover shared-models mode (LOCALAI_DISTRIBUTED_SHARED_MODELS): when
|
||||||
|
// every node mounts the same models directory at the same path, the router must
|
||||||
|
// NOT stage model files to workers. The canonical absolute path is already valid
|
||||||
|
// on the worker, so staging would only re-download what is already present
|
||||||
|
// (#10556).
|
||||||
|
var _ = Describe("stageModelFiles shared-models mode", func() {
|
||||||
|
var (
|
||||||
|
stager *fakeFileStager
|
||||||
|
node *BackendNode
|
||||||
|
tmp string
|
||||||
|
gguf string
|
||||||
|
modelID = "ornith-1.0-35b"
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
stager = &fakeFileStager{}
|
||||||
|
node = &BackendNode{ID: "node-1", Name: "node-1", Address: "10.0.0.1:50051"}
|
||||||
|
tmp = GinkgoT().TempDir()
|
||||||
|
|
||||||
|
modelDir := filepath.Join(tmp, "models", "llama-cpp", "models")
|
||||||
|
Expect(os.MkdirAll(modelDir, 0o755)).To(Succeed())
|
||||||
|
gguf = filepath.Join(modelDir, "ornith.gguf")
|
||||||
|
Expect(os.WriteFile(gguf, []byte("weights"), 0o644)).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("does not stage and keeps the canonical absolute ModelFile when shared-models is enabled", func() {
|
||||||
|
router := &SmartRouter{
|
||||||
|
fileStager: stager,
|
||||||
|
stagingTracker: NewStagingTracker(),
|
||||||
|
sharedModels: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := &pb.ModelOptions{
|
||||||
|
Model: "llama-cpp/models/ornith.gguf",
|
||||||
|
ModelFile: gguf,
|
||||||
|
}
|
||||||
|
|
||||||
|
staged, err := router.stageModelFiles(context.Background(), node, opts, modelID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
// The file stager must never be touched: no upload, no re-download.
|
||||||
|
Expect(stager.ensureCalls).To(BeEmpty())
|
||||||
|
// The worker loads directly from the shared volume, so the path is unchanged.
|
||||||
|
Expect(staged.ModelFile).To(Equal(gguf))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("stages files (existing behavior) when shared-models is disabled", func() {
|
||||||
|
router := &SmartRouter{
|
||||||
|
fileStager: stager,
|
||||||
|
stagingTracker: NewStagingTracker(),
|
||||||
|
sharedModels: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := &pb.ModelOptions{
|
||||||
|
Model: "llama-cpp/models/ornith.gguf",
|
||||||
|
ModelFile: gguf,
|
||||||
|
}
|
||||||
|
|
||||||
|
staged, err := router.stageModelFiles(context.Background(), node, opts, modelID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
// Default mode uploads the model file to the worker.
|
||||||
|
Expect(stager.ensureCalls).ToNot(BeEmpty())
|
||||||
|
stagedLocals := make([]string, 0, len(stager.ensureCalls))
|
||||||
|
for _, c := range stager.ensureCalls {
|
||||||
|
stagedLocals = append(stagedLocals, c.localPath)
|
||||||
|
}
|
||||||
|
Expect(stagedLocals).To(ContainElement(gguf))
|
||||||
|
// ModelFile is rewritten to the remote (tracking-key namespaced) path.
|
||||||
|
Expect(staged.ModelFile).ToNot(Equal(gguf))
|
||||||
|
})
|
||||||
|
})
|
||||||
13
core/services/quantization/quantization_suite_test.go
Normal file
13
core/services/quantization/quantization_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
package quantization
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestQuantization(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "Quantization Suite")
|
||||||
|
}
|
||||||
@@ -17,6 +17,9 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/gallery/importers"
|
"github.com/mudler/LocalAI/core/gallery/importers"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/core/services/distributed"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
@@ -30,26 +33,63 @@ type QuantizationService struct {
|
|||||||
modelLoader *model.ModelLoader
|
modelLoader *model.ModelLoader
|
||||||
configLoader *config.ModelConfigLoader
|
configLoader *config.ModelConfigLoader
|
||||||
|
|
||||||
|
// mu serializes the read-modify-write of job values. The SyncedMap guards its
|
||||||
|
// own map structure, but a job is a pointer mutated in place (e.g. the import
|
||||||
|
// goroutine), so the service still needs a lock to keep those field updates and
|
||||||
|
// the subsequent Set atomic with respect to readers.
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
jobs map[string]*schema.QuantizationJob
|
|
||||||
|
// jobs is the cross-replica job store: an in-memory map kept consistent across
|
||||||
|
// replicas via NATS, optionally read-through to PostgreSQL in distributed mode.
|
||||||
|
jobs *syncstate.SyncedMap[string, *schema.QuantizationJob]
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewQuantizationService creates a new QuantizationService.
|
// NewQuantizationService creates a new QuantizationService. In distributed mode
|
||||||
|
// pass the shared NATS client and PostgreSQL store so jobs stay consistent across
|
||||||
|
// replicas; pass nil for both in standalone mode, where the disk Loader hydrates
|
||||||
|
// the map and there is nothing to broadcast.
|
||||||
func NewQuantizationService(
|
func NewQuantizationService(
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
modelLoader *model.ModelLoader,
|
modelLoader *model.ModelLoader,
|
||||||
configLoader *config.ModelConfigLoader,
|
configLoader *config.ModelConfigLoader,
|
||||||
|
nats messaging.MessagingClient,
|
||||||
|
store *distributed.QuantStore,
|
||||||
) *QuantizationService {
|
) *QuantizationService {
|
||||||
s := &QuantizationService{
|
s := &QuantizationService{
|
||||||
appConfig: appConfig,
|
appConfig: appConfig,
|
||||||
modelLoader: modelLoader,
|
modelLoader: modelLoader,
|
||||||
configLoader: configLoader,
|
configLoader: configLoader,
|
||||||
jobs: make(map[string]*schema.QuantizationJob),
|
|
||||||
}
|
}
|
||||||
s.loadAllJobs()
|
|
||||||
|
// Only attach a Store interface when a concrete store exists, otherwise the
|
||||||
|
// SyncedMap would see a non-nil interface wrapping a nil pointer and try to
|
||||||
|
// hydrate/write through a nil DB.
|
||||||
|
var syncStore syncstate.Store[string, *schema.QuantizationJob]
|
||||||
|
if store != nil {
|
||||||
|
syncStore = &quantStoreAdapter{store: store}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.jobs = syncstate.New(syncstate.Config[string, *schema.QuantizationJob]{
|
||||||
|
Name: "quant.jobs",
|
||||||
|
Key: func(j *schema.QuantizationJob) string { return j.ID },
|
||||||
|
Nats: nats,
|
||||||
|
Store: syncStore,
|
||||||
|
Loader: s.loadJobsFromDisk, // ignored when Store is set (distributed mode)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Hydrate + subscribe. A hydrate failure must not take the server down: log and
|
||||||
|
// continue degraded (standalone), mirroring the FineTune/OpCache wiring.
|
||||||
|
if err := s.jobs.Start(appConfig.Context); err != nil {
|
||||||
|
xlog.Warn("Quantization SyncedMap start failed; running degraded", "error", err)
|
||||||
|
}
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Close releases the SyncedMap subscription and background workers.
|
||||||
|
func (s *QuantizationService) Close() error {
|
||||||
|
return s.jobs.Close()
|
||||||
|
}
|
||||||
|
|
||||||
// quantizationBaseDir returns the base directory for quantization job data.
|
// quantizationBaseDir returns the base directory for quantization job data.
|
||||||
func (s *QuantizationService) quantizationBaseDir() string {
|
func (s *QuantizationService) quantizationBaseDir() string {
|
||||||
return filepath.Join(s.appConfig.DataPath, "quantization")
|
return filepath.Join(s.appConfig.DataPath, "quantization")
|
||||||
@@ -80,15 +120,18 @@ func (s *QuantizationService) saveJobState(job *schema.QuantizationJob) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// loadAllJobs scans the quantization directory for persisted jobs and loads them.
|
// loadJobsFromDisk scans the quantization directory for persisted jobs and
|
||||||
func (s *QuantizationService) loadAllJobs() {
|
// returns them. It is the SyncedMap Loader used in standalone mode (no DB); the
|
||||||
|
// returned slice hydrates the map on Start.
|
||||||
|
func (s *QuantizationService) loadJobsFromDisk(_ context.Context) ([]*schema.QuantizationJob, error) {
|
||||||
baseDir := s.quantizationBaseDir()
|
baseDir := s.quantizationBaseDir()
|
||||||
entries, err := os.ReadDir(baseDir)
|
entries, err := os.ReadDir(baseDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Directory doesn't exist yet — that's fine
|
// Directory doesn't exist yet — that's fine, start empty.
|
||||||
return
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var jobs []*schema.QuantizationJob
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
if !entry.IsDir() {
|
if !entry.IsDir() {
|
||||||
continue
|
continue
|
||||||
@@ -117,12 +160,13 @@ func (s *QuantizationService) loadAllJobs() {
|
|||||||
job.ImportMessage = "Server restarted while import was running"
|
job.ImportMessage = "Server restarted while import was running"
|
||||||
}
|
}
|
||||||
|
|
||||||
s.jobs[job.ID] = &job
|
jobs = append(jobs, &job)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(s.jobs) > 0 {
|
if len(jobs) > 0 {
|
||||||
xlog.Info("Loaded persisted quantization jobs", "count", len(s.jobs))
|
xlog.Info("Loaded persisted quantization jobs", "count", len(jobs))
|
||||||
}
|
}
|
||||||
|
return jobs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// StartJob starts a new quantization job.
|
// StartJob starts a new quantization job.
|
||||||
@@ -188,7 +232,12 @@ func (s *QuantizationService) StartJob(ctx context.Context, userID string, req s
|
|||||||
CreatedAt: time.Now().UTC().Format(time.RFC3339),
|
CreatedAt: time.Now().UTC().Format(time.RFC3339),
|
||||||
Config: &req,
|
Config: &req,
|
||||||
}
|
}
|
||||||
s.jobs[jobID] = job
|
// Set write-through persists to PostgreSQL (distributed) and broadcasts to
|
||||||
|
// peer replicas; the disk state.json is written separately for restart
|
||||||
|
// recovery / standalone hydrate.
|
||||||
|
if err := s.jobs.Set(ctx, job); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to persist job: %w", err)
|
||||||
|
}
|
||||||
s.saveJobState(job)
|
s.saveJobState(job)
|
||||||
|
|
||||||
return &schema.QuantizationJobResponse{
|
return &schema.QuantizationJobResponse{
|
||||||
@@ -203,7 +252,7 @@ func (s *QuantizationService) GetJob(userID, jobID string) (*schema.Quantization
|
|||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, fmt.Errorf("job not found: %s", jobID)
|
return nil, fmt.Errorf("job not found: %s", jobID)
|
||||||
}
|
}
|
||||||
@@ -219,7 +268,7 @@ func (s *QuantizationService) ListJobs(userID string) []*schema.QuantizationJob
|
|||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
var result []*schema.QuantizationJob
|
var result []*schema.QuantizationJob
|
||||||
for _, job := range s.jobs {
|
for _, job := range s.jobs.List() {
|
||||||
if userID == "" || job.UserID == userID {
|
if userID == "" || job.UserID == userID {
|
||||||
result = append(result, job)
|
result = append(result, job)
|
||||||
}
|
}
|
||||||
@@ -235,7 +284,7 @@ func (s *QuantizationService) ListJobs(userID string) []*schema.QuantizationJob
|
|||||||
// StopJob stops a running quantization job.
|
// StopJob stops a running quantization job.
|
||||||
func (s *QuantizationService) StopJob(ctx context.Context, userID, jobID string) error {
|
func (s *QuantizationService) StopJob(ctx context.Context, userID, jobID string) error {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return fmt.Errorf("job not found: %s", jobID)
|
return fmt.Errorf("job not found: %s", jobID)
|
||||||
@@ -256,6 +305,9 @@ func (s *QuantizationService) StopJob(ctx context.Context, userID, jobID string)
|
|||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job.Status = "stopped"
|
job.Status = "stopped"
|
||||||
job.Message = "Quantization stopped by user"
|
job.Message = "Quantization stopped by user"
|
||||||
|
if err := s.jobs.Set(ctx, job); err != nil {
|
||||||
|
xlog.Warn("Failed to persist stopped job", "job_id", jobID, "error", err)
|
||||||
|
}
|
||||||
s.saveJobState(job)
|
s.saveJobState(job)
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
|
||||||
@@ -265,7 +317,7 @@ func (s *QuantizationService) StopJob(ctx context.Context, userID, jobID string)
|
|||||||
// DeleteJob removes a quantization job and its associated data from disk.
|
// DeleteJob removes a quantization job and its associated data from disk.
|
||||||
func (s *QuantizationService) DeleteJob(userID, jobID string) error {
|
func (s *QuantizationService) DeleteJob(userID, jobID string) error {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return fmt.Errorf("job not found: %s", jobID)
|
return fmt.Errorf("job not found: %s", jobID)
|
||||||
@@ -289,7 +341,11 @@ func (s *QuantizationService) DeleteJob(userID, jobID string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
importModelName := job.ImportModelName
|
importModelName := job.ImportModelName
|
||||||
delete(s.jobs, jobID)
|
// Delete write-through removes the DB row (distributed) and broadcasts the
|
||||||
|
// removal to peer replicas. DeleteJob has no ctx, so use Background.
|
||||||
|
if err := s.jobs.Delete(context.Background(), jobID); err != nil {
|
||||||
|
xlog.Warn("Failed to delete job from store", "job_id", jobID, "error", err)
|
||||||
|
}
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
|
||||||
// Remove job directory (state.json, output files)
|
// Remove job directory (state.json, output files)
|
||||||
@@ -324,7 +380,7 @@ func (s *QuantizationService) DeleteJob(userID, jobID string) error {
|
|||||||
// StreamProgress opens a gRPC progress stream and calls the callback for each update.
|
// StreamProgress opens a gRPC progress stream and calls the callback for each update.
|
||||||
func (s *QuantizationService) StreamProgress(ctx context.Context, userID, jobID string, callback func(event *schema.QuantizationProgressEvent)) error {
|
func (s *QuantizationService) StreamProgress(ctx context.Context, userID, jobID string, callback func(event *schema.QuantizationProgressEvent)) error {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return fmt.Errorf("job not found: %s", jobID)
|
return fmt.Errorf("job not found: %s", jobID)
|
||||||
@@ -353,7 +409,7 @@ func (s *QuantizationService) StreamProgress(ctx context.Context, userID, jobID
|
|||||||
}, func(update *pb.QuantizationProgressUpdate) {
|
}, func(update *pb.QuantizationProgressUpdate) {
|
||||||
// Update job status and persist
|
// Update job status and persist
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
if j, ok := s.jobs[jobID]; ok {
|
if j, ok := s.jobs.Get(jobID); ok {
|
||||||
// Don't let progress updates overwrite terminal states
|
// Don't let progress updates overwrite terminal states
|
||||||
isTerminal := j.Status == "stopped" || j.Status == "completed" || j.Status == "failed"
|
isTerminal := j.Status == "stopped" || j.Status == "completed" || j.Status == "failed"
|
||||||
if !isTerminal {
|
if !isTerminal {
|
||||||
@@ -365,6 +421,9 @@ func (s *QuantizationService) StreamProgress(ctx context.Context, userID, jobID
|
|||||||
if update.OutputFile != "" {
|
if update.OutputFile != "" {
|
||||||
j.OutputFile = update.OutputFile
|
j.OutputFile = update.OutputFile
|
||||||
}
|
}
|
||||||
|
if err := s.jobs.Set(ctx, j); err != nil {
|
||||||
|
xlog.Warn("Failed to persist progress update", "job_id", jobID, "error", err)
|
||||||
|
}
|
||||||
s.saveJobState(j)
|
s.saveJobState(j)
|
||||||
}
|
}
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
@@ -399,7 +458,7 @@ func sanitizeQuantModelName(s string) string {
|
|||||||
// ImportModel imports a quantized model into LocalAI asynchronously.
|
// ImportModel imports a quantized model into LocalAI asynchronously.
|
||||||
func (s *QuantizationService) ImportModel(ctx context.Context, userID, jobID string, req schema.QuantizationImportRequest) (string, error) {
|
func (s *QuantizationService) ImportModel(ctx context.Context, userID, jobID string, req schema.QuantizationImportRequest) (string, error) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return "", fmt.Errorf("job not found: %s", jobID)
|
return "", fmt.Errorf("job not found: %s", jobID)
|
||||||
@@ -459,6 +518,9 @@ func (s *QuantizationService) ImportModel(ctx context.Context, userID, jobID str
|
|||||||
job.ImportStatus = "importing"
|
job.ImportStatus = "importing"
|
||||||
job.ImportMessage = ""
|
job.ImportMessage = ""
|
||||||
job.ImportModelName = ""
|
job.ImportModelName = ""
|
||||||
|
if err := s.jobs.Set(ctx, job); err != nil {
|
||||||
|
xlog.Warn("Failed to persist import start", "job_id", jobID, "error", err)
|
||||||
|
}
|
||||||
s.saveJobState(job)
|
s.saveJobState(job)
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
|
||||||
@@ -514,10 +576,15 @@ func (s *QuantizationService) ImportModel(ctx context.Context, userID, jobID str
|
|||||||
|
|
||||||
xlog.Info("Quantized model imported and registered", "job_id", jobID, "model_name", modelName)
|
xlog.Info("Quantized model imported and registered", "job_id", jobID, "model_name", modelName)
|
||||||
|
|
||||||
|
// Runs after the HTTP request returns, so use Background rather than the
|
||||||
|
// (now likely cancelled) request ctx for the write-through.
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job.ImportStatus = "completed"
|
job.ImportStatus = "completed"
|
||||||
job.ImportModelName = modelName
|
job.ImportModelName = modelName
|
||||||
job.ImportMessage = ""
|
job.ImportMessage = ""
|
||||||
|
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||||
|
xlog.Warn("Failed to persist import completion", "job_id", jobID, "error", err)
|
||||||
|
}
|
||||||
s.saveJobState(job)
|
s.saveJobState(job)
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}()
|
}()
|
||||||
@@ -525,10 +592,14 @@ func (s *QuantizationService) ImportModel(ctx context.Context, userID, jobID str
|
|||||||
return modelName, nil
|
return modelName, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// setImportMessage updates the import message and persists the job state.
|
// setImportMessage updates the import message and persists the job state. Called
|
||||||
|
// from the background import goroutine, so it uses Background for write-through.
|
||||||
func (s *QuantizationService) setImportMessage(job *schema.QuantizationJob, msg string) {
|
func (s *QuantizationService) setImportMessage(job *schema.QuantizationJob, msg string) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job.ImportMessage = msg
|
job.ImportMessage = msg
|
||||||
|
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||||
|
xlog.Warn("Failed to persist import message", "job_id", job.ID, "error", err)
|
||||||
|
}
|
||||||
s.saveJobState(job)
|
s.saveJobState(job)
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}
|
}
|
||||||
@@ -539,6 +610,9 @@ func (s *QuantizationService) setImportFailed(job *schema.QuantizationJob, messa
|
|||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job.ImportStatus = "failed"
|
job.ImportStatus = "failed"
|
||||||
job.ImportMessage = message
|
job.ImportMessage = message
|
||||||
|
if err := s.jobs.Set(context.Background(), job); err != nil {
|
||||||
|
xlog.Warn("Failed to persist import failure", "job_id", job.ID, "error", err)
|
||||||
|
}
|
||||||
s.saveJobState(job)
|
s.saveJobState(job)
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}
|
}
|
||||||
@@ -546,7 +620,7 @@ func (s *QuantizationService) setImportFailed(job *schema.QuantizationJob, messa
|
|||||||
// GetOutputPath returns the path to the quantized model file and a download name.
|
// GetOutputPath returns the path to the quantized model file and a download name.
|
||||||
func (s *QuantizationService) GetOutputPath(userID, jobID string) (string, string, error) {
|
func (s *QuantizationService) GetOutputPath(userID, jobID string) (string, string, error) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
job, ok := s.jobs[jobID]
|
job, ok := s.jobs.Get(jobID)
|
||||||
if !ok {
|
if !ok {
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return "", "", fmt.Errorf("job not found: %s", jobID)
|
return "", "", fmt.Errorf("job not found: %s", jobID)
|
||||||
|
|||||||
187
core/services/quantization/service_test.go
Normal file
187
core/services/quantization/service_test.go
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
package quantization
|
||||||
|
|
||||||
|
// White-box tests (package quantization) so a spec can drive the service's
|
||||||
|
// internal SyncedMap the same way StartJob does (via jobs.Set) without standing
|
||||||
|
// up a quantization backend, then assert the cross-replica reads
|
||||||
|
// (GetJob/ListJobs) and the adapter conversions that keep REST responses
|
||||||
|
// byte-for-byte unchanged.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/core/services/distributed"
|
||||||
|
"github.com/mudler/LocalAI/core/services/testutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
// newTestService builds a standalone QuantizationService wired to the given bus.
|
||||||
|
// The model/config loaders are nil because the read/sync paths under test never
|
||||||
|
// touch them; the data dir is a throwaway temp dir so the disk Loader finds
|
||||||
|
// nothing.
|
||||||
|
func newTestService(bus *testutil.FakeBus) *QuantizationService {
|
||||||
|
appConfig := &config.ApplicationConfig{
|
||||||
|
Context: context.Background(),
|
||||||
|
DataPath: GinkgoT().TempDir(),
|
||||||
|
}
|
||||||
|
return NewQuantizationService(appConfig, nil, nil, bus, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("QuantizationService", func() {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
Describe("cross-replica job visibility", func() {
|
||||||
|
var (
|
||||||
|
bus *testutil.FakeBus
|
||||||
|
a, b *QuantizationService
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
// One shared bus, two replicas: exactly the distributed topology where a
|
||||||
|
// round-robin request may land on a replica that did not originate the
|
||||||
|
// change.
|
||||||
|
bus = testutil.NewFakeBus()
|
||||||
|
a = newTestService(bus)
|
||||||
|
b = newTestService(bus)
|
||||||
|
})
|
||||||
|
|
||||||
|
AfterEach(func() {
|
||||||
|
Expect(a.Close()).To(Succeed())
|
||||||
|
Expect(b.Close()).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("makes a job created on A visible via B's GetJob and ListJobs", func() {
|
||||||
|
job := &schema.QuantizationJob{ID: "job-1", UserID: "user-1", Status: "queued", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||||
|
// StartJob persists via jobs.Set; drive that directly to avoid a backend.
|
||||||
|
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||||
|
|
||||||
|
got, err := b.GetJob("user-1", "job-1")
|
||||||
|
Expect(err).ToNot(HaveOccurred(), "B must see a job A just created")
|
||||||
|
Expect(got.Status).To(Equal("queued"))
|
||||||
|
|
||||||
|
listed := b.ListJobs("user-1")
|
||||||
|
Expect(listed).To(HaveLen(1))
|
||||||
|
Expect(listed[0].ID).To(Equal("job-1"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("removes a job from B when it is deleted on A", func() {
|
||||||
|
job := &schema.QuantizationJob{ID: "job-2", UserID: "user-1", Status: "completed", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||||
|
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||||
|
_, err := b.GetJob("user-1", "job-2")
|
||||||
|
Expect(err).ToNot(HaveOccurred(), "precondition: B must have the job before the delete")
|
||||||
|
|
||||||
|
Expect(a.jobs.Delete(ctx, "job-2")).To(Succeed())
|
||||||
|
|
||||||
|
_, err = b.GetJob("user-1", "job-2")
|
||||||
|
Expect(err).To(HaveOccurred(), "a delete on A must remove the job from B")
|
||||||
|
})
|
||||||
|
|
||||||
|
It("propagates a status update from A to B", func() {
|
||||||
|
job := &schema.QuantizationJob{ID: "job-3", UserID: "user-1", Status: "quantizing", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||||
|
Expect(a.jobs.Set(ctx, job)).To(Succeed())
|
||||||
|
|
||||||
|
updated := &schema.QuantizationJob{ID: "job-3", UserID: "user-1", Status: "completed", CreatedAt: "2026-06-27T10:00:00Z"}
|
||||||
|
Expect(a.jobs.Set(ctx, updated)).To(Succeed())
|
||||||
|
|
||||||
|
got, err := b.GetJob("user-1", "job-3")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got.Status).To(Equal("completed"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("ListJobs", func() {
|
||||||
|
var svc *QuantizationService
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
svc = newTestService(testutil.NewFakeBus())
|
||||||
|
})
|
||||||
|
AfterEach(func() { Expect(svc.Close()).To(Succeed()) })
|
||||||
|
|
||||||
|
It("filters by user and sorts newest-first", func() {
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "old", UserID: "u1", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "new", UserID: "u1", CreatedAt: "2026-06-27T10:00:00Z"})).To(Succeed())
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "other", UserID: "u2", CreatedAt: "2026-06-26T10:00:00Z"})).To(Succeed())
|
||||||
|
|
||||||
|
jobs := svc.ListJobs("u1")
|
||||||
|
Expect(jobs).To(HaveLen(2), "only u1's jobs")
|
||||||
|
Expect(jobs[0].ID).To(Equal("new"), "newest first")
|
||||||
|
Expect(jobs[1].ID).To(Equal("old"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns every user's jobs when the userID filter is empty", func() {
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "a", UserID: "u1", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "b", UserID: "u2", CreatedAt: "2026-06-26T10:00:00Z"})).To(Succeed())
|
||||||
|
|
||||||
|
Expect(svc.ListJobs("")).To(HaveLen(2))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("rejects GetJob for a job owned by another user", func() {
|
||||||
|
Expect(svc.jobs.Set(ctx, &schema.QuantizationJob{ID: "x", UserID: "owner", CreatedAt: "2026-06-25T10:00:00Z"})).To(Succeed())
|
||||||
|
|
||||||
|
_, err := svc.GetJob("intruder", "x")
|
||||||
|
Expect(err).To(HaveOccurred(), "a different user must not read someone else's job")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("store adapter conversion", func() {
|
||||||
|
// The SyncedMap value type is *schema.QuantizationJob (the exact REST shape).
|
||||||
|
// These specs prove the DB adapter round-trips it losslessly, so hydrate and
|
||||||
|
// write-through in distributed mode keep responses unchanged.
|
||||||
|
It("round-trips a job through jobToRecord/recordToJob preserving the API shape", func() {
|
||||||
|
original := &schema.QuantizationJob{
|
||||||
|
ID: "rt-1",
|
||||||
|
UserID: "user-1",
|
||||||
|
Model: "base-model",
|
||||||
|
Backend: "llama-cpp-quantization",
|
||||||
|
ModelID: "llama-cpp-quantization-quantize-rt-1",
|
||||||
|
QuantizationType: "q4_k_m",
|
||||||
|
Status: "completed",
|
||||||
|
Message: "done",
|
||||||
|
OutputDir: "/data/quantization/rt-1",
|
||||||
|
OutputFile: "/data/quantization/rt-1/model.gguf",
|
||||||
|
ExtraOptions: map[string]string{"hf_token": "secret"},
|
||||||
|
CreatedAt: "2026-06-27T10:00:00Z",
|
||||||
|
ImportStatus: "completed",
|
||||||
|
ImportMessage: "",
|
||||||
|
ImportModelName: "base-model-q4_k_m-rt-1",
|
||||||
|
Config: &schema.QuantizationJobRequest{Model: "base-model", Backend: "llama-cpp-quantization", QuantizationType: "q4_k_m"},
|
||||||
|
}
|
||||||
|
|
||||||
|
rec := jobToRecord(original)
|
||||||
|
Expect(rec.ID).To(Equal("rt-1"))
|
||||||
|
Expect(rec.ConfigJSON).ToNot(BeEmpty(), "structured config must serialize into the JSON column")
|
||||||
|
Expect(rec.ExtraOptsJSON).ToNot(BeEmpty())
|
||||||
|
|
||||||
|
back := recordToJob(rec)
|
||||||
|
Expect(back.ID).To(Equal(original.ID))
|
||||||
|
Expect(back.UserID).To(Equal(original.UserID))
|
||||||
|
Expect(back.Model).To(Equal(original.Model))
|
||||||
|
Expect(back.Backend).To(Equal(original.Backend))
|
||||||
|
Expect(back.ModelID).To(Equal(original.ModelID))
|
||||||
|
Expect(back.QuantizationType).To(Equal(original.QuantizationType))
|
||||||
|
Expect(back.Status).To(Equal(original.Status))
|
||||||
|
Expect(back.Message).To(Equal(original.Message))
|
||||||
|
Expect(back.OutputDir).To(Equal(original.OutputDir))
|
||||||
|
Expect(back.OutputFile).To(Equal(original.OutputFile))
|
||||||
|
Expect(back.ImportStatus).To(Equal(original.ImportStatus))
|
||||||
|
Expect(back.ImportModelName).To(Equal(original.ImportModelName))
|
||||||
|
Expect(back.CreatedAt).To(Equal(original.CreatedAt))
|
||||||
|
Expect(back.ExtraOptions).To(Equal(original.ExtraOptions))
|
||||||
|
Expect(back.Config).ToNot(BeNil())
|
||||||
|
Expect(back.Config.QuantizationType).To(Equal("q4_k_m"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("compile-time adapter contract", func() {
|
||||||
|
It("satisfies syncstate.Store for *distributed.QuantStore", func() {
|
||||||
|
// Guards against drift between the adapter and the component interface;
|
||||||
|
// the var assertion in syncstore.go covers it at build time, this keeps
|
||||||
|
// the type referenced from a spec too.
|
||||||
|
var _ *distributed.QuantStore
|
||||||
|
Expect(&quantStoreAdapter{}).ToNot(BeNil())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
114
core/services/quantization/syncstore.go
Normal file
114
core/services/quantization/syncstore.go
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
package quantization
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/core/services/distributed"
|
||||||
|
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||||
|
)
|
||||||
|
|
||||||
|
// quantStoreAdapter bridges the distributed PostgreSQL QuantStore to the generic
|
||||||
|
// syncstate.Store the SyncedMap consumes. It is only wired in distributed mode;
|
||||||
|
// standalone leaves Store nil and hydrates from disk via a Loader instead.
|
||||||
|
//
|
||||||
|
// The SyncedMap value type is *schema.QuantizationJob (the exact shape the REST
|
||||||
|
// API returns) so reads need no conversion and the response JSON is provably
|
||||||
|
// unchanged. The adapter is the single place that translates between that API
|
||||||
|
// shape and the DB QuantJobRecord.
|
||||||
|
type quantStoreAdapter struct {
|
||||||
|
store *distributed.QuantStore
|
||||||
|
}
|
||||||
|
|
||||||
|
// compile-time assertion that the adapter satisfies the component's Store.
|
||||||
|
var _ syncstate.Store[string, *schema.QuantizationJob] = (*quantStoreAdapter)(nil)
|
||||||
|
|
||||||
|
func (a *quantStoreAdapter) List(_ context.Context) ([]*schema.QuantizationJob, error) {
|
||||||
|
records, err := a.store.ListAll()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
jobs := make([]*schema.QuantizationJob, 0, len(records))
|
||||||
|
for i := range records {
|
||||||
|
jobs = append(jobs, recordToJob(&records[i]))
|
||||||
|
}
|
||||||
|
return jobs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *quantStoreAdapter) Upsert(_ context.Context, job *schema.QuantizationJob) error {
|
||||||
|
return a.store.Upsert(jobToRecord(job))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *quantStoreAdapter) Delete(_ context.Context, id string) error {
|
||||||
|
return a.store.Delete(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// recordToJob maps a persisted DB record back to the API shape, reconstructing
|
||||||
|
// the structured Config / ExtraOptions from their JSON columns.
|
||||||
|
func recordToJob(r *distributed.QuantJobRecord) *schema.QuantizationJob {
|
||||||
|
job := &schema.QuantizationJob{
|
||||||
|
ID: r.ID,
|
||||||
|
UserID: r.UserID,
|
||||||
|
Model: r.Model,
|
||||||
|
Backend: r.Backend,
|
||||||
|
ModelID: r.ModelID,
|
||||||
|
QuantizationType: r.QuantizationType,
|
||||||
|
Status: r.Status,
|
||||||
|
Message: r.Message,
|
||||||
|
OutputDir: r.OutputDir,
|
||||||
|
OutputFile: r.OutputFile,
|
||||||
|
ImportStatus: r.ImportStatus,
|
||||||
|
ImportMessage: r.ImportMessage,
|
||||||
|
ImportModelName: r.ImportModelName,
|
||||||
|
CreatedAt: r.CreatedAt.UTC().Format(time.RFC3339),
|
||||||
|
}
|
||||||
|
if r.ExtraOptsJSON != "" {
|
||||||
|
// Best-effort: a malformed column must not drop the whole job from the API.
|
||||||
|
_ = json.Unmarshal([]byte(r.ExtraOptsJSON), &job.ExtraOptions)
|
||||||
|
}
|
||||||
|
if r.ConfigJSON != "" {
|
||||||
|
var cfg schema.QuantizationJobRequest
|
||||||
|
if err := json.Unmarshal([]byte(r.ConfigJSON), &cfg); err == nil {
|
||||||
|
job.Config = &cfg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return job
|
||||||
|
}
|
||||||
|
|
||||||
|
// jobToRecord maps the API shape to a DB record for write-through, serializing
|
||||||
|
// the structured Config / ExtraOptions into their JSON columns. CreatedAt is
|
||||||
|
// parsed back from the RFC3339 string the service stamps; an unparseable value is
|
||||||
|
// left zero so QuantStore.Upsert stamps "now".
|
||||||
|
func jobToRecord(job *schema.QuantizationJob) *distributed.QuantJobRecord {
|
||||||
|
rec := &distributed.QuantJobRecord{
|
||||||
|
ID: job.ID,
|
||||||
|
UserID: job.UserID,
|
||||||
|
Model: job.Model,
|
||||||
|
Backend: job.Backend,
|
||||||
|
ModelID: job.ModelID,
|
||||||
|
QuantizationType: job.QuantizationType,
|
||||||
|
Status: job.Status,
|
||||||
|
Message: job.Message,
|
||||||
|
OutputDir: job.OutputDir,
|
||||||
|
OutputFile: job.OutputFile,
|
||||||
|
ImportStatus: job.ImportStatus,
|
||||||
|
ImportMessage: job.ImportMessage,
|
||||||
|
ImportModelName: job.ImportModelName,
|
||||||
|
}
|
||||||
|
if job.Config != nil {
|
||||||
|
if data, err := json.Marshal(job.Config); err == nil {
|
||||||
|
rec.ConfigJSON = string(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if job.ExtraOptions != nil {
|
||||||
|
if data, err := json.Marshal(job.ExtraOptions); err == nil {
|
||||||
|
rec.ExtraOptsJSON = string(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if t, err := time.Parse(time.RFC3339, job.CreatedAt); err == nil {
|
||||||
|
rec.CreatedAt = t
|
||||||
|
}
|
||||||
|
return rec
|
||||||
|
}
|
||||||
289
core/services/syncstate/syncstate.go
Normal file
289
core/services/syncstate/syncstate.go
Normal file
@@ -0,0 +1,289 @@
|
|||||||
|
// Package syncstate provides SyncedMap, a reusable cross-replica in-memory map.
|
||||||
|
//
|
||||||
|
// LocalAI in distributed mode runs multiple frontend replicas behind a
|
||||||
|
// round-robin load balancer. Several features keep process-local in-memory state
|
||||||
|
// that is surfaced to the HTTP/UI API; without cross-replica sync a poll that
|
||||||
|
// lands on a replica which did not originate a change sees stale or missing data.
|
||||||
|
// SyncedMap collapses the three legs each feature otherwise hand-wires - an
|
||||||
|
// in-memory map, a NATS broadcast/apply path, and optional durable read-through -
|
||||||
|
// into one well-tested component so cross-replica consistency is a configuration
|
||||||
|
// choice rather than a bespoke re-implementation.
|
||||||
|
package syncstate
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/xlog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Op values carried on the wire and passed to OnApply.
|
||||||
|
const (
|
||||||
|
opSet = "set"
|
||||||
|
opDelete = "delete"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Store is optional durable backing for a SyncedMap. In distributed mode it is a
|
||||||
|
// single shared DB, so the apply path (a delta received from a peer) updates
|
||||||
|
// memory only and never re-writes the Store.
|
||||||
|
type Store[K comparable, V any] interface {
|
||||||
|
List(ctx context.Context) ([]V, error)
|
||||||
|
Upsert(ctx context.Context, v V) error
|
||||||
|
Delete(ctx context.Context, k K) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// Config configures a SyncedMap.
|
||||||
|
type Config[K comparable, V any] struct {
|
||||||
|
Name string // subject namespace, e.g. "finetune.jobs"
|
||||||
|
Key func(V) K // extract the key from a value
|
||||||
|
Nats messaging.MessagingClient // nil => standalone: in-memory only, no broadcast/subscribe
|
||||||
|
Store Store[K, V] // optional read-through persistence
|
||||||
|
Loader func(ctx context.Context) ([]V, error) // source when there is no Store (e.g. disk reload)
|
||||||
|
OnApply func(op string, k K, v V) // optional hook after an applied change (e.g. ShutdownModel)
|
||||||
|
Reconcile time.Duration // optional periodic re-hydrate; 0 = off
|
||||||
|
}
|
||||||
|
|
||||||
|
// delta is the JSON wire envelope broadcast on every local mutation. Value is
|
||||||
|
// omitempty so a delete carries only op+key.
|
||||||
|
type delta[K comparable, V any] struct {
|
||||||
|
Op string `json:"op"`
|
||||||
|
Key K `json:"key"`
|
||||||
|
Value V `json:"value,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SyncedMap is a cross-replica in-memory map. A local write (Set/Delete) updates
|
||||||
|
// memory, the optional durable Store, then broadcasts a delta to peers. A peer's
|
||||||
|
// delta updates memory only and fires OnApply - it never re-broadcasts and never
|
||||||
|
// writes the Store. That structural split is the echo-loop guard (same pattern as
|
||||||
|
// galleryop.mergeStatus / OpCache.applyStart): receiving your own broadcast just
|
||||||
|
// re-applies an idempotent value to memory, so there is no storm and no
|
||||||
|
// double-write.
|
||||||
|
type SyncedMap[K comparable, V any] struct {
|
||||||
|
cfg Config[K, V]
|
||||||
|
|
||||||
|
mu sync.RWMutex
|
||||||
|
data map[K]V
|
||||||
|
|
||||||
|
sub Subscription
|
||||||
|
|
||||||
|
// lifeCtx outlives Start's argument: a reconnect callback or reconcile tick
|
||||||
|
// can fire long after Start returns, so they must not be tied to a ctx the
|
||||||
|
// caller may cancel. Close cancels it.
|
||||||
|
lifeCtx context.Context
|
||||||
|
cancel context.CancelFunc
|
||||||
|
wg sync.WaitGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
// Subscription is the subset of messaging.Subscription the component holds onto.
|
||||||
|
type Subscription = messaging.Subscription
|
||||||
|
|
||||||
|
// New constructs a SyncedMap. Call Start to hydrate and begin syncing.
|
||||||
|
func New[K comparable, V any](cfg Config[K, V]) *SyncedMap[K, V] {
|
||||||
|
return &SyncedMap[K, V]{cfg: cfg, data: make(map[K]V)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SyncedMap[K, V]) subject() string {
|
||||||
|
return messaging.SubjectSyncStateDelta(m.cfg.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start hydrates from the source, subscribes for peer deltas, registers a
|
||||||
|
// reconnect re-hydrate (when the client supports it), and starts the optional
|
||||||
|
// reconcile ticker.
|
||||||
|
func (m *SyncedMap[K, V]) Start(ctx context.Context) error {
|
||||||
|
if err := m.hydrate(ctx); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// The cancel func is stored on the struct and invoked in Close (covered by
|
||||||
|
// tests); lifeCtx must outlive Start to drive the reconnect/reconcile
|
||||||
|
// goroutines, so it cannot be cancelled or deferred within this scope.
|
||||||
|
m.lifeCtx, m.cancel = context.WithCancel(context.Background()) // #nosec G118 -- cancel is invoked in Close()
|
||||||
|
|
||||||
|
if m.cfg.Nats != nil {
|
||||||
|
sub, err := messaging.SubscribeJSON(m.cfg.Nats, m.subject(), m.apply)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
m.sub = sub
|
||||||
|
|
||||||
|
// nats.go transparently resubscribes on reconnect, but it cannot know we
|
||||||
|
// kept derived in-memory state that may have drifted while the link was
|
||||||
|
// down, so re-hydrate from the durable source. Detected via an optional
|
||||||
|
// interface so MessagingClient itself stays minimal; standalone/test
|
||||||
|
// clients without the method simply fall back to the reconcile ticker.
|
||||||
|
if r, ok := m.cfg.Nats.(interface{ OnReconnect(func()) }); ok {
|
||||||
|
r.OnReconnect(func() {
|
||||||
|
if err := m.hydrate(m.lifeCtx); err != nil {
|
||||||
|
xlog.Warn("syncstate: reconnect re-hydrate failed", "name", m.cfg.Name, "error", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if m.cfg.Reconcile > 0 {
|
||||||
|
m.wg.Add(1)
|
||||||
|
go m.reconcileLoop()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close unsubscribes and stops the reconcile ticker.
|
||||||
|
func (m *SyncedMap[K, V]) Close() error {
|
||||||
|
if m.cancel != nil {
|
||||||
|
m.cancel()
|
||||||
|
}
|
||||||
|
m.wg.Wait()
|
||||||
|
if m.sub != nil {
|
||||||
|
return m.sub.Unsubscribe()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set updates the value locally, writes through the Store, then broadcasts.
|
||||||
|
// Per the data-flow contract the Store write happens under the lock so memory and
|
||||||
|
// durable state move together; the broadcast is best-effort after unlocking.
|
||||||
|
func (m *SyncedMap[K, V]) Set(ctx context.Context, v V) error {
|
||||||
|
k := m.cfg.Key(v)
|
||||||
|
m.mu.Lock()
|
||||||
|
m.data[k] = v
|
||||||
|
if m.cfg.Store != nil {
|
||||||
|
if err := m.cfg.Store.Upsert(ctx, v); err != nil {
|
||||||
|
m.mu.Unlock()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.mu.Unlock()
|
||||||
|
m.publish(opSet, k, v)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete removes the key locally, deletes it from the Store, then broadcasts.
|
||||||
|
func (m *SyncedMap[K, V]) Delete(ctx context.Context, k K) error {
|
||||||
|
m.mu.Lock()
|
||||||
|
delete(m.data, k)
|
||||||
|
if m.cfg.Store != nil {
|
||||||
|
if err := m.cfg.Store.Delete(ctx, k); err != nil {
|
||||||
|
m.mu.Unlock()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.mu.Unlock()
|
||||||
|
var zero V
|
||||||
|
m.publish(opDelete, k, zero)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get returns the value for k and whether it was present.
|
||||||
|
func (m *SyncedMap[K, V]) Get(k K) (V, bool) {
|
||||||
|
m.mu.RLock()
|
||||||
|
defer m.mu.RUnlock()
|
||||||
|
v, ok := m.data[k]
|
||||||
|
return v, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// List returns a snapshot slice of all values.
|
||||||
|
func (m *SyncedMap[K, V]) List() []V {
|
||||||
|
m.mu.RLock()
|
||||||
|
defer m.mu.RUnlock()
|
||||||
|
out := make([]V, 0, len(m.data))
|
||||||
|
for _, v := range m.data {
|
||||||
|
out = append(out, v)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// Snapshot returns a copy of the underlying map.
|
||||||
|
func (m *SyncedMap[K, V]) Snapshot() map[K]V {
|
||||||
|
m.mu.RLock()
|
||||||
|
defer m.mu.RUnlock()
|
||||||
|
out := make(map[K]V, len(m.data))
|
||||||
|
for k, v := range m.data {
|
||||||
|
out[k] = v
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// publish broadcasts a delta. Standalone (nil Nats) is a strict no-op.
|
||||||
|
func (m *SyncedMap[K, V]) publish(op string, k K, v V) {
|
||||||
|
if m.cfg.Nats == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := m.cfg.Nats.Publish(m.subject(), delta[K, V]{Op: op, Key: k, Value: v}); err != nil {
|
||||||
|
xlog.Warn("syncstate: failed to broadcast delta", "name", m.cfg.Name, "op", op, "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// apply handles a peer's delta: memory-only update plus OnApply. It deliberately
|
||||||
|
// never writes the Store nor re-publishes - that is the echo-loop guard.
|
||||||
|
func (m *SyncedMap[K, V]) apply(d delta[K, V]) {
|
||||||
|
switch d.Op {
|
||||||
|
case opSet:
|
||||||
|
m.mu.Lock()
|
||||||
|
m.data[d.Key] = d.Value
|
||||||
|
m.mu.Unlock()
|
||||||
|
case opDelete:
|
||||||
|
m.mu.Lock()
|
||||||
|
delete(m.data, d.Key)
|
||||||
|
m.mu.Unlock()
|
||||||
|
default:
|
||||||
|
xlog.Warn("syncstate: ignoring delta with unknown op", "name", m.cfg.Name, "op", d.Op)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if m.cfg.OnApply != nil {
|
||||||
|
m.cfg.OnApply(d.Op, d.Key, d.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// hydrate replaces the whole map from the durable source: Store if present, else
|
||||||
|
// Loader. With neither, a late joiner starts empty and catches up via deltas
|
||||||
|
// (acceptable only for ephemeral state).
|
||||||
|
func (m *SyncedMap[K, V]) hydrate(ctx context.Context) error {
|
||||||
|
var (
|
||||||
|
vals []V
|
||||||
|
err error
|
||||||
|
)
|
||||||
|
switch {
|
||||||
|
case m.cfg.Store != nil:
|
||||||
|
vals, err = m.cfg.Store.List(ctx)
|
||||||
|
case m.cfg.Loader != nil:
|
||||||
|
vals, err = m.cfg.Loader(ctx)
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
m.replaceAll(vals)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// replaceAll atomically swaps the map contents for the given values, keyed via
|
||||||
|
// cfg.Key.
|
||||||
|
func (m *SyncedMap[K, V]) replaceAll(vals []V) {
|
||||||
|
next := make(map[K]V, len(vals))
|
||||||
|
for _, v := range vals {
|
||||||
|
next[m.cfg.Key(v)] = v
|
||||||
|
}
|
||||||
|
m.mu.Lock()
|
||||||
|
m.data = next
|
||||||
|
m.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// reconcileLoop periodically re-hydrates to repair silent drift (missed deltas).
|
||||||
|
func (m *SyncedMap[K, V]) reconcileLoop() {
|
||||||
|
defer m.wg.Done()
|
||||||
|
t := time.NewTicker(m.cfg.Reconcile)
|
||||||
|
defer t.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-m.lifeCtx.Done():
|
||||||
|
return
|
||||||
|
case <-t.C:
|
||||||
|
if err := m.hydrate(m.lifeCtx); err != nil {
|
||||||
|
xlog.Warn("syncstate: reconcile re-hydrate failed", "name", m.cfg.Name, "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
13
core/services/syncstate/syncstate_suite_test.go
Normal file
13
core/services/syncstate/syncstate_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
package syncstate_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSyncstate(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "Syncstate Suite")
|
||||||
|
}
|
||||||
291
core/services/syncstate/syncstate_test.go
Normal file
291
core/services/syncstate/syncstate_test.go
Normal file
@@ -0,0 +1,291 @@
|
|||||||
|
package syncstate_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/LocalAI/core/services/syncstate"
|
||||||
|
"github.com/mudler/LocalAI/core/services/testutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
// job is a minimal JSON-serializable value stand-in for the real cross-replica
|
||||||
|
// records (finetune/quant/agent jobs) the component is built for.
|
||||||
|
type job struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func jobKey(j *job) string { return j.ID }
|
||||||
|
|
||||||
|
const stateName = "test.jobs"
|
||||||
|
|
||||||
|
func deltaSubject() string { return messaging.SubjectSyncStateDelta(stateName) }
|
||||||
|
|
||||||
|
// fakeStore is an in-memory Store that records call counts so specs can assert
|
||||||
|
// the write-through-vs-apply split (local writes hit the Store; applied deltas
|
||||||
|
// must not).
|
||||||
|
type fakeStore struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
data map[string]*job
|
||||||
|
upsertCalls int
|
||||||
|
deleteCalls int
|
||||||
|
listCalls int
|
||||||
|
}
|
||||||
|
|
||||||
|
func newFakeStore(seed ...*job) *fakeStore {
|
||||||
|
s := &fakeStore{data: map[string]*job{}}
|
||||||
|
for _, j := range seed {
|
||||||
|
s.data[j.ID] = j
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeStore) List(_ context.Context) ([]*job, error) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.listCalls++
|
||||||
|
out := make([]*job, 0, len(s.data))
|
||||||
|
for _, j := range s.data {
|
||||||
|
out = append(out, j)
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeStore) Upsert(_ context.Context, j *job) error {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.upsertCalls++
|
||||||
|
s.data[j.ID] = j
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeStore) Delete(_ context.Context, k string) error {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.deleteCalls++
|
||||||
|
delete(s.data, k)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// add simulates a peer replica writing to the shared DB out-of-band (e.g. while
|
||||||
|
// this replica was partitioned), so a re-hydrate can be observed to pick it up.
|
||||||
|
func (s *fakeStore) add(j *job) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.data[j.ID] = j
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeStore) counts() (upsert, del, list int) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
return s.upsertCalls, s.deleteCalls, s.listCalls
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("SyncedMap", func() {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
Describe("cross-replica delta propagation", func() {
|
||||||
|
var (
|
||||||
|
bus *testutil.FakeBus
|
||||||
|
a, b *syncstate.SyncedMap[string, *job]
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
bus = testutil.NewFakeBus()
|
||||||
|
a = syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus})
|
||||||
|
b = syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus})
|
||||||
|
Expect(a.Start(ctx)).To(Succeed())
|
||||||
|
Expect(b.Start(ctx)).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
AfterEach(func() {
|
||||||
|
Expect(a.Close()).To(Succeed())
|
||||||
|
Expect(b.Close()).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("propagates a Set on A to B", func() {
|
||||||
|
Expect(a.Set(ctx, &job{ID: "1", Status: "running"})).To(Succeed())
|
||||||
|
|
||||||
|
got, ok := b.Get("1")
|
||||||
|
Expect(ok).To(BeTrue(), "replica B should see the value A just set")
|
||||||
|
Expect(got.Status).To(Equal("running"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("prunes a Delete on A from B", func() {
|
||||||
|
Expect(a.Set(ctx, &job{ID: "1", Status: "running"})).To(Succeed())
|
||||||
|
_, present := b.Get("1")
|
||||||
|
Expect(present).To(BeTrue(), "precondition: B must have the value before the delete")
|
||||||
|
|
||||||
|
Expect(a.Delete(ctx, "1")).To(Succeed())
|
||||||
|
|
||||||
|
_, ok := b.Get("1")
|
||||||
|
Expect(ok).To(BeFalse(), "a delete on A must remove the key from B")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("hydration", func() {
|
||||||
|
It("hydrates on Start from a preloaded Store", func() {
|
||||||
|
store := newFakeStore(&job{ID: "x", Status: "done"})
|
||||||
|
m := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Store: store})
|
||||||
|
Expect(m.Start(ctx)).To(Succeed())
|
||||||
|
|
||||||
|
got, ok := m.Get("x")
|
||||||
|
Expect(ok).To(BeTrue(), "Start must populate the map from the Store")
|
||||||
|
Expect(got.Status).To(Equal("done"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("uses the Loader when Store is nil", func() {
|
||||||
|
m := syncstate.New(syncstate.Config[string, *job]{
|
||||||
|
Name: stateName,
|
||||||
|
Key: jobKey,
|
||||||
|
Loader: func(_ context.Context) ([]*job, error) {
|
||||||
|
return []*job{{ID: "l", Status: "loaded"}}, nil
|
||||||
|
},
|
||||||
|
})
|
||||||
|
Expect(m.Start(ctx)).To(Succeed())
|
||||||
|
|
||||||
|
got, ok := m.Get("l")
|
||||||
|
Expect(ok).To(BeTrue(), "Loader output must hydrate the map when there is no Store")
|
||||||
|
Expect(got.Status).To(Equal("loaded"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("echo-loop guard", func() {
|
||||||
|
It("applies its own broadcast once and does not re-publish", func() {
|
||||||
|
bus := testutil.NewFakeBus()
|
||||||
|
a := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus})
|
||||||
|
b := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus})
|
||||||
|
Expect(a.Start(ctx)).To(Succeed())
|
||||||
|
Expect(b.Start(ctx)).To(Succeed())
|
||||||
|
defer func() {
|
||||||
|
Expect(a.Close()).To(Succeed())
|
||||||
|
Expect(b.Close()).To(Succeed())
|
||||||
|
}()
|
||||||
|
|
||||||
|
Expect(a.Set(ctx, &job{ID: "e", Status: "running"})).To(Succeed())
|
||||||
|
|
||||||
|
// One local write must produce exactly one broadcast: A and B both
|
||||||
|
// receive it and apply to memory, but the apply path never re-publishes.
|
||||||
|
Expect(bus.PublishCount(deltaSubject())).To(Equal(1),
|
||||||
|
"the apply path must not re-broadcast, otherwise replicas storm")
|
||||||
|
Expect(a.List()).To(HaveLen(1), "A must not double-store its own echo")
|
||||||
|
_, ok := b.Get("e")
|
||||||
|
Expect(ok).To(BeTrue())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("Store write-through vs apply", func() {
|
||||||
|
It("writes the Store on local Set/Delete but not on an applied delta", func() {
|
||||||
|
bus := testutil.NewFakeBus()
|
||||||
|
storeA := newFakeStore()
|
||||||
|
storeB := newFakeStore()
|
||||||
|
a := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus, Store: storeA})
|
||||||
|
b := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus, Store: storeB})
|
||||||
|
Expect(a.Start(ctx)).To(Succeed())
|
||||||
|
Expect(b.Start(ctx)).To(Succeed())
|
||||||
|
defer func() {
|
||||||
|
Expect(a.Close()).To(Succeed())
|
||||||
|
Expect(b.Close()).To(Succeed())
|
||||||
|
}()
|
||||||
|
|
||||||
|
Expect(a.Set(ctx, &job{ID: "w", Status: "running"})).To(Succeed())
|
||||||
|
|
||||||
|
upA, _, _ := storeA.counts()
|
||||||
|
upB, _, _ := storeB.counts()
|
||||||
|
Expect(upA).To(Equal(1), "local Set must write through to its own Store")
|
||||||
|
Expect(upB).To(Equal(0), "the apply path must never write the peer's Store")
|
||||||
|
|
||||||
|
Expect(a.Delete(ctx, "w")).To(Succeed())
|
||||||
|
_, delA, _ := storeA.counts()
|
||||||
|
_, delB, _ := storeB.counts()
|
||||||
|
Expect(delA).To(Equal(1), "local Delete must delete from its own Store")
|
||||||
|
Expect(delB).To(Equal(0), "the apply path must never delete from the peer's Store")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("OnApply hook", func() {
|
||||||
|
It("fires with the correct op and key on an applied delta", func() {
|
||||||
|
bus := testutil.NewFakeBus()
|
||||||
|
var (
|
||||||
|
mu sync.Mutex
|
||||||
|
ops []string
|
||||||
|
keys []string
|
||||||
|
)
|
||||||
|
a := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus})
|
||||||
|
b := syncstate.New(syncstate.Config[string, *job]{
|
||||||
|
Name: stateName, Key: jobKey, Nats: bus,
|
||||||
|
OnApply: func(op string, k string, _ *job) {
|
||||||
|
mu.Lock()
|
||||||
|
ops = append(ops, op)
|
||||||
|
keys = append(keys, k)
|
||||||
|
mu.Unlock()
|
||||||
|
},
|
||||||
|
})
|
||||||
|
Expect(a.Start(ctx)).To(Succeed())
|
||||||
|
Expect(b.Start(ctx)).To(Succeed())
|
||||||
|
defer func() {
|
||||||
|
Expect(a.Close()).To(Succeed())
|
||||||
|
Expect(b.Close()).To(Succeed())
|
||||||
|
}()
|
||||||
|
|
||||||
|
Expect(a.Set(ctx, &job{ID: "o", Status: "running"})).To(Succeed())
|
||||||
|
Expect(a.Delete(ctx, "o")).To(Succeed())
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
Expect(ops).To(Equal([]string{"set", "delete"}))
|
||||||
|
Expect(keys).To(Equal([]string{"o", "o"}))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("standalone (nil Nats)", func() {
|
||||||
|
It("works in-memory with no panic and nothing to broadcast", func() {
|
||||||
|
m := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey})
|
||||||
|
Expect(m.Start(ctx)).To(Succeed())
|
||||||
|
defer func() { Expect(m.Close()).To(Succeed()) }()
|
||||||
|
|
||||||
|
Expect(func() {
|
||||||
|
Expect(m.Set(ctx, &job{ID: "s", Status: "running"})).To(Succeed())
|
||||||
|
}).ToNot(Panic())
|
||||||
|
|
||||||
|
got, ok := m.Get("s")
|
||||||
|
Expect(ok).To(BeTrue())
|
||||||
|
Expect(got.Status).To(Equal("running"))
|
||||||
|
Expect(m.List()).To(HaveLen(1))
|
||||||
|
Expect(m.Snapshot()).To(HaveKey("s"))
|
||||||
|
|
||||||
|
Expect(m.Delete(ctx, "s")).To(Succeed())
|
||||||
|
_, ok = m.Get("s")
|
||||||
|
Expect(ok).To(BeFalse())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("reconnect re-hydrate", func() {
|
||||||
|
It("re-reads the source when the messaging client reconnects", func() {
|
||||||
|
bus := testutil.NewFakeBus()
|
||||||
|
store := newFakeStore(&job{ID: "init", Status: "running"})
|
||||||
|
m := syncstate.New(syncstate.Config[string, *job]{Name: stateName, Key: jobKey, Nats: bus, Store: store})
|
||||||
|
Expect(m.Start(ctx)).To(Succeed())
|
||||||
|
defer func() { Expect(m.Close()).To(Succeed()) }()
|
||||||
|
|
||||||
|
_, ok := m.Get("init")
|
||||||
|
Expect(ok).To(BeTrue())
|
||||||
|
|
||||||
|
// A peer writes to the shared DB while we are unaware (no delta seen).
|
||||||
|
store.add(&job{ID: "late", Status: "running"})
|
||||||
|
_, ok = m.Get("late")
|
||||||
|
Expect(ok).To(BeFalse(), "the new row should not appear before a re-hydrate")
|
||||||
|
|
||||||
|
bus.TriggerReconnect()
|
||||||
|
|
||||||
|
_, ok = m.Get("late")
|
||||||
|
Expect(ok).To(BeTrue(), "reconnect must re-hydrate from the source and pick up drift")
|
||||||
|
_, _, list := store.counts()
|
||||||
|
Expect(list).To(Equal(2), "exactly one Start hydrate plus one reconnect re-hydrate")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
160
core/services/testutil/fakebus.go
Normal file
160
core/services/testutil/fakebus.go
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
package testutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FakeBus is an in-memory messaging.MessagingClient that delivers each published
|
||||||
|
// message synchronously to every registered subscriber whose subject filter
|
||||||
|
// matches, including NATS-style wildcard subjects (`*` matches exactly one
|
||||||
|
// token).
|
||||||
|
//
|
||||||
|
// Synchronous delivery keeps specs deterministic: the moment Publish returns,
|
||||||
|
// every matching subscriber's handler has already run, so the spec body can read
|
||||||
|
// the resulting state without polling. It is the shared test double for every
|
||||||
|
// cross-replica-sync adopter (gallery, syncstate, ...) so they exercise the same
|
||||||
|
// delivery semantics. It deliberately depends only on the standard library and
|
||||||
|
// the messaging package — no test framework — so it is importable anywhere.
|
||||||
|
type FakeBus struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
subs []fakeBusSub
|
||||||
|
// publishCounts records how many messages were published per subject, so a
|
||||||
|
// spec can assert the echo-loop guard (an applied delta must not re-publish).
|
||||||
|
publishCounts map[string]int
|
||||||
|
|
||||||
|
// reconnectCbs back the optional OnReconnect/TriggerReconnect pair, letting a
|
||||||
|
// spec exercise the component's reconnect re-hydrate path without a real
|
||||||
|
// NATS server.
|
||||||
|
reconnectCbs []func()
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeBusSub struct {
|
||||||
|
subject string
|
||||||
|
handler func([]byte)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFakeBus returns a ready-to-use in-memory bus.
|
||||||
|
func NewFakeBus() *FakeBus {
|
||||||
|
return &FakeBus{publishCounts: map[string]int{}}
|
||||||
|
}
|
||||||
|
|
||||||
|
// subjectMatches reports whether a subscription filter matches a concrete
|
||||||
|
// subject, honoring the single-token `*` wildcard used by NATS.
|
||||||
|
func subjectMatches(filter, subject string) bool {
|
||||||
|
if filter == subject {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
fp := strings.Split(filter, ".")
|
||||||
|
sp := strings.Split(subject, ".")
|
||||||
|
if len(fp) != len(sp) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range fp {
|
||||||
|
if fp[i] == "*" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if fp[i] != sp[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Publish marshals data as JSON and delivers it synchronously to every matching
|
||||||
|
// subscriber.
|
||||||
|
func (b *FakeBus) Publish(subject string, data any) error {
|
||||||
|
payload, err := json.Marshal(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
b.mu.Lock()
|
||||||
|
b.publishCounts[subject]++
|
||||||
|
subs := append([]fakeBusSub(nil), b.subs...)
|
||||||
|
b.mu.Unlock()
|
||||||
|
for _, s := range subs {
|
||||||
|
if subjectMatches(s.subject, subject) {
|
||||||
|
s.handler(payload)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// PublishCount returns how many messages were published on the exact subject.
|
||||||
|
func (b *FakeBus) PublishCount(subject string) int {
|
||||||
|
b.mu.Lock()
|
||||||
|
defer b.mu.Unlock()
|
||||||
|
return b.publishCounts[subject]
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeBusSubscription struct {
|
||||||
|
bus *FakeBus
|
||||||
|
subRef fakeBusSub
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *fakeBusSubscription) Unsubscribe() error {
|
||||||
|
s.bus.mu.Lock()
|
||||||
|
defer s.bus.mu.Unlock()
|
||||||
|
for i, candidate := range s.bus.subs {
|
||||||
|
if candidate.subject == s.subRef.subject {
|
||||||
|
s.bus.subs = append(s.bus.subs[:i], s.bus.subs[i+1:]...)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *FakeBus) Subscribe(subject string, handler func([]byte)) (messaging.Subscription, error) {
|
||||||
|
sub := fakeBusSub{subject: subject, handler: handler}
|
||||||
|
b.mu.Lock()
|
||||||
|
b.subs = append(b.subs, sub)
|
||||||
|
b.mu.Unlock()
|
||||||
|
return &fakeBusSubscription{bus: b, subRef: sub}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *FakeBus) QueueSubscribe(subject, _ string, handler func([]byte)) (messaging.Subscription, error) {
|
||||||
|
return b.Subscribe(subject, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *FakeBus) QueueSubscribeReply(string, string, func([]byte, func([]byte))) (messaging.Subscription, error) {
|
||||||
|
return &fakeBusSubscription{bus: b}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *FakeBus) SubscribeReply(string, func([]byte, func([]byte))) (messaging.Subscription, error) {
|
||||||
|
return &fakeBusSubscription{bus: b}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *FakeBus) Request(string, []byte, time.Duration) ([]byte, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *FakeBus) IsConnected() bool { return true }
|
||||||
|
func (b *FakeBus) Close() {}
|
||||||
|
|
||||||
|
// OnReconnect mirrors *messaging.Client.OnReconnect so a spec can drive the
|
||||||
|
// component's reconnect re-hydrate path. The component detects this method via an
|
||||||
|
// optional interface assertion; implementing it here keeps the fake a faithful
|
||||||
|
// stand-in for the concrete client.
|
||||||
|
func (b *FakeBus) OnReconnect(cb func()) {
|
||||||
|
if cb == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
b.mu.Lock()
|
||||||
|
b.reconnectCbs = append(b.reconnectCbs, cb)
|
||||||
|
b.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TriggerReconnect runs every registered reconnect callback, simulating a NATS
|
||||||
|
// reconnect event.
|
||||||
|
func (b *FakeBus) TriggerReconnect() {
|
||||||
|
b.mu.Lock()
|
||||||
|
cbs := append([]func(){}, b.reconnectCbs...)
|
||||||
|
b.mu.Unlock()
|
||||||
|
for _, cb := range cbs {
|
||||||
|
cb()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -57,6 +57,11 @@ services:
|
|||||||
LOCALAI_AGENT_POOL_VECTOR_ENGINE: "postgres"
|
LOCALAI_AGENT_POOL_VECTOR_ENGINE: "postgres"
|
||||||
LOCALAI_AGENT_POOL_DATABASE_URL: "postgresql://localai:localai@postgres:5432/localai?sslmode=disable"
|
LOCALAI_AGENT_POOL_DATABASE_URL: "postgresql://localai:localai@postgres:5432/localai?sslmode=disable"
|
||||||
LOCALAI_REGISTRATION_TOKEN: "changeme" # Change this in production!
|
LOCALAI_REGISTRATION_TOKEN: "changeme" # Change this in production!
|
||||||
|
# Shared-models mode (optional): set when every node mounts the SAME
|
||||||
|
# models directory at the SAME path (see "Shared Volume Mode" below).
|
||||||
|
# The router then skips gRPC file staging and workers load models
|
||||||
|
# directly from the shared volume instead of re-downloading them.
|
||||||
|
# LOCALAI_DISTRIBUTED_SHARED_MODELS: "true"
|
||||||
# Auth (required for distributed mode — must use PostgreSQL)
|
# Auth (required for distributed mode — must use PostgreSQL)
|
||||||
LOCALAI_AUTH: "true"
|
LOCALAI_AUTH: "true"
|
||||||
LOCALAI_AUTH_DATABASE_URL: "postgresql://localai:localai@postgres:5432/localai?sslmode=disable"
|
LOCALAI_AUTH_DATABASE_URL: "postgresql://localai:localai@postgres:5432/localai?sslmode=disable"
|
||||||
@@ -157,8 +162,11 @@ services:
|
|||||||
# Then add to the volumes section:
|
# Then add to the volumes section:
|
||||||
# shared_models:
|
# shared_models:
|
||||||
#
|
#
|
||||||
# With shared volumes, model files are already available on the backend —
|
# With shared volumes the model files are already present on every worker at
|
||||||
# gRPC file staging becomes a no-op (paths match).
|
# the same path. Set LOCALAI_DISTRIBUTED_SHARED_MODELS=true on the frontend
|
||||||
|
# (see its environment above) so the router skips gRPC file staging and the
|
||||||
|
# worker loads the model directly from the shared path instead of
|
||||||
|
# re-downloading it into a per-model subdirectory.
|
||||||
|
|
||||||
# --- Adding More Workers ---
|
# --- Adding More Workers ---
|
||||||
# Copy the worker-1 service above and change:
|
# Copy the worker-1 service above and change:
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ The frontend is a standard LocalAI instance with distributed mode enabled. These
|
|||||||
| `--registration-require-auth` | `LOCALAI_REGISTRATION_REQUIRE_AUTH` | `false` | Fail startup when distributed mode is enabled but the registration token is empty (node endpoints and worker file-transfer would otherwise be unauthenticated) |
|
| `--registration-require-auth` | `LOCALAI_REGISTRATION_REQUIRE_AUTH` | `false` | Fail startup when distributed mode is enabled but the registration token is empty (node endpoints and worker file-transfer would otherwise be unauthenticated) |
|
||||||
| `--distributed-require-auth` | `LOCALAI_DISTRIBUTED_REQUIRE_AUTH` | `false` | **Umbrella switch.** Implies both `--nats-require-auth` and `--registration-require-auth` — one knob to lock down the NATS bus *and* the registration/file-transfer layer. Set this in production instead of the two granular flags. |
|
| `--distributed-require-auth` | `LOCALAI_DISTRIBUTED_REQUIRE_AUTH` | `false` | **Umbrella switch.** Implies both `--nats-require-auth` and `--registration-require-auth` — one knob to lock down the NATS bus *and* the registration/file-transfer layer. Set this in production instead of the two granular flags. |
|
||||||
| `--auto-approve-nodes` | `LOCALAI_AUTO_APPROVE_NODES` | `false` | Auto-approve new worker nodes (skip admin approval) |
|
| `--auto-approve-nodes` | `LOCALAI_AUTO_APPROVE_NODES` | `false` | Auto-approve new worker nodes (skip admin approval) |
|
||||||
|
| `--distributed-shared-models` | `LOCALAI_DISTRIBUTED_SHARED_MODELS` | `false` | Assert that every node mounts the **same** models directory at the **same** path (a shared volume). When `true`, the router skips file staging entirely and workers load models directly from the shared path instead of re-downloading them. See [Shared models directory](#shared-models-directory). |
|
||||||
| `--auth` | `LOCALAI_AUTH` | `false` | **Must be `true`** for distributed mode |
|
| `--auth` | `LOCALAI_AUTH` | `false` | **Must be `true`** for distributed mode |
|
||||||
| `--auth-database-url` | `LOCALAI_AUTH_DATABASE_URL` | *(required)* | PostgreSQL connection URL |
|
| `--auth-database-url` | `LOCALAI_AUTH_DATABASE_URL` | *(required)* | PostgreSQL connection URL |
|
||||||
| `--backend-install-timeout` | `LOCALAI_NATS_BACKEND_INSTALL_TIMEOUT` | `15m` | How long the frontend waits for a worker to acknowledge a backend install before considering the request stalled. Raise it when workers pull large backend images over slow links. If a worker takes longer than this, the operation shows as "still installing in background" in the admin UI and clears once the worker finishes. |
|
| `--backend-install-timeout` | `LOCALAI_NATS_BACKEND_INSTALL_TIMEOUT` | `15m` | How long the frontend waits for a worker to acknowledge a backend install before considering the request stalled. Raise it when workers pull large backend images over slow links. If a worker takes longer than this, the operation shows as "still installing in background" in the admin UI and clears once the worker finishes. |
|
||||||
@@ -133,6 +134,14 @@ When S3 is not configured, model files are transferred directly from the fronten
|
|||||||
|
|
||||||
For high-throughput or very large model files, S3 can be more efficient since it avoids streaming through the frontend.
|
For high-throughput or very large model files, S3 can be more efficient since it avoids streaming through the frontend.
|
||||||
|
|
||||||
|
### Shared models directory
|
||||||
|
|
||||||
|
If every node (frontend and workers) mounts the **same** models directory at the **same** path - for example a shared volume or network filesystem, as shown in the "Shared Volume Mode" section of `docker-compose.distributed.yaml` - the model files are already present on each worker at their canonical path. In that case staging is wasted work: it copies files that already exist into a per-model subdirectory the worker then loads from, which shows up as a re-download of a model you already have.
|
||||||
|
|
||||||
|
Set `LOCALAI_DISTRIBUTED_SHARED_MODELS=true` (or `--distributed-shared-models`) on the frontend to skip staging entirely. The router then leaves the model's absolute paths untouched and the worker loads them directly from the shared volume.
|
||||||
|
|
||||||
|
This flag is a contract you assert: all nodes must mount identical paths. Leave it off (the default) when workers have independent models directories - the frontend stages files to them over HTTP (or S3) as described above.
|
||||||
|
|
||||||
{{% notice warning %}}
|
{{% notice warning %}}
|
||||||
The worker HTTP file transfer server is authenticated by `LOCALAI_REGISTRATION_TOKEN`. If the token is **empty**, the server **fails open** — anyone who can reach the port gets read/write access to the worker's models/staging/data directories (a remote model-poisoning / exfiltration vector). The worker logs a loud warning at startup in this case. Always set `LOCALAI_REGISTRATION_TOKEN` in distributed mode, and set `LOCALAI_DISTRIBUTED_REQUIRE_AUTH=true` (frontend **and** workers) to make a missing token *or* missing NATS credentials a hard startup error rather than a silent fail-open. Firewall the file-transfer port (gRPC base − 1) so only the frontend can reach it.
|
The worker HTTP file transfer server is authenticated by `LOCALAI_REGISTRATION_TOKEN`. If the token is **empty**, the server **fails open** — anyone who can reach the port gets read/write access to the worker's models/staging/data directories (a remote model-poisoning / exfiltration vector). The worker logs a loud warning at startup in this case. Always set `LOCALAI_REGISTRATION_TOKEN` in distributed mode, and set `LOCALAI_DISTRIBUTED_REQUIRE_AUTH=true` (frontend **and** workers) to make a missing token *or* missing NATS credentials a hard startup error rather than a silent fail-open. Firewall the file-transfer port (gRPC base − 1) so only the frontend can reach it.
|
||||||
{{% /notice %}}
|
{{% /notice %}}
|
||||||
|
|||||||
@@ -7,16 +7,93 @@ url = "/features/face-recognition/"
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
LocalAI supports face recognition through the `insightface` backend:
|
LocalAI supports face recognition: face verification (1:1), face
|
||||||
face verification (1:1), face identification (1:N) against a built-in
|
identification (1:N) against a built-in vector store, face embedding,
|
||||||
vector store, face embedding, face detection, demographic analysis
|
face detection, demographic analysis (age / gender), and antispoofing /
|
||||||
(age / gender), and antispoofing / liveness detection.
|
liveness detection.
|
||||||
|
|
||||||
The backend ships **two interchangeable engines** under one image, each
|
The same `/v1/face/*` HTTP API is served by two backends:
|
||||||
paired with a distinct gallery entry so users can pick by license and
|
|
||||||
accuracy needs.
|
|
||||||
|
|
||||||
## Licensing — read this first
|
- **`face-detect` (recommended, default).** A standalone C++/ggml
|
||||||
|
engine ([face-detect.cpp](https://github.com/mudler/face-detect.cpp)):
|
||||||
|
no Python, no onnxruntime, no torch runtime. Each gallery entry is a
|
||||||
|
single self-describing GGUF. This is the recommended option for new
|
||||||
|
deployments.
|
||||||
|
- **`insightface` (Python).** The original ONNX Runtime backend. Still
|
||||||
|
supported; see [the Python backend](#insightface-python-backend) below.
|
||||||
|
|
||||||
|
Both backends expose the identical wire format, so the API examples in
|
||||||
|
this page work with either - only the gallery entry name (the `model`
|
||||||
|
field) changes.
|
||||||
|
|
||||||
|
## face-detect (ggml) backend
|
||||||
|
|
||||||
|
The `face-detect` backend reads the detector and recognizer architecture
|
||||||
|
(`facedetect.arch`) directly from the GGUF metadata, so installing a
|
||||||
|
gallery entry is all that is needed to select an engine. It drives the
|
||||||
|
Embeddings / Detect / FaceVerify / FaceAnalyze gRPC rpcs behind the
|
||||||
|
`/v1/face/{embed,verify,analyze,detect,register,identify,forget}`
|
||||||
|
endpoints.
|
||||||
|
|
||||||
|
### Licensing - read this first
|
||||||
|
|
||||||
|
| Gallery entry | Detector + recognizer | Embedding dim | License |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `face-detect-buffalo-l` | SCRFD-10GF + ArcFace R50 + GenderAge | 512 | **Non-commercial research only** (upstream insightface weights) |
|
||||||
|
| `face-detect-buffalo-m` | SCRFD-2.5GF + ArcFace R50 + GenderAge | 512 | **Non-commercial research only** |
|
||||||
|
| `face-detect-buffalo-s` | SCRFD-500MF + MBF + GenderAge | 512 | **Non-commercial research only** |
|
||||||
|
| `face-detect-yunet-sface` | YuNet + SFace (OpenCV Zoo) | 128 | **Apache 2.0 - commercial-safe** |
|
||||||
|
|
||||||
|
The insightface buffalo packs (buffalo_l / buffalo_m / buffalo_s) are
|
||||||
|
released by the upstream maintainers for **non-commercial research use
|
||||||
|
only**. Pick the `face-detect-yunet-sface` entry for production /
|
||||||
|
commercial deployments.
|
||||||
|
|
||||||
|
### Quickstart
|
||||||
|
|
||||||
|
Install the commercial-safe entry (recommended for copy-paste):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
local-ai models install face-detect-yunet-sface
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify that two images depict the same person:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -sX POST http://localhost:8080/v1/face/verify \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "face-detect-yunet-sface",
|
||||||
|
"img1": "https://example.com/alice_1.jpg",
|
||||||
|
"img2": "https://example.com/alice_2.jpg"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Detect faces and analyze demographics (buffalo entries populate
|
||||||
|
age / gender; YuNet + SFace returns regions only):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -sX POST http://localhost:8080/v1/face/detect \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"model": "face-detect-buffalo-l", "img": "https://example.com/group.jpg"}'
|
||||||
|
|
||||||
|
curl -sX POST http://localhost:8080/v1/face/analyze \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"model": "face-detect-buffalo-l", "img": "https://example.com/alice.jpg"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
The 1:N register / identify / forget workflow and the rest of the API
|
||||||
|
are identical to the [API reference](#api-reference) below - just pass a
|
||||||
|
`face-detect-*` model name. The per-engine verify thresholds are ~0.35
|
||||||
|
for the buffalo ArcFace/MBF recognizers and ~0.363 for SFace.
|
||||||
|
|
||||||
|
## insightface (Python) backend
|
||||||
|
|
||||||
|
The `insightface` backend ships **two interchangeable engines** under
|
||||||
|
one image, each paired with a distinct gallery entry so users can pick
|
||||||
|
by license and accuracy needs.
|
||||||
|
|
||||||
|
### Licensing - read this first
|
||||||
|
|
||||||
| Gallery entry | Detector + recognizer | Size | License |
|
| Gallery entry | Detector + recognizer | Size | License |
|
||||||
|---|---|---|---|
|
|---|---|---|---|
|
||||||
|
|||||||
@@ -7,16 +7,92 @@ url = "/features/voice-recognition/"
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
LocalAI supports voice (speaker) recognition through the
|
LocalAI supports voice (speaker) recognition: speaker verification
|
||||||
`speaker-recognition` backend: speaker verification (1:1), speaker
|
(1:1), speaker identification (1:N) against a built-in vector store,
|
||||||
identification (1:N) against a built-in vector store, speaker
|
speaker embedding, and demographic analysis (age / gender / emotion
|
||||||
embedding, and demographic analysis (age / gender / emotion from
|
from voice).
|
||||||
voice).
|
|
||||||
|
|
||||||
The audio analog to [Face Recognition](/features/face-recognition/),
|
The audio analog to [Face Recognition](/features/face-recognition/),
|
||||||
following the same two-engine pattern under one image.
|
served over the same `/v1/voice/*` HTTP API by two backends:
|
||||||
|
|
||||||
## Engines
|
- **`voice-detect` (recommended, default).** A standalone C++/ggml
|
||||||
|
engine ([voice-detect.cpp](https://github.com/mudler/voice-detect.cpp)):
|
||||||
|
no Python, no onnxruntime, no torch runtime. Each gallery entry is a
|
||||||
|
single self-describing GGUF. This is the recommended option for new
|
||||||
|
deployments.
|
||||||
|
- **`speaker-recognition` (Python).** The original SpeechBrain / ONNX
|
||||||
|
backend. Still supported; see [the Python backend](#speaker-recognition-python-backend)
|
||||||
|
below.
|
||||||
|
|
||||||
|
Both backends expose the identical wire format, so the API examples on
|
||||||
|
this page work with either - only the gallery entry name (the `model`
|
||||||
|
field) changes.
|
||||||
|
|
||||||
|
## voice-detect (ggml) backend
|
||||||
|
|
||||||
|
The `voice-detect` backend reads the embedding (or analysis)
|
||||||
|
architecture (`voicedetect.arch`) directly from the GGUF metadata, so
|
||||||
|
installing a gallery entry is all that is needed to select an engine. It
|
||||||
|
drives the VoiceEmbed / VoiceVerify / VoiceAnalyze gRPC rpcs behind the
|
||||||
|
`/v1/voice/{embed,verify,analyze,register,identify,forget}` endpoints.
|
||||||
|
|
||||||
|
### Gallery entries
|
||||||
|
|
||||||
|
| Gallery entry | Model | Embedding dim | License |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `voice-detect-ecapa-tdnn` | SpeechBrain ECAPA-TDNN (VoxCeleb) | 192 | **Apache 2.0 - commercial-safe** |
|
||||||
|
| `voice-detect-wespeaker-resnet34` | WeSpeaker ResNet34 (VoxCeleb) | 256 | CC-BY-4.0 |
|
||||||
|
| `voice-detect-eres2net` | 3D-Speaker ERes2Net (VoxCeleb) | 192 | **Apache 2.0 - commercial-safe** |
|
||||||
|
| `voice-detect-campplus` | 3D-Speaker CAM++ (VoxCeleb) | 192 | **Apache 2.0 - commercial-safe** |
|
||||||
|
| `voice-detect-emotion-wav2vec2` | audEERING wav2vec2 (age / gender / emotion) | analyze head | **CC-BY-NC-SA-4.0 - non-commercial** |
|
||||||
|
|
||||||
|
The four speaker-recognition entries drive verify / embed / identify.
|
||||||
|
`voice-detect-emotion-wav2vec2` is the analysis head behind
|
||||||
|
`/v1/voice/analyze` (continuous age estimate plus gender and emotion
|
||||||
|
class scores) and is **non-commercial / research use only**.
|
||||||
|
|
||||||
|
### Quickstart
|
||||||
|
|
||||||
|
Install the default entry (recommended for copy-paste):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
local-ai models install voice-detect-ecapa-tdnn
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify that two audio clips were spoken by the same person:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -sX POST http://localhost:8080/v1/voice/verify \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "voice-detect-ecapa-tdnn",
|
||||||
|
"audio1": "https://example.com/alice_1.wav",
|
||||||
|
"audio2": "https://example.com/alice_2.wav"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Analyze age / gender / emotion (install the analyze entry first):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
local-ai models install voice-detect-emotion-wav2vec2
|
||||||
|
|
||||||
|
curl -sX POST http://localhost:8080/v1/voice/analyze \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"model": "voice-detect-emotion-wav2vec2", "audio": "https://example.com/alice.wav"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
The 1:N register / identify / forget workflow and the rest of the API
|
||||||
|
are identical to the [API reference](#api-reference) below - just pass a
|
||||||
|
`voice-detect-*` model name. The default verify threshold is ~0.25 for
|
||||||
|
the ECAPA-TDNN / ERes2Net / CAM++ recognizers and ~0.30 for WeSpeaker
|
||||||
|
ResNet34.
|
||||||
|
|
||||||
|
## speaker-recognition (Python) backend
|
||||||
|
|
||||||
|
The `speaker-recognition` backend follows the same two-engine pattern
|
||||||
|
under one image.
|
||||||
|
|
||||||
|
### Engines
|
||||||
|
|
||||||
| Gallery entry | Model | Size | License |
|
| Gallery entry | Model | Size | License |
|
||||||
|---|---|---|---|
|
|---|---|---|---|
|
||||||
|
|||||||
@@ -97,6 +97,8 @@ All backends listed here can be installed on demand from the [Backend Gallery]({
|
|||||||
| [locate-anything.cpp](https://github.com/mudler/locate-anything.cpp) | Open-vocabulary object detection and visual grounding (LocateAnything-3B) in C/C++ using GGML | CPU, CUDA 12/13, Intel SYCL, Vulkan, Jetson L4T |
|
| [locate-anything.cpp](https://github.com/mudler/locate-anything.cpp) | Open-vocabulary object detection and visual grounding (LocateAnything-3B) in C/C++ using GGML | CPU, CUDA 12/13, Intel SYCL, Vulkan, Jetson L4T |
|
||||||
| [depth-anything.cpp](https://github.com/mudler/depth-anything.cpp) | Depth Anything 3 monocular metric depth + camera pose in C/C++ using GGML | CPU, CUDA 12/13, Intel SYCL, Vulkan, Jetson L4T |
|
| [depth-anything.cpp](https://github.com/mudler/depth-anything.cpp) | Depth Anything 3 monocular metric depth + camera pose in C/C++ using GGML | CPU, CUDA 12/13, Intel SYCL, Vulkan, Jetson L4T |
|
||||||
| [sam3.cpp](https://github.com/PABannier/sam3.cpp) | Segment Anything (SAM 3/2/EdgeTAM) with text/point/box prompts in C/C++ using GGML | CPU, CUDA 12/13, Intel SYCL, Vulkan, Jetson L4T |
|
| [sam3.cpp](https://github.com/PABannier/sam3.cpp) | Segment Anything (SAM 3/2/EdgeTAM) with text/point/box prompts in C/C++ using GGML | CPU, CUDA 12/13, Intel SYCL, Vulkan, Jetson L4T |
|
||||||
|
| [face-detect.cpp](https://github.com/mudler/face-detect.cpp) | Native face detection, recognition, embedding, demographics and anti-spoofing (SCRFD/ArcFace, YuNet/SFace) in C/C++ using GGML | CPU, CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, Jetson L4T |
|
||||||
|
| [voice-detect.cpp](https://github.com/mudler/voice-detect.cpp) | Native speaker (voice) recognition and voice analysis (ECAPA-TDNN, WeSpeaker, ERes2Net, CAM++, wav2vec2) in C/C++ using GGML | CPU, CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, Jetson L4T |
|
||||||
| [insightface](https://github.com/deepinsight/insightface) | Face verification, embedding, and anti-spoofing liveness (ONNX Runtime) | CPU, CUDA 12 |
|
| [insightface](https://github.com/deepinsight/insightface) | Face verification, embedding, and anti-spoofing liveness (ONNX Runtime) | CPU, CUDA 12 |
|
||||||
| [speaker-recognition](https://speechbrain.github.io/) | Speaker (voice) recognition via SpeechBrain ECAPA-TDNN | CPU, CUDA 12, Metal |
|
| [speaker-recognition](https://speechbrain.github.io/) | Speaker (voice) recognition via SpeechBrain ECAPA-TDNN | CPU, CUDA 12, Metal |
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user