mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-25 00:59:28 -04:00
Compare commits
27 Commits
master
...
feat/recon
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3e91eafed3 | ||
|
|
814b2a7c6c | ||
|
|
7cbb743b25 | ||
|
|
9684c5dd7e | ||
|
|
628b8a8e01 | ||
|
|
c4df41d209 | ||
|
|
c1a3afc980 | ||
|
|
f9a465ee25 | ||
|
|
48e22da165 | ||
|
|
f940dc858a | ||
|
|
f6d93591bd | ||
|
|
594576f440 | ||
|
|
5614b39782 | ||
|
|
b4f7a36d6d | ||
|
|
c6170b875d | ||
|
|
a9c7484986 | ||
|
|
e05dece93c | ||
|
|
7c2a347e79 | ||
|
|
6e0c491380 | ||
|
|
2bcdfe2a68 | ||
|
|
b843f498ca | ||
|
|
46d7d59a82 | ||
|
|
e3bca9a172 | ||
|
|
a19ab22186 | ||
|
|
91d08d88e6 | ||
|
|
2c5ed413cb | ||
|
|
01e098a844 |
311
.github/backend-matrix.yml
vendored
311
.github/backend-matrix.yml
vendored
@@ -3723,6 +3723,302 @@ include:
|
|||||||
dockerfile: "./backend/Dockerfile.golang"
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
context: "./"
|
context: "./"
|
||||||
ubuntu-version: '2404'
|
ubuntu-version: '2404'
|
||||||
|
# voice-detect
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "8"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-12-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-13-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-cuda-13-arm64-voice-detect'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-voice-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f32'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f32-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f16'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f16-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-voice-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-voice-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-arm64-voice-detect'
|
||||||
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2204'
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-rocm-hipblas-voice-detect'
|
||||||
|
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voice-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
# face-detect
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "8"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-12-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-nvidia-cuda-13-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "13"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-cuda-13-arm64-face-detect'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-face-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f32'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f32-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'sycl_f16'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-intel-sycl-f16-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
platform-tag: 'amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-face-detect'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
platform-tag: 'arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-vulkan-face-detect'
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-arm64-face-detect'
|
||||||
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2204'
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-gpu-rocm-hipblas-face-detect'
|
||||||
|
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "face-detect"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
# acestep-cpp
|
# acestep-cpp
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
cuda-major-version: ""
|
cuda-major-version: ""
|
||||||
@@ -4906,6 +5202,14 @@ includeDarwin:
|
|||||||
tag-suffix: "-metal-darwin-arm64-ced"
|
tag-suffix: "-metal-darwin-arm64-ced"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
lang: "go"
|
lang: "go"
|
||||||
|
- backend: "voice-detect"
|
||||||
|
tag-suffix: "-metal-darwin-arm64-voice-detect"
|
||||||
|
build-type: "metal"
|
||||||
|
lang: "go"
|
||||||
|
- backend: "face-detect"
|
||||||
|
tag-suffix: "-metal-darwin-arm64-face-detect"
|
||||||
|
build-type: "metal"
|
||||||
|
lang: "go"
|
||||||
- backend: "acestep-cpp"
|
- backend: "acestep-cpp"
|
||||||
tag-suffix: "-metal-darwin-arm64-acestep-cpp"
|
tag-suffix: "-metal-darwin-arm64-acestep-cpp"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
@@ -4974,9 +5278,6 @@ includeDarwin:
|
|||||||
- backend: "kitten-tts"
|
- backend: "kitten-tts"
|
||||||
tag-suffix: "-metal-darwin-arm64-kitten-tts"
|
tag-suffix: "-metal-darwin-arm64-kitten-tts"
|
||||||
build-type: "mps"
|
build-type: "mps"
|
||||||
- backend: "liquid-audio"
|
|
||||||
tag-suffix: "-metal-darwin-arm64-liquid-audio"
|
|
||||||
build-type: "mps"
|
|
||||||
- backend: "piper"
|
- backend: "piper"
|
||||||
tag-suffix: "-metal-darwin-arm64-piper"
|
tag-suffix: "-metal-darwin-arm64-piper"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
@@ -4993,10 +5294,6 @@ includeDarwin:
|
|||||||
tag-suffix: "-metal-darwin-arm64-sherpa-onnx"
|
tag-suffix: "-metal-darwin-arm64-sherpa-onnx"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
lang: "go"
|
lang: "go"
|
||||||
- backend: "supertonic"
|
|
||||||
tag-suffix: "-metal-darwin-arm64-supertonic"
|
|
||||||
build-type: "metal"
|
|
||||||
lang: "go"
|
|
||||||
- backend: "local-store"
|
- backend: "local-store"
|
||||||
tag-suffix: "-metal-darwin-arm64-local-store"
|
tag-suffix: "-metal-darwin-arm64-local-store"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
|
|||||||
2
.github/workflows/backend.yml
vendored
2
.github/workflows/backend.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
|||||||
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Setup Bun
|
- name: Setup Bun
|
||||||
uses: oven-sh/setup-bun@v2
|
uses: oven-sh/setup-bun@v2
|
||||||
|
|||||||
2
.github/workflows/backend_build.yml
vendored
2
.github/workflows/backend_build.yml
vendored
@@ -101,7 +101,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/backend_build_darwin.yml
vendored
2
.github/workflows/backend_build_darwin.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
|||||||
HOMEBREW_NO_ANALYTICS: '1'
|
HOMEBREW_NO_ANALYTICS: '1'
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/backend_merge.yml
vendored
2
.github/workflows/backend_merge.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
|||||||
# Sparse checkout: the merge job needs `.github/scripts/` (for the
|
# Sparse checkout: the merge job needs `.github/scripts/` (for the
|
||||||
# keepalive cleanup script) but none of the source tree.
|
# keepalive cleanup script) but none of the source tree.
|
||||||
- name: Checkout (.github/scripts only)
|
- name: Checkout (.github/scripts only)
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
sparse-checkout: |
|
sparse-checkout: |
|
||||||
.github/scripts
|
.github/scripts
|
||||||
|
|||||||
2
.github/workflows/backend_pr.yml
vendored
2
.github/workflows/backend_pr.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
|||||||
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Setup Bun
|
- name: Setup Bun
|
||||||
uses: oven-sh/setup-bun@v2
|
uses: oven-sh/setup-bun@v2
|
||||||
|
|||||||
2
.github/workflows/base-images.yml
vendored
2
.github/workflows/base-images.yml
vendored
@@ -127,7 +127,7 @@ jobs:
|
|||||||
# the original l4t matrix entry which set skip-drivers: 'true'.
|
# the original l4t matrix entry which set skip-drivers: 'true'.
|
||||||
skip-drivers: 'true'
|
skip-drivers: 'true'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v7
|
- uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: false
|
submodules: false
|
||||||
- name: Free disk space
|
- name: Free disk space
|
||||||
|
|||||||
6
.github/workflows/build-test.yaml
vendored
6
.github/workflows/build-test.yaml
vendored
@@ -11,7 +11,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
@@ -25,7 +25,7 @@ jobs:
|
|||||||
runs-on: macos-latest
|
runs-on: macos-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
@@ -47,7 +47,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ jobs:
|
|||||||
bump:
|
bump:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v7
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
|
|||||||
12
.github/workflows/bump_deps.yaml
vendored
12
.github/workflows/bump_deps.yaml
vendored
@@ -46,6 +46,14 @@ jobs:
|
|||||||
variable: "CED_VERSION"
|
variable: "CED_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
file: "backend/go/ced/Makefile"
|
file: "backend/go/ced/Makefile"
|
||||||
|
- repository: "mudler/voice-detect.cpp"
|
||||||
|
variable: "VOICEDETECT_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
file: "backend/go/voice-detect/Makefile"
|
||||||
|
- repository: "mudler/face-detect.cpp"
|
||||||
|
variable: "FACEDETECT_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
file: "backend/go/face-detect/Makefile"
|
||||||
- repository: "mudler/depth-anything.cpp"
|
- repository: "mudler/depth-anything.cpp"
|
||||||
variable: "DEPTHANYTHING_VERSION"
|
variable: "DEPTHANYTHING_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
@@ -92,7 +100,7 @@ jobs:
|
|||||||
file: "backend/go/vibevoice-cpp/Makefile"
|
file: "backend/go/vibevoice-cpp/Makefile"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v7
|
- uses: actions/checkout@v6
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
id: bump
|
id: bump
|
||||||
run: |
|
run: |
|
||||||
@@ -128,7 +136,7 @@ jobs:
|
|||||||
if: github.repository == 'mudler/LocalAI'
|
if: github.repository == 'mudler/LocalAI'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v7
|
- uses: actions/checkout@v6
|
||||||
- name: Bump vLLM cu130 wheel pin 🔧
|
- name: Bump vLLM cu130 wheel pin 🔧
|
||||||
id: bump
|
id: bump
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@@ -13,7 +13,7 @@ jobs:
|
|||||||
- repository: "mudler/LocalAI"
|
- repository: "mudler/LocalAI"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v7
|
- uses: actions/checkout@v6
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
run: |
|
run: |
|
||||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||||
|
|||||||
2
.github/workflows/checksum_checker.yaml
vendored
2
.github/workflows/checksum_checker.yaml
vendored
@@ -8,7 +8,7 @@ jobs:
|
|||||||
if: github.repository == 'mudler/LocalAI'
|
if: github.repository == 'mudler/LocalAI'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v7
|
- uses: actions/checkout@v6
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
uses: ./.github/actions/configure-apt-mirror
|
uses: ./.github/actions/configure-apt-mirror
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
|
|||||||
2
.github/workflows/deploy-explorer.yaml
vendored
2
.github/workflows/deploy-explorer.yaml
vendored
@@ -16,7 +16,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
|
|||||||
2
.github/workflows/gallery-agent.yaml
vendored
2
.github/workflows/gallery-agent.yaml
vendored
@@ -31,7 +31,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.GITHUB_TOKEN }}
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/generate_intel_image.yaml
vendored
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -44,7 +44,7 @@ jobs:
|
|||||||
uses: docker/setup-buildx-action@master
|
uses: docker/setup-buildx-action@master
|
||||||
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Cache Intel images
|
- name: Cache Intel images
|
||||||
uses: docker/build-push-action@v7
|
uses: docker/build-push-action@v7
|
||||||
|
|||||||
2
.github/workflows/gh-pages.yml
vendored
2
.github/workflows/gh-pages.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
|||||||
HUGO_VERSION: "0.146.3"
|
HUGO_VERSION: "0.146.3"
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0 # needed for enableGitInfo
|
fetch-depth: 0 # needed for enableGitInfo
|
||||||
submodules: true
|
submodules: true
|
||||||
|
|||||||
2
.github/workflows/image_build.yml
vendored
2
.github/workflows/image_build.yml
vendored
@@ -80,7 +80,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
id: apt_mirror
|
id: apt_mirror
|
||||||
|
|||||||
2
.github/workflows/image_merge.yml
vendored
2
.github/workflows/image_merge.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
|||||||
# Sparse checkout: needed for .github/scripts/ (the keepalive cleanup
|
# Sparse checkout: needed for .github/scripts/ (the keepalive cleanup
|
||||||
# script). Skips the rest of the source tree.
|
# script). Skips the rest of the source tree.
|
||||||
- name: Checkout (.github/scripts only)
|
- name: Checkout (.github/scripts only)
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
sparse-checkout: |
|
sparse-checkout: |
|
||||||
.github/scripts
|
.github/scripts
|
||||||
|
|||||||
2
.github/workflows/lint.yml
vendored
2
.github/workflows/lint.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
|||||||
golangci-lint:
|
golangci-lint:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v7
|
- uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
# Full history so golangci-lint's new-from-merge-base can reach
|
# Full history so golangci-lint's new-from-merge-base can reach
|
||||||
# origin/master and compute the diff against it.
|
# origin/master and compute the diff against it.
|
||||||
|
|||||||
6
.github/workflows/release.yaml
vendored
6
.github/workflows/release.yaml
vendored
@@ -10,7 +10,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
@@ -28,7 +28,7 @@ jobs:
|
|||||||
runs-on: macos-latest
|
runs-on: macos-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
@@ -46,7 +46,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
|
|||||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -14,7 +14,7 @@ jobs:
|
|||||||
GO111MODULE: on
|
GO111MODULE: on
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Source
|
- name: Checkout Source
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
|
|||||||
86
.github/workflows/test-extra.yml
vendored
86
.github/workflows/test-extra.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
|||||||
parakeet-cpp: ${{ steps.detect.outputs.parakeet-cpp }}
|
parakeet-cpp: ${{ steps.detect.outputs.parakeet-cpp }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
- name: Setup Bun
|
- name: Setup Bun
|
||||||
uses: oven-sh/setup-bun@v2
|
uses: oven-sh/setup-bun@v2
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
@@ -67,7 +67,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v7
|
# uses: actions/checkout@v6
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -90,7 +90,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -113,7 +113,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -137,7 +137,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -158,7 +158,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v7
|
# uses: actions/checkout@v6
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -178,7 +178,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v7
|
# uses: actions/checkout@v6
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -240,7 +240,7 @@ jobs:
|
|||||||
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||||
# df -h
|
# df -h
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v7
|
# uses: actions/checkout@v6
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -265,7 +265,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v7
|
# uses: actions/checkout@v6
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -288,7 +288,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -309,7 +309,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -330,7 +330,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -351,7 +351,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -373,7 +373,7 @@ jobs:
|
|||||||
# timeout-minutes: 45
|
# timeout-minutes: 45
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v7
|
# uses: actions/checkout@v6
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -394,7 +394,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -415,7 +415,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -436,7 +436,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -462,7 +462,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -484,7 +484,7 @@ jobs:
|
|||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -513,7 +513,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -530,7 +530,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -552,7 +552,7 @@ jobs:
|
|||||||
timeout-minutes: 20
|
timeout-minutes: 20
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -579,7 +579,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -604,7 +604,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -625,7 +625,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -645,7 +645,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -664,7 +664,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -681,7 +681,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -698,7 +698,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -741,7 +741,7 @@ jobs:
|
|||||||
# timeout-minutes: 90
|
# timeout-minutes: 90
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v7
|
# uses: actions/checkout@v6
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -783,7 +783,7 @@ jobs:
|
|||||||
# timeout-minutes: 90
|
# timeout-minutes: 90
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v7
|
# uses: actions/checkout@v6
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -808,7 +808,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -840,7 +840,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -876,7 +876,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -915,7 +915,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -952,7 +952,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -987,7 +987,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -1013,7 +1013,7 @@ jobs:
|
|||||||
timeout-minutes: 150
|
timeout-minutes: 150
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -1042,7 +1042,7 @@ jobs:
|
|||||||
timeout-minutes: 60
|
timeout-minutes: 60
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
@@ -1058,7 +1058,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -1091,7 +1091,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -1114,7 +1114,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -1140,7 +1140,7 @@ jobs:
|
|||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
|
|||||||
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
|||||||
go-version: ['1.26.x']
|
go-version: ['1.26.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Free disk space
|
- name: Free disk space
|
||||||
@@ -84,7 +84,7 @@ jobs:
|
|||||||
go-version: ['1.26.x']
|
go-version: ['1.26.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
|
|||||||
2
.github/workflows/tests-aio.yml
vendored
2
.github/workflows/tests-aio.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
|||||||
sudo rm -rfv build || true
|
sudo rm -rfv build || true
|
||||||
df -h
|
df -h
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
|
|||||||
2
.github/workflows/tests-e2e.yml
vendored
2
.github/workflows/tests-e2e.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
|||||||
go-version: ['1.25.x']
|
go-version: ['1.25.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
|
|||||||
97
.github/workflows/tests-pii-ner-e2e.yml
vendored
97
.github/workflows/tests-pii-ner-e2e.yml
vendored
@@ -1,97 +0,0 @@
|
|||||||
---
|
|
||||||
name: 'PII NER tier E2E (live GGUF, CPU)'
|
|
||||||
|
|
||||||
# Runs the real privacy-filter GGUF NER tier end-to-end on CPU — the gap the
|
|
||||||
# hermetic tests/e2e suite cannot cover (it only exercises the in-process
|
|
||||||
# pattern tier). Heavy (builds the C++ backend image + downloads a ~2.7 GB
|
|
||||||
# GGUF), so it is path-filtered on PRs and otherwise runs nightly / on demand.
|
|
||||||
#
|
|
||||||
# This drives the container-level harness (tests/e2e-backends) via
|
|
||||||
# `make test-extra-backend-privacy-filter`: it builds the privacy-filter image,
|
|
||||||
# downloads the model, loads it on CPU, and asserts byte-correct, UTF-8-aligned
|
|
||||||
# TokenClassify spans. The complementary HTTP-path specs in tests/e2e
|
|
||||||
# (e2e_pii_ner_test.go) Skip unless PII_NER_MODEL_GGUF is wired.
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
schedule:
|
|
||||||
- cron: '0 3 * * *'
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
paths:
|
|
||||||
- 'backend/cpp/privacy-filter/**'
|
|
||||||
- 'backend/Dockerfile.privacy-filter'
|
|
||||||
- 'core/services/routing/pii/**'
|
|
||||||
- 'core/services/routing/piidetector/**'
|
|
||||||
- 'core/backend/token_classify.go'
|
|
||||||
- 'core/http/endpoints/localai/pii.go'
|
|
||||||
- 'core/schema/pii.go'
|
|
||||||
- 'tests/e2e-backends/**'
|
|
||||||
- 'tests/e2e/e2e_pii_ner_test.go'
|
|
||||||
- 'tests/e2e/e2e_suite_test.go'
|
|
||||||
- '.github/workflows/tests-pii-ner-e2e.yml'
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- 'backend/cpp/privacy-filter/**'
|
|
||||||
- 'backend/Dockerfile.privacy-filter'
|
|
||||||
- 'core/services/routing/pii/**'
|
|
||||||
- 'core/services/routing/piidetector/**'
|
|
||||||
- 'core/backend/token_classify.go'
|
|
||||||
- 'core/http/endpoints/localai/pii.go'
|
|
||||||
- 'core/schema/pii.go'
|
|
||||||
- 'tests/e2e-backends/**'
|
|
||||||
- 'tests/e2e/e2e_pii_ner_test.go'
|
|
||||||
- 'tests/e2e/e2e_suite_test.go'
|
|
||||||
- '.github/workflows/tests-pii-ner-e2e.yml'
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ci-tests-pii-ner-e2e-${{ github.event.pull_request.number || github.sha }}-${{ github.repository }}
|
|
||||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
tests-pii-ner-e2e:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
go-version: ['1.25.x']
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v7
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Free disk space
|
|
||||||
run: |
|
|
||||||
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL || true
|
|
||||||
sudo docker image prune --all --force || true
|
|
||||||
df -h
|
|
||||||
- name: Configure apt mirror on runner
|
|
||||||
uses: ./.github/actions/configure-apt-mirror
|
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
|
||||||
uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: ${{ matrix.go-version }}
|
|
||||||
cache: false
|
|
||||||
- name: Proto Dependencies
|
|
||||||
run: |
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y build-essential
|
|
||||||
# Builds local-ai-backend:privacy-filter, downloads the GGUF, loads it on
|
|
||||||
# CPU and runs the token_classify capability spec (byte-offset contract).
|
|
||||||
- name: Run live PII NER backend E2E
|
|
||||||
run: PATH="$PATH:$HOME/go/bin" make test-extra-backend-privacy-filter
|
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.23
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
2
.github/workflows/tests-ui-e2e.yml
vendored
2
.github/workflows/tests-ui-e2e.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
|||||||
go-version: ['1.26.x']
|
go-version: ['1.26.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v7
|
uses: actions/checkout@v6
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
|
|||||||
2
.github/workflows/update_swagger.yaml
vendored
2
.github/workflows/update_swagger.yaml
vendored
@@ -10,7 +10,7 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v7
|
- uses: actions/checkout@v6
|
||||||
- name: Configure apt mirror on runner
|
- name: Configure apt mirror on runner
|
||||||
uses: ./.github/actions/configure-apt-mirror
|
uses: ./.github/actions/configure-apt-mirror
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -91,6 +91,3 @@ core/http/react-ui/test-results/
|
|||||||
|
|
||||||
# Local worktrees
|
# Local worktrees
|
||||||
.worktrees/
|
.worktrees/
|
||||||
|
|
||||||
# SDD / brainstorm scratch (agent-driven development)
|
|
||||||
.superpowers/
|
|
||||||
|
|||||||
10
Makefile
10
Makefile
@@ -690,16 +690,6 @@ test-extra-backend-llama-cpp-transcription: docker-build-llama-cpp
|
|||||||
BACKEND_TEST_CTX_SIZE=2048 \
|
BACKEND_TEST_CTX_SIZE=2048 \
|
||||||
$(MAKE) test-extra-backend
|
$(MAKE) test-extra-backend
|
||||||
|
|
||||||
## privacy-filter: the PII/NER token-classification backend. Exercises the
|
|
||||||
## TokenClassify RPC and asserts byte-correct, UTF-8-aligned span offsets
|
|
||||||
## against the openai-privacy-filter multilingual GGUF (CPU-runnable, ~50M
|
|
||||||
## active params). This is the live-backend coverage for the PII NER tier.
|
|
||||||
test-extra-backend-privacy-filter: docker-build-privacy-filter
|
|
||||||
BACKEND_IMAGE=local-ai-backend:privacy-filter \
|
|
||||||
BACKEND_TEST_MODEL_URL=https://huggingface.co/LocalAI-io/privacy-filter-multilingual-GGUF/resolve/main/privacy-filter-multilingual-f16.gguf \
|
|
||||||
BACKEND_TEST_CAPS=health,load,token_classify \
|
|
||||||
$(MAKE) test-extra-backend
|
|
||||||
|
|
||||||
## vllm is resolved from a HuggingFace model id (no file download) and
|
## vllm is resolved from a HuggingFace model id (no file download) and
|
||||||
## exercises Predict + streaming + tool-call extraction via the hermes parser.
|
## exercises Predict + streaming + tool-call extraction via the hermes parser.
|
||||||
## Requires a host CPU with the SIMD instructions the prebuilt vllm CPU
|
## Requires a host CPU with the SIMD instructions the prebuilt vllm CPU
|
||||||
|
|||||||
@@ -177,6 +177,7 @@ For more details, see the [Getting Started guide](https://localai.io/basics/gett
|
|||||||
|
|
||||||
## Latest News
|
## Latest News
|
||||||
|
|
||||||
|
- **June 2026**: New native biometric backends from the LocalAI team: [voice-detect.cpp](https://github.com/mudler/voice-detect.cpp) for speaker recognition and voice analysis (ECAPA-TDNN, WeSpeaker, ERes2Net, CAM++, wav2vec2 age/gender/emotion) and [face-detect.cpp](https://github.com/mudler/face-detect.cpp) for face detection, recognition, demographics and anti-spoofing (SCRFD/ArcFace, YuNet/SFace). Both are from-scratch C++/ggml engines with no Python or onnxruntime at inference, self-contained GGUF weights, bit-exact parity with the reference, and GPU cuDNN parity, replacing the heavier Python `insightface` and `speaker-recognition` backends ([PR #10441](https://github.com/mudler/LocalAI/pull/10441)).
|
||||||
- **June 2026**: New [realtime voice assistant demo](https://github.com/localai-org/localai-realtime-demo) (a tiny Go client for the Realtime API with a full talk-back voice loop and tool calling), plus [streaming of the realtime LLM / TTS / transcription pipeline stages](https://github.com/mudler/LocalAI/pull/10176) and [configurable WebRTC ICE candidates](https://github.com/mudler/LocalAI/pull/10231).
|
- **June 2026**: New [realtime voice assistant demo](https://github.com/localai-org/localai-realtime-demo) (a tiny Go client for the Realtime API with a full talk-back voice loop and tool calling), plus [streaming of the realtime LLM / TTS / transcription pipeline stages](https://github.com/mudler/LocalAI/pull/10176) and [configurable WebRTC ICE candidates](https://github.com/mudler/LocalAI/pull/10231).
|
||||||
- **June 2026**: Big speech push: the [parakeet.cpp](https://github.com/mudler/parakeet.cpp) ASR engine gains [NeMo-faithful segment timestamps](https://github.com/mudler/LocalAI/pull/10207), a [multilingual streaming Nemotron-3.5 model](https://github.com/mudler/LocalAI/pull/10199), [dynamic batching for concurrent transcription](https://github.com/mudler/LocalAI/pull/10112) and [CUDA graphs](https://github.com/mudler/LocalAI/pull/10273); the new [CrispASR backend](https://github.com/mudler/LocalAI/pull/10099) adds multi-architecture ASR + TTS, and [60 Piper TTS voices across 42 languages](https://github.com/mudler/LocalAI/pull/10296) land in the gallery (plus [per-request TTS instructions and params](https://github.com/mudler/LocalAI/pull/10172)).
|
- **June 2026**: Big speech push: the [parakeet.cpp](https://github.com/mudler/parakeet.cpp) ASR engine gains [NeMo-faithful segment timestamps](https://github.com/mudler/LocalAI/pull/10207), a [multilingual streaming Nemotron-3.5 model](https://github.com/mudler/LocalAI/pull/10199), [dynamic batching for concurrent transcription](https://github.com/mudler/LocalAI/pull/10112) and [CUDA graphs](https://github.com/mudler/LocalAI/pull/10273); the new [CrispASR backend](https://github.com/mudler/LocalAI/pull/10099) adds multi-architecture ASR + TTS, and [60 Piper TTS voices across 42 languages](https://github.com/mudler/LocalAI/pull/10296) land in the gallery (plus [per-request TTS instructions and params](https://github.com/mudler/LocalAI/pull/10172)).
|
||||||
- **June 2026**: New backends and models: [locate-anything.cpp](https://github.com/mudler/LocalAI/pull/10264) for open-vocabulary object detection via ggml, [Ideogram4 image generation](https://github.com/mudler/LocalAI/pull/10201) in stablediffusion-ggml, [llama.cpp video input](https://github.com/mudler/LocalAI/pull/10216), and the [Gemma 4 QAT family with MTP speculative-decoding pairs](https://github.com/mudler/LocalAI/pull/10215). Plus an [interactive CLI chat mode](https://github.com/mudler/LocalAI/pull/10226) and [RAG source citations in agent responses](https://github.com/mudler/LocalAI/pull/10228).
|
- **June 2026**: New backends and models: [locate-anything.cpp](https://github.com/mudler/LocalAI/pull/10264) for open-vocabulary object detection via ggml, [Ideogram4 image generation](https://github.com/mudler/LocalAI/pull/10201) in stablediffusion-ggml, [llama.cpp video input](https://github.com/mudler/LocalAI/pull/10216), and the [Gemma 4 QAT family with MTP speculative-decoding pairs](https://github.com/mudler/LocalAI/pull/10215). Plus an [interactive CLI chat mode](https://github.com/mudler/LocalAI/pull/10226) and [RAG source citations in agent responses](https://github.com/mudler/LocalAI/pull/10228).
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ RUN <<EOT bash
|
|||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
||||||
if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
|
if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} libcudnn9-dev-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
||||||
fi
|
fi
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
IK_LLAMA_VERSION?=d5507e33ae7ee2b7b41475f08044d3bde3b839ee
|
IK_LLAMA_VERSION?=6c00e87ac84404af588ad2e65935bd6f079c696f
|
||||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=be4a6a63eb2b848e19c277bdcf2bd399e8af76d9
|
LLAMA_VERSION?=e475fa2b5f9fb50c3d6fc3e7c6fdf1e004465b62
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# CrispASR version (release tag)
|
# CrispASR version (release tag)
|
||||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||||
CRISPASR_VERSION?=96b2a6ee31d30389fed8a7ef1a54239b75231ddc
|
CRISPASR_VERSION?=d745bda4386ae0f9d1d2f23fff8ec95d76428221
|
||||||
SO_TARGET?=libgocrispasr.so
|
SO_TARGET?=libgocrispasr.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
18
backend/go/face-detect/.gitignore
vendored
Normal file
18
backend/go/face-detect/.gitignore
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Fetched upstream sources
|
||||||
|
sources/
|
||||||
|
|
||||||
|
# CMake build directories
|
||||||
|
build*/
|
||||||
|
|
||||||
|
# build artifacts staged in-tree by the Makefile (cp from sources/) or
|
||||||
|
# symlinked for local dev; the real sources live in face-detect.cpp upstream.
|
||||||
|
*.so
|
||||||
|
*.so.*
|
||||||
|
facedetect_capi.h
|
||||||
|
compile_commands.json
|
||||||
|
|
||||||
|
# Compiled backend binary
|
||||||
|
face-detect-grpc
|
||||||
|
|
||||||
|
# Packaging output
|
||||||
|
package/
|
||||||
110
backend/go/face-detect/Makefile
Normal file
110
backend/go/face-detect/Makefile
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
# face-detect backend Makefile.
|
||||||
|
#
|
||||||
|
# Upstream pin lives below as FACEDETECT_VERSION?=6107a24... (.github/bump_deps.sh
|
||||||
|
# can find and update it - matches the voice-detect / parakeet.cpp / whisper.cpp
|
||||||
|
# convention).
|
||||||
|
#
|
||||||
|
# Local dev shortcut: if you already have an out-of-tree face-detect.cpp build,
|
||||||
|
# symlink the .so + header into this directory and skip the clone/cmake steps:
|
||||||
|
#
|
||||||
|
# ln -sf /path/to/face-detect.cpp/build-shared/libfacedetect.so .
|
||||||
|
# ln -sf /path/to/face-detect.cpp/include/facedetect_capi.h .
|
||||||
|
# go build -o face-detect-grpc .
|
||||||
|
#
|
||||||
|
# The default target below does the proper clone-at-pin + cmake build so CI does
|
||||||
|
# not need a side-checkout.
|
||||||
|
|
||||||
|
FACEDETECT_VERSION?=6107a2414fdaccc9ce8650b762f9436d20541cbe
|
||||||
|
FACEDETECT_REPO?=https://github.com/mudler/face-detect.cpp
|
||||||
|
|
||||||
|
GOCMD?=go
|
||||||
|
GO_TAGS?=
|
||||||
|
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
|
||||||
|
|
||||||
|
BUILD_TYPE?=
|
||||||
|
NATIVE?=false
|
||||||
|
|
||||||
|
# Resolve the target arch. The backend matrix / Docker build pass TARGETARCH
|
||||||
|
# (amd64|arm64); fall back to uname -m (aarch64|x86_64) for a local build.
|
||||||
|
RECON_ARCH?=$(or $(TARGETARCH),$(shell uname -m))
|
||||||
|
|
||||||
|
# Build ggml + the vendored libjpeg-turbo statically into libfacedetect.so (PIC)
|
||||||
|
# so the shared lib is self-contained: dlopen needs no libggml*.so alongside it,
|
||||||
|
# only system libs (libstdc++/libgomp/libc) the runtime image already provides.
|
||||||
|
# The vendored jpeg symbols are hidden via -Wl,--exclude-libs,ALL on the C++
|
||||||
|
# side, so only the facedetect_capi_* surface is exported.
|
||||||
|
CMAKE_ARGS?=-DCMAKE_BUILD_TYPE=Release -DFACEDETECT_SHARED=ON -DFACEDETECT_BUILD_CLI=OFF -DFACEDETECT_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||||
|
|
||||||
|
ifeq ($(NATIVE),false)
|
||||||
|
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
|
endif
|
||||||
|
|
||||||
|
# face-detect.cpp gates its GGML backends behind FACEDETECT_GGML_* options and
|
||||||
|
# does set(GGML_CUDA ${FACEDETECT_GGML_CUDA} CACHE BOOL "" FORCE), so a bare
|
||||||
|
# -DGGML_CUDA=ON is overwritten back to OFF. Forward the FACEDETECT_GGML_*
|
||||||
|
# options instead. (openblas is not gated, so -DGGML_BLAS passes through.)
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_CUDA=ON
|
||||||
|
# Opt-in cuDNN implicit-GEMM conv path (kills im2col on GPU, SCRFD 2.3x
|
||||||
|
# vs torch-cuDNN parity). Only the arm64 + CUDA 13 image (GB10/Jetson/L4T)
|
||||||
|
# ships libcudnn9 + the -dev headers, so gate cuDNN to that variant.
|
||||||
|
# x86 CUDA images carry no cuDNN -> enabling it there is a link failure.
|
||||||
|
ifeq ($(CUDA_MAJOR_VERSION),13)
|
||||||
|
ifneq (,$(filter arm64 aarch64,$(RECON_ARCH)))
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_CUDNN=ON
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_HIP=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),vulkan)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_VULKAN=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DFACEDETECT_GGML_METAL=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: face-detect-grpc package build clean purge test all
|
||||||
|
|
||||||
|
all: face-detect-grpc
|
||||||
|
|
||||||
|
# Clone the upstream face-detect.cpp source at the pinned commit. Directory acts
|
||||||
|
# as the target so make only re-clones when missing. After a FACEDETECT_VERSION
|
||||||
|
# bump, run 'make purge && make' to refetch.
|
||||||
|
sources/face-detect.cpp:
|
||||||
|
mkdir -p sources/face-detect.cpp
|
||||||
|
cd sources/face-detect.cpp && \
|
||||||
|
git init -q && \
|
||||||
|
git remote add origin $(FACEDETECT_REPO) && \
|
||||||
|
git fetch --depth 1 origin $(FACEDETECT_VERSION) && \
|
||||||
|
git checkout FETCH_HEAD && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
# Build the shared lib + header out-of-tree, then stage them next to the Go
|
||||||
|
# sources so purego.Dlopen("libfacedetect.so") and the cgo-less build both pick
|
||||||
|
# them up.
|
||||||
|
libfacedetect.so: sources/face-detect.cpp
|
||||||
|
cmake -B sources/face-detect.cpp/build-shared -S sources/face-detect.cpp $(CMAKE_ARGS)
|
||||||
|
cmake --build sources/face-detect.cpp/build-shared --config Release -j$(JOBS) --target facedetect
|
||||||
|
cp -fv sources/face-detect.cpp/build-shared/libfacedetect.so* ./ 2>/dev/null || true
|
||||||
|
cp -fv sources/face-detect.cpp/include/facedetect_capi.h ./
|
||||||
|
|
||||||
|
face-detect-grpc: libfacedetect.so main.go gofacedetect.go options.go
|
||||||
|
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o face-detect-grpc .
|
||||||
|
|
||||||
|
package: face-detect-grpc
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: package
|
||||||
|
|
||||||
|
# Test target. The embed/detect/verify/analyze smoke specs are gated on
|
||||||
|
# FACEDETECT_BACKEND_TEST_MODEL + FACEDETECT_BACKEND_TEST_IMAGE; without them the
|
||||||
|
# heavy specs auto-skip and only the pure-Go parsing specs run.
|
||||||
|
test:
|
||||||
|
LD_LIBRARY_PATH=$(CURDIR):$$LD_LIBRARY_PATH $(GOCMD) test ./... -count=1
|
||||||
|
|
||||||
|
clean: purge
|
||||||
|
rm -rf libfacedetect.so* facedetect_capi.h package face-detect-grpc
|
||||||
|
|
||||||
|
purge:
|
||||||
|
rm -rf sources/face-detect.cpp
|
||||||
431
backend/go/face-detect/gofacedetect.go
Normal file
431
backend/go/face-detect/gofacedetect.go
Normal file
@@ -0,0 +1,431 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/mudler/xlog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// purego-bound entry points from libfacedetect.so. Names match
|
||||||
|
// facedetect_capi.h exactly so a `nm libfacedetect.so | grep facedetect_capi`
|
||||||
|
// is enough to spot drift.
|
||||||
|
//
|
||||||
|
// The opaque ctx and the malloc'd char*/float* return values are declared as
|
||||||
|
// uintptr so we get the raw pointer back and can release it via the matching
|
||||||
|
// capi free function. purego's native string/[]float32 returns would copy and
|
||||||
|
// forget the original pointer, leaking the C-owned buffer on every call.
|
||||||
|
var (
|
||||||
|
CppAbiVersion func() int32
|
||||||
|
CppLoad func(ggufPath string) uintptr
|
||||||
|
CppFree func(ctx uintptr)
|
||||||
|
CppLastError func(ctx uintptr) string
|
||||||
|
CppFreeString func(s uintptr)
|
||||||
|
CppFreeVec func(v uintptr)
|
||||||
|
CppEmbedPath func(ctx uintptr, imagePath string, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppEmbedRGB func(ctx uintptr, rgb []byte, width, height int32, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppDetectJSON func(ctx uintptr, imagePath string) uintptr
|
||||||
|
CppVerifyPaths func(ctx uintptr, a, b string, threshold float32, antiSpoof int32, outDistance, outVerified unsafe.Pointer) int32
|
||||||
|
CppAnalyzeJSON func(ctx uintptr, imagePath string) uintptr
|
||||||
|
)
|
||||||
|
|
||||||
|
// FaceDetect implements the face-recognition (biometric) subset of the Backend
|
||||||
|
// gRPC service over libfacedetect.so. The C side keeps a single loaded model
|
||||||
|
// pack plus a per-ctx last-error buffer and is not reentrant, so
|
||||||
|
// base.SingleThread serializes every call.
|
||||||
|
type FaceDetect struct {
|
||||||
|
base.SingleThread
|
||||||
|
opts loadOptions
|
||||||
|
ctxPtr uintptr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FaceDetect) Load(opts *pb.ModelOptions) error {
|
||||||
|
model := opts.ModelFile
|
||||||
|
if model == "" {
|
||||||
|
model = opts.ModelPath
|
||||||
|
}
|
||||||
|
if !filepath.IsAbs(model) && opts.ModelPath != "" {
|
||||||
|
model = filepath.Join(opts.ModelPath, model)
|
||||||
|
}
|
||||||
|
if model == "" {
|
||||||
|
return errors.New("face-detect: ModelFile is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
f.opts = parseOptions(opts.Options)
|
||||||
|
if f.opts.modelName == "" {
|
||||||
|
f.opts.modelName = filepath.Base(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns
|
||||||
|
// one backend process per model and serves requests concurrently, so the
|
||||||
|
// engine's own min(hardware_concurrency, 8) default can oversubscribe cores.
|
||||||
|
// FACEDETECT_THREADS is read by the engine at backend construction, so it
|
||||||
|
// must be set before the capi load. A non-positive Threads means "unset":
|
||||||
|
// leave the env alone so the engine keeps its sane default.
|
||||||
|
threads := opts.Threads
|
||||||
|
if threads > 0 {
|
||||||
|
if err := os.Setenv("FACEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil {
|
||||||
|
return fmt.Errorf("face-detect: set FACEDETECT_THREADS: %w", err)
|
||||||
|
}
|
||||||
|
xlog.Info("face-detect: applying LocalAI thread budget", "threads", threads)
|
||||||
|
}
|
||||||
|
|
||||||
|
xlog.Info("face-detect: loading model", "model", model,
|
||||||
|
"verify_threshold", f.opts.verifyThreshold, "abi", CppAbiVersion())
|
||||||
|
|
||||||
|
ctx := CppLoad(model)
|
||||||
|
if ctx == 0 {
|
||||||
|
// The last-error buffer lives on the ctx that was never returned, so
|
||||||
|
// surface the path the operator tried to load instead.
|
||||||
|
return fmt.Errorf("face-detect: facedetect_capi_load failed for %q", model)
|
||||||
|
}
|
||||||
|
f.ctxPtr = ctx
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Embeddings returns the L2-normalized ArcFace embedding of the primary face in
|
||||||
|
// the supplied image. Mirroring the Python face backend, the image is read from
|
||||||
|
// Images[0] as a base64 payload; materializeImage decodes it to a temp file so
|
||||||
|
// the path-based C-API can run its own decode (cv2.imread parity). The gRPC
|
||||||
|
// server wraps the returned slice in an EmbeddingResult.
|
||||||
|
func (f *FaceDetect) Embeddings(req *pb.PredictOptions) ([]float32, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return nil, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if len(req.Images) == 0 || req.Images[0] == "" {
|
||||||
|
return nil, errors.New("face-detect: Embedding requires Images[0] to be a base64 image")
|
||||||
|
}
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(req.Images[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
return f.embedPath(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FaceDetect) embedPath(path string) ([]float32, error) {
|
||||||
|
var vec uintptr
|
||||||
|
var dim int32
|
||||||
|
rc := CppEmbedPath(f.ctxPtr, path, unsafe.Pointer(&vec), unsafe.Pointer(&dim))
|
||||||
|
if rc != 0 || vec == 0 || dim <= 0 {
|
||||||
|
return nil, f.lastErr("embed", path)
|
||||||
|
}
|
||||||
|
defer CppFreeVec(vec)
|
||||||
|
// Copy out of the C-owned malloc'd buffer before freeing it. The
|
||||||
|
// uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory; safe here, the GC neither tracks
|
||||||
|
// nor moves this buffer and we copy immediately.
|
||||||
|
src := unsafe.Slice((*float32)(unsafe.Pointer(vec)), int(dim)) //nolint:govet // C-owned malloc'd vector, copied out before free
|
||||||
|
out := make([]float32, int(dim))
|
||||||
|
copy(out, src)
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect runs SCRFD over the image and returns one Detection per face. The
|
||||||
|
// C-API emits a box as [x1,y1,x2,y2] in pixels; the proto carries x/y plus
|
||||||
|
// width/height, so the corners are converted. The 5 facial landmarks the engine
|
||||||
|
// also returns are dropped: the Detection message has no field for them.
|
||||||
|
func (f *FaceDetect) Detect(req *pb.DetectOptions) (pb.DetectResponse, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return pb.DetectResponse{}, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Src == "" {
|
||||||
|
return pb.DetectResponse{}, errors.New("face-detect: src image is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(req.Src)
|
||||||
|
if err != nil {
|
||||||
|
return pb.DetectResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
faces, err := f.detectFaces(path)
|
||||||
|
if err != nil {
|
||||||
|
return pb.DetectResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
dets := make([]*pb.Detection, 0, len(faces))
|
||||||
|
for _, fc := range faces {
|
||||||
|
if req.Threshold > 0 && fc.Score < req.Threshold {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x, y, w, h := fc.xywh()
|
||||||
|
dets = append(dets, &pb.Detection{
|
||||||
|
X: x,
|
||||||
|
Y: y,
|
||||||
|
Width: w,
|
||||||
|
Height: h,
|
||||||
|
Confidence: fc.Score,
|
||||||
|
ClassName: "face",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return pb.DetectResponse{Detections: dets}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FaceVerify embeds the primary face in each image and reports whether they are
|
||||||
|
// the same identity by cosine distance against a threshold. A request threshold
|
||||||
|
// <= 0 falls back to the model-configured default (verify_threshold option,
|
||||||
|
// 0.35 if unset). When anti_spoofing is set, the C-API applies a MiniFASNet
|
||||||
|
// veto internally (verified forced false on a spoof); the per-image liveness
|
||||||
|
// scores are not exposed by the verify entry point, so img*_is_real /
|
||||||
|
// img*_antispoof_score stay at their zero values.
|
||||||
|
func (f *FaceDetect) FaceVerify(req *pb.FaceVerifyRequest) (pb.FaceVerifyResponse, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return pb.FaceVerifyResponse{}, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Img1 == "" || req.Img2 == "" {
|
||||||
|
return pb.FaceVerifyResponse{}, errors.New("face-detect: img1 and img2 are required")
|
||||||
|
}
|
||||||
|
|
||||||
|
path1, cleanup1, err := materializeImage(req.Img1)
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceVerifyResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup1()
|
||||||
|
path2, cleanup2, err := materializeImage(req.Img2)
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceVerifyResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup2()
|
||||||
|
|
||||||
|
threshold := req.Threshold
|
||||||
|
if threshold <= 0 {
|
||||||
|
threshold = f.opts.verifyThreshold
|
||||||
|
}
|
||||||
|
|
||||||
|
antiSpoof := int32(0)
|
||||||
|
if req.AntiSpoofing {
|
||||||
|
antiSpoof = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
started := time.Now()
|
||||||
|
var distance float32
|
||||||
|
var verified int32
|
||||||
|
rc := CppVerifyPaths(f.ctxPtr, path1, path2, threshold, antiSpoof,
|
||||||
|
unsafe.Pointer(&distance), unsafe.Pointer(&verified))
|
||||||
|
if rc != 0 {
|
||||||
|
return pb.FaceVerifyResponse{}, f.lastErr("verify", req.Img1[:min(8, len(req.Img1))]+"...")
|
||||||
|
}
|
||||||
|
elapsedMs := float32(time.Since(started).Seconds() * 1000.0)
|
||||||
|
|
||||||
|
// Confidence decays linearly from 100 at distance 0 to 0 at the threshold,
|
||||||
|
// matching the Python face backend's reporting.
|
||||||
|
confidence := float32(0)
|
||||||
|
if threshold > 0 {
|
||||||
|
confidence = float32(math.Max(0, math.Min(100, (1.0-float64(distance)/float64(threshold))*100.0)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return pb.FaceVerifyResponse{
|
||||||
|
Verified: verified != 0,
|
||||||
|
Distance: distance,
|
||||||
|
Threshold: threshold,
|
||||||
|
Confidence: confidence,
|
||||||
|
Model: f.opts.modelName,
|
||||||
|
Img1Area: f.bestArea(path1),
|
||||||
|
Img2Area: f.bestArea(path2),
|
||||||
|
ProcessingTimeMs: elapsedMs,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FaceAnalyze runs the genderage head on every detected face. The C-API returns
|
||||||
|
// "M"/"F" gender labels and a rounded age; the labels are normalized to the
|
||||||
|
// "Man"/"Woman" values the proto documents.
|
||||||
|
func (f *FaceDetect) FaceAnalyze(req *pb.FaceAnalyzeRequest) (pb.FaceAnalyzeResponse, error) {
|
||||||
|
if f.ctxPtr == 0 {
|
||||||
|
return pb.FaceAnalyzeResponse{}, errors.New("face-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Img == "" {
|
||||||
|
return pb.FaceAnalyzeResponse{}, errors.New("face-detect: img is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(req.Img)
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceAnalyzeResponse{}, err
|
||||||
|
}
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
ptr := CppAnalyzeJSON(f.ctxPtr, path)
|
||||||
|
if ptr == 0 {
|
||||||
|
return pb.FaceAnalyzeResponse{}, f.lastErr("analyze", path)
|
||||||
|
}
|
||||||
|
defer CppFreeString(ptr)
|
||||||
|
|
||||||
|
faces, err := parseAnalyzeJSON(goStringFromCPtr(ptr))
|
||||||
|
if err != nil {
|
||||||
|
return pb.FaceAnalyzeResponse{}, fmt.Errorf("face-detect: analyze JSON: %w", err)
|
||||||
|
}
|
||||||
|
return pb.FaceAnalyzeResponse{Faces: faces}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// faceBox is one entry of the detect/analyze JSON documents the engine emits.
|
||||||
|
type faceBox struct {
|
||||||
|
Score float32 `json:"score"`
|
||||||
|
Box []float32 `json:"box"`
|
||||||
|
Age float32 `json:"age"`
|
||||||
|
Gender string `json:"gender"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// xywh converts the engine's [x1,y1,x2,y2] box into the x/y/width/height the
|
||||||
|
// proto carries. A short or missing box yields zeros.
|
||||||
|
func (b faceBox) xywh() (x, y, w, h float32) {
|
||||||
|
if len(b.Box) < 4 {
|
||||||
|
return 0, 0, 0, 0
|
||||||
|
}
|
||||||
|
return b.Box[0], b.Box[1], b.Box[2] - b.Box[0], b.Box[3] - b.Box[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
type facesJSON struct {
|
||||||
|
Faces []faceBox `json:"faces"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FaceDetect) detectFaces(path string) ([]faceBox, error) {
|
||||||
|
ptr := CppDetectJSON(f.ctxPtr, path)
|
||||||
|
if ptr == 0 {
|
||||||
|
return nil, f.lastErr("detect", path)
|
||||||
|
}
|
||||||
|
defer CppFreeString(ptr)
|
||||||
|
|
||||||
|
var doc facesJSON
|
||||||
|
if err := json.Unmarshal([]byte(goStringFromCPtr(ptr)), &doc); err != nil {
|
||||||
|
return nil, fmt.Errorf("face-detect: detect JSON: %w", err)
|
||||||
|
}
|
||||||
|
return doc.Faces, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// bestArea returns the FacialArea of the highest-scoring face in an image, or an
|
||||||
|
// empty area when detection fails or finds nothing. Best-effort: verify already
|
||||||
|
// succeeded, so a missing region must not turn a valid match into an error.
|
||||||
|
func (f *FaceDetect) bestArea(path string) *pb.FacialArea {
|
||||||
|
faces, err := f.detectFaces(path)
|
||||||
|
if err != nil || len(faces) == 0 {
|
||||||
|
return &pb.FacialArea{}
|
||||||
|
}
|
||||||
|
best := faces[0]
|
||||||
|
for _, fc := range faces[1:] {
|
||||||
|
if fc.Score > best.Score {
|
||||||
|
best = fc
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x, y, w, h := best.xywh()
|
||||||
|
return &pb.FacialArea{X: x, Y: y, W: w, H: h}
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseAnalyzeJSON maps the engine's analyze document onto FaceAnalysis entries.
|
||||||
|
// The engine reports gender as "M"/"F"; both the dominant label and the score
|
||||||
|
// map are filled with the "Man"/"Woman" form the proto documents.
|
||||||
|
func parseAnalyzeJSON(doc string) ([]*pb.FaceAnalysis, error) {
|
||||||
|
var parsed facesJSON
|
||||||
|
if err := json.Unmarshal([]byte(doc), &parsed); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]*pb.FaceAnalysis, 0, len(parsed.Faces))
|
||||||
|
for _, fc := range parsed.Faces {
|
||||||
|
x, y, w, h := fc.xywh()
|
||||||
|
fa := &pb.FaceAnalysis{
|
||||||
|
Region: &pb.FacialArea{X: x, Y: y, W: w, H: h},
|
||||||
|
FaceConfidence: fc.Score,
|
||||||
|
Age: fc.Age,
|
||||||
|
}
|
||||||
|
if label := normalizeGender(fc.Gender); label != "" {
|
||||||
|
fa.DominantGender = label
|
||||||
|
fa.Gender = map[string]float32{label: 1.0}
|
||||||
|
}
|
||||||
|
out = append(out, fa)
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeGender maps the engine's "M"/"F" code to the "Man"/"Woman" labels the
|
||||||
|
// proto documents. Unknown codes pass through unchanged.
|
||||||
|
func normalizeGender(g string) string {
|
||||||
|
switch strings.ToUpper(strings.TrimSpace(g)) {
|
||||||
|
case "M":
|
||||||
|
return "Man"
|
||||||
|
case "F":
|
||||||
|
return "Woman"
|
||||||
|
case "":
|
||||||
|
return ""
|
||||||
|
default:
|
||||||
|
return g
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// materializeImage decodes a base64 image payload into a temp file and returns
|
||||||
|
// its path plus a cleanup func. As a convenience for callers that already pass a
|
||||||
|
// filesystem path (e.g. a test fixture), an existing path is used as-is with a
|
||||||
|
// no-op cleanup. data: URI prefixes are stripped before decoding.
|
||||||
|
func materializeImage(src string) (path string, cleanup func(), err error) {
|
||||||
|
noop := func() {}
|
||||||
|
if src == "" {
|
||||||
|
return "", noop, errors.New("face-detect: empty image input")
|
||||||
|
}
|
||||||
|
if _, statErr := os.Stat(src); statErr == nil {
|
||||||
|
return src, noop, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
payload := src
|
||||||
|
if i := strings.Index(payload, ","); strings.HasPrefix(payload, "data:") && i >= 0 {
|
||||||
|
payload = payload[i+1:]
|
||||||
|
}
|
||||||
|
data, decErr := base64.StdEncoding.DecodeString(strings.TrimSpace(payload))
|
||||||
|
if decErr != nil || len(data) == 0 {
|
||||||
|
return "", noop, errors.New("face-detect: image is neither an existing path nor valid base64")
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp, createErr := os.CreateTemp("", "face-detect-*.img")
|
||||||
|
if createErr != nil {
|
||||||
|
return "", noop, fmt.Errorf("face-detect: create temp image: %w", createErr)
|
||||||
|
}
|
||||||
|
cleanup = func() { _ = os.Remove(tmp.Name()) }
|
||||||
|
if _, wErr := tmp.Write(data); wErr != nil {
|
||||||
|
_ = tmp.Close()
|
||||||
|
cleanup()
|
||||||
|
return "", noop, fmt.Errorf("face-detect: write temp image: %w", wErr)
|
||||||
|
}
|
||||||
|
if cErr := tmp.Close(); cErr != nil {
|
||||||
|
cleanup()
|
||||||
|
return "", noop, fmt.Errorf("face-detect: close temp image: %w", cErr)
|
||||||
|
}
|
||||||
|
return tmp.Name(), cleanup, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// lastErr wraps the C-API's per-ctx last-error buffer into a Go error.
|
||||||
|
func (f *FaceDetect) lastErr(op, subject string) error {
|
||||||
|
msg := strings.TrimSpace(CppLastError(f.ctxPtr))
|
||||||
|
if msg == "" {
|
||||||
|
msg = "no error detail"
|
||||||
|
}
|
||||||
|
return fmt.Errorf("face-detect: %s failed for %q: %s", op, subject, msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// goStringFromCPtr copies a NUL-terminated C string into Go memory. cptr is a
|
||||||
|
// malloc'd buffer the caller owns; release it via CppFreeString after the copy.
|
||||||
|
//
|
||||||
|
// The uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory. Safe here: the GC neither tracks nor
|
||||||
|
// moves the buffer and we dereference it immediately to copy the bytes out.
|
||||||
|
func goStringFromCPtr(cptr uintptr) string {
|
||||||
|
if cptr == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
p := unsafe.Pointer(cptr) //nolint:govet // C-owned malloc'd buffer, not Go-GC memory (see doc above)
|
||||||
|
n := 0
|
||||||
|
for *(*byte)(unsafe.Add(p, n)) != 0 {
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
return string(unsafe.Slice((*byte)(p), n))
|
||||||
|
}
|
||||||
230
backend/go/face-detect/gofacedetect_test.go
Normal file
230
backend/go/face-detect/gofacedetect_test.go
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFaceDetect(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "face-detect Backend Suite")
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
libLoadOnce sync.Once
|
||||||
|
libLoadErr error
|
||||||
|
)
|
||||||
|
|
||||||
|
// ensureLibLoaded mirrors main.go's bootstrap so a Go test can drive the C-API
|
||||||
|
// bridge without spinning up the gRPC server. Records the error (the smoke
|
||||||
|
// specs skip themselves) when libfacedetect.so is not loadable from cwd
|
||||||
|
// (LD_LIBRARY_PATH or a symlink in ./).
|
||||||
|
func ensureLibLoaded() error {
|
||||||
|
libLoadOnce.Do(func() {
|
||||||
|
libName := os.Getenv("FACEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libfacedetect.so"
|
||||||
|
}
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
libLoadErr = err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
purego.RegisterLibFunc(&CppAbiVersion, lib, "facedetect_capi_abi_version")
|
||||||
|
purego.RegisterLibFunc(&CppLoad, lib, "facedetect_capi_load")
|
||||||
|
purego.RegisterLibFunc(&CppFree, lib, "facedetect_capi_free")
|
||||||
|
purego.RegisterLibFunc(&CppLastError, lib, "facedetect_capi_last_error")
|
||||||
|
purego.RegisterLibFunc(&CppFreeString, lib, "facedetect_capi_free_string")
|
||||||
|
purego.RegisterLibFunc(&CppFreeVec, lib, "facedetect_capi_free_vec")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedPath, lib, "facedetect_capi_embed_path")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedRGB, lib, "facedetect_capi_embed_rgb")
|
||||||
|
purego.RegisterLibFunc(&CppDetectJSON, lib, "facedetect_capi_detect_path_json")
|
||||||
|
purego.RegisterLibFunc(&CppVerifyPaths, lib, "facedetect_capi_verify_paths")
|
||||||
|
purego.RegisterLibFunc(&CppAnalyzeJSON, lib, "facedetect_capi_analyze_path_json")
|
||||||
|
})
|
||||||
|
return libLoadErr
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("parseOptions", func() {
|
||||||
|
It("defaults verify_threshold to 0.35", func() {
|
||||||
|
o := parseOptions(nil)
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.35)))
|
||||||
|
Expect(o.modelName).To(Equal(""))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("parses verify_threshold, threshold alias and model_name", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0.4", "model_name:buffalo_l", "unknown:x"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.4)))
|
||||||
|
Expect(o.modelName).To(Equal("buffalo_l"))
|
||||||
|
|
||||||
|
o2 := parseOptions([]string{"threshold:0.3"})
|
||||||
|
Expect(o2.verifyThreshold).To(Equal(float32(0.3)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("ignores non-positive thresholds and keeps the default", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0", "threshold:-1"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.35)))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("normalizeGender", func() {
|
||||||
|
It("maps M/F codes to Man/Woman", func() {
|
||||||
|
Expect(normalizeGender("M")).To(Equal("Man"))
|
||||||
|
Expect(normalizeGender("f")).To(Equal("Woman"))
|
||||||
|
Expect(normalizeGender(" m ")).To(Equal("Man"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("passes empty and unknown codes through", func() {
|
||||||
|
Expect(normalizeGender("")).To(Equal(""))
|
||||||
|
Expect(normalizeGender("nonbinary")).To(Equal("nonbinary"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("faceBox.xywh", func() {
|
||||||
|
It("converts an [x1,y1,x2,y2] box to x/y/width/height", func() {
|
||||||
|
b := faceBox{Box: []float32{10, 20, 50, 80}}
|
||||||
|
x, y, w, h := b.xywh()
|
||||||
|
Expect(x).To(Equal(float32(10)))
|
||||||
|
Expect(y).To(Equal(float32(20)))
|
||||||
|
Expect(w).To(Equal(float32(40)))
|
||||||
|
Expect(h).To(Equal(float32(60)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns zeros for a short box", func() {
|
||||||
|
x, y, w, h := faceBox{Box: []float32{1, 2}}.xywh()
|
||||||
|
Expect([]float32{x, y, w, h}).To(Equal([]float32{0, 0, 0, 0}))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("parseAnalyzeJSON", func() {
|
||||||
|
It("maps region, age and gender for each face", func() {
|
||||||
|
doc := `{"faces":[
|
||||||
|
{"score":0.997,"box":[10,20,50,80],"age":31,"gender":"M"},
|
||||||
|
{"score":0.81,"box":[0,0,40,40],"age":24,"gender":"F"}]}`
|
||||||
|
faces, err := parseAnalyzeJSON(doc)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(faces).To(HaveLen(2))
|
||||||
|
|
||||||
|
Expect(faces[0].FaceConfidence).To(BeNumerically("~", 0.997, 1e-4))
|
||||||
|
Expect(faces[0].Age).To(BeNumerically("~", 31, 1e-4))
|
||||||
|
Expect(faces[0].DominantGender).To(Equal("Man"))
|
||||||
|
Expect(faces[0].Gender).To(HaveKeyWithValue("Man", float32(1.0)))
|
||||||
|
Expect(faces[0].Region.W).To(Equal(float32(40)))
|
||||||
|
Expect(faces[0].Region.H).To(Equal(float32(60)))
|
||||||
|
|
||||||
|
Expect(faces[1].DominantGender).To(Equal("Woman"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("tolerates a missing gender field", func() {
|
||||||
|
faces, err := parseAnalyzeJSON(`{"faces":[{"score":0.5,"box":[0,0,10,10],"age":40}]}`)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(faces).To(HaveLen(1))
|
||||||
|
Expect(faces[0].DominantGender).To(Equal(""))
|
||||||
|
Expect(faces[0].Gender).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns no faces for an empty document", func() {
|
||||||
|
faces, err := parseAnalyzeJSON(`{"faces":[]}`)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(faces).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns an error on malformed JSON", func() {
|
||||||
|
_, err := parseAnalyzeJSON(`{not-json`)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("materializeImage", func() {
|
||||||
|
It("decodes a base64 payload to a temp file", func() {
|
||||||
|
payload := base64.StdEncoding.EncodeToString([]byte("\xff\xd8\xff\xe0fake-jpeg"))
|
||||||
|
path, cleanup, err := materializeImage(payload)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer cleanup()
|
||||||
|
data, rerr := os.ReadFile(path)
|
||||||
|
Expect(rerr).ToNot(HaveOccurred())
|
||||||
|
Expect(data).To(Equal([]byte("\xff\xd8\xff\xe0fake-jpeg")))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("strips a data: URI prefix before decoding", func() {
|
||||||
|
payload := "data:image/png;base64," + base64.StdEncoding.EncodeToString([]byte("hello"))
|
||||||
|
path, cleanup, err := materializeImage(payload)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer cleanup()
|
||||||
|
data, rerr := os.ReadFile(path)
|
||||||
|
Expect(rerr).ToNot(HaveOccurred())
|
||||||
|
Expect(data).To(Equal([]byte("hello")))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("uses an existing path as-is", func() {
|
||||||
|
tmp, err := os.CreateTemp("", "face-detect-fixture-*.bin")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer func() { _ = os.Remove(tmp.Name()) }()
|
||||||
|
Expect(tmp.Close()).To(Succeed())
|
||||||
|
|
||||||
|
path, cleanup, err := materializeImage(tmp.Name())
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
defer cleanup()
|
||||||
|
Expect(path).To(Equal(tmp.Name()))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("errors on input that is neither a path nor base64", func() {
|
||||||
|
_, _, err := materializeImage("not base64!!!")
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// The specs below exercise the real C-API end to end. They run only when both a
|
||||||
|
// model GGUF and a test image are provided, and skip cleanly otherwise so the
|
||||||
|
// suite stays green without large assets.
|
||||||
|
var _ = Describe("FaceDetect end-to-end", Ordered, func() {
|
||||||
|
var (
|
||||||
|
f *FaceDetect
|
||||||
|
modelPath = os.Getenv("FACEDETECT_BACKEND_TEST_MODEL")
|
||||||
|
imagePath = os.Getenv("FACEDETECT_BACKEND_TEST_IMAGE")
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeAll(func() {
|
||||||
|
if modelPath == "" || imagePath == "" {
|
||||||
|
Skip("set FACEDETECT_BACKEND_TEST_MODEL and FACEDETECT_BACKEND_TEST_IMAGE to run the e2e specs")
|
||||||
|
}
|
||||||
|
if err := ensureLibLoaded(); err != nil {
|
||||||
|
Skip("libfacedetect.so not loadable: " + err.Error())
|
||||||
|
}
|
||||||
|
f = &FaceDetect{}
|
||||||
|
Expect(f.Load(&pb.ModelOptions{ModelFile: modelPath})).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("embeds the primary face in an image", func() {
|
||||||
|
emb, err := f.Embeddings(&pb.PredictOptions{Images: []string{imagePath}})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(emb).ToNot(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("detects at least one face", func() {
|
||||||
|
resp, err := f.Detect(&pb.DetectOptions{Src: imagePath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Detections).ToNot(BeEmpty())
|
||||||
|
Expect(resp.Detections[0].ClassName).To(Equal("face"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("verifies an image against itself as the same identity", func() {
|
||||||
|
resp, err := f.FaceVerify(&pb.FaceVerifyRequest{Img1: imagePath, Img2: imagePath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Verified).To(BeTrue())
|
||||||
|
Expect(resp.Distance).To(BeNumerically("<=", resp.Threshold))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("analyzes age/gender for each face", func() {
|
||||||
|
resp, err := f.FaceAnalyze(&pb.FaceAnalyzeRequest{Img: imagePath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Faces).ToNot(BeEmpty())
|
||||||
|
})
|
||||||
|
})
|
||||||
65
backend/go/face-detect/main.go
Normal file
65
backend/go/face-detect/main.go
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Started internally by LocalAI - one gRPC server per loaded model.
|
||||||
|
//
|
||||||
|
// Loads libfacedetect.so via purego and registers the flat C-API entry points
|
||||||
|
// declared in facedetect_capi.h. The library name can be overridden with
|
||||||
|
// FACEDETECT_LIBRARY (mirrors the VOICEDETECT_LIBRARY / PARAKEET_LIBRARY
|
||||||
|
// convention in the sibling backends); the default looks for the .so next to
|
||||||
|
// this binary (resolved via LD_LIBRARY_PATH by run.sh).
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
type LibFuncs struct {
|
||||||
|
FuncPtr any
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
libName := os.Getenv("FACEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libfacedetect.so"
|
||||||
|
}
|
||||||
|
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("face-detect: dlopen %q: %w", libName, err))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bound 1:1 to facedetect_capi.h. char*/float* returns are registered as
|
||||||
|
// uintptr so the raw pointer can be freed via the matching capi free fn.
|
||||||
|
libFuncs := []LibFuncs{
|
||||||
|
{&CppAbiVersion, "facedetect_capi_abi_version"},
|
||||||
|
{&CppLoad, "facedetect_capi_load"},
|
||||||
|
{&CppFree, "facedetect_capi_free"},
|
||||||
|
{&CppLastError, "facedetect_capi_last_error"},
|
||||||
|
{&CppFreeString, "facedetect_capi_free_string"},
|
||||||
|
{&CppFreeVec, "facedetect_capi_free_vec"},
|
||||||
|
{&CppEmbedPath, "facedetect_capi_embed_path"},
|
||||||
|
{&CppEmbedRGB, "facedetect_capi_embed_rgb"},
|
||||||
|
{&CppDetectJSON, "facedetect_capi_detect_path_json"},
|
||||||
|
{&CppVerifyPaths, "facedetect_capi_verify_paths"},
|
||||||
|
{&CppAnalyzeJSON, "facedetect_capi_analyze_path_json"},
|
||||||
|
}
|
||||||
|
for _, lf := range libFuncs {
|
||||||
|
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(os.Stderr, "[face-detect] ABI=%d\n", CppAbiVersion())
|
||||||
|
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &FaceDetect{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
47
backend/go/face-detect/options.go
Normal file
47
backend/go/face-detect/options.go
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// defaultVerifyThreshold is the cosine-distance cutoff used when a request does
|
||||||
|
// not set one. Matches the insightface buffalo_l ArcFace R50 default the Python
|
||||||
|
// face backend ships with so the two implementations agree on verdicts out of
|
||||||
|
// the box.
|
||||||
|
const defaultVerifyThreshold float32 = 0.35
|
||||||
|
|
||||||
|
// loadOptions holds the parsed model-level options for face-detect.
|
||||||
|
type loadOptions struct {
|
||||||
|
verifyThreshold float32
|
||||||
|
modelName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitOption(o string) (key, value string, ok bool) {
|
||||||
|
i := strings.Index(o, ":")
|
||||||
|
if i < 0 {
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(o[:i]), strings.TrimSpace(o[i+1:]), true
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseOptions reads the backend "key:value" option slice. Unknown keys are
|
||||||
|
// ignored. Defaults: verify_threshold 0.35, model_name derived from the file.
|
||||||
|
func parseOptions(opts []string) loadOptions {
|
||||||
|
o := loadOptions{verifyThreshold: defaultVerifyThreshold}
|
||||||
|
for _, oo := range opts {
|
||||||
|
key, value, ok := splitOption(oo)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch key {
|
||||||
|
case "verify_threshold", "threshold":
|
||||||
|
if f, err := strconv.ParseFloat(value, 32); err == nil && f > 0 {
|
||||||
|
o.verifyThreshold = float32(f)
|
||||||
|
}
|
||||||
|
case "model_name":
|
||||||
|
o.modelName = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return o
|
||||||
|
}
|
||||||
68
backend/go/face-detect/package.sh
Normal file
68
backend/go/face-detect/package.sh
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Bundle the face-detect-grpc binary, libfacedetect.so, the core runtime libs
|
||||||
|
# (libc/libstdc++/libgomp + ld.so) and the GPU runtime for the active BUILD_TYPE
|
||||||
|
# so the package is self-contained. Mirrors backend/go/voice-detect/package.sh;
|
||||||
|
# run.sh routes the (CGO_ENABLED=0) binary through lib/ld.so so the packaged libc
|
||||||
|
# is used instead of the host's.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
REPO_ROOT="${CURDIR}/../../.."
|
||||||
|
|
||||||
|
mkdir -p "$CURDIR/package/lib"
|
||||||
|
|
||||||
|
cp -avf "$CURDIR/face-detect-grpc" "$CURDIR/package/"
|
||||||
|
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||||
|
|
||||||
|
# libfacedetect.so + any soname symlinks. purego.Dlopen resolves it via
|
||||||
|
# LD_LIBRARY_PATH, which run.sh points at lib/.
|
||||||
|
cp -avf "$CURDIR"/libfacedetect.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||||
|
echo "ERROR: libfacedetect.so not found in $CURDIR, run 'make' first" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detect architecture and copy the core runtime libs libfacedetect.so links
|
||||||
|
# against, plus the matching dynamic loader as lib/ld.so.
|
||||||
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||||
|
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||||
|
cp -arfLv /lib/ld-linux-aarch64.so.1 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||||
|
echo "Detected Darwin"
|
||||||
|
else
|
||||||
|
echo "Error: Could not detect architecture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Package GPU libraries (CUDA/ROCm/Intel/Vulkan loader + ICDs + drivers) based on
|
||||||
|
# BUILD_TYPE so the backend can reach the GPU without the runtime base image
|
||||||
|
# shipping those drivers.
|
||||||
|
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
|
||||||
|
if [ -f "$GPU_LIB_SCRIPT" ]; then
|
||||||
|
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
|
||||||
|
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
|
||||||
|
package_gpu_libs
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Packaging completed successfully"
|
||||||
|
ls -liah "$CURDIR/package/" "$CURDIR/package/lib/"
|
||||||
16
backend/go/face-detect/run.sh
Normal file
16
backend/go/face-detect/run.sh
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||||
|
|
||||||
|
# If a self-contained ld.so was packaged, route through it so the packaged
|
||||||
|
# libc / libstdc++ are used instead of the host's (matches the voice-detect /
|
||||||
|
# whisper / parakeet backends' runtime layout).
|
||||||
|
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||||
|
echo "Using lib/ld.so"
|
||||||
|
exec "$CURDIR/lib/ld.so" "$CURDIR/face-detect-grpc" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec "$CURDIR/face-detect-grpc" "$@"
|
||||||
15
backend/go/face-detect/test.sh
Normal file
15
backend/go/face-detect/test.sh
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
cd "$CURDIR"
|
||||||
|
|
||||||
|
echo "Running face-detect backend tests..."
|
||||||
|
|
||||||
|
# The pure-Go parsing specs always run. The embed/detect/verify/analyze smoke
|
||||||
|
# specs run only when a model + image are provided via
|
||||||
|
# FACEDETECT_BACKEND_TEST_MODEL and FACEDETECT_BACKEND_TEST_IMAGE; otherwise they
|
||||||
|
# auto-skip.
|
||||||
|
LD_LIBRARY_PATH="$CURDIR:${LD_LIBRARY_PATH:-}" go test -v -timeout 1200s .
|
||||||
|
|
||||||
|
echo "face-detect tests completed."
|
||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# omnivoice.cpp version
|
# omnivoice.cpp version
|
||||||
OMNIVOICE_REPO?=https://github.com/ServeurpersoCom/omnivoice.cpp
|
OMNIVOICE_REPO?=https://github.com/ServeurpersoCom/omnivoice.cpp
|
||||||
OMNIVOICE_VERSION?=0f37401bebe9b20c0160a888e592108fc1d17607
|
OMNIVOICE_VERSION?=96d30169afd5e6bb3fd6a0e9be0eb505bfe81fcd
|
||||||
SO_TARGET?=libgomnivoicecpp.so
|
SO_TARGET?=libgomnivoicecpp.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# parakeet-cpp backend Makefile.
|
# parakeet-cpp backend Makefile.
|
||||||
#
|
#
|
||||||
# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
# Upstream pin lives below as PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||||
# (.github/bump_deps.sh) can find and update it - matches the
|
# (.github/bump_deps.sh) can find and update it - matches the
|
||||||
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
||||||
#
|
#
|
||||||
@@ -15,7 +15,7 @@
|
|||||||
# That's what the L0 smoke test uses. The default target below does the
|
# That's what the L0 smoke test uses. The default target below does the
|
||||||
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
||||||
|
|
||||||
PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||||
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
||||||
|
|
||||||
GOCMD?=go
|
GOCMD?=go
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# stablediffusion.cpp (ggml)
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||||
STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
|
STABLEDIFFUSION_GGML_VERSION?=b12098f5d09fc83da36e65c784f7bdb16a5a5ebf
|
||||||
|
|
||||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"runtime"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
"unicode"
|
"unicode"
|
||||||
@@ -944,15 +943,9 @@ func InitializeONNXRuntime() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if libPath == "" {
|
if libPath == "" {
|
||||||
// LocalAI: default to the platform-native shared library
|
|
||||||
// extension when nothing else is found (dyld vs ld.so).
|
|
||||||
if runtime.GOOS == "darwin" {
|
|
||||||
libPath = "/usr/local/lib/libonnxruntime.dylib"
|
|
||||||
} else {
|
|
||||||
libPath = "/usr/local/lib/libonnxruntime.so"
|
libPath = "/usr/local/lib/libonnxruntime.so"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
ort.SetSharedLibraryPath(libPath)
|
ort.SetSharedLibraryPath(libPath)
|
||||||
|
|
||||||
if err := ort.InitializeEnvironment(); err != nil {
|
if err := ort.InitializeEnvironment(); err != nil {
|
||||||
|
|||||||
@@ -32,10 +32,6 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
|||||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||||
elif [ $(uname -s) = "Darwin" ]; then
|
|
||||||
# macOS: dyld resolves the bundled .dylib via DYLD_LIBRARY_PATH (set in
|
|
||||||
# run.sh); there is no ld.so loader nor glibc to bundle.
|
|
||||||
echo "Detected Darwin"
|
|
||||||
else
|
else
|
||||||
echo "Error: Could not detect architecture"
|
echo "Error: Could not detect architecture"
|
||||||
exit 1
|
exit 1
|
||||||
|
|||||||
@@ -3,12 +3,6 @@ set -ex
|
|||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
if [ "$(uname)" = "Darwin" ]; then
|
|
||||||
# macOS uses dyld: there is no ld.so loader, and the search path env
|
|
||||||
# var is DYLD_LIBRARY_PATH. ONNX Runtime ships as a .dylib here.
|
|
||||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
|
||||||
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.dylib
|
|
||||||
else
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
|
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
|
||||||
|
|
||||||
@@ -16,6 +10,5 @@ else
|
|||||||
echo "Using lib/ld.so"
|
echo "Using lib/ld.so"
|
||||||
exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
|
exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
|
||||||
fi
|
fi
|
||||||
fi
|
|
||||||
|
|
||||||
exec $CURDIR/supertonic "$@"
|
exec $CURDIR/supertonic "$@"
|
||||||
|
|||||||
18
backend/go/voice-detect/.gitignore
vendored
Normal file
18
backend/go/voice-detect/.gitignore
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Fetched upstream sources
|
||||||
|
sources/
|
||||||
|
|
||||||
|
# CMake build directories
|
||||||
|
build*/
|
||||||
|
|
||||||
|
# build artifacts staged in-tree by the Makefile (cp from sources/) or
|
||||||
|
# symlinked for local dev; the real sources live in voice-detect.cpp upstream.
|
||||||
|
*.so
|
||||||
|
*.so.*
|
||||||
|
voicedetect_capi.h
|
||||||
|
compile_commands.json
|
||||||
|
|
||||||
|
# Compiled backend binary
|
||||||
|
voice-detect-grpc
|
||||||
|
|
||||||
|
# Packaging output
|
||||||
|
package/
|
||||||
107
backend/go/voice-detect/Makefile
Normal file
107
backend/go/voice-detect/Makefile
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
# voice-detect backend Makefile.
|
||||||
|
#
|
||||||
|
# Upstream pin lives below as VOICEDETECT_VERSION?=30beecd... (.github/bump_deps.sh
|
||||||
|
# can find and update it - matches the parakeet.cpp / whisper.cpp / ds4 convention).
|
||||||
|
#
|
||||||
|
# Local dev shortcut: if you already have an out-of-tree voice-detect.cpp build,
|
||||||
|
# symlink the .so + header into this directory and skip the clone/cmake steps:
|
||||||
|
#
|
||||||
|
# ln -sf /path/to/voice-detect.cpp/build-shared/libvoicedetect.so .
|
||||||
|
# ln -sf /path/to/voice-detect.cpp/include/voicedetect_capi.h .
|
||||||
|
# go build -o voice-detect-grpc .
|
||||||
|
#
|
||||||
|
# The default target below does the proper clone-at-pin + cmake build so CI does
|
||||||
|
# not need a side-checkout.
|
||||||
|
|
||||||
|
VOICEDETECT_VERSION?=30beecdbe9662fb27e826ae4ec949d3fa02ff366
|
||||||
|
VOICEDETECT_REPO?=https://github.com/mudler/voice-detect.cpp
|
||||||
|
|
||||||
|
GOCMD?=go
|
||||||
|
GO_TAGS?=
|
||||||
|
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
|
||||||
|
|
||||||
|
BUILD_TYPE?=
|
||||||
|
NATIVE?=false
|
||||||
|
|
||||||
|
# Resolve the target arch. The backend matrix / Docker build pass TARGETARCH
|
||||||
|
# (amd64|arm64); fall back to uname -m (aarch64|x86_64) for a local build.
|
||||||
|
RECON_ARCH?=$(or $(TARGETARCH),$(shell uname -m))
|
||||||
|
|
||||||
|
# Build ggml statically into libvoicedetect.so (PIC) so the shared lib is
|
||||||
|
# self-contained: dlopen needs no libggml*.so alongside it, only system libs
|
||||||
|
# (libstdc++/libgomp/libc) that the runtime image already provides.
|
||||||
|
CMAKE_ARGS?=-DCMAKE_BUILD_TYPE=Release -DVOICEDETECT_SHARED=ON -DVOICEDETECT_BUILD_CLI=OFF -DVOICEDETECT_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||||
|
|
||||||
|
ifeq ($(NATIVE),false)
|
||||||
|
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
|
endif
|
||||||
|
|
||||||
|
# voice-detect.cpp gates its GGML backends behind VOICEDETECT_GGML_* options and
|
||||||
|
# does set(GGML_CUDA ${VOICEDETECT_GGML_CUDA} CACHE BOOL "" FORCE), so a bare
|
||||||
|
# -DGGML_CUDA=ON is overwritten back to OFF. Forward the VOICEDETECT_GGML_*
|
||||||
|
# options instead. (openblas is not gated, so -DGGML_BLAS passes through.)
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_CUDA=ON
|
||||||
|
# Opt-in cuDNN implicit-GEMM conv path (kills im2col on GPU, reaches
|
||||||
|
# torch-cuDNN parity). Only the arm64 + CUDA 13 image (GB10/Jetson/L4T)
|
||||||
|
# ships libcudnn9 + the -dev headers, so gate cuDNN to that variant.
|
||||||
|
# x86 CUDA images carry no cuDNN -> enabling it there is a link failure.
|
||||||
|
ifeq ($(CUDA_MAJOR_VERSION),13)
|
||||||
|
ifneq (,$(filter arm64 aarch64,$(RECON_ARCH)))
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_CUDNN=ON
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_HIP=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),vulkan)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_VULKAN=ON
|
||||||
|
else ifeq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DVOICEDETECT_GGML_METAL=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: voice-detect-grpc package build clean purge test all
|
||||||
|
|
||||||
|
all: voice-detect-grpc
|
||||||
|
|
||||||
|
# Clone the upstream voice-detect.cpp source at the pinned commit. Directory acts
|
||||||
|
# as the target so make only re-clones when missing. After a VOICEDETECT_VERSION
|
||||||
|
# bump, run 'make purge && make' to refetch.
|
||||||
|
sources/voice-detect.cpp:
|
||||||
|
mkdir -p sources/voice-detect.cpp
|
||||||
|
cd sources/voice-detect.cpp && \
|
||||||
|
git init -q && \
|
||||||
|
git remote add origin $(VOICEDETECT_REPO) && \
|
||||||
|
git fetch --depth 1 origin $(VOICEDETECT_VERSION) && \
|
||||||
|
git checkout FETCH_HEAD && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
# Build the shared lib + header out-of-tree, then stage them next to the Go
|
||||||
|
# sources so purego.Dlopen("libvoicedetect.so") and the cgo-less build both pick
|
||||||
|
# them up.
|
||||||
|
libvoicedetect.so: sources/voice-detect.cpp
|
||||||
|
cmake -B sources/voice-detect.cpp/build-shared -S sources/voice-detect.cpp $(CMAKE_ARGS)
|
||||||
|
cmake --build sources/voice-detect.cpp/build-shared --config Release -j$(JOBS) --target voicedetect
|
||||||
|
cp -fv sources/voice-detect.cpp/build-shared/libvoicedetect.so* ./ 2>/dev/null || true
|
||||||
|
cp -fv sources/voice-detect.cpp/include/voicedetect_capi.h ./
|
||||||
|
|
||||||
|
voice-detect-grpc: libvoicedetect.so main.go govoicedetect.go options.go
|
||||||
|
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o voice-detect-grpc .
|
||||||
|
|
||||||
|
package: voice-detect-grpc
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: package
|
||||||
|
|
||||||
|
# Test target. The embed/verify/analyze smoke specs are gated on
|
||||||
|
# VOICEDETECT_BACKEND_TEST_MODEL + VOICEDETECT_BACKEND_TEST_WAV; without them the
|
||||||
|
# heavy specs auto-skip and only the pure-Go parsing specs run.
|
||||||
|
test:
|
||||||
|
LD_LIBRARY_PATH=$(CURDIR):$$LD_LIBRARY_PATH $(GOCMD) test ./... -count=1
|
||||||
|
|
||||||
|
clean: purge
|
||||||
|
rm -rf libvoicedetect.so* voicedetect_capi.h package voice-detect-grpc
|
||||||
|
|
||||||
|
purge:
|
||||||
|
rm -rf sources/voice-detect.cpp
|
||||||
273
backend/go/voice-detect/govoicedetect.go
Normal file
273
backend/go/voice-detect/govoicedetect.go
Normal file
@@ -0,0 +1,273 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/mudler/xlog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// purego-bound entry points from libvoicedetect.so. Names match
|
||||||
|
// voicedetect_capi.h exactly so a `nm libvoicedetect.so | grep voicedetect_capi`
|
||||||
|
// is enough to spot drift.
|
||||||
|
//
|
||||||
|
// The opaque ctx and the malloc'd char*/float* return values are declared as
|
||||||
|
// uintptr so we get the raw pointer back and can release it via the matching
|
||||||
|
// capi free function. purego's native string/[]float32 returns would copy and
|
||||||
|
// forget the original pointer, leaking the C-owned buffer on every call.
|
||||||
|
var (
|
||||||
|
CppAbiVersion func() int32
|
||||||
|
CppLoad func(ggufPath string) uintptr
|
||||||
|
CppFree func(ctx uintptr)
|
||||||
|
CppLastError func(ctx uintptr) string
|
||||||
|
CppFreeString func(s uintptr)
|
||||||
|
CppFreeVec func(v uintptr)
|
||||||
|
CppEmbedPath func(ctx uintptr, wavPath string, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppEmbedPCM func(ctx uintptr, pcm []float32, nSamples, sampleRate int32, outVec, outDim unsafe.Pointer) int32
|
||||||
|
CppVerifyPaths func(ctx uintptr, a, b string, threshold float32, outDistance, outVerified unsafe.Pointer) int32
|
||||||
|
CppAnalyzeJSON func(ctx uintptr, wavPath string) uintptr
|
||||||
|
)
|
||||||
|
|
||||||
|
// VoiceDetect implements the speaker-recognition voice subset of the Backend
|
||||||
|
// gRPC service over libvoicedetect.so. The C side keeps a single loaded model
|
||||||
|
// plus a per-ctx last-error buffer and is not reentrant, so base.SingleThread
|
||||||
|
// serializes every call.
|
||||||
|
type VoiceDetect struct {
|
||||||
|
base.SingleThread
|
||||||
|
opts loadOptions
|
||||||
|
ctxPtr uintptr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *VoiceDetect) Load(opts *pb.ModelOptions) error {
|
||||||
|
model := opts.ModelFile
|
||||||
|
if model == "" {
|
||||||
|
model = opts.ModelPath
|
||||||
|
}
|
||||||
|
if !filepath.IsAbs(model) && opts.ModelPath != "" {
|
||||||
|
model = filepath.Join(opts.ModelPath, model)
|
||||||
|
}
|
||||||
|
if model == "" {
|
||||||
|
return errors.New("voice-detect: ModelFile is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
v.opts = parseOptions(opts.Options)
|
||||||
|
if v.opts.modelName == "" {
|
||||||
|
v.opts.modelName = filepath.Base(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns
|
||||||
|
// one backend process per model and serves requests concurrently, so the
|
||||||
|
// engine's own min(hardware_concurrency, 8) default can oversubscribe cores.
|
||||||
|
// VOICEDETECT_THREADS is read by the engine at backend construction, so it
|
||||||
|
// must be set before the capi load. A non-positive Threads means "unset":
|
||||||
|
// leave the env alone so the engine keeps its sane default.
|
||||||
|
threads := opts.Threads
|
||||||
|
if threads > 0 {
|
||||||
|
if err := os.Setenv("VOICEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil {
|
||||||
|
return fmt.Errorf("voice-detect: set VOICEDETECT_THREADS: %w", err)
|
||||||
|
}
|
||||||
|
xlog.Info("voice-detect: applying LocalAI thread budget", "threads", threads)
|
||||||
|
}
|
||||||
|
|
||||||
|
xlog.Info("voice-detect: loading model", "model", model,
|
||||||
|
"verify_threshold", v.opts.verifyThreshold, "abi", CppAbiVersion())
|
||||||
|
|
||||||
|
ctx := CppLoad(model)
|
||||||
|
if ctx == 0 {
|
||||||
|
// The last-error buffer lives on the ctx that was never returned, so
|
||||||
|
// surface the path the operator tried to load instead.
|
||||||
|
return fmt.Errorf("voice-detect: voicedetect_capi_load failed for %q", model)
|
||||||
|
}
|
||||||
|
v.ctxPtr = ctx
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// VoiceEmbed returns the L2-normalized speaker embedding for an audio clip.
|
||||||
|
// The request carries a filesystem PATH; the HTTP layer materializes
|
||||||
|
// base64/URL/data-URI inputs to a temp file before the gRPC call.
|
||||||
|
func (v *VoiceDetect) VoiceEmbed(req *pb.VoiceEmbedRequest) (pb.VoiceEmbedResponse, error) {
|
||||||
|
if v.ctxPtr == 0 {
|
||||||
|
return pb.VoiceEmbedResponse{}, errors.New("voice-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Audio == "" {
|
||||||
|
return pb.VoiceEmbedResponse{}, errors.New("voice-detect: audio path is required")
|
||||||
|
}
|
||||||
|
emb, err := v.embedPath(req.Audio)
|
||||||
|
if err != nil {
|
||||||
|
return pb.VoiceEmbedResponse{}, err
|
||||||
|
}
|
||||||
|
return pb.VoiceEmbedResponse{Embedding: emb, Model: v.opts.modelName}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *VoiceDetect) embedPath(path string) ([]float32, error) {
|
||||||
|
var vec uintptr
|
||||||
|
var dim int32
|
||||||
|
rc := CppEmbedPath(v.ctxPtr, path, unsafe.Pointer(&vec), unsafe.Pointer(&dim))
|
||||||
|
if rc != 0 || vec == 0 || dim <= 0 {
|
||||||
|
return nil, v.lastErr("embed", path)
|
||||||
|
}
|
||||||
|
defer CppFreeVec(vec)
|
||||||
|
// Copy out of the C-owned malloc'd buffer before freeing it. The
|
||||||
|
// uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory; safe here, the GC neither tracks
|
||||||
|
// nor moves this buffer and we copy immediately.
|
||||||
|
src := unsafe.Slice((*float32)(unsafe.Pointer(vec)), int(dim)) //nolint:govet // C-owned malloc'd vector, copied out before free
|
||||||
|
out := make([]float32, int(dim))
|
||||||
|
copy(out, src)
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// VoiceVerify embeds two clips and reports whether they are the same speaker by
|
||||||
|
// cosine distance against a threshold. A request threshold <= 0 falls back to
|
||||||
|
// the model-configured default (verify_threshold option, 0.25 if unset).
|
||||||
|
func (v *VoiceDetect) VoiceVerify(req *pb.VoiceVerifyRequest) (pb.VoiceVerifyResponse, error) {
|
||||||
|
if v.ctxPtr == 0 {
|
||||||
|
return pb.VoiceVerifyResponse{}, errors.New("voice-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Audio1 == "" || req.Audio2 == "" {
|
||||||
|
return pb.VoiceVerifyResponse{}, errors.New("voice-detect: audio1 and audio2 are required")
|
||||||
|
}
|
||||||
|
|
||||||
|
threshold := req.Threshold
|
||||||
|
if threshold <= 0 {
|
||||||
|
threshold = v.opts.verifyThreshold
|
||||||
|
}
|
||||||
|
|
||||||
|
started := time.Now()
|
||||||
|
var distance float32
|
||||||
|
var verified int32
|
||||||
|
rc := CppVerifyPaths(v.ctxPtr, req.Audio1, req.Audio2, threshold,
|
||||||
|
unsafe.Pointer(&distance), unsafe.Pointer(&verified))
|
||||||
|
if rc != 0 {
|
||||||
|
return pb.VoiceVerifyResponse{}, v.lastErr("verify", req.Audio1+","+req.Audio2)
|
||||||
|
}
|
||||||
|
elapsedMs := float32(time.Since(started).Seconds() * 1000.0)
|
||||||
|
|
||||||
|
// Confidence decays linearly from 100 at distance 0 to 0 at the threshold,
|
||||||
|
// matching the Python speaker-recognition backend's reporting.
|
||||||
|
confidence := float32(0)
|
||||||
|
if threshold > 0 {
|
||||||
|
confidence = float32(math.Max(0, math.Min(100, (1.0-float64(distance)/float64(threshold))*100.0)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return pb.VoiceVerifyResponse{
|
||||||
|
Verified: verified != 0,
|
||||||
|
Distance: distance,
|
||||||
|
Threshold: threshold,
|
||||||
|
Confidence: confidence,
|
||||||
|
Model: v.opts.modelName,
|
||||||
|
ProcessingTimeMs: elapsedMs,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// VoiceAnalyze runs the age/gender/emotion heads on a single clip. The C-API
|
||||||
|
// always evaluates every supported head, so the request's actions filter is
|
||||||
|
// advisory and the full analysis is returned as a single segment (the engine
|
||||||
|
// does not produce time-bounded segments).
|
||||||
|
func (v *VoiceDetect) VoiceAnalyze(req *pb.VoiceAnalyzeRequest) (pb.VoiceAnalyzeResponse, error) {
|
||||||
|
if v.ctxPtr == 0 {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, errors.New("voice-detect: model not loaded")
|
||||||
|
}
|
||||||
|
if req.Audio == "" {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, errors.New("voice-detect: audio path is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr := CppAnalyzeJSON(v.ctxPtr, req.Audio)
|
||||||
|
if ptr == 0 {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, v.lastErr("analyze", req.Audio)
|
||||||
|
}
|
||||||
|
defer CppFreeString(ptr)
|
||||||
|
|
||||||
|
seg, err := parseAnalyzeJSON(goStringFromCPtr(ptr))
|
||||||
|
if err != nil {
|
||||||
|
return pb.VoiceAnalyzeResponse{}, fmt.Errorf("voice-detect: analyze JSON for %q: %w", req.Audio, err)
|
||||||
|
}
|
||||||
|
return pb.VoiceAnalyzeResponse{Segments: []*pb.VoiceAnalysis{seg}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// analyzeJSON mirrors the document returned by voicedetect_capi_analyze_path_json:
|
||||||
|
//
|
||||||
|
// {"age":42.0,
|
||||||
|
// "gender":{"label":"female","female":0.88,"male":0.12},
|
||||||
|
// "emotion":{"label":"neutral","scores":{"neutral":0.7, ...}}}
|
||||||
|
//
|
||||||
|
// gender is a mixed object (a "label" string plus per-class float scores), so
|
||||||
|
// it is decoded into raw messages and split in parseAnalyzeJSON.
|
||||||
|
type analyzeJSON struct {
|
||||||
|
Age float32 `json:"age"`
|
||||||
|
Gender map[string]json.RawMessage `json:"gender"`
|
||||||
|
Emotion struct {
|
||||||
|
Label string `json:"label"`
|
||||||
|
Scores map[string]float32 `json:"scores"`
|
||||||
|
} `json:"emotion"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseAnalyzeJSON maps the engine's analyze document onto a VoiceAnalysis.
|
||||||
|
// start/end stay 0: the model emits a single whole-utterance result, not
|
||||||
|
// time-bounded segments.
|
||||||
|
func parseAnalyzeJSON(doc string) (*pb.VoiceAnalysis, error) {
|
||||||
|
var a analyzeJSON
|
||||||
|
if err := json.Unmarshal([]byte(doc), &a); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
seg := &pb.VoiceAnalysis{
|
||||||
|
Age: a.Age,
|
||||||
|
DominantEmotion: a.Emotion.Label,
|
||||||
|
Emotion: a.Emotion.Scores,
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(a.Gender) > 0 {
|
||||||
|
gender := make(map[string]float32, len(a.Gender))
|
||||||
|
for k, raw := range a.Gender {
|
||||||
|
if k == "label" {
|
||||||
|
_ = json.Unmarshal(raw, &seg.DominantGender)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var score float32
|
||||||
|
if err := json.Unmarshal(raw, &score); err == nil {
|
||||||
|
gender[k] = score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
seg.Gender = gender
|
||||||
|
}
|
||||||
|
|
||||||
|
return seg, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// lastErr wraps the C-API's per-ctx last-error buffer into a Go error.
|
||||||
|
func (v *VoiceDetect) lastErr(op, subject string) error {
|
||||||
|
msg := strings.TrimSpace(CppLastError(v.ctxPtr))
|
||||||
|
if msg == "" {
|
||||||
|
msg = "no error detail"
|
||||||
|
}
|
||||||
|
return fmt.Errorf("voice-detect: %s failed for %q: %s", op, subject, msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// goStringFromCPtr copies a NUL-terminated C string into Go memory. cptr is a
|
||||||
|
// malloc'd buffer the caller owns; release it via CppFreeString after the copy.
|
||||||
|
//
|
||||||
|
// The uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||||
|
// a C heap pointer from Go-managed memory. Safe here: the GC neither tracks nor
|
||||||
|
// moves the buffer and we dereference it immediately to copy the bytes out.
|
||||||
|
func goStringFromCPtr(cptr uintptr) string {
|
||||||
|
if cptr == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
p := unsafe.Pointer(cptr) //nolint:govet // C-owned malloc'd buffer, not Go-GC memory (see doc above)
|
||||||
|
n := 0
|
||||||
|
for *(*byte)(unsafe.Add(p, n)) != 0 {
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
return string(unsafe.Slice((*byte)(p), n))
|
||||||
|
}
|
||||||
144
backend/go/voice-detect/govoicedetect_test.go
Normal file
144
backend/go/voice-detect/govoicedetect_test.go
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestVoiceDetect(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "voice-detect Backend Suite")
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
libLoadOnce sync.Once
|
||||||
|
libLoadErr error
|
||||||
|
)
|
||||||
|
|
||||||
|
// ensureLibLoaded mirrors main.go's bootstrap so a Go test can drive the C-API
|
||||||
|
// bridge without spinning up the gRPC server. Records the error (the smoke
|
||||||
|
// specs skip themselves) when libvoicedetect.so is not loadable from cwd
|
||||||
|
// (LD_LIBRARY_PATH or a symlink in ./).
|
||||||
|
func ensureLibLoaded() error {
|
||||||
|
libLoadOnce.Do(func() {
|
||||||
|
libName := os.Getenv("VOICEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libvoicedetect.so"
|
||||||
|
}
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
libLoadErr = err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
purego.RegisterLibFunc(&CppAbiVersion, lib, "voicedetect_capi_abi_version")
|
||||||
|
purego.RegisterLibFunc(&CppLoad, lib, "voicedetect_capi_load")
|
||||||
|
purego.RegisterLibFunc(&CppFree, lib, "voicedetect_capi_free")
|
||||||
|
purego.RegisterLibFunc(&CppLastError, lib, "voicedetect_capi_last_error")
|
||||||
|
purego.RegisterLibFunc(&CppFreeString, lib, "voicedetect_capi_free_string")
|
||||||
|
purego.RegisterLibFunc(&CppFreeVec, lib, "voicedetect_capi_free_vec")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedPath, lib, "voicedetect_capi_embed_path")
|
||||||
|
purego.RegisterLibFunc(&CppEmbedPCM, lib, "voicedetect_capi_embed_pcm")
|
||||||
|
purego.RegisterLibFunc(&CppVerifyPaths, lib, "voicedetect_capi_verify_paths")
|
||||||
|
purego.RegisterLibFunc(&CppAnalyzeJSON, lib, "voicedetect_capi_analyze_path_json")
|
||||||
|
})
|
||||||
|
return libLoadErr
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = Describe("parseOptions", func() {
|
||||||
|
It("defaults verify_threshold to 0.25", func() {
|
||||||
|
o := parseOptions(nil)
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.25)))
|
||||||
|
Expect(o.modelName).To(Equal(""))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("parses verify_threshold, threshold alias and model_name", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0.4", "model_name:ecapa", "unknown:x"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.4)))
|
||||||
|
Expect(o.modelName).To(Equal("ecapa"))
|
||||||
|
|
||||||
|
o2 := parseOptions([]string{"threshold:0.3"})
|
||||||
|
Expect(o2.verifyThreshold).To(Equal(float32(0.3)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("ignores non-positive thresholds and keeps the default", func() {
|
||||||
|
o := parseOptions([]string{"verify_threshold:0", "threshold:-1"})
|
||||||
|
Expect(o.verifyThreshold).To(Equal(float32(0.25)))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("parseAnalyzeJSON", func() {
|
||||||
|
It("maps age, gender label+scores and emotion label+scores", func() {
|
||||||
|
doc := `{"age":42.0,
|
||||||
|
"gender":{"label":"female","female":0.88,"male":0.12},
|
||||||
|
"emotion":{"label":"neutral","scores":{"neutral":0.7,"happy":0.2,"sad":0.1}}}`
|
||||||
|
seg, err := parseAnalyzeJSON(doc)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(seg.Age).To(BeNumerically("~", 42.0, 1e-4))
|
||||||
|
Expect(seg.Start).To(Equal(float32(0)))
|
||||||
|
Expect(seg.End).To(Equal(float32(0)))
|
||||||
|
|
||||||
|
Expect(seg.DominantGender).To(Equal("female"))
|
||||||
|
Expect(seg.Gender).To(HaveKeyWithValue("female", BeNumerically("~", 0.88, 1e-4)))
|
||||||
|
Expect(seg.Gender).To(HaveKeyWithValue("male", BeNumerically("~", 0.12, 1e-4)))
|
||||||
|
// The "label" entry is consumed into DominantGender, not the score map.
|
||||||
|
Expect(seg.Gender).ToNot(HaveKey("label"))
|
||||||
|
|
||||||
|
Expect(seg.DominantEmotion).To(Equal("neutral"))
|
||||||
|
Expect(seg.Emotion).To(HaveKeyWithValue("neutral", BeNumerically("~", 0.7, 1e-4)))
|
||||||
|
Expect(seg.Emotion).To(HaveKeyWithValue("happy", BeNumerically("~", 0.2, 1e-4)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("tolerates a missing gender block", func() {
|
||||||
|
seg, err := parseAnalyzeJSON(`{"age":30.0,"emotion":{"label":"happy","scores":{"happy":1.0}}}`)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(seg.DominantGender).To(Equal(""))
|
||||||
|
Expect(seg.DominantEmotion).To(Equal("happy"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns an error on malformed JSON", func() {
|
||||||
|
_, err := parseAnalyzeJSON(`{not-json`)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// The specs below exercise the real C-API end to end. They run only when both a
|
||||||
|
// model GGUF and a test WAV are provided, and skip cleanly otherwise so the
|
||||||
|
// suite stays green without large assets.
|
||||||
|
var _ = Describe("VoiceDetect end-to-end", Ordered, func() {
|
||||||
|
var (
|
||||||
|
v *VoiceDetect
|
||||||
|
modelPath = os.Getenv("VOICEDETECT_BACKEND_TEST_MODEL")
|
||||||
|
wavPath = os.Getenv("VOICEDETECT_BACKEND_TEST_WAV")
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeAll(func() {
|
||||||
|
if modelPath == "" || wavPath == "" {
|
||||||
|
Skip("set VOICEDETECT_BACKEND_TEST_MODEL and VOICEDETECT_BACKEND_TEST_WAV to run the e2e specs")
|
||||||
|
}
|
||||||
|
if err := ensureLibLoaded(); err != nil {
|
||||||
|
Skip("libvoicedetect.so not loadable: " + err.Error())
|
||||||
|
}
|
||||||
|
v = &VoiceDetect{}
|
||||||
|
Expect(v.Load(&pb.ModelOptions{ModelFile: modelPath})).To(Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("embeds an audio clip", func() {
|
||||||
|
resp, err := v.VoiceEmbed(&pb.VoiceEmbedRequest{Audio: wavPath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Embedding).ToNot(BeEmpty())
|
||||||
|
Expect(resp.Model).ToNot(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("verifies a clip against itself as the same speaker", func() {
|
||||||
|
resp, err := v.VoiceVerify(&pb.VoiceVerifyRequest{Audio1: wavPath, Audio2: wavPath})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(resp.Verified).To(BeTrue())
|
||||||
|
Expect(resp.Distance).To(BeNumerically("<=", resp.Threshold))
|
||||||
|
})
|
||||||
|
})
|
||||||
64
backend/go/voice-detect/main.go
Normal file
64
backend/go/voice-detect/main.go
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Started internally by LocalAI - one gRPC server per loaded model.
|
||||||
|
//
|
||||||
|
// Loads libvoicedetect.so via purego and registers the flat C-API entry points
|
||||||
|
// declared in voicedetect_capi.h. The library name can be overridden with
|
||||||
|
// VOICEDETECT_LIBRARY (mirrors the PARAKEET_LIBRARY / OMNIVOICE_LIBRARY
|
||||||
|
// convention in the sibling backends); the default looks for the .so next to
|
||||||
|
// this binary (resolved via LD_LIBRARY_PATH by run.sh).
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
type LibFuncs struct {
|
||||||
|
FuncPtr any
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
libName := os.Getenv("VOICEDETECT_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
libName = "libvoicedetect.so"
|
||||||
|
}
|
||||||
|
|
||||||
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("voice-detect: dlopen %q: %w", libName, err))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bound 1:1 to voicedetect_capi.h. char*/float* returns are registered as
|
||||||
|
// uintptr so the raw pointer can be freed via the matching capi free fn.
|
||||||
|
libFuncs := []LibFuncs{
|
||||||
|
{&CppAbiVersion, "voicedetect_capi_abi_version"},
|
||||||
|
{&CppLoad, "voicedetect_capi_load"},
|
||||||
|
{&CppFree, "voicedetect_capi_free"},
|
||||||
|
{&CppLastError, "voicedetect_capi_last_error"},
|
||||||
|
{&CppFreeString, "voicedetect_capi_free_string"},
|
||||||
|
{&CppFreeVec, "voicedetect_capi_free_vec"},
|
||||||
|
{&CppEmbedPath, "voicedetect_capi_embed_path"},
|
||||||
|
{&CppEmbedPCM, "voicedetect_capi_embed_pcm"},
|
||||||
|
{&CppVerifyPaths, "voicedetect_capi_verify_paths"},
|
||||||
|
{&CppAnalyzeJSON, "voicedetect_capi_analyze_path_json"},
|
||||||
|
}
|
||||||
|
for _, lf := range libFuncs {
|
||||||
|
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(os.Stderr, "[voice-detect] ABI=%d\n", CppAbiVersion())
|
||||||
|
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &VoiceDetect{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
46
backend/go/voice-detect/options.go
Normal file
46
backend/go/voice-detect/options.go
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// defaultVerifyThreshold is the cosine-distance cutoff used when a request does
|
||||||
|
// not set one. Matches the Python speaker-recognition backend's default so the
|
||||||
|
// two implementations agree on verdicts out of the box.
|
||||||
|
const defaultVerifyThreshold float32 = 0.25
|
||||||
|
|
||||||
|
// loadOptions holds the parsed model-level options for voice-detect.
|
||||||
|
type loadOptions struct {
|
||||||
|
verifyThreshold float32
|
||||||
|
modelName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitOption(o string) (key, value string, ok bool) {
|
||||||
|
i := strings.Index(o, ":")
|
||||||
|
if i < 0 {
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(o[:i]), strings.TrimSpace(o[i+1:]), true
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseOptions reads the backend "key:value" option slice. Unknown keys are
|
||||||
|
// ignored. Defaults: verify_threshold 0.25, model_name derived from the file.
|
||||||
|
func parseOptions(opts []string) loadOptions {
|
||||||
|
o := loadOptions{verifyThreshold: defaultVerifyThreshold}
|
||||||
|
for _, oo := range opts {
|
||||||
|
key, value, ok := splitOption(oo)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch key {
|
||||||
|
case "verify_threshold", "threshold":
|
||||||
|
if f, err := strconv.ParseFloat(value, 32); err == nil && f > 0 {
|
||||||
|
o.verifyThreshold = float32(f)
|
||||||
|
}
|
||||||
|
case "model_name":
|
||||||
|
o.modelName = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return o
|
||||||
|
}
|
||||||
68
backend/go/voice-detect/package.sh
Executable file
68
backend/go/voice-detect/package.sh
Executable file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Bundle the voice-detect-grpc binary, libvoicedetect.so, the core runtime libs
|
||||||
|
# (libc/libstdc++/libgomp + ld.so) and the GPU runtime for the active BUILD_TYPE
|
||||||
|
# so the package is self-contained. Mirrors backend/go/parakeet-cpp/package.sh;
|
||||||
|
# run.sh routes the (CGO_ENABLED=0) binary through lib/ld.so so the packaged libc
|
||||||
|
# is used instead of the host's.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
REPO_ROOT="${CURDIR}/../../.."
|
||||||
|
|
||||||
|
mkdir -p "$CURDIR/package/lib"
|
||||||
|
|
||||||
|
cp -avf "$CURDIR/voice-detect-grpc" "$CURDIR/package/"
|
||||||
|
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||||
|
|
||||||
|
# libvoicedetect.so + any soname symlinks. purego.Dlopen resolves it via
|
||||||
|
# LD_LIBRARY_PATH, which run.sh points at lib/.
|
||||||
|
cp -avf "$CURDIR"/libvoicedetect.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||||
|
echo "ERROR: libvoicedetect.so not found in $CURDIR, run 'make' first" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detect architecture and copy the core runtime libs libvoicedetect.so links
|
||||||
|
# against, plus the matching dynamic loader as lib/ld.so.
|
||||||
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||||
|
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||||
|
cp -arfLv /lib/ld-linux-aarch64.so.1 "$CURDIR/package/lib/ld.so"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
|
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||||
|
echo "Detected Darwin"
|
||||||
|
else
|
||||||
|
echo "Error: Could not detect architecture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Package GPU libraries (CUDA/ROCm/Intel/Vulkan loader + ICDs + drivers) based on
|
||||||
|
# BUILD_TYPE so the backend can reach the GPU without the runtime base image
|
||||||
|
# shipping those drivers.
|
||||||
|
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
|
||||||
|
if [ -f "$GPU_LIB_SCRIPT" ]; then
|
||||||
|
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
|
||||||
|
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
|
||||||
|
package_gpu_libs
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Packaging completed successfully"
|
||||||
|
ls -liah "$CURDIR/package/" "$CURDIR/package/lib/"
|
||||||
16
backend/go/voice-detect/run.sh
Executable file
16
backend/go/voice-detect/run.sh
Executable file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||||
|
|
||||||
|
# If a self-contained ld.so was packaged, route through it so the packaged
|
||||||
|
# libc / libstdc++ are used instead of the host's (matches the whisper /
|
||||||
|
# parakeet backends' runtime layout).
|
||||||
|
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||||
|
echo "Using lib/ld.so"
|
||||||
|
exec "$CURDIR/lib/ld.so" "$CURDIR/voice-detect-grpc" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec "$CURDIR/voice-detect-grpc" "$@"
|
||||||
14
backend/go/voice-detect/test.sh
Executable file
14
backend/go/voice-detect/test.sh
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
cd "$CURDIR"
|
||||||
|
|
||||||
|
echo "Running voice-detect backend tests..."
|
||||||
|
|
||||||
|
# The pure-Go parsing specs always run. The embed/verify/analyze smoke specs run
|
||||||
|
# only when a model + WAV are provided via VOICEDETECT_BACKEND_TEST_MODEL and
|
||||||
|
# VOICEDETECT_BACKEND_TEST_WAV; otherwise they auto-skip.
|
||||||
|
LD_LIBRARY_PATH="$CURDIR:${LD_LIBRARY_PATH:-}" go test -v -timeout 1200s .
|
||||||
|
|
||||||
|
echo "voice-detect tests completed."
|
||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=43d78af5be58f41d6ffbc227d608f104577741ea
|
WHISPER_CPP_VERSION?=5ed76e9a079962f1c85cfce44edd325c27ef1f97
|
||||||
SO_TARGET?=libgowhisper.so
|
SO_TARGET?=libgowhisper.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
@@ -209,6 +209,78 @@
|
|||||||
nvidia-cuda-12: "cuda12-ced"
|
nvidia-cuda-12: "cuda12-ced"
|
||||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-ced"
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-ced"
|
||||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-ced"
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-ced"
|
||||||
|
- &voicedetect
|
||||||
|
name: "voice-detect"
|
||||||
|
alias: "voice-detect"
|
||||||
|
license: mit
|
||||||
|
icon: https://avatars.githubusercontent.com/u/95302084
|
||||||
|
description: |
|
||||||
|
voice-detect speaker recognition and voice analysis.
|
||||||
|
voice-detect.cpp is a C++/ggml engine that produces L2-normalised
|
||||||
|
speaker embeddings (ECAPA-TDNN, WeSpeaker ResNet34, 3D-Speaker
|
||||||
|
ERes2Net, CAM++) for voice verification and 1:N identification, plus
|
||||||
|
a wav2vec2 age / gender / emotion analysis head. It replaces the
|
||||||
|
Python speaker-recognition backend and is exposed through the Voice*
|
||||||
|
gRPC rpcs and the /v1/voice/* REST endpoints. It runs on CPU, NVIDIA
|
||||||
|
CUDA, AMD ROCm/HIP, Intel SYCL, Vulkan and NVIDIA Jetson (L4T) targets.
|
||||||
|
urls:
|
||||||
|
- https://github.com/mudler/voice-detect.cpp
|
||||||
|
tags:
|
||||||
|
- voice-recognition
|
||||||
|
- speaker-verification
|
||||||
|
- speaker-embedding
|
||||||
|
- CPU
|
||||||
|
- GPU
|
||||||
|
- CUDA
|
||||||
|
- HIP
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-voice-detect"
|
||||||
|
nvidia: "cuda12-voice-detect"
|
||||||
|
intel: "intel-sycl-f16-voice-detect"
|
||||||
|
metal: "metal-voice-detect"
|
||||||
|
amd: "rocm-voice-detect"
|
||||||
|
vulkan: "vulkan-voice-detect"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-voice-detect"
|
||||||
|
nvidia-cuda-13: "cuda13-voice-detect"
|
||||||
|
nvidia-cuda-12: "cuda12-voice-detect"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-voice-detect"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-voice-detect"
|
||||||
|
- &facedetect
|
||||||
|
name: "face-detect"
|
||||||
|
alias: "face-detect"
|
||||||
|
license: mit
|
||||||
|
icon: https://avatars.githubusercontent.com/u/95302084
|
||||||
|
description: |
|
||||||
|
face-detect face detection, embedding, verification and analysis.
|
||||||
|
face-detect.cpp is a C++/ggml engine that runs SCRFD / YuNet face
|
||||||
|
detection and ArcFace / SFace 512-d (or 128-d) L2-normalised face
|
||||||
|
embeddings for verification and 1:N identification, plus a landmark /
|
||||||
|
age / gender analysis head. It replaces the Python insightface backend
|
||||||
|
and is exposed through the Embedding, Detect and Face* gRPC rpcs and
|
||||||
|
the /v1/face/* REST endpoints. It runs on CPU, NVIDIA CUDA, AMD
|
||||||
|
ROCm/HIP, Intel SYCL, Vulkan and NVIDIA Jetson (L4T) targets.
|
||||||
|
urls:
|
||||||
|
- https://github.com/mudler/face-detect.cpp
|
||||||
|
tags:
|
||||||
|
- face-recognition
|
||||||
|
- face-verification
|
||||||
|
- face-embedding
|
||||||
|
- CPU
|
||||||
|
- GPU
|
||||||
|
- CUDA
|
||||||
|
- HIP
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-face-detect"
|
||||||
|
nvidia: "cuda12-face-detect"
|
||||||
|
intel: "intel-sycl-f16-face-detect"
|
||||||
|
metal: "metal-face-detect"
|
||||||
|
amd: "rocm-face-detect"
|
||||||
|
vulkan: "vulkan-face-detect"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-face-detect"
|
||||||
|
nvidia-cuda-13: "cuda13-face-detect"
|
||||||
|
nvidia-cuda-12: "cuda12-face-detect"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-face-detect"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-face-detect"
|
||||||
- &voxtral
|
- &voxtral
|
||||||
name: "voxtral"
|
name: "voxtral"
|
||||||
alias: "voxtral"
|
alias: "voxtral"
|
||||||
@@ -1284,7 +1356,6 @@
|
|||||||
nvidia-cuda-13: "cuda13-liquid-audio"
|
nvidia-cuda-13: "cuda13-liquid-audio"
|
||||||
nvidia-cuda-12: "cuda12-liquid-audio"
|
nvidia-cuda-12: "cuda12-liquid-audio"
|
||||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio"
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio"
|
||||||
metal: "metal-liquid-audio"
|
|
||||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png
|
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png
|
||||||
- &qwen-tts
|
- &qwen-tts
|
||||||
urls:
|
urls:
|
||||||
@@ -1570,7 +1641,6 @@
|
|||||||
- TTS
|
- TTS
|
||||||
capabilities:
|
capabilities:
|
||||||
default: "cpu-supertonic"
|
default: "cpu-supertonic"
|
||||||
metal: "metal-supertonic"
|
|
||||||
- !!merge <<: *neutts
|
- !!merge <<: *neutts
|
||||||
name: "neutts-development"
|
name: "neutts-development"
|
||||||
capabilities:
|
capabilities:
|
||||||
@@ -2798,6 +2868,236 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-ced"
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-ced"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-ced
|
- localai/localai-backends:master-gpu-nvidia-cuda-13-ced
|
||||||
|
## voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "voice-detect-development"
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-voice-detect-development"
|
||||||
|
nvidia: "cuda12-voice-detect-development"
|
||||||
|
intel: "intel-sycl-f16-voice-detect-development"
|
||||||
|
metal: "metal-voice-detect-development"
|
||||||
|
amd: "rocm-voice-detect-development"
|
||||||
|
vulkan: "vulkan-voice-detect-development"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
nvidia-cuda-13: "cuda13-voice-detect-development"
|
||||||
|
nvidia-cuda-12: "cuda12-voice-detect-development"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "nvidia-l4t-arm64-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cpu-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-cpu-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cpu-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-cpu-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "metal-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-metal-darwin-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "metal-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-metal-darwin-arm64-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda12-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-12-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda12-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-12-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "rocm-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-rocm-hipblas-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "rocm-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-rocm-hipblas-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f32-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f32-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f32-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f32-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f16-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f16-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "intel-sycl-f16-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f16-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "vulkan-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-vulkan-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "vulkan-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-vulkan-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-voice-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-13-voice-detect
|
||||||
|
- !!merge <<: *voicedetect
|
||||||
|
name: "cuda13-voice-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-voice-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-13-voice-detect
|
||||||
|
## face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "face-detect-development"
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-face-detect-development"
|
||||||
|
nvidia: "cuda12-face-detect-development"
|
||||||
|
intel: "intel-sycl-f16-face-detect-development"
|
||||||
|
metal: "metal-face-detect-development"
|
||||||
|
amd: "rocm-face-detect-development"
|
||||||
|
vulkan: "vulkan-face-detect-development"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-face-detect-development"
|
||||||
|
nvidia-cuda-13: "cuda13-face-detect-development"
|
||||||
|
nvidia-cuda-12: "cuda12-face-detect-development"
|
||||||
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-face-detect-development"
|
||||||
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-face-detect-development"
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "nvidia-l4t-arm64-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "nvidia-l4t-arm64-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-nvidia-l4t-arm64-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cpu-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-cpu-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cpu-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-cpu-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "metal-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-metal-darwin-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "metal-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-metal-darwin-arm64-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda12-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-12-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda12-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-12-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "rocm-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-rocm-hipblas-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "rocm-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-rocm-hipblas-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f32-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f32-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f32-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f32-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f16-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-intel-sycl-f16-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "intel-sycl-f16-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-intel-sycl-f16-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "vulkan-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-vulkan-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "vulkan-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-vulkan-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-face-detect"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-cuda-13-face-detect
|
||||||
|
- !!merge <<: *facedetect
|
||||||
|
name: "cuda13-face-detect-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-face-detect"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-cuda-13-face-detect
|
||||||
## stablediffusion-ggml
|
## stablediffusion-ggml
|
||||||
- !!merge <<: *stablediffusionggml
|
- !!merge <<: *stablediffusionggml
|
||||||
name: "cpu-stablediffusion-ggml"
|
name: "cpu-stablediffusion-ggml"
|
||||||
@@ -4614,7 +4914,6 @@
|
|||||||
nvidia-cuda-13: "cuda13-liquid-audio-development"
|
nvidia-cuda-13: "cuda13-liquid-audio-development"
|
||||||
nvidia-cuda-12: "cuda12-liquid-audio-development"
|
nvidia-cuda-12: "cuda12-liquid-audio-development"
|
||||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio-development"
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio-development"
|
||||||
metal: "metal-liquid-audio-development"
|
|
||||||
- !!merge <<: *liquid-audio
|
- !!merge <<: *liquid-audio
|
||||||
name: "cpu-liquid-audio"
|
name: "cpu-liquid-audio"
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-liquid-audio"
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-liquid-audio"
|
||||||
@@ -4625,16 +4924,6 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-liquid-audio"
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-liquid-audio"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-cpu-liquid-audio
|
- localai/localai-backends:master-cpu-liquid-audio
|
||||||
- !!merge <<: *liquid-audio
|
|
||||||
name: "metal-liquid-audio"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-liquid-audio"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:latest-metal-darwin-arm64-liquid-audio
|
|
||||||
- !!merge <<: *liquid-audio
|
|
||||||
name: "metal-liquid-audio-development"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-liquid-audio"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:master-metal-darwin-arm64-liquid-audio
|
|
||||||
- !!merge <<: *liquid-audio
|
- !!merge <<: *liquid-audio
|
||||||
name: "cuda12-liquid-audio"
|
name: "cuda12-liquid-audio"
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-liquid-audio"
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-liquid-audio"
|
||||||
@@ -5497,7 +5786,6 @@
|
|||||||
name: "supertonic-development"
|
name: "supertonic-development"
|
||||||
capabilities:
|
capabilities:
|
||||||
default: "cpu-supertonic-development"
|
default: "cpu-supertonic-development"
|
||||||
metal: "metal-supertonic-development"
|
|
||||||
- !!merge <<: *supertonic
|
- !!merge <<: *supertonic
|
||||||
name: "cpu-supertonic"
|
name: "cpu-supertonic"
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-supertonic"
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-supertonic"
|
||||||
@@ -5508,13 +5796,3 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-supertonic"
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-supertonic"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-cpu-supertonic
|
- localai/localai-backends:master-cpu-supertonic
|
||||||
- !!merge <<: *supertonic
|
|
||||||
name: "metal-supertonic"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-supertonic"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:latest-metal-darwin-arm64-supertonic
|
|
||||||
- !!merge <<: *supertonic
|
|
||||||
name: "metal-supertonic-development"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-supertonic"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:master-metal-darwin-arm64-supertonic
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
diffusers==0.38.0
|
git+https://github.com/huggingface/diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers==4.57.6
|
transformers
|
||||||
torchvision==0.22.1
|
torchvision==0.22.1
|
||||||
accelerate
|
accelerate
|
||||||
git+https://github.com/xhinker/sd_embed
|
git+https://github.com/xhinker/sd_embed
|
||||||
@@ -10,15 +10,9 @@ sentencepiece
|
|||||||
torch==2.7.1
|
torch==2.7.1
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
ftfy
|
ftfy
|
||||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
# TODO: re-add compel once it supports transformers >= 5.
|
||||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
# https://github.com/damian0815/compel/issues/128
|
||||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||||
# last known-good released pair so builds are reproducible and can't drift into
|
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
|
||||||
#
|
|
||||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
|
||||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
|
||||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
|
||||||
# the import succeeding, so dropping it here is safe.
|
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu121
|
--extra-index-url https://download.pytorch.org/whl/cu121
|
||||||
diffusers==0.38.0
|
git+https://github.com/huggingface/diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers==4.57.6
|
transformers
|
||||||
torchvision
|
torchvision
|
||||||
accelerate
|
accelerate
|
||||||
git+https://github.com/xhinker/sd_embed
|
git+https://github.com/xhinker/sd_embed
|
||||||
@@ -10,15 +10,9 @@ sentencepiece
|
|||||||
torch
|
torch
|
||||||
ftfy
|
ftfy
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
# TODO: re-add compel once it supports transformers >= 5.
|
||||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
# https://github.com/damian0815/compel/issues/128
|
||||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||||
# last known-good released pair so builds are reproducible and can't drift into
|
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
|
||||||
#
|
|
||||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
|
||||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
|
||||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
|
||||||
# the import succeeding, so dropping it here is safe.
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu130
|
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||||
diffusers==0.38.0
|
git+https://github.com/huggingface/diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers==4.57.6
|
transformers
|
||||||
torchvision
|
torchvision
|
||||||
accelerate
|
accelerate
|
||||||
git+https://github.com/xhinker/sd_embed
|
git+https://github.com/xhinker/sd_embed
|
||||||
@@ -10,15 +10,9 @@ sentencepiece
|
|||||||
torch
|
torch
|
||||||
ftfy
|
ftfy
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
# TODO: re-add compel once it supports transformers >= 5.
|
||||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
# https://github.com/damian0815/compel/issues/128
|
||||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||||
# last known-good released pair so builds are reproducible and can't drift into
|
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
|
||||||
#
|
|
||||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
|
||||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
|
||||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
|
||||||
# the import succeeding, so dropping it here is safe.
|
|
||||||
|
|||||||
@@ -1,23 +1,17 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
||||||
torch==2.10.0+rocm7.0
|
torch==2.10.0+rocm7.0
|
||||||
torchvision==0.25.0+rocm7.0
|
torchvision==0.25.0+rocm7.0
|
||||||
diffusers==0.38.0
|
git+https://github.com/huggingface/diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers==4.57.6
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
ftfy
|
ftfy
|
||||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
# TODO: re-add compel once it supports transformers >= 5.
|
||||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
# https://github.com/damian0815/compel/issues/128
|
||||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||||
# last known-good released pair so builds are reproducible and can't drift into
|
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
|
||||||
#
|
|
||||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
|
||||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
|
||||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
|
||||||
# the import succeeding, so dropping it here is safe.
|
|
||||||
@@ -3,24 +3,18 @@ torch
|
|||||||
torchvision
|
torchvision
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools
|
||||||
diffusers==0.38.0
|
git+https://github.com/huggingface/diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers==4.57.6
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
git+https://github.com/xhinker/sd_embed
|
git+https://github.com/xhinker/sd_embed
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
ftfy
|
ftfy
|
||||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
# TODO: re-add compel once it supports transformers >= 5.
|
||||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
# https://github.com/damian0815/compel/issues/128
|
||||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||||
# last known-good released pair so builds are reproducible and can't drift into
|
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
|
||||||
#
|
|
||||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
|
||||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
|
||||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
|
||||||
# the import succeeding, so dropping it here is safe.
|
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
|
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
|
||||||
torch
|
torch
|
||||||
diffusers==0.38.0
|
git+https://github.com/huggingface/diffusers
|
||||||
transformers==4.57.6
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
peft
|
peft
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
@@ -9,15 +9,9 @@ numpy<2
|
|||||||
sentencepiece
|
sentencepiece
|
||||||
torchvision
|
torchvision
|
||||||
ftfy
|
ftfy
|
||||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
# TODO: re-add compel once it supports transformers >= 5.
|
||||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
# https://github.com/damian0815/compel/issues/128
|
||||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||||
# last known-good released pair so builds are reproducible and can't drift into
|
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
|
||||||
#
|
|
||||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
|
||||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
|
||||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
|
||||||
# the import succeeding, so dropping it here is safe.
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu130
|
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||||
torch
|
torch
|
||||||
diffusers==0.38.0
|
git+https://github.com/huggingface/diffusers
|
||||||
transformers==4.57.6
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
peft
|
peft
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
@@ -10,15 +10,9 @@ sentencepiece
|
|||||||
torchvision
|
torchvision
|
||||||
ftfy
|
ftfy
|
||||||
chardet
|
chardet
|
||||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
# TODO: re-add compel once it supports transformers >= 5.
|
||||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
# https://github.com/damian0815/compel/issues/128
|
||||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||||
# last known-good released pair so builds are reproducible and can't drift into
|
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
|
||||||
#
|
|
||||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
|
||||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
|
||||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
|
||||||
# the import succeeding, so dropping it here is safe.
|
|
||||||
|
|||||||
@@ -1,22 +1,16 @@
|
|||||||
torch==2.7.1
|
torch==2.7.1
|
||||||
torchvision==0.22.1
|
torchvision==0.22.1
|
||||||
diffusers==0.38.0
|
git+https://github.com/huggingface/diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers==4.57.6
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
ftfy
|
ftfy
|
||||||
# diffusers and transformers are pinned together on purpose. transformers v5
|
# TODO: re-add compel once it supports transformers >= 5.
|
||||||
# restructured CLIPTextModel and dropped the `.text_model` attribute, which
|
# Tracking: https://github.com/damian0815/compel/pull/129
|
||||||
# breaks single-file Stable Diffusion loading on every released diffusers
|
# https://github.com/damian0815/compel/issues/128
|
||||||
# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
|
# compel currently pins transformers~=4.25, which forced pip into multi-hour
|
||||||
# main via git froze whichever broken pair existed at image-build time. Pin the
|
# resolver backtracking storms in CI. backend.py imports it lazily and gates
|
||||||
# last known-good released pair so builds are reproducible and can't drift into
|
# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
|
||||||
# the broken window. See https://github.com/mudler/LocalAI/issues/9979
|
|
||||||
#
|
|
||||||
# compel is intentionally omitted: it pins transformers~=4.25, which conflicts
|
|
||||||
# with this pin and previously forced pip into multi-hour resolver backtracking
|
|
||||||
# storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
|
|
||||||
# the import succeeding, so dropping it here is safe.
|
|
||||||
@@ -14,11 +14,5 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# liquid-audio's torch wheels are large; allow upgrades to satisfy transitive pins
|
# liquid-audio's torch wheels are large; allow upgrades to satisfy transitive pins
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||||
# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
|
|
||||||
# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
|
|
||||||
# it on the uv path; Linux/CUDA resolution is unchanged.
|
|
||||||
if [ "x${USE_PIP:-}" != "xtrue" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
# MPS (Apple Silicon / Metal) build profile - installed by the darwin CI job.
|
|
||||||
torch>=2.8.0
|
torch>=2.8.0
|
||||||
torchaudio>=2.8.0
|
torchaudio>=2.8.0
|
||||||
torchcodec>=0.9.1
|
torchcodec>=0.9.1
|
||||||
|
|||||||
@@ -341,9 +341,11 @@ func (a *Application) ResolvePIIPolicy(cfg *config.ModelConfig) (enabled bool, d
|
|||||||
}
|
}
|
||||||
appCfg := a.ApplicationConfig()
|
appCfg := a.ApplicationConfig()
|
||||||
|
|
||||||
// PIIIsEnabled already encodes "explicit pii.enabled wins, else backend
|
if cfg.PII.Enabled != nil {
|
||||||
// default (cloud-proxy)" — the single source of that rule.
|
enabled = *cfg.PII.Enabled
|
||||||
enabled = cfg.PIIIsEnabled()
|
} else {
|
||||||
|
enabled = cfg.PIIIsEnabled() // backend default (cloud-proxy)
|
||||||
|
}
|
||||||
if !enabled {
|
if !enabled {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
@@ -352,7 +354,7 @@ func (a *Application) ResolvePIIPolicy(cfg *config.ModelConfig) (enabled bool, d
|
|||||||
if len(detectors) == 0 {
|
if len(detectors) == 0 {
|
||||||
detectors = append([]string(nil), appCfg.PIIDefaultDetectors...)
|
detectors = append([]string(nil), appCfg.PIIDefaultDetectors...)
|
||||||
}
|
}
|
||||||
return true, detectors // enabled is necessarily true past the !enabled guard
|
return enabled, detectors
|
||||||
}
|
}
|
||||||
|
|
||||||
// PIIPolicyResolver adapts ResolvePIIPolicy to pii.PolicyResolver for
|
// PIIPolicyResolver adapts ResolvePIIPolicy to pii.PolicyResolver for
|
||||||
|
|||||||
@@ -215,7 +215,6 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
|||||||
envBackendGalleries := slices.Equal(appConfig.BackendGalleries, startupAppConfig.BackendGalleries)
|
envBackendGalleries := slices.Equal(appConfig.BackendGalleries, startupAppConfig.BackendGalleries)
|
||||||
envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
|
envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
|
||||||
envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
|
envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
|
||||||
envPIIDefaultDetectors := slices.Equal(appConfig.PIIDefaultDetectors, startupAppConfig.PIIDefaultDetectors)
|
|
||||||
envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
|
envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
|
||||||
envForceEvictionWhenBusy := appConfig.ForceEvictionWhenBusy == startupAppConfig.ForceEvictionWhenBusy
|
envForceEvictionWhenBusy := appConfig.ForceEvictionWhenBusy == startupAppConfig.ForceEvictionWhenBusy
|
||||||
envLRUEvictionMaxRetries := appConfig.LRUEvictionMaxRetries == startupAppConfig.LRUEvictionMaxRetries
|
envLRUEvictionMaxRetries := appConfig.LRUEvictionMaxRetries == startupAppConfig.LRUEvictionMaxRetries
|
||||||
@@ -336,15 +335,6 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
|||||||
if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
|
if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
|
||||||
appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
|
appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
|
||||||
}
|
}
|
||||||
if settings.PIIDefaultDetectors != nil && !envPIIDefaultDetectors {
|
|
||||||
// Request-side default redaction reads this live via
|
|
||||||
// ResolvePIIPolicy, so a file edit takes effect on the next chat
|
|
||||||
// request. The MITM listener resolves its per-host detector map
|
|
||||||
// once at start, so a raw file edit reaches cloud-proxy traffic
|
|
||||||
// only after a restart or a POST /api/settings (which rebuilds
|
|
||||||
// the listener) — the admin UI uses the latter.
|
|
||||||
appConfig.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
|
|
||||||
}
|
|
||||||
if settings.AutoUpgradeBackends != nil {
|
if settings.AutoUpgradeBackends != nil {
|
||||||
appConfig.AutoUpgradeBackends = *settings.AutoUpgradeBackends
|
appConfig.AutoUpgradeBackends = *settings.AutoUpgradeBackends
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -357,15 +357,6 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoade
|
|||||||
Pressure: pressure,
|
Pressure: pressure,
|
||||||
})
|
})
|
||||||
|
|
||||||
// Wire staging-progress broadcasting so file-staging shows up on every
|
|
||||||
// replica, not just the one performing the transfer. Without this, a
|
|
||||||
// /api/operations poll that round-robins onto a peer sees no staging row and
|
|
||||||
// the progress flickers. The origin publishes; peers mirror via the wildcard.
|
|
||||||
router.StagingTracker().SetPublisher(natsClient)
|
|
||||||
if _, err := router.StagingTracker().SubscribeBroadcasts(natsClient); err != nil {
|
|
||||||
xlog.Warn("Failed to subscribe to staging progress broadcasts", "error", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create ReplicaReconciler for auto-scaling model replicas. Adapter +
|
// Create ReplicaReconciler for auto-scaling model replicas. Adapter +
|
||||||
// RegistrationToken feed the state-reconciliation passes: pending op
|
// RegistrationToken feed the state-reconciliation passes: pending op
|
||||||
// drain uses the adapter, and model health probes use the token to auth
|
// drain uses the adapter, and model health probes use the token to auth
|
||||||
|
|||||||
@@ -109,52 +109,6 @@ var _ = Describe("loadRuntimeSettingsFromFile", func() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
// Instance-wide default PII detectors. The file is the only source (no
|
|
||||||
// env var), and the loader runs immediately before startMITMIfConfigured,
|
|
||||||
// so a regression here means the cloud-proxy MITM listener resolves an
|
|
||||||
// empty detector set at boot and forwards intercepted traffic unredacted —
|
|
||||||
// even though pii_default_detectors is on disk and the MITM model has PII
|
|
||||||
// enabled. It also breaks request-side default redaction the same way.
|
|
||||||
Describe("PII default detectors", func() {
|
|
||||||
It("loads pii_default_detectors from the file", func() {
|
|
||||||
cfg := &config.ApplicationConfig{DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["privacy-filter-nemotron", "secret-filter"]}`)}
|
|
||||||
loadRuntimeSettingsFromFile(cfg)
|
|
||||||
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"privacy-filter-nemotron", "secret-filter"}))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("does not override an env/CLI-set value (LOCALAI_PII_DEFAULT_DETECTORS)", func() {
|
|
||||||
cfg := &config.ApplicationConfig{
|
|
||||||
DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["from-file"]}`),
|
|
||||||
PIIDefaultDetectors: []string{"from-env"}, // simulate WithPIIDefaultDetectors(env)
|
|
||||||
}
|
|
||||||
loadRuntimeSettingsFromFile(cfg)
|
|
||||||
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env var must win over the persisted file value")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// The live file watcher applies pii_default_detectors on a runtime change
|
|
||||||
// the same way it handles galleries/threads/etc.: env-set values (current
|
|
||||||
// == startup snapshot) are left alone, otherwise the file value is applied
|
|
||||||
// to the live config so request-side default redaction picks it up without
|
|
||||||
// a restart.
|
|
||||||
Describe("file watcher: pii_default_detectors", func() {
|
|
||||||
It("applies a changed file value to the live config", func() {
|
|
||||||
startup := config.ApplicationConfig{} // no env baseline
|
|
||||||
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"old"}}
|
|
||||||
handler := readRuntimeSettingsJson(startup)
|
|
||||||
Expect(handler([]byte(`{"pii_default_detectors":["new-a","new-b"]}`), live)).To(Succeed())
|
|
||||||
Expect(live.PIIDefaultDetectors).To(Equal([]string{"new-a", "new-b"}))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("leaves an env-controlled value untouched", func() {
|
|
||||||
startup := config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
|
|
||||||
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
|
|
||||||
handler := readRuntimeSettingsJson(startup)
|
|
||||||
Expect(handler([]byte(`{"pii_default_detectors":["from-file"]}`), live)).To(Succeed())
|
|
||||||
Expect(live.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env-controlled detectors must not be overwritten by the file")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// The Agent Pool block has a mix of zero and non-zero defaults
|
// The Agent Pool block has a mix of zero and non-zero defaults
|
||||||
// (Enabled=true, EmbeddingModel="granite-...", MaxChunkingSize=400,
|
// (Enabled=true, EmbeddingModel="granite-...", MaxChunkingSize=400,
|
||||||
// VectorEngine="chromem", AgentHubURL="https://agenthub.localai.io").
|
// VectorEngine="chromem", AgentHubURL="https://agenthub.localai.io").
|
||||||
|
|||||||
@@ -750,20 +750,6 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
|||||||
options.MITMListen = *settings.MITMListen
|
options.MITMListen = *settings.MITMListen
|
||||||
}
|
}
|
||||||
|
|
||||||
// Instance-wide default PII detectors. LOCALAI_PII_DEFAULT_DETECTORS (via
|
|
||||||
// WithPIIDefaultDetectors) wins when set; otherwise the file is the source
|
|
||||||
// — apply it only when the env/CLI left the value empty, mirroring the
|
|
||||||
// "env > file" precedence used for the other fields. This must land before
|
|
||||||
// startMITMIfConfigured (called right after this loader): the cloud-proxy
|
|
||||||
// listener resolves each intercept host's detectors once at start via
|
|
||||||
// ResolvePIIPolicy, and a MITM model that names no detectors of its own
|
|
||||||
// falls back to these defaults. Without it the listener (and request-side
|
|
||||||
// default redaction) starts with an empty detector set and forwards
|
|
||||||
// traffic unredacted even though pii_default_detectors is on disk.
|
|
||||||
if settings.PIIDefaultDetectors != nil && len(options.PIIDefaultDetectors) == 0 {
|
|
||||||
options.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Backend upgrade flags
|
// Backend upgrade flags
|
||||||
if settings.AutoUpgradeBackends != nil {
|
if settings.AutoUpgradeBackends != nil {
|
||||||
if !options.AutoUpgradeBackends {
|
if !options.AutoUpgradeBackends {
|
||||||
|
|||||||
@@ -181,8 +181,6 @@ type RunCMD struct {
|
|||||||
// Cloud-proxy MITM listener (off by default).
|
// Cloud-proxy MITM listener (off by default).
|
||||||
MITMListen string `env:"LOCALAI_MITM_LISTEN" help:"Address (host:port) for the cloudproxy MITM listener. Empty = disabled. Clients set HTTPS_PROXY=http://<this>:<port>. Intercept hosts are declared per-model via the model YAML mitm.hosts: block; create one from the Add Model UI." group:"middleware"`
|
MITMListen string `env:"LOCALAI_MITM_LISTEN" help:"Address (host:port) for the cloudproxy MITM listener. Empty = disabled. Clients set HTTPS_PROXY=http://<this>:<port>. Intercept hosts are declared per-model via the model YAML mitm.hosts: block; create one from the Add Model UI." group:"middleware"`
|
||||||
MITMCADir string `env:"LOCALAI_MITM_CA_DIR" type:"path" help:"Directory holding the MITM proxy CA cert + key. Defaults to <data-path>/mitm-ca." group:"middleware"`
|
MITMCADir string `env:"LOCALAI_MITM_CA_DIR" type:"path" help:"Directory holding the MITM proxy CA cert + key. Defaults to <data-path>/mitm-ca." group:"middleware"`
|
||||||
|
|
||||||
PIIDefaultDetectors []string `env:"LOCALAI_PII_DEFAULT_DETECTORS" help:"Instance-wide default PII/secret detector model names applied to any PII-enabled model (chiefly cloud-proxy / MITM models) that names no pii.detectors of its own. Comma-separated, e.g. privacy-filter-nemotron,secret-filter. Takes precedence over the value persisted via the Middleware UI." group:"middleware"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||||
@@ -245,7 +243,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
config.WithAPIAddress(r.Address),
|
config.WithAPIAddress(r.Address),
|
||||||
config.WithMITMListen(r.MITMListen),
|
config.WithMITMListen(r.MITMListen),
|
||||||
config.WithMITMCADir(r.MITMCADir),
|
config.WithMITMCADir(r.MITMCADir),
|
||||||
config.WithPIIDefaultDetectors(r.PIIDefaultDetectors),
|
|
||||||
config.WithAgentJobRetentionDays(r.AgentJobRetentionDays),
|
config.WithAgentJobRetentionDays(r.AgentJobRetentionDays),
|
||||||
config.WithLlamaCPPTunnelCallback(func(tunnels []string) {
|
config.WithLlamaCPPTunnelCallback(func(tunnels []string) {
|
||||||
tunnelEnvVar := strings.Join(tunnels, ",")
|
tunnelEnvVar := strings.Join(tunnels, ",")
|
||||||
|
|||||||
@@ -712,18 +712,6 @@ func WithMITMCADir(dir string) AppOption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithPIIDefaultDetectors sets the instance-wide default PII/secret detector
|
|
||||||
// model names applied to any PII-enabled model (chiefly cloud-proxy / MITM
|
|
||||||
// models) that names no pii.detectors of its own. CLI/env:
|
|
||||||
// LOCALAI_PII_DEFAULT_DETECTORS. Empty leaves the value to
|
|
||||||
// runtime_settings.json / the Middleware UI; a non-empty value takes
|
|
||||||
// precedence over the file (env > file).
|
|
||||||
func WithPIIDefaultDetectors(detectors []string) AppOption {
|
|
||||||
return func(o *ApplicationConfig) {
|
|
||||||
o.PIIDefaultDetectors = detectors
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
|
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
|
||||||
return func(o *ApplicationConfig) {
|
return func(o *ApplicationConfig) {
|
||||||
o.DynamicConfigsDir = dynamicConfigsDir
|
o.DynamicConfigsDir = dynamicConfigsDir
|
||||||
|
|||||||
@@ -542,6 +542,19 @@ var BackendCapabilities = map[string]BackendCapability{
|
|||||||
DefaultUsecases: []string{UsecaseSpeakerRecognition},
|
DefaultUsecases: []string{UsecaseSpeakerRecognition},
|
||||||
Description: "Speaker recognition — voice identity verification and analysis",
|
Description: "Speaker recognition — voice identity verification and analysis",
|
||||||
},
|
},
|
||||||
|
"voice-detect": {
|
||||||
|
GRPCMethods: []GRPCMethod{MethodVoiceVerify, MethodVoiceEmbed, MethodVoiceAnalyze},
|
||||||
|
PossibleUsecases: []string{UsecaseSpeakerRecognition},
|
||||||
|
DefaultUsecases: []string{UsecaseSpeakerRecognition},
|
||||||
|
Description: "voice-detect.cpp: C++/ggml speaker embedding, verification and voice analysis (age/gender/emotion)",
|
||||||
|
},
|
||||||
|
"face-detect": {
|
||||||
|
GRPCMethods: []GRPCMethod{MethodEmbedding, MethodDetect, MethodFaceVerify, MethodFaceAnalyze},
|
||||||
|
PossibleUsecases: []string{UsecaseEmbeddings, UsecaseDetection, UsecaseFaceRecognition},
|
||||||
|
DefaultUsecases: []string{UsecaseFaceRecognition},
|
||||||
|
AcceptsImages: true,
|
||||||
|
Description: "face-detect.cpp: C++/ggml face detection, embedding, verification and attribute analysis",
|
||||||
|
},
|
||||||
"silero-vad": {
|
"silero-vad": {
|
||||||
GRPCMethods: []GRPCMethod{MethodVAD},
|
GRPCMethods: []GRPCMethod{MethodVAD},
|
||||||
PossibleUsecases: []string{UsecaseVAD},
|
PossibleUsecases: []string{UsecaseVAD},
|
||||||
|
|||||||
@@ -54,35 +54,8 @@ func (g GPU) IsNVIDIABlackwell() bool {
|
|||||||
return maj >= 12
|
return maj >= 12
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute-buffer headroom guard for the raised physical batch.
|
|
||||||
//
|
|
||||||
// Raising n_ubatch grows the CUDA *compute buffer* (the scratch for the forward
|
|
||||||
// graph), which is allocated PER DEVICE — it does not benefit from a second GPU
|
|
||||||
// the way weights or KV (which are split across devices) do. The buffer scales
|
|
||||||
// ~linearly with n_ubatch * n_ctx, so a large context turns the GB10-tuned
|
|
||||||
// ub2048 into multi-GiB of extra scratch that must fit on a SINGLE card. On a
|
|
||||||
// 16 GiB consumer Blackwell with a 200k context that overflows (issue #10485),
|
|
||||||
// even though the GB10 it was measured on (128 GiB unified memory) had room.
|
|
||||||
//
|
|
||||||
// These constants size a conservative guard: only raise the batch when the
|
|
||||||
// extra scratch fits the per-device VRAM ceiling.
|
|
||||||
const (
|
|
||||||
// computeBufferBytesPerCell approximates the CUDA compute-buffer cost of one
|
|
||||||
// (n_ubatch * n_ctx) cell. Derived from an observed allocation (ub2048 *
|
|
||||||
// ctx204800 ~= 4.5 GiB => ~11 B/cell) and rounded up to 16 for margin, since
|
|
||||||
// the real cost also grows with model width (heads / embedding dim) which we
|
|
||||||
// don't know at config time.
|
|
||||||
computeBufferBytesPerCell = 16
|
|
||||||
// blackwellBatchHeadroomDivisor caps the extra compute buffer from raising the
|
|
||||||
// physical batch at VRAM/divisor. /4 keeps the bulk of a device for weights +
|
|
||||||
// KV, which already dominate VRAM use.
|
|
||||||
blackwellBatchHeadroomDivisor = 4
|
|
||||||
)
|
|
||||||
|
|
||||||
// PhysicalBatch returns the canonical physical batch (n_batch/n_ubatch) for the
|
// PhysicalBatch returns the canonical physical batch (n_batch/n_ubatch) for the
|
||||||
// given hardware class, ignoring context/VRAM headroom. Use
|
// given hardware, used when the model config leaves batch unset.
|
||||||
// PhysicalBatchForContext when a model context and per-device VRAM are known
|
|
||||||
// (the load paths) so the raised batch can't overflow a single device.
|
|
||||||
func PhysicalBatch(g GPU) int {
|
func PhysicalBatch(g GPU) int {
|
||||||
if g.IsNVIDIABlackwell() {
|
if g.IsNVIDIABlackwell() {
|
||||||
return BlackwellPhysicalBatch
|
return BlackwellPhysicalBatch
|
||||||
@@ -90,32 +63,6 @@ func PhysicalBatch(g GPU) int {
|
|||||||
return DefaultPhysicalBatch
|
return DefaultPhysicalBatch
|
||||||
}
|
}
|
||||||
|
|
||||||
// PhysicalBatchForContext is PhysicalBatch gated on per-device VRAM headroom for
|
|
||||||
// the given context: it only raises the batch above the conservative default
|
|
||||||
// when the extra compute buffer (which is allocated on a single device and grows
|
|
||||||
// with n_ubatch * n_ctx) fits within blackwellBatchHeadroomDivisor of the GPU's
|
|
||||||
// VRAM. g.VRAM must be the PER-DEVICE ceiling (the smallest device on a
|
|
||||||
// multi-GPU host), not the summed total — the compute buffer can't be split.
|
|
||||||
//
|
|
||||||
// VRAM 0 (unknown) stays conservative rather than risk a per-device OOM; the
|
|
||||||
// GB10 / unified-memory path reports system RAM, so it still clears the guard.
|
|
||||||
func PhysicalBatchForContext(g GPU, ctx int) int {
|
|
||||||
if !g.IsNVIDIABlackwell() {
|
|
||||||
return DefaultPhysicalBatch
|
|
||||||
}
|
|
||||||
if ctx <= 0 {
|
|
||||||
ctx = DefaultContextSize
|
|
||||||
}
|
|
||||||
if g.VRAM == 0 {
|
|
||||||
return DefaultPhysicalBatch
|
|
||||||
}
|
|
||||||
extra := uint64(ctx) * uint64(BlackwellPhysicalBatch-DefaultPhysicalBatch) * computeBufferBytesPerCell
|
|
||||||
if extra <= g.VRAM/blackwellBatchHeadroomDivisor {
|
|
||||||
return BlackwellPhysicalBatch
|
|
||||||
}
|
|
||||||
return DefaultPhysicalBatch
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsManagedPhysicalBatch reports whether n is a value PhysicalBatch assigns.
|
// IsManagedPhysicalBatch reports whether n is a value PhysicalBatch assigns.
|
||||||
// Callers that re-tune a value chosen by an upstream host (the distributed
|
// Callers that re-tune a value chosen by an upstream host (the distributed
|
||||||
// router correcting the frontend's guess) use this to avoid clobbering an
|
// router correcting the frontend's guess) use this to avoid clobbering an
|
||||||
@@ -175,12 +122,7 @@ func hasParallelOption(opts []string) bool {
|
|||||||
// deterministic device — detection does a live nvidia-smi call.
|
// deterministic device — detection does a live nvidia-smi call.
|
||||||
var localGPU = func() GPU {
|
var localGPU = func() GPU {
|
||||||
vendor, _ := xsysinfo.DetectGPUVendor()
|
vendor, _ := xsysinfo.DetectGPUVendor()
|
||||||
// Use the SMALLEST device's VRAM, not the summed total: the parallel-slot
|
vram, _ := xsysinfo.TotalAvailableVRAM()
|
||||||
// tier and the batch headroom guard both reason about what fits on a single
|
|
||||||
// card, and per-device compute buffers can't be split across GPUs. Summing
|
|
||||||
// two 16 GiB cards into "32 GiB" is what over-provisioned multi-GPU hosts
|
|
||||||
// into OOM (issue #10485).
|
|
||||||
vram, _ := xsysinfo.MinPerGPUVRAM()
|
|
||||||
return GPU{
|
return GPU{
|
||||||
Vendor: vendor,
|
Vendor: vendor,
|
||||||
ComputeCapability: xsysinfo.NVIDIAComputeCapability(),
|
ComputeCapability: xsysinfo.NVIDIAComputeCapability(),
|
||||||
@@ -195,20 +137,10 @@ func ApplyHardwareDefaults(cfg *ModelConfig, gpu GPU) {
|
|||||||
if cfg == nil {
|
if cfg == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Raise the physical batch on Blackwell only when the resulting compute
|
if cfg.Batch == 0 && gpu.IsNVIDIABlackwell() {
|
||||||
// buffer fits the per-device VRAM at THIS model's context. Leaving Batch at 0
|
|
||||||
// (rather than writing the default 512) preserves the downstream single-pass
|
|
||||||
// sizing in core/backend.EffectiveBatchSize for embedding/score/rerank.
|
|
||||||
if cfg.Batch == 0 {
|
|
||||||
ctx := DefaultContextSize
|
|
||||||
if cfg.ContextSize != nil {
|
|
||||||
ctx = *cfg.ContextSize
|
|
||||||
}
|
|
||||||
if PhysicalBatchForContext(gpu, ctx) == BlackwellPhysicalBatch {
|
|
||||||
cfg.Batch = BlackwellPhysicalBatch
|
cfg.Batch = BlackwellPhysicalBatch
|
||||||
xlog.Debug("[hardware_defaults] Blackwell GPU: defaulting physical batch",
|
xlog.Debug("[hardware_defaults] Blackwell GPU: defaulting physical batch",
|
||||||
"batch", cfg.Batch, "compute_cap", gpu.ComputeCapability, "context", ctx, "vram_gib", gpu.VRAM>>30)
|
"batch", cfg.Batch, "compute_cap", gpu.ComputeCapability)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable concurrent serving by default on a capable GPU: without this the
|
// Enable concurrent serving by default on a capable GPU: without this the
|
||||||
|
|||||||
@@ -9,37 +9,26 @@ import (
|
|||||||
// GPU. The detection seam (localGPU) is injected so the path is deterministic
|
// GPU. The detection seam (localGPU) is injected so the path is deterministic
|
||||||
// without a real GPU.
|
// without a real GPU.
|
||||||
var _ = Describe("SetDefaults hardware defaults (single-instance)", func() {
|
var _ = Describe("SetDefaults hardware defaults (single-instance)", func() {
|
||||||
const gib = uint64(1) << 30
|
|
||||||
|
|
||||||
var orig func() GPU
|
var orig func() GPU
|
||||||
BeforeEach(func() { orig = localGPU })
|
BeforeEach(func() { orig = localGPU })
|
||||||
AfterEach(func() { localGPU = orig })
|
AfterEach(func() { localGPU = orig })
|
||||||
|
|
||||||
It("sets the physical batch on a local Blackwell GPU with headroom", func() {
|
It("sets the physical batch on a local Blackwell GPU", func() {
|
||||||
localGPU = func() GPU { return GPU{ComputeCapability: "12.1", VRAM: 119 * gib} }
|
localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} }
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
cfg.SetDefaults()
|
cfg.SetDefaults()
|
||||||
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
|
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("leaves batch unset when a large context would overflow the device", func() {
|
|
||||||
// Regression guard for issue #10485: 16 GiB consumer Blackwell + ~200k ctx.
|
|
||||||
localGPU = func() GPU { return GPU{ComputeCapability: "12.0", VRAM: 16 * gib} }
|
|
||||||
ctx := 204800
|
|
||||||
cfg := &ModelConfig{LLMConfig: LLMConfig{ContextSize: &ctx}}
|
|
||||||
cfg.SetDefaults()
|
|
||||||
Expect(cfg.Batch).To(Equal(0))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("leaves batch unset on a non-Blackwell local GPU", func() {
|
It("leaves batch unset on a non-Blackwell local GPU", func() {
|
||||||
localGPU = func() GPU { return GPU{ComputeCapability: "8.9", VRAM: 119 * gib} }
|
localGPU = func() GPU { return GPU{ComputeCapability: "8.9"} }
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
cfg.SetDefaults()
|
cfg.SetDefaults()
|
||||||
Expect(cfg.Batch).To(Equal(0))
|
Expect(cfg.Batch).To(Equal(0))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("never overrides an explicit batch", func() {
|
It("never overrides an explicit batch", func() {
|
||||||
localGPU = func() GPU { return GPU{ComputeCapability: "12.1", VRAM: 119 * gib} }
|
localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} }
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
cfg.Batch = 1024
|
cfg.Batch = 1024
|
||||||
cfg.SetDefaults()
|
cfg.SetDefaults()
|
||||||
|
|||||||
@@ -7,8 +7,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var _ = Describe("Hardware-driven config defaults", func() {
|
var _ = Describe("Hardware-driven config defaults", func() {
|
||||||
const gib = uint64(1) << 30
|
|
||||||
|
|
||||||
DescribeTable("GPU.IsNVIDIABlackwell (sm_12x consumer family)",
|
DescribeTable("GPU.IsNVIDIABlackwell (sm_12x consumer family)",
|
||||||
func(cc string, want bool) {
|
func(cc string, want bool) {
|
||||||
Expect(GPU{ComputeCapability: cc}.IsNVIDIABlackwell()).To(Equal(want))
|
Expect(GPU{ComputeCapability: cc}.IsNVIDIABlackwell()).To(Equal(want))
|
||||||
@@ -37,54 +35,21 @@ var _ = Describe("Hardware-driven config defaults", func() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
Describe("PhysicalBatchForContext (per-device VRAM headroom)", func() {
|
|
||||||
It("raises the batch when the compute buffer fits the device", func() {
|
|
||||||
// 16 GiB Blackwell with a small context: the extra scratch is tiny.
|
|
||||||
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.0", VRAM: 16 * gib}, 8192)).
|
|
||||||
To(Equal(BlackwellPhysicalBatch))
|
|
||||||
})
|
|
||||||
It("keeps the default batch when a large context would overflow one device", func() {
|
|
||||||
// The issue #10485 case: 16 GiB consumer Blackwell, ~200k context.
|
|
||||||
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.0", VRAM: 16 * gib}, 204800)).
|
|
||||||
To(Equal(DefaultPhysicalBatch))
|
|
||||||
})
|
|
||||||
It("still raises the batch on a large unified-memory device (GB10)", func() {
|
|
||||||
// GB10 reports system RAM (~119 GiB) as its single device's VRAM.
|
|
||||||
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.1", VRAM: 119 * gib}, 204800)).
|
|
||||||
To(Equal(BlackwellPhysicalBatch))
|
|
||||||
})
|
|
||||||
It("stays conservative when VRAM is unknown", func() {
|
|
||||||
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.1"}, 8192)).
|
|
||||||
To(Equal(DefaultPhysicalBatch))
|
|
||||||
})
|
|
||||||
It("never raises the batch on non-Blackwell", func() {
|
|
||||||
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "9.0", VRAM: 80 * gib}, 8192)).
|
|
||||||
To(Equal(DefaultPhysicalBatch))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Describe("ApplyHardwareDefaults", func() {
|
Describe("ApplyHardwareDefaults", func() {
|
||||||
It("raises an unset batch to 2048 on Blackwell with headroom", func() {
|
It("raises an unset batch to 2048 on Blackwell", func() {
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib})
|
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
|
||||||
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
|
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
|
||||||
})
|
})
|
||||||
It("leaves batch unset when a large context would overflow one device", func() {
|
|
||||||
// Regression guard for issue #10485: 16 GiB card + ~200k context.
|
|
||||||
ctx := 204800
|
|
||||||
cfg := &ModelConfig{LLMConfig: LLMConfig{ContextSize: &ctx}}
|
|
||||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.0", VRAM: 16 * gib})
|
|
||||||
Expect(cfg.Batch).To(Equal(0))
|
|
||||||
})
|
|
||||||
It("leaves batch unset on non-Blackwell", func() {
|
It("leaves batch unset on non-Blackwell", func() {
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0", VRAM: 119 * gib})
|
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0"})
|
||||||
Expect(cfg.Batch).To(Equal(0))
|
Expect(cfg.Batch).To(Equal(0))
|
||||||
})
|
})
|
||||||
It("never overrides an explicit batch", func() {
|
It("never overrides an explicit batch", func() {
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
cfg.Batch = 1024
|
cfg.Batch = 1024
|
||||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib})
|
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
|
||||||
Expect(cfg.Batch).To(Equal(1024))
|
Expect(cfg.Batch).To(Equal(1024))
|
||||||
})
|
})
|
||||||
It("no-ops on nil", func() {
|
It("no-ops on nil", func() {
|
||||||
@@ -92,6 +57,8 @@ var _ = Describe("Hardware-driven config defaults", func() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
const gib = uint64(1) << 30
|
||||||
|
|
||||||
DescribeTable("DefaultParallelSlots (by VRAM)",
|
DescribeTable("DefaultParallelSlots (by VRAM)",
|
||||||
func(vramGiB uint64, want int) {
|
func(vramGiB uint64, want int) {
|
||||||
Expect(DefaultParallelSlots(GPU{VRAM: vramGiB * gib})).To(Equal(want))
|
Expect(DefaultParallelSlots(GPU{VRAM: vramGiB * gib})).To(Equal(want))
|
||||||
|
|||||||
@@ -537,36 +537,6 @@ func DefaultRegistry() map[string]FieldMetaOverride {
|
|||||||
Component: "number",
|
Component: "number",
|
||||||
Order: 79,
|
Order: 79,
|
||||||
},
|
},
|
||||||
"pipeline.compaction.enabled": {
|
|
||||||
Section: "pipeline",
|
|
||||||
Label: "Compaction Enabled",
|
|
||||||
Description: "Fold conversation items that age out of the live window (Max History Items) into a rolling summary instead of dropping them, so long realtime sessions stay cheap without losing earlier context. Off by default.",
|
|
||||||
Component: "toggle",
|
|
||||||
Order: 80,
|
|
||||||
},
|
|
||||||
"pipeline.compaction.trigger_items": {
|
|
||||||
Section: "pipeline",
|
|
||||||
Label: "Compaction Trigger Items",
|
|
||||||
Description: "High-water mark: once the live conversation exceeds this many items, the overflow above Max History Items is summarized and evicted. Must be greater than Max History Items; defaults to twice it. The gap controls how often summarization runs.",
|
|
||||||
Component: "number",
|
|
||||||
Order: 81,
|
|
||||||
},
|
|
||||||
"pipeline.compaction.summary_model": {
|
|
||||||
Section: "pipeline",
|
|
||||||
Label: "Compaction Summary Model",
|
|
||||||
Description: "Optional smaller/cheaper model used to produce the rolling summary. Empty reuses the pipeline's own LLM. On CPU, a tiny model here keeps compaction from competing with the conversation LLM.",
|
|
||||||
Component: "input",
|
|
||||||
Advanced: true,
|
|
||||||
Order: 82,
|
|
||||||
},
|
|
||||||
"pipeline.compaction.max_summary_tokens": {
|
|
||||||
Section: "pipeline",
|
|
||||||
Label: "Compaction Max Summary Tokens",
|
|
||||||
Description: "Advisory cap on the rolling summary length (fed to the summarizer prompt). Defaults to 512.",
|
|
||||||
Component: "number",
|
|
||||||
Advanced: true,
|
|
||||||
Order: 83,
|
|
||||||
},
|
|
||||||
|
|
||||||
// --- Functions ---
|
// --- Functions ---
|
||||||
"function.grammar.parallel_calls": {
|
"function.grammar.parallel_calls": {
|
||||||
|
|||||||
@@ -641,32 +641,11 @@ type Pipeline struct {
|
|||||||
// context fills.
|
// context fills.
|
||||||
MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"`
|
MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"`
|
||||||
|
|
||||||
// Compaction folds conversation items that age out of the live window
|
|
||||||
// (max_history_items) into a rolling summary instead of dropping them, so
|
|
||||||
// long realtime sessions stay cheap without losing earlier context. Nil
|
|
||||||
// (block absent) means disabled, preserving existing behavior.
|
|
||||||
Compaction *PipelineCompaction `yaml:"compaction,omitempty" json:"compaction,omitempty"`
|
|
||||||
|
|
||||||
// VoiceRecognition gates the pipeline behind speaker verification. Nil
|
// VoiceRecognition gates the pipeline behind speaker verification. Nil
|
||||||
// (block absent) means no gate, preserving existing behavior.
|
// (block absent) means no gate, preserving existing behavior.
|
||||||
VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"`
|
VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// PipelineCompaction configures summarize-then-drop for a realtime pipeline.
|
|
||||||
type PipelineCompaction struct {
|
|
||||||
// Enabled turns summarize-then-drop on. Default false.
|
|
||||||
Enabled bool `yaml:"enabled,omitempty" json:"enabled,omitempty"`
|
|
||||||
// TriggerItems is the high-water mark: once live items exceed it, overflow
|
|
||||||
// above max_history_items is summarized and evicted. Must exceed
|
|
||||||
// max_history_items; clamped up if not. Default: 2x max_history_items.
|
|
||||||
TriggerItems int `yaml:"trigger_items,omitempty" json:"trigger_items,omitempty"`
|
|
||||||
// SummaryModel optionally names a smaller/cheaper model for the summary
|
|
||||||
// call. Empty uses the pipeline's own LLM.
|
|
||||||
SummaryModel string `yaml:"summary_model,omitempty" json:"summary_model,omitempty"`
|
|
||||||
// MaxSummaryTokens advises the summary length (fed to the prompt). Default 512.
|
|
||||||
MaxSummaryTokens int `yaml:"max_summary_tokens,omitempty" json:"max_summary_tokens,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ApplyReasoningEffort resolves the effective reasoning effort — a per-request
|
// ApplyReasoningEffort resolves the effective reasoning effort — a per-request
|
||||||
// value (requestEffort) overrides the config's own ReasoningEffort default —
|
// value (requestEffort) overrides the config's own ReasoningEffort default —
|
||||||
// stores it on the config so gRPCPredictOpts forwards it to the backend as the
|
// stores it on the config so gRPCPredictOpts forwards it to the backend as the
|
||||||
@@ -1204,6 +1183,11 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
|||||||
// This ensures gallery-installed and runtime-loaded models get optimal parameters.
|
// This ensures gallery-installed and runtime-loaded models get optimal parameters.
|
||||||
ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model)
|
ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model)
|
||||||
|
|
||||||
|
// Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell).
|
||||||
|
// Uses the local GPU here; in distributed mode the router re-applies the same
|
||||||
|
// heuristics for the selected node's GPU before loading. Explicit config wins.
|
||||||
|
ApplyHardwareDefaults(cfg, localGPU())
|
||||||
|
|
||||||
// Apply serving-policy defaults (device-independent): cross-request prefix
|
// Apply serving-policy defaults (device-independent): cross-request prefix
|
||||||
// caching. Propagates to distributed nodes via the model options.
|
// caching. Propagates to distributed nodes via the model options.
|
||||||
ApplyServingDefaults(cfg)
|
ApplyServingDefaults(cfg)
|
||||||
@@ -1242,16 +1226,6 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
|||||||
cfg.ContextSize = &ctx
|
cfg.ContextSize = &ctx
|
||||||
}
|
}
|
||||||
runBackendHooks(cfg, lo.modelPath)
|
runBackendHooks(cfg, lo.modelPath)
|
||||||
|
|
||||||
// Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell)
|
|
||||||
// LAST, after the context size is fully resolved (explicit config, LoadOptions,
|
|
||||||
// then the GGUF guess inside runBackendHooks): the Blackwell batch guard sizes
|
|
||||||
// the per-device compute buffer against this model's context, so it must see
|
|
||||||
// the final value, not a pre-guess nil. Uses the local GPU here; in distributed
|
|
||||||
// mode the router re-applies the same heuristics for the selected node's GPU
|
|
||||||
// before loading. Explicit config always wins.
|
|
||||||
ApplyHardwareDefaults(cfg, localGPU())
|
|
||||||
|
|
||||||
cfg.syncKnownUsecasesFromString()
|
cfg.syncKnownUsecasesFromString()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// runtimeSettingsFile is the on-disk filename inside DynamicConfigsDir.
|
// runtimeSettingsFile is the on-disk filename inside DynamicConfigsDir.
|
||||||
@@ -34,35 +33,6 @@ func (o *ApplicationConfig) ReadPersistedSettings() (RuntimeSettings, error) {
|
|||||||
return settings, nil
|
return settings, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// MergeNonNil overlays every set (non-nil) field of overlay onto the
|
|
||||||
// receiver, leaving the receiver's value untouched wherever overlay left a
|
|
||||||
// field unset. Every RuntimeSettings field is a pointer precisely so "set"
|
|
||||||
// can be told apart from "absent" (see the type doc), which makes this a
|
|
||||||
// faithful partial update: a caller that submits only the field it owns
|
|
||||||
// changes exactly that field and never clobbers unrelated settings.
|
|
||||||
//
|
|
||||||
// This is the read-modify-write contract the persistence helpers exist for.
|
|
||||||
// UpdateSettingsEndpoint reads the on-disk settings, merges the request body
|
|
||||||
// on top, and writes the result — so a focused admin page that POSTs only its
|
|
||||||
// own field (the Middleware page sends only mitm_listen; the detector table
|
|
||||||
// only pii_default_detectors) no longer nulls every other setting.
|
|
||||||
//
|
|
||||||
// Reflection keeps the merge total over the struct: a field added to
|
|
||||||
// RuntimeSettings later is merged automatically, so the persistence path can
|
|
||||||
// never silently drop a new setting the way a hand-maintained field list
|
|
||||||
// would. Non-pointer fields (none today) are skipped — they cannot express
|
|
||||||
// "absent", so the receiver wins.
|
|
||||||
func (s *RuntimeSettings) MergeNonNil(overlay RuntimeSettings) {
|
|
||||||
dst := reflect.ValueOf(s).Elem()
|
|
||||||
src := reflect.ValueOf(overlay)
|
|
||||||
for i := 0; i < src.NumField(); i++ {
|
|
||||||
f := src.Field(i)
|
|
||||||
if f.Kind() == reflect.Pointer && !f.IsNil() {
|
|
||||||
dst.Field(i).Set(f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// WritePersistedSettings serialises the given RuntimeSettings to
|
// WritePersistedSettings serialises the given RuntimeSettings to
|
||||||
// runtime_settings.json with restricted permissions (it may carry API
|
// runtime_settings.json with restricted permissions (it may carry API
|
||||||
// keys and P2P tokens).
|
// keys and P2P tokens).
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func strPtr(s string) *string { return &s }
|
func strPtr(s string) *string { return &s }
|
||||||
func boolPtr(b bool) *bool { return &b }
|
|
||||||
|
|
||||||
var _ = Describe("RuntimeSettings persistence helpers", func() {
|
var _ = Describe("RuntimeSettings persistence helpers", func() {
|
||||||
var (
|
var (
|
||||||
@@ -52,47 +51,6 @@ var _ = Describe("RuntimeSettings persistence helpers", func() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
// MergeNonNil is the partial-update primitive UpdateSettingsEndpoint
|
|
||||||
// relies on: a focused admin page POSTs only the field it owns, and the
|
|
||||||
// handler reads the on-disk settings and overlays the request on top.
|
|
||||||
// Without it, the body would be written verbatim and every field the
|
|
||||||
// caller omitted would be nulled (the reported regression: changing
|
|
||||||
// mitm_listen wiped the galleries, api keys, watchdog config, etc.).
|
|
||||||
Describe("MergeNonNil partial update", func() {
|
|
||||||
It("overlays set fields and preserves unset ones", func() {
|
|
||||||
base := config.RuntimeSettings{
|
|
||||||
MITMListen: strPtr(":9000"),
|
|
||||||
Galleries: &[]config.Gallery{{Name: "g1", URL: "http://example/g1"}},
|
|
||||||
WatchdogIdleEnabled: boolPtr(true),
|
|
||||||
ApiKeys: &[]string{"persisted-key"},
|
|
||||||
PIIDefaultDetectors: &[]string{"det-a"},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Simulate the Middleware proxy tab: only mitm_listen is sent.
|
|
||||||
overlay := config.RuntimeSettings{MITMListen: strPtr(":8443")}
|
|
||||||
base.MergeNonNil(overlay)
|
|
||||||
|
|
||||||
Expect(base.MITMListen).ToNot(BeNil())
|
|
||||||
Expect(*base.MITMListen).To(Equal(":8443"), "set field should be overlaid")
|
|
||||||
// Everything the overlay left unset must survive untouched.
|
|
||||||
Expect(base.Galleries).ToNot(BeNil(), "galleries were clobbered")
|
|
||||||
Expect(*base.Galleries).To(HaveLen(1))
|
|
||||||
Expect(base.WatchdogIdleEnabled).ToNot(BeNil())
|
|
||||||
Expect(*base.WatchdogIdleEnabled).To(BeTrue())
|
|
||||||
Expect(base.ApiKeys).ToNot(BeNil(), "api_keys were clobbered")
|
|
||||||
Expect(*base.ApiKeys).To(Equal([]string{"persisted-key"}))
|
|
||||||
Expect(base.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were clobbered")
|
|
||||||
Expect(*base.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("lets an explicit empty slice clear a field", func() {
|
|
||||||
base := config.RuntimeSettings{PIIDefaultDetectors: &[]string{"det-a"}}
|
|
||||||
base.MergeNonNil(config.RuntimeSettings{PIIDefaultDetectors: &[]string{}})
|
|
||||||
Expect(base.PIIDefaultDetectors).ToNot(BeNil())
|
|
||||||
Expect(*base.PIIDefaultDetectors).To(BeEmpty(), "an explicit empty slice should clear, not preserve")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// MITM round trip pins the contract that loadRuntimeSettingsFromFile
|
// MITM round trip pins the contract that loadRuntimeSettingsFromFile
|
||||||
// MITM listener address must survive a write/read round trip so the
|
// MITM listener address must survive a write/read round trip so the
|
||||||
// next process restart can bring the listener back up. (Intercept
|
// next process restart can bring the listener back up. (Intercept
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ func UploadToCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
name := decodedParam(c, "name")
|
name := c.Param("name")
|
||||||
file, err := c.FormFile("file")
|
file, err := c.FormFile("file")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": "file required"})
|
return c.JSON(http.StatusBadRequest, map[string]string{"error": "file required"})
|
||||||
@@ -116,7 +116,7 @@ func ListCollectionEntriesEndpoint(app *application.Application) echo.HandlerFun
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
entries, err := svc.ListCollectionEntriesForUser(userID, decodedParam(c, "name"))
|
entries, err := svc.ListCollectionEntriesForUser(userID, c.Param("name"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if strings.Contains(err.Error(), "not found") {
|
if strings.Contains(err.Error(), "not found") {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
@@ -139,7 +139,7 @@ func GetCollectionEntryContentEndpoint(app *application.Application) echo.Handle
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
entry = entryParam
|
entry = entryParam
|
||||||
}
|
}
|
||||||
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, decodedParam(c, "name"), entry)
|
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, c.Param("name"), entry)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if strings.Contains(err.Error(), "not found") {
|
if strings.Contains(err.Error(), "not found") {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
@@ -164,7 +164,7 @@ func SearchCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
if err := c.Bind(&payload); err != nil {
|
if err := c.Bind(&payload); err != nil {
|
||||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||||
}
|
}
|
||||||
results, err := svc.SearchCollectionForUser(userID, decodedParam(c, "name"), payload.Query, payload.MaxResults)
|
results, err := svc.SearchCollectionForUser(userID, c.Param("name"), payload.Query, payload.MaxResults)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if strings.Contains(err.Error(), "not found") {
|
if strings.Contains(err.Error(), "not found") {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
@@ -182,7 +182,7 @@ func ResetCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
if err := svc.ResetCollectionForUser(userID, decodedParam(c, "name")); err != nil {
|
if err := svc.ResetCollectionForUser(userID, c.Param("name")); err != nil {
|
||||||
if strings.Contains(err.Error(), "not found") {
|
if strings.Contains(err.Error(), "not found") {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
}
|
}
|
||||||
@@ -202,7 +202,7 @@ func DeleteCollectionEntryEndpoint(app *application.Application) echo.HandlerFun
|
|||||||
if err := c.Bind(&payload); err != nil {
|
if err := c.Bind(&payload); err != nil {
|
||||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||||
}
|
}
|
||||||
remaining, err := svc.DeleteCollectionEntryForUser(userID, decodedParam(c, "name"), payload.Entry)
|
remaining, err := svc.DeleteCollectionEntryForUser(userID, c.Param("name"), payload.Entry)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if strings.Contains(err.Error(), "not found") {
|
if strings.Contains(err.Error(), "not found") {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
@@ -230,7 +230,7 @@ func AddCollectionSourceEndpoint(app *application.Application) echo.HandlerFunc
|
|||||||
if payload.UpdateInterval < 1 {
|
if payload.UpdateInterval < 1 {
|
||||||
payload.UpdateInterval = 60
|
payload.UpdateInterval = 60
|
||||||
}
|
}
|
||||||
if err := svc.AddCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL, payload.UpdateInterval); err != nil {
|
if err := svc.AddCollectionSourceForUser(userID, c.Param("name"), payload.URL, payload.UpdateInterval); err != nil {
|
||||||
if strings.Contains(err.Error(), "not found") {
|
if strings.Contains(err.Error(), "not found") {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
}
|
}
|
||||||
@@ -250,7 +250,7 @@ func RemoveCollectionSourceEndpoint(app *application.Application) echo.HandlerFu
|
|||||||
if err := c.Bind(&payload); err != nil {
|
if err := c.Bind(&payload); err != nil {
|
||||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||||
}
|
}
|
||||||
if err := svc.RemoveCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL); err != nil {
|
if err := svc.RemoveCollectionSourceForUser(userID, c.Param("name"), payload.URL); err != nil {
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||||
}
|
}
|
||||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||||
@@ -267,7 +267,7 @@ func GetCollectionEntryRawFileEndpoint(app *application.Application) echo.Handle
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
entry = entryParam
|
entry = entryParam
|
||||||
}
|
}
|
||||||
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, decodedParam(c, "name"), entry)
|
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, c.Param("name"), entry)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if strings.Contains(err.Error(), "not found") {
|
if strings.Contains(err.Error(), "not found") {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
@@ -282,7 +282,7 @@ func ListCollectionSourcesEndpoint(app *application.Application) echo.HandlerFun
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
sources, err := svc.ListCollectionSourcesForUser(userID, decodedParam(c, "name"))
|
sources, err := svc.ListCollectionSourcesForUser(userID, c.Param("name"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if strings.Contains(err.Error(), "not found") {
|
if strings.Contains(err.Error(), "not found") {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
|
|||||||
@@ -1,49 +0,0 @@
|
|||||||
package localai
|
|
||||||
|
|
||||||
import (
|
|
||||||
"net/http"
|
|
||||||
"net/http/httptest"
|
|
||||||
|
|
||||||
"github.com/labstack/echo/v4"
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
|
||||||
. "github.com/onsi/gomega"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Regression for #10443: agent/collection names carry a "legacy-api-key:"
|
|
||||||
// prefix, so the ':' is percent-encoded as %3A in the request path. Echo routes
|
|
||||||
// such paths via URL.RawPath and stores the path-param value still escaped, so
|
|
||||||
// handlers must URL-decode it before looking the collection up in the store -
|
|
||||||
// otherwise the lookup sees "legacy-api-key%3ALiteraryResearch" and 404s.
|
|
||||||
var _ = Describe("decodedParam", func() {
|
|
||||||
var e *echo.Echo
|
|
||||||
|
|
||||||
BeforeEach(func() {
|
|
||||||
e = echo.New()
|
|
||||||
})
|
|
||||||
|
|
||||||
// route runs a request through Echo's real router so the path param is
|
|
||||||
// populated exactly as it would be in production, then returns the decoded
|
|
||||||
// value the handler would observe.
|
|
||||||
route := func(rawPath string) string {
|
|
||||||
var got string
|
|
||||||
e.GET("/api/agents/collections/:name/upload", func(c echo.Context) error {
|
|
||||||
got = decodedParam(c, "name")
|
|
||||||
return c.NoContent(http.StatusOK)
|
|
||||||
})
|
|
||||||
req := httptest.NewRequest(http.MethodGet, rawPath, nil)
|
|
||||||
rec := httptest.NewRecorder()
|
|
||||||
e.ServeHTTP(rec, req)
|
|
||||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
|
||||||
return got
|
|
||||||
}
|
|
||||||
|
|
||||||
It("decodes a percent-encoded colon in the collection name", func() {
|
|
||||||
got := route("/api/agents/collections/legacy-api-key%3ALiteraryResearch/upload")
|
|
||||||
Expect(got).To(Equal("legacy-api-key:LiteraryResearch"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("leaves an unencoded name untouched", func() {
|
|
||||||
got := route("/api/agents/collections/PlainCollection/upload")
|
|
||||||
Expect(got).To(Equal("PlainCollection"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"maps"
|
"maps"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"slices"
|
"slices"
|
||||||
@@ -34,22 +33,6 @@ func getUserID(c echo.Context) string {
|
|||||||
return user.ID
|
return user.ID
|
||||||
}
|
}
|
||||||
|
|
||||||
// decodedParam returns the named path parameter, URL-decoding it.
|
|
||||||
//
|
|
||||||
// Echo routes a request via URL.RawPath whenever the path contains
|
|
||||||
// percent-encoded characters (e.g. %3A for ':'), and in that case stores the
|
|
||||||
// matched path-param value raw/escaped. Agent and collection names carry a
|
|
||||||
// "legacy-api-key:" prefix, so the ':' arrives as %3A and the raw param no
|
|
||||||
// longer matches the stored name. Callers must unescape before lookups.
|
|
||||||
// Falls back to the raw value if it isn't valid percent-encoding.
|
|
||||||
func decodedParam(c echo.Context, name string) string {
|
|
||||||
raw := c.Param(name)
|
|
||||||
if decoded, err := url.PathUnescape(raw); err == nil {
|
|
||||||
return decoded
|
|
||||||
}
|
|
||||||
return raw
|
|
||||||
}
|
|
||||||
|
|
||||||
// isAdminUser returns true if the authenticated user has admin role.
|
// isAdminUser returns true if the authenticated user has admin role.
|
||||||
func isAdminUser(c echo.Context) bool {
|
func isAdminUser(c echo.Context) bool {
|
||||||
user := auth.GetUser(c)
|
user := auth.GetUser(c)
|
||||||
@@ -144,7 +127,7 @@ func GetAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
name := decodedParam(c, "name")
|
name := c.Param("name")
|
||||||
|
|
||||||
statuses := svc.ListAgentsForUser(userID)
|
statuses := svc.ListAgentsForUser(userID)
|
||||||
active, exists := statuses[name]
|
active, exists := statuses[name]
|
||||||
@@ -159,7 +142,7 @@ func UpdateAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
name := decodedParam(c, "name")
|
name := c.Param("name")
|
||||||
var cfg state.AgentConfig
|
var cfg state.AgentConfig
|
||||||
if err := c.Bind(&cfg); err != nil {
|
if err := c.Bind(&cfg); err != nil {
|
||||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||||
@@ -178,7 +161,7 @@ func DeleteAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
name := decodedParam(c, "name")
|
name := c.Param("name")
|
||||||
if err := svc.DeleteAgentForUser(userID, name); err != nil {
|
if err := svc.DeleteAgentForUser(userID, name); err != nil {
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||||
}
|
}
|
||||||
@@ -190,7 +173,7 @@ func GetAgentConfigEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
name := decodedParam(c, "name")
|
name := c.Param("name")
|
||||||
cfg := svc.GetAgentConfigForUser(userID, name)
|
cfg := svc.GetAgentConfigForUser(userID, name)
|
||||||
if cfg == nil {
|
if cfg == nil {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": "Agent not found"})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": "Agent not found"})
|
||||||
@@ -203,7 +186,7 @@ func PauseAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
if err := svc.PauseAgentForUser(userID, decodedParam(c, "name")); err != nil {
|
if err := svc.PauseAgentForUser(userID, c.Param("name")); err != nil {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
}
|
}
|
||||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||||
@@ -214,7 +197,7 @@ func ResumeAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
if err := svc.ResumeAgentForUser(userID, decodedParam(c, "name")); err != nil {
|
if err := svc.ResumeAgentForUser(userID, c.Param("name")); err != nil {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
}
|
}
|
||||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||||
@@ -225,7 +208,7 @@ func GetAgentStatusEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
name := decodedParam(c, "name")
|
name := c.Param("name")
|
||||||
|
|
||||||
history := svc.GetAgentStatusForUser(userID, name)
|
history := svc.GetAgentStatusForUser(userID, name)
|
||||||
if history == nil {
|
if history == nil {
|
||||||
@@ -258,7 +241,7 @@ func GetAgentObservablesEndpoint(app *application.Application) echo.HandlerFunc
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
name := decodedParam(c, "name")
|
name := c.Param("name")
|
||||||
|
|
||||||
history, err := svc.GetAgentObservablesForUser(userID, name)
|
history, err := svc.GetAgentObservablesForUser(userID, name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -278,7 +261,7 @@ func ClearAgentObservablesEndpoint(app *application.Application) echo.HandlerFun
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
name := decodedParam(c, "name")
|
name := c.Param("name")
|
||||||
if err := svc.ClearAgentObservablesForUser(userID, name); err != nil {
|
if err := svc.ClearAgentObservablesForUser(userID, name); err != nil {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
}
|
}
|
||||||
@@ -290,7 +273,7 @@ func ChatWithAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
name := decodedParam(c, "name")
|
name := c.Param("name")
|
||||||
var payload struct {
|
var payload struct {
|
||||||
Message string `json:"message"`
|
Message string `json:"message"`
|
||||||
}
|
}
|
||||||
@@ -319,7 +302,7 @@ func AgentSSEEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
name := decodedParam(c, "name")
|
name := c.Param("name")
|
||||||
|
|
||||||
// Try local SSE manager first
|
// Try local SSE manager first
|
||||||
manager := svc.GetSSEManagerForUser(userID, name)
|
manager := svc.GetSSEManagerForUser(userID, name)
|
||||||
@@ -351,7 +334,7 @@ func ExportAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
svc := app.AgentPoolService()
|
svc := app.AgentPoolService()
|
||||||
userID := effectiveUserID(c)
|
userID := effectiveUserID(c)
|
||||||
name := decodedParam(c, "name")
|
name := c.Param("name")
|
||||||
data, err := svc.ExportAgentForUser(userID, name)
|
data, err := svc.ExportAgentForUser(userID, name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||||
|
|||||||
@@ -385,23 +385,6 @@ func GetNodeModelsEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ListAllNodeModelsEndpoint returns all loaded models across all healthy nodes.
|
|
||||||
// @Summary List all loaded models cluster-wide
|
|
||||||
// @Tags Nodes
|
|
||||||
// @Success 200 {array} nodes.NodeModel
|
|
||||||
// @Router /api/nodes/models [get]
|
|
||||||
func ListAllNodeModelsEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
|
||||||
return func(c echo.Context) error {
|
|
||||||
ctx := c.Request().Context()
|
|
||||||
models, err := registry.ListAllLoadedModels(ctx)
|
|
||||||
if err != nil {
|
|
||||||
xlog.Error("Failed to list all node models", "error", err)
|
|
||||||
return c.JSON(http.StatusInternalServerError, nodeError(http.StatusInternalServerError, "failed to list node models"))
|
|
||||||
}
|
|
||||||
return c.JSON(http.StatusOK, models)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// DrainNodeEndpoint sets a node to draining status (no new requests).
|
// DrainNodeEndpoint sets a node to draining status (no new requests).
|
||||||
func DrainNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
func DrainNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
|
|||||||
@@ -407,44 +407,4 @@ var _ = Describe("Node HTTP handlers", func() {
|
|||||||
Expect(names).To(ConsistOf("alpha", "beta"))
|
Expect(names).To(ConsistOf("alpha", "beta"))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
Describe("ListAllNodeModelsEndpoint", func() {
|
|
||||||
It("returns an empty list when no models are loaded", func() {
|
|
||||||
e := echo.New()
|
|
||||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
|
||||||
rec := httptest.NewRecorder()
|
|
||||||
c := e.NewContext(req, rec)
|
|
||||||
|
|
||||||
handler := ListAllNodeModelsEndpoint(registry)
|
|
||||||
Expect(handler(c)).To(Succeed())
|
|
||||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
|
||||||
|
|
||||||
var list []nodes.NodeModel
|
|
||||||
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
|
||||||
Expect(list).To(BeEmpty())
|
|
||||||
})
|
|
||||||
|
|
||||||
It("returns loaded models across healthy nodes", func() {
|
|
||||||
ctx := context.Background()
|
|
||||||
Expect(registry.Register(ctx, &nodes.BackendNode{
|
|
||||||
ID: "n1", Name: "alpha", Address: "10.0.0.1:50051", Status: nodes.StatusHealthy,
|
|
||||||
}, true)).To(Succeed())
|
|
||||||
Expect(registry.SetNodeModel(ctx, "n1", "llama-3.3", 0, "loaded", "10.0.0.1:50051", 0)).To(Succeed())
|
|
||||||
|
|
||||||
e := echo.New()
|
|
||||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
|
||||||
rec := httptest.NewRecorder()
|
|
||||||
c := e.NewContext(req, rec)
|
|
||||||
|
|
||||||
handler := ListAllNodeModelsEndpoint(registry)
|
|
||||||
Expect(handler(c)).To(Succeed())
|
|
||||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
|
||||||
|
|
||||||
var list []nodes.NodeModel
|
|
||||||
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
|
||||||
Expect(list).To(HaveLen(1))
|
|
||||||
Expect(list[0].ModelName).To(Equal("llama-3.3"))
|
|
||||||
Expect(list[0].NodeID).To(Equal("n1"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
@@ -108,18 +110,6 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read whatever is already persisted: it is both the source of truth
|
|
||||||
// for branding asset filenames (below) and the base we merge this
|
|
||||||
// request onto before writing. A read failure must not let a Save
|
|
||||||
// silently discard the existing settings — surface it instead.
|
|
||||||
persisted, err := appConfig.ReadPersistedSettings()
|
|
||||||
if err != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
|
||||||
Success: false,
|
|
||||||
Error: "Failed to read existing settings: " + err.Error(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Branding asset filenames are owned exclusively by
|
// Branding asset filenames are owned exclusively by
|
||||||
// /api/branding/asset/{kind} (upload/delete). The Settings page also
|
// /api/branding/asset/{kind} (upload/delete). The Settings page also
|
||||||
// round-trips them via GET /api/settings, but its local state is stale
|
// round-trips them via GET /api/settings, but its local state is stale
|
||||||
@@ -128,9 +118,11 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
// at page open. Replace whatever the body sent for these three fields
|
// at page open. Replace whatever the body sent for these three fields
|
||||||
// with the values currently on disk so /api/settings can never
|
// with the values currently on disk so /api/settings can never
|
||||||
// regress them.
|
// regress them.
|
||||||
settings.LogoFile = persisted.LogoFile
|
if existing, err := appConfig.ReadPersistedSettings(); err == nil {
|
||||||
settings.LogoHorizontalFile = persisted.LogoHorizontalFile
|
settings.LogoFile = existing.LogoFile
|
||||||
settings.FaviconFile = persisted.FaviconFile
|
settings.LogoHorizontalFile = existing.LogoHorizontalFile
|
||||||
|
settings.FaviconFile = existing.FaviconFile
|
||||||
|
}
|
||||||
|
|
||||||
// The UI reads ApiKeys from GET /api/settings, which already returns the
|
// The UI reads ApiKeys from GET /api/settings, which already returns the
|
||||||
// merged env+runtime list. When the user clicks Save, the same merged
|
// merged env+runtime list. When the user clicks Save, the same merged
|
||||||
@@ -153,17 +145,16 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
settings.ApiKeys = &runtimeOnly
|
settings.ApiKeys = &runtimeOnly
|
||||||
}
|
}
|
||||||
|
|
||||||
// Persist as a partial update: overlay only the fields this request set
|
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
|
||||||
// onto the settings already on disk. Focused admin pages POST just the
|
settingsJSON, err := json.MarshalIndent(settings, "", " ")
|
||||||
// keys they own (the Middleware proxy tab sends only mitm_listen; the
|
if err != nil {
|
||||||
// detector table only pii_default_detectors), so writing the request
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||||
// body verbatim would null every unrelated setting (the no-omitempty
|
Success: false,
|
||||||
// api_keys / pii_default_detectors fields even round-trip as JSON
|
Error: "Failed to marshal settings: " + err.Error(),
|
||||||
// null). The full Settings page still round-trips every field, so its
|
})
|
||||||
// Save is unchanged.
|
}
|
||||||
toPersist := persisted
|
|
||||||
toPersist.MergeNonNil(settings)
|
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
|
||||||
if err := appConfig.WritePersistedSettings(toPersist); err != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||||
Success: false,
|
Success: false,
|
||||||
Error: "Failed to write settings file: " + err.Error(),
|
Error: "Failed to write settings file: " + err.Error(),
|
||||||
@@ -271,14 +262,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rebuild the MITM listener when its address OR the instance-wide
|
if settings.MITMListen != nil {
|
||||||
// default detectors change. The per-host detector map is resolved once
|
|
||||||
// at listener start (startMITMLocked → ResolvePIIPolicy), so a
|
|
||||||
// default-detector change is otherwise invisible to cloud-proxy traffic
|
|
||||||
// until the next restart — an admin toggling a default detector would
|
|
||||||
// see no redaction. RestartMITM is a no-op when the listener is
|
|
||||||
// disabled (empty address).
|
|
||||||
if settings.MITMListen != nil || settings.PIIDefaultDetectors != nil {
|
|
||||||
if err := app.RestartMITM(); err != nil {
|
if err := app.RestartMITM(); err != nil {
|
||||||
xlog.Error("Failed to restart MITM proxy", "error", err)
|
xlog.Error("Failed to restart MITM proxy", "error", err)
|
||||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||||
|
|||||||
@@ -52,10 +52,6 @@ var _ = Describe("Settings endpoints", func() {
|
|||||||
// Settings are persisted here; set after construction since there's no
|
// Settings are persisted here; set after construction since there's no
|
||||||
// dedicated AppOption for it.
|
// dedicated AppOption for it.
|
||||||
app.ApplicationConfig().DynamicConfigsDir = tmp
|
app.ApplicationConfig().DynamicConfigsDir = tmp
|
||||||
// Contain the MITM CA inside tmp too. The partial-save spec flips
|
|
||||||
// mitm_listen, which starts the listener and writes a CA; without this
|
|
||||||
// it defaults to ./mitm-ca and litters the package source tree.
|
|
||||||
app.ApplicationConfig().MITMCADir = filepath.Join(tmp, "mitm-ca")
|
|
||||||
|
|
||||||
e = echo.New()
|
e = echo.New()
|
||||||
e.GET("/api/settings", GetSettingsEndpoint(app))
|
e.GET("/api/settings", GetSettingsEndpoint(app))
|
||||||
@@ -113,57 +109,6 @@ var _ = Describe("Settings endpoints", func() {
|
|||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
})
|
})
|
||||||
|
|
||||||
// Regression: a focused admin page (the Middleware proxy tab) POSTs only
|
|
||||||
// the one field it owns — mitm_listen. The old handler wrote the request
|
|
||||||
// body verbatim, so every other persisted setting was dropped (and
|
|
||||||
// api_keys / pii_default_detectors, which lack omitempty, were written as
|
|
||||||
// null). A partial POST must now merge onto what is already on disk.
|
|
||||||
It("preserves unrelated persisted settings when a partial POST sets only mitm_listen", func() {
|
|
||||||
// First save establishes a fuller settings file (as the full Settings
|
|
||||||
// page would): galleries, an API key, and the MITM listener. The
|
|
||||||
// listener restart binds a real socket, so use 127.0.0.1:0 for an
|
|
||||||
// ephemeral free port rather than a fixed one that may be in use.
|
|
||||||
rec := post(`{"mitm_listen":"127.0.0.1:0","galleries":[{"name":"g1","url":"http://example/g1"}],"api_keys":["k1"],"pii_default_detectors":["det-a"]}`)
|
|
||||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
|
||||||
|
|
||||||
// The Middleware proxy tab then changes only the listen address — the
|
|
||||||
// exact partial body that nulled everything else before the fix.
|
|
||||||
rec = post(`{"mitm_listen":"127.0.0.1:0"}`)
|
|
||||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
|
||||||
|
|
||||||
raw, err := os.ReadFile(filepath.Join(tmp, "runtime_settings.json"))
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
var ondisk config.RuntimeSettings
|
|
||||||
Expect(json.Unmarshal(raw, &ondisk)).To(Succeed())
|
|
||||||
|
|
||||||
Expect(ondisk.MITMListen).ToNot(BeNil())
|
|
||||||
Expect(*ondisk.MITMListen).To(Equal("127.0.0.1:0"), "the changed field should be saved")
|
|
||||||
Expect(ondisk.Galleries).ToNot(BeNil(), "galleries were clobbered by the partial save")
|
|
||||||
Expect(*ondisk.Galleries).To(HaveLen(1))
|
|
||||||
Expect(ondisk.ApiKeys).ToNot(BeNil(), "api_keys were nulled by the partial save")
|
|
||||||
Expect(*ondisk.ApiKeys).To(Equal([]string{"k1"}))
|
|
||||||
Expect(ondisk.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were nulled by the partial save")
|
|
||||||
Expect(*ondisk.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
|
||||||
})
|
|
||||||
|
|
||||||
// The MITM listener resolves its per-host PII detectors once at start
|
|
||||||
// (startMITMLocked → ResolvePIIPolicy), and the handler used to restart it
|
|
||||||
// only when mitm_listen changed. So an admin toggling a default detector
|
|
||||||
// (the Middleware detector table POSTs only pii_default_detectors) left
|
|
||||||
// cloud-proxy traffic unredacted until the next reboot. A
|
|
||||||
// pii_default_detectors change must now rebuild the listener.
|
|
||||||
It("rebuilds the MITM listener when only pii_default_detectors changes", func() {
|
|
||||||
rec := post(`{"mitm_listen":"127.0.0.1:0"}`)
|
|
||||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
|
||||||
srv1 := app.MITMServer()
|
|
||||||
Expect(srv1).ToNot(BeNil(), "listener should be running after mitm_listen is set")
|
|
||||||
|
|
||||||
rec = post(`{"pii_default_detectors":["det-a"]}`)
|
|
||||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
|
||||||
Expect(app.MITMServer()).ToNot(BeIdenticalTo(srv1),
|
|
||||||
"a default-detector change must restart the listener so it picks up the new detectors")
|
|
||||||
})
|
|
||||||
|
|
||||||
// Residual #9125: enabling the watchdog from a cold (off) state via the
|
// Residual #9125: enabling the watchdog from a cold (off) state via the
|
||||||
// React master toggle must start the live watchdog immediately, without a
|
// React master toggle must start the live watchdog immediately, without a
|
||||||
// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while
|
// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -135,18 +134,6 @@ type Session struct {
|
|||||||
// pairs are kept together so we never feed an orphaned tool result.
|
// pairs are kept together so we never feed an orphaned tool result.
|
||||||
MaxHistoryItems int
|
MaxHistoryItems int
|
||||||
|
|
||||||
// Compaction settings resolved from pipeline.compaction (see resolveCompaction).
|
|
||||||
CompactionEnabled bool
|
|
||||||
CompactionTrigger int
|
|
||||||
SummaryModel string
|
|
||||||
MaxSummaryTokens int
|
|
||||||
|
|
||||||
// summarizerFactory lazily builds the model used for compaction summaries
|
|
||||||
// when summary_model is configured; nil means reuse the pipeline LLM.
|
|
||||||
summarizerFactory func() (Model, error)
|
|
||||||
summarizerOnce sync.Once
|
|
||||||
summarizerCached Model
|
|
||||||
|
|
||||||
// AssistantExecutor is non-nil when the session opted into the in-process
|
// AssistantExecutor is non-nil when the session opted into the in-process
|
||||||
// LocalAI Assistant tool surface. Tool calls whose name matches this
|
// LocalAI Assistant tool surface. Tool calls whose name matches this
|
||||||
// executor's catalog are run inproc and their output is fed back to the
|
// executor's catalog are run inproc and their output is fed back to the
|
||||||
@@ -254,12 +241,6 @@ type Conversation struct {
|
|||||||
ID string
|
ID string
|
||||||
Items []*types.MessageItemUnion
|
Items []*types.MessageItemUnion
|
||||||
Lock sync.Mutex
|
Lock sync.Mutex
|
||||||
// Memory is the rolling summary of items already evicted by compaction. It
|
|
||||||
// is kept out of Items (so trimRealtimeItems never drops it) and rendered
|
|
||||||
// as a system message right after the session instructions.
|
|
||||||
Memory string
|
|
||||||
// compacting ensures at most one background compaction runs per conversation.
|
|
||||||
compacting atomic.Bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Conversation) ToServer() types.Conversation {
|
func (c *Conversation) ToServer() types.Conversation {
|
||||||
@@ -559,12 +540,13 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
|||||||
SoundDetectionWindowMs: cfg.Pipeline.SoundDetectionWindowMs,
|
SoundDetectionWindowMs: cfg.Pipeline.SoundDetectionWindowMs,
|
||||||
SoundDetectionHopMs: cfg.Pipeline.SoundDetectionHopMs,
|
SoundDetectionHopMs: cfg.Pipeline.SoundDetectionHopMs,
|
||||||
}
|
}
|
||||||
session.CompactionEnabled, session.CompactionTrigger, session.MaxSummaryTokens, session.SummaryModel = resolveCompaction(cfg, session.MaxHistoryItems)
|
|
||||||
|
|
||||||
// Create a default conversation
|
// Create a default conversation
|
||||||
conversationID := generateConversationID()
|
conversationID := generateConversationID()
|
||||||
conversation := &Conversation{
|
conversation := &Conversation{
|
||||||
ID: conversationID,
|
ID: conversationID,
|
||||||
|
// TODO: We need to truncate the conversation items when a new item is added and we have run out of space. There are multiple places where items
|
||||||
|
// can be added so we could use a datastructure here that enforces truncation upon addition
|
||||||
Items: []*types.MessageItemUnion{},
|
Items: []*types.MessageItemUnion{},
|
||||||
}
|
}
|
||||||
session.Conversations[conversationID] = conversation
|
session.Conversations[conversationID] = conversation
|
||||||
@@ -595,18 +577,6 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
|||||||
}
|
}
|
||||||
session.ModelInterface = m
|
session.ModelInterface = m
|
||||||
|
|
||||||
if session.SummaryModel != "" {
|
|
||||||
summaryModelName := session.SummaryModel
|
|
||||||
sid := sessionID
|
|
||||||
session.summarizerFactory = func() (Model, error) {
|
|
||||||
summaryCfg, lerr := application.ModelConfigLoader().LoadModelConfigFileByNameDefaultOptions(summaryModelName, application.ApplicationConfig())
|
|
||||||
if lerr != nil {
|
|
||||||
return nil, fmt.Errorf("load summary model config %q: %w", summaryModelName, lerr)
|
|
||||||
}
|
|
||||||
return newModel(&summaryCfg.Pipeline, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), evaluator, buildRealtimeRoutingContext(application, sid))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if cfg.Pipeline.VoiceGateEnabled() {
|
if cfg.Pipeline.VoiceGateEnabled() {
|
||||||
gate, gerr := newVoiceGate(
|
gate, gerr := newVoiceGate(
|
||||||
*cfg.Pipeline.VoiceRecognition,
|
*cfg.Pipeline.VoiceRecognition,
|
||||||
@@ -837,15 +807,6 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
|||||||
commitUtterance(respCtx, allAudio, session, conversation, t)
|
commitUtterance(respCtx, allAudio, session, conversation, t)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
case types.InputAudioBufferClearEvent:
|
|
||||||
xlog.Debug("recv", "message", string(msg))
|
|
||||||
// Discard a partially-captured utterance so the client can restart
|
|
||||||
// input cleanly without the stale buffer leaking into the next commit.
|
|
||||||
clearInputAudio(session)
|
|
||||||
sendEvent(t, types.InputAudioBufferClearedEvent{
|
|
||||||
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
|
||||||
})
|
|
||||||
|
|
||||||
case types.ConversationItemCreateEvent:
|
case types.ConversationItemCreateEvent:
|
||||||
xlog.Debug("recv", "message", string(msg))
|
xlog.Debug("recv", "message", string(msg))
|
||||||
// Add the item to the conversation
|
// Add the item to the conversation
|
||||||
@@ -880,39 +841,7 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
|||||||
})
|
})
|
||||||
|
|
||||||
case types.ConversationItemDeleteEvent:
|
case types.ConversationItemDeleteEvent:
|
||||||
xlog.Debug("recv", "message", string(msg))
|
sendError(t, "not_implemented", "Deleting items not implemented", "", "event_TODO")
|
||||||
if e.ItemID == "" {
|
|
||||||
sendError(t, "invalid_item_id", "Need item_id, but none specified", "", "event_TODO")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
conversation.Lock.Lock()
|
|
||||||
updated, ok := deleteItem(conversation.Items, e.ItemID)
|
|
||||||
conversation.Items = updated
|
|
||||||
conversation.Lock.Unlock()
|
|
||||||
if !ok {
|
|
||||||
sendError(t, "invalid_item_id", "Item to delete not found", "", "event_TODO")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
sendEvent(t, types.ConversationItemDeletedEvent{
|
|
||||||
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
|
||||||
ItemID: e.ItemID,
|
|
||||||
})
|
|
||||||
|
|
||||||
case types.ConversationItemTruncateEvent:
|
|
||||||
xlog.Debug("recv", "message", string(msg))
|
|
||||||
conversation.Lock.Lock()
|
|
||||||
ok := truncateAssistantText(conversation.Items, e.ItemID, e.ContentIndex)
|
|
||||||
conversation.Lock.Unlock()
|
|
||||||
if !ok {
|
|
||||||
sendError(t, "invalid_item_id", "Item to truncate not found", "", "event_TODO")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
sendEvent(t, types.ConversationItemTruncatedEvent{
|
|
||||||
ServerEventBase: types.ServerEventBase{EventID: e.EventID},
|
|
||||||
ItemID: e.ItemID,
|
|
||||||
ContentIndex: e.ContentIndex,
|
|
||||||
AudioEndMs: e.AudioEndMs,
|
|
||||||
})
|
|
||||||
|
|
||||||
case types.ConversationItemRetrieveEvent:
|
case types.ConversationItemRetrieveEvent:
|
||||||
xlog.Debug("recv", "message", string(msg))
|
xlog.Debug("recv", "message", string(msg))
|
||||||
@@ -925,7 +854,21 @@ func runRealtimeSession(application *application.Application, t Transport, model
|
|||||||
conversation.Lock.Lock()
|
conversation.Lock.Lock()
|
||||||
var retrievedItem types.MessageItemUnion
|
var retrievedItem types.MessageItemUnion
|
||||||
for _, item := range conversation.Items {
|
for _, item := range conversation.Items {
|
||||||
if itemID(item) == e.ItemID {
|
// We need to check ID in the union
|
||||||
|
var id string
|
||||||
|
if item.System != nil {
|
||||||
|
id = item.System.ID
|
||||||
|
} else if item.User != nil {
|
||||||
|
id = item.User.ID
|
||||||
|
} else if item.Assistant != nil {
|
||||||
|
id = item.Assistant.ID
|
||||||
|
} else if item.FunctionCall != nil {
|
||||||
|
id = item.FunctionCall.ID
|
||||||
|
} else if item.FunctionCallOutput != nil {
|
||||||
|
id = item.FunctionCallOutput.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
if id == e.ItemID {
|
||||||
retrievedItem = *item
|
retrievedItem = *item
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -1723,9 +1666,6 @@ const maxAssistantToolTurns = 10
|
|||||||
|
|
||||||
func triggerResponse(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams) {
|
func triggerResponse(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams) {
|
||||||
triggerResponseAtTurn(ctx, session, conv, t, overrides, 0)
|
triggerResponseAtTurn(ctx, session, conv, t, overrides, 0)
|
||||||
// Fold aged-out turns into the rolling memory off the critical path; the
|
|
||||||
// next turn reaps the smaller buffer.
|
|
||||||
session.maybeCompact(conv)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams, toolTurn int) {
|
func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams, toolTurn int) {
|
||||||
@@ -1781,7 +1721,6 @@ func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversa
|
|||||||
var lastUserSpeaker *types.Speaker
|
var lastUserSpeaker *types.Speaker
|
||||||
personalize := session.voiceGate != nil && session.voiceGate.cfg.PersonalizeEnabled()
|
personalize := session.voiceGate != nil && session.voiceGate.cfg.PersonalizeEnabled()
|
||||||
conv.Lock.Lock()
|
conv.Lock.Lock()
|
||||||
conversationHistory = withMemory(conversationHistory, conv.Memory)
|
|
||||||
items := trimRealtimeItems(conv.Items, session.MaxHistoryItems)
|
items := trimRealtimeItems(conv.Items, session.MaxHistoryItems)
|
||||||
for _, item := range items {
|
for _, item := range items {
|
||||||
if item.User != nil {
|
if item.User != nil {
|
||||||
|
|||||||
@@ -1,326 +0,0 @@
|
|||||||
package openai
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
|
||||||
"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
"github.com/mudler/LocalAI/pkg/reasoning"
|
|
||||||
"github.com/mudler/xlog"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
defaultMaxSummaryTokens = 512
|
|
||||||
memoryPrefix = "Summary of earlier conversation:\n"
|
|
||||||
// compactionTimeout bounds the summarizer call so a stuck model can't pin the
|
|
||||||
// compacting flag (and thus block all further compaction) forever.
|
|
||||||
compactionTimeout = 60 * time.Second
|
|
||||||
)
|
|
||||||
|
|
||||||
// withMemory inserts the rolling summary as a system message after the existing
|
|
||||||
// (instructions) history. No-op when memory is empty.
|
|
||||||
func withMemory(history schema.Messages, memory string) schema.Messages {
|
|
||||||
if memory == "" {
|
|
||||||
return history
|
|
||||||
}
|
|
||||||
content := memoryPrefix + memory
|
|
||||||
return append(history, schema.Message{
|
|
||||||
Role: string(types.MessageRoleSystem),
|
|
||||||
StringContent: content,
|
|
||||||
Content: content,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// renderItemsTranscript renders conversation items as a plain "role: text"
|
|
||||||
// transcript for summarization. Non-text items (bare tool calls) are labelled
|
|
||||||
// so the summarizer keeps track of actions taken.
|
|
||||||
func renderItemsTranscript(items []*types.MessageItemUnion) string {
|
|
||||||
var b strings.Builder
|
|
||||||
for _, item := range items {
|
|
||||||
switch {
|
|
||||||
case item.User != nil:
|
|
||||||
b.WriteString("user: ")
|
|
||||||
for _, c := range item.User.Content {
|
|
||||||
if c.Text != "" {
|
|
||||||
b.WriteString(c.Text)
|
|
||||||
}
|
|
||||||
if c.Transcript != "" {
|
|
||||||
b.WriteString(c.Transcript)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
b.WriteString("\n")
|
|
||||||
case item.Assistant != nil:
|
|
||||||
b.WriteString("assistant: ")
|
|
||||||
// Realtime assistant *audio* turns store the spoken words in
|
|
||||||
// .Transcript (not .Text), so emit both or spoken turns are dropped.
|
|
||||||
for _, c := range item.Assistant.Content {
|
|
||||||
if c.Text != "" {
|
|
||||||
b.WriteString(c.Text)
|
|
||||||
}
|
|
||||||
if c.Transcript != "" {
|
|
||||||
b.WriteString(c.Transcript)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
b.WriteString("\n")
|
|
||||||
case item.FunctionCall != nil:
|
|
||||||
b.WriteString(fmt.Sprintf("assistant called tool %s(%s)\n", item.FunctionCall.Name, item.FunctionCall.Arguments))
|
|
||||||
case item.FunctionCallOutput != nil:
|
|
||||||
b.WriteString(fmt.Sprintf("tool result: %s\n", item.FunctionCallOutput.Output))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return strings.TrimSpace(b.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
// buildSummaryMessages builds the chat messages for the summarizer LLM: a system
|
|
||||||
// instruction plus prior memory and the new transcript to fold in. maxTokens is
|
|
||||||
// advisory (fed to the prompt; not hard-enforced in v1).
|
|
||||||
func buildSummaryMessages(priorMemory, transcript string, maxTokens int) schema.Messages {
|
|
||||||
system := fmt.Sprintf("You maintain a running memory of a live voice conversation. "+
|
|
||||||
"Merge the prior memory with the new exchanges into an updated memory. "+
|
|
||||||
"Keep names, decisions, facts, preferences, and open threads. Be concise "+
|
|
||||||
"(under ~%d tokens). Output only the updated memory, with no reasoning or tags.", maxTokens)
|
|
||||||
var user strings.Builder
|
|
||||||
if priorMemory != "" {
|
|
||||||
user.WriteString("Prior memory:\n")
|
|
||||||
user.WriteString(priorMemory)
|
|
||||||
user.WriteString("\n\n")
|
|
||||||
}
|
|
||||||
user.WriteString("New exchanges to fold in:\n")
|
|
||||||
user.WriteString(transcript)
|
|
||||||
return schema.Messages{
|
|
||||||
{Role: string(types.MessageRoleSystem), StringContent: system, Content: system},
|
|
||||||
{Role: string(types.MessageRoleUser), StringContent: user.String(), Content: user.String()},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// clearInputAudio resets the session's pending input audio buffer (the raw
|
|
||||||
// PCM and any buffered Opus frames). Used by the input_audio_buffer.clear
|
|
||||||
// realtime event so a client can discard a partially-captured utterance.
|
|
||||||
func clearInputAudio(s *Session) {
|
|
||||||
s.AudioBufferLock.Lock()
|
|
||||||
s.InputAudioBuffer = nil
|
|
||||||
s.AudioBufferLock.Unlock()
|
|
||||||
s.OpusFramesLock.Lock()
|
|
||||||
s.OpusFrames = nil
|
|
||||||
s.OpusFramesLock.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
// itemID extracts the id from any MessageItemUnion variant ("" if none).
|
|
||||||
func itemID(item *types.MessageItemUnion) string {
|
|
||||||
switch {
|
|
||||||
case item == nil:
|
|
||||||
return ""
|
|
||||||
case item.System != nil:
|
|
||||||
return item.System.ID
|
|
||||||
case item.User != nil:
|
|
||||||
return item.User.ID
|
|
||||||
case item.Assistant != nil:
|
|
||||||
return item.Assistant.ID
|
|
||||||
case item.FunctionCall != nil:
|
|
||||||
return item.FunctionCall.ID
|
|
||||||
case item.FunctionCallOutput != nil:
|
|
||||||
return item.FunctionCallOutput.ID
|
|
||||||
default:
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// deleteItem removes the item with id from items, returning the new slice and
|
|
||||||
// whether it was found.
|
|
||||||
func deleteItem(items []*types.MessageItemUnion, id string) ([]*types.MessageItemUnion, bool) {
|
|
||||||
for i, item := range items {
|
|
||||||
if itemID(item) == id {
|
|
||||||
return append(items[:i:i], items[i+1:]...), true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return items, false
|
|
||||||
}
|
|
||||||
|
|
||||||
// truncateAssistantText clears the text of the assistant item's content part at
|
|
||||||
// contentIndex. Minimal truncate: used to discard an interrupted/barge-in
|
|
||||||
// response tail. Both .Text and .Transcript are cleared because realtime audio
|
|
||||||
// turns store the spoken words in .Transcript (clearing only .Text would no-op).
|
|
||||||
func truncateAssistantText(items []*types.MessageItemUnion, id string, contentIndex int) bool {
|
|
||||||
for _, item := range items {
|
|
||||||
if itemID(item) != id || item.Assistant == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if contentIndex >= 0 && contentIndex < len(item.Assistant.Content) {
|
|
||||||
item.Assistant.Content[contentIndex].Text = ""
|
|
||||||
item.Assistant.Content[contentIndex].Transcript = ""
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// compactionCut returns the index splitting items into overflow (items[:cut],
|
|
||||||
// to be summarized+evicted) and the kept live tail (items[cut:]), keeping the
|
|
||||||
// last `keep` items. It mirrors trimRealtimeItems' pair-safety: the cut is
|
|
||||||
// pulled left so a function_call and its function_call_output are never split
|
|
||||||
// across the boundary (the whole pair lands in the kept tail). Returns 0 when
|
|
||||||
// there is nothing to cut.
|
|
||||||
func compactionCut(items []*types.MessageItemUnion, keep int) int {
|
|
||||||
// keep <= 0 means no live-window cap (the "unlimited history" sentinel, as
|
|
||||||
// in trimRealtimeItems): there is nothing to evict, so cut nothing. This
|
|
||||||
// also avoids indexing items[len(items)] in the pair-safety loop below.
|
|
||||||
if keep <= 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
cut := len(items) - keep
|
|
||||||
if cut <= 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
for cut > 0 && items[cut] != nil && items[cut].FunctionCallOutput != nil {
|
|
||||||
cut--
|
|
||||||
}
|
|
||||||
return cut
|
|
||||||
}
|
|
||||||
|
|
||||||
// resolveCompaction reads the pipeline.compaction block, applying defaults and
|
|
||||||
// the trigger>max_history invariant. maxHistory is the already-resolved live
|
|
||||||
// window size. Returns enabled=false (and zero values) when compaction is off.
|
|
||||||
func resolveCompaction(cfg *config.ModelConfig, maxHistory int) (enabled bool, trigger, maxSummaryTokens int, summaryModel string) {
|
|
||||||
if cfg == nil || cfg.Pipeline.Compaction == nil || !cfg.Pipeline.Compaction.Enabled {
|
|
||||||
return false, 0, 0, ""
|
|
||||||
}
|
|
||||||
c := cfg.Pipeline.Compaction
|
|
||||||
trigger = c.TriggerItems
|
|
||||||
if trigger <= 0 {
|
|
||||||
trigger = maxHistory * 2
|
|
||||||
}
|
|
||||||
if trigger <= maxHistory {
|
|
||||||
trigger = maxHistory + 1
|
|
||||||
}
|
|
||||||
maxSummaryTokens = c.MaxSummaryTokens
|
|
||||||
if maxSummaryTokens <= 0 {
|
|
||||||
maxSummaryTokens = defaultMaxSummaryTokens
|
|
||||||
}
|
|
||||||
return true, trigger, maxSummaryTokens, c.SummaryModel
|
|
||||||
}
|
|
||||||
|
|
||||||
// prefixMatches reports whether items begins with the same ids, in order, as
|
|
||||||
// snapshot — i.e. the overflow we summarized is still at the head (no concurrent
|
|
||||||
// client delete reshuffled it).
|
|
||||||
func prefixMatches(items, snapshot []*types.MessageItemUnion) bool {
|
|
||||||
if len(items) < len(snapshot) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i := range snapshot {
|
|
||||||
if itemID(items[i]) != itemID(snapshot[i]) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// compact folds overflow items into conv.Memory and evicts them. It never holds
|
|
||||||
// conv.Lock across the summarizer call: snapshot under lock, summarize unlocked,
|
|
||||||
// commit under lock (re-validating the head is unchanged). On any error it
|
|
||||||
// leaves the conversation untouched — items are never dropped without a summary.
|
|
||||||
func (s *Session) compact(conv *Conversation, model Model) {
|
|
||||||
if model == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Snapshot.
|
|
||||||
conv.Lock.Lock()
|
|
||||||
if len(conv.Items) <= s.CompactionTrigger {
|
|
||||||
conv.Lock.Unlock()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
cut := compactionCut(conv.Items, s.MaxHistoryItems)
|
|
||||||
if cut <= 0 {
|
|
||||||
conv.Lock.Unlock()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
overflow := append([]*types.MessageItemUnion(nil), conv.Items[:cut]...)
|
|
||||||
prior := conv.Memory
|
|
||||||
conv.Lock.Unlock()
|
|
||||||
|
|
||||||
// Summarize (unlocked).
|
|
||||||
msgs := buildSummaryMessages(prior, renderItemsTranscript(overflow), s.MaxSummaryTokens)
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), compactionTimeout)
|
|
||||||
defer cancel()
|
|
||||||
predFunc, err := model.Predict(ctx, msgs, nil, nil, nil, nil, nil, nil, nil, nil, nil)
|
|
||||||
if err != nil {
|
|
||||||
xlog.Warn("realtime compaction: summarizer predict failed", "error", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
pred, err := predFunc()
|
|
||||||
if err != nil {
|
|
||||||
xlog.Warn("realtime compaction: summarizer inference failed", "error", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Strip any leaked reasoning/thinking spans using the same extractor the
|
|
||||||
// rest of the realtime path uses, rather than a bespoke regex.
|
|
||||||
rcfg := reasoning.Config{}
|
|
||||||
if mc := model.PredictConfig(); mc != nil {
|
|
||||||
rcfg = spokenReasoningConfig(mc.ReasoningConfig)
|
|
||||||
}
|
|
||||||
_, summary := reasoning.ExtractReasoningComplete(pred.Response, "", rcfg)
|
|
||||||
summary = strings.TrimSpace(summary)
|
|
||||||
if summary == "" {
|
|
||||||
xlog.Warn("realtime compaction: empty summary, skipping eviction")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Commit.
|
|
||||||
conv.Lock.Lock()
|
|
||||||
defer conv.Lock.Unlock()
|
|
||||||
if !prefixMatches(conv.Items, overflow) {
|
|
||||||
xlog.Debug("realtime compaction: head changed during summary, skipping")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
conv.Memory = summary
|
|
||||||
conv.Items = conv.Items[len(overflow):]
|
|
||||||
xlog.Debug("realtime compaction: evicted items into memory", "evicted", len(overflow), "remaining", len(conv.Items))
|
|
||||||
}
|
|
||||||
|
|
||||||
// summarizerModel resolves the model used to produce compaction summaries.
|
|
||||||
// Without a configured summary_model (or factory) it reuses the pipeline LLM.
|
|
||||||
func (s *Session) summarizerModel() Model {
|
|
||||||
if s.SummaryModel == "" || s.summarizerFactory == nil {
|
|
||||||
return s.ModelInterface
|
|
||||||
}
|
|
||||||
s.summarizerOnce.Do(func() {
|
|
||||||
m, err := s.summarizerFactory()
|
|
||||||
if err != nil {
|
|
||||||
xlog.Warn("realtime compaction: summary_model load failed, falling back to pipeline LLM", "model", s.SummaryModel, "error", err)
|
|
||||||
m = s.ModelInterface
|
|
||||||
}
|
|
||||||
s.summarizerCached = m
|
|
||||||
})
|
|
||||||
return s.summarizerCached
|
|
||||||
}
|
|
||||||
|
|
||||||
// maybeCompact schedules a background compaction when the live buffer has grown
|
|
||||||
// past the trigger and none is already running. Returns immediately.
|
|
||||||
func (s *Session) maybeCompact(conv *Conversation) {
|
|
||||||
if !s.CompactionEnabled {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
conv.Lock.Lock()
|
|
||||||
over := len(conv.Items) > s.CompactionTrigger
|
|
||||||
conv.Lock.Unlock()
|
|
||||||
if !over {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if !conv.compacting.CompareAndSwap(false, true) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
go func() {
|
|
||||||
defer conv.compacting.Store(false)
|
|
||||||
// Resolve (and, for a configured summary_model, lazily load) the
|
|
||||||
// summarizer only when a compaction actually runs, off the response
|
|
||||||
// path — so the model load never blocks a user turn.
|
|
||||||
model := s.summarizerModel()
|
|
||||||
if model == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
s.compact(conv, model)
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
@@ -1,308 +0,0 @@
|
|||||||
package openai
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
|
||||||
. "github.com/onsi/gomega"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
|
||||||
"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
)
|
|
||||||
|
|
||||||
var _ = Describe("resolveCompaction", func() {
|
|
||||||
It("disables when the block is absent", func() {
|
|
||||||
enabled, _, _, _ := resolveCompaction(&config.ModelConfig{}, 6)
|
|
||||||
Expect(enabled).To(BeFalse())
|
|
||||||
})
|
|
||||||
|
|
||||||
It("defaults trigger to 2x max history and tokens to 512", func() {
|
|
||||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{Enabled: true}}}
|
|
||||||
enabled, trigger, maxTok, _ := resolveCompaction(cfg, 6)
|
|
||||||
Expect(enabled).To(BeTrue())
|
|
||||||
Expect(trigger).To(Equal(12))
|
|
||||||
Expect(maxTok).To(Equal(512))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("clamps trigger to max history + 1 when misconfigured", func() {
|
|
||||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{Enabled: true, TriggerItems: 4}}}
|
|
||||||
_, trigger, _, _ := resolveCompaction(cfg, 6)
|
|
||||||
Expect(trigger).To(Equal(7))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("honors explicit values", func() {
|
|
||||||
cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{
|
|
||||||
Enabled: true, TriggerItems: 20, MaxSummaryTokens: 256, SummaryModel: "tiny"}}}
|
|
||||||
enabled, trigger, maxTok, model := resolveCompaction(cfg, 6)
|
|
||||||
Expect(enabled).To(BeTrue())
|
|
||||||
Expect(trigger).To(Equal(20))
|
|
||||||
Expect(maxTok).To(Equal(256))
|
|
||||||
Expect(model).To(Equal("tiny"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
var _ = Describe("deleteItem", func() {
|
|
||||||
mk := func(ids ...string) []*types.MessageItemUnion {
|
|
||||||
out := make([]*types.MessageItemUnion, len(ids))
|
|
||||||
for i, id := range ids {
|
|
||||||
out[i] = &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
|
||||||
}
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
It("removes the item with the given id", func() {
|
|
||||||
items, ok := deleteItem(mk("a", "b", "c"), "b")
|
|
||||||
Expect(ok).To(BeTrue())
|
|
||||||
Expect(len(items)).To(Equal(2))
|
|
||||||
Expect(itemID(items[0])).To(Equal("a"))
|
|
||||||
Expect(itemID(items[1])).To(Equal("c"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("reports not found for an unknown id", func() {
|
|
||||||
_, ok := deleteItem(mk("a"), "zzz")
|
|
||||||
Expect(ok).To(BeFalse())
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
var _ = Describe("clearInputAudio", func() {
|
|
||||||
It("resets the pending PCM and buffered Opus frames", func() {
|
|
||||||
s := &Session{InputAudioBuffer: []byte{1, 2, 3}, OpusFrames: [][]byte{{9}}}
|
|
||||||
clearInputAudio(s)
|
|
||||||
Expect(s.InputAudioBuffer).To(BeNil())
|
|
||||||
Expect(s.OpusFrames).To(BeNil())
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
var _ = Describe("truncateAssistantText", func() {
|
|
||||||
It("clears the text of the assistant content part at the index", func() {
|
|
||||||
items := []*types.MessageItemUnion{{Assistant: &types.MessageItemAssistant{
|
|
||||||
ID: "a1",
|
|
||||||
Content: []types.MessageContentOutput{{Type: types.MessageContentTypeText, Text: "hello world"}},
|
|
||||||
}}}
|
|
||||||
ok := truncateAssistantText(items, "a1", 0)
|
|
||||||
Expect(ok).To(BeTrue())
|
|
||||||
Expect(items[0].Assistant.Content[0].Text).To(Equal(""))
|
|
||||||
})
|
|
||||||
|
|
||||||
// Realtime assistant *audio* turns store the spoken words in .Transcript, not
|
|
||||||
// .Text, so a barge-in truncate must clear .Transcript too or it would no-op.
|
|
||||||
It("clears the transcript of an assistant audio content part", func() {
|
|
||||||
items := []*types.MessageItemUnion{{Assistant: &types.MessageItemAssistant{
|
|
||||||
ID: "a1",
|
|
||||||
Content: []types.MessageContentOutput{{Type: types.MessageContentTypeAudio, Transcript: "hello world"}},
|
|
||||||
}}}
|
|
||||||
ok := truncateAssistantText(items, "a1", 0)
|
|
||||||
Expect(ok).To(BeTrue())
|
|
||||||
Expect(items[0].Assistant.Content[0].Transcript).To(Equal(""))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("returns false for an unknown id", func() {
|
|
||||||
Expect(truncateAssistantText(nil, "nope", 0)).To(BeFalse())
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
var _ = Describe("compactionCut", func() {
|
|
||||||
user := func(id string) *types.MessageItemUnion {
|
|
||||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
|
||||||
}
|
|
||||||
call := func(id string) *types.MessageItemUnion {
|
|
||||||
return &types.MessageItemUnion{FunctionCall: &types.MessageItemFunctionCall{ID: id}}
|
|
||||||
}
|
|
||||||
out := func(id string) *types.MessageItemUnion {
|
|
||||||
return &types.MessageItemUnion{FunctionCallOutput: &types.MessageItemFunctionCallOutput{ID: id}}
|
|
||||||
}
|
|
||||||
|
|
||||||
It("cuts exactly len-keep when no pairs straddle the boundary", func() {
|
|
||||||
items := []*types.MessageItemUnion{user("1"), user("2"), user("3"), user("4")}
|
|
||||||
Expect(compactionCut(items, 2)).To(Equal(2))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("returns 0 when nothing to cut", func() {
|
|
||||||
Expect(compactionCut([]*types.MessageItemUnion{user("1")}, 2)).To(Equal(0))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("returns 0 (cuts nothing) when keep is 0 — the unlimited-window sentinel", func() {
|
|
||||||
items := []*types.MessageItemUnion{user("1"), user("2"), user("3")}
|
|
||||||
Expect(compactionCut(items, 0)).To(Equal(0))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("moves the boundary so a call/output pair is not split", func() {
|
|
||||||
// keep=2 -> naive cut=2, but items[2] is the output of items[1]'s call;
|
|
||||||
// pull the cut right so the whole pair stays in the kept tail.
|
|
||||||
items := []*types.MessageItemUnion{user("1"), call("c"), out("c"), user("4")}
|
|
||||||
Expect(compactionCut(items, 2)).To(Equal(1))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
var _ = Describe("withMemory", func() {
|
|
||||||
It("inserts a memory system message when memory is non-empty", func() {
|
|
||||||
base := schema.Messages{{Role: "system", StringContent: "instructions"}}
|
|
||||||
out := withMemory(base, "user is Bob; wants pizza")
|
|
||||||
Expect(len(out)).To(Equal(2))
|
|
||||||
Expect(out[1].Role).To(Equal("system"))
|
|
||||||
Expect(out[1].StringContent).To(ContainSubstring("user is Bob"))
|
|
||||||
Expect(out[1].StringContent).To(ContainSubstring("Summary of earlier conversation"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("is a no-op when memory is empty", func() {
|
|
||||||
base := schema.Messages{{Role: "system", StringContent: "instructions"}}
|
|
||||||
Expect(withMemory(base, "")).To(HaveLen(1))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
var _ = Describe("renderItemsTranscript", func() {
|
|
||||||
It("renders user and assistant text turns", func() {
|
|
||||||
items := []*types.MessageItemUnion{
|
|
||||||
{User: &types.MessageItemUser{Content: []types.MessageContentInput{{Type: types.MessageContentTypeInputText, Text: "hi"}}}},
|
|
||||||
{Assistant: &types.MessageItemAssistant{Content: []types.MessageContentOutput{{Type: types.MessageContentTypeText, Text: "hello"}}}},
|
|
||||||
}
|
|
||||||
out := renderItemsTranscript(items)
|
|
||||||
Expect(out).To(ContainSubstring("user: hi"))
|
|
||||||
Expect(out).To(ContainSubstring("assistant: hello"))
|
|
||||||
})
|
|
||||||
|
|
||||||
// Realtime assistant *audio* turns store the spoken words in .Transcript, not
|
|
||||||
// .Text, so the transcript builder must emit .Transcript too or spoken turns
|
|
||||||
// would be dropped from the summary.
|
|
||||||
It("renders an assistant audio turn from its transcript", func() {
|
|
||||||
items := []*types.MessageItemUnion{
|
|
||||||
{Assistant: &types.MessageItemAssistant{Content: []types.MessageContentOutput{{Type: types.MessageContentTypeAudio, Transcript: "spoken words"}}}},
|
|
||||||
}
|
|
||||||
Expect(renderItemsTranscript(items)).To(ContainSubstring("assistant: spoken words"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
var _ = Describe("buildSummaryMessages", func() {
|
|
||||||
It("includes prior memory and the new transcript", func() {
|
|
||||||
msgs := buildSummaryMessages("prior facts", "user: hi", 512)
|
|
||||||
Expect(len(msgs)).To(Equal(2))
|
|
||||||
Expect(msgs[0].Role).To(Equal("system"))
|
|
||||||
Expect(msgs[1].StringContent).To(ContainSubstring("prior facts"))
|
|
||||||
Expect(msgs[1].StringContent).To(ContainSubstring("user: hi"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
var _ = Describe("compact", func() {
|
|
||||||
user := func(id, text string) *types.MessageItemUnion {
|
|
||||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id,
|
|
||||||
Content: []types.MessageContentInput{{Type: types.MessageContentTypeInputText, Text: text}}}}
|
|
||||||
}
|
|
||||||
|
|
||||||
It("summarizes overflow into Memory and evicts it, keeping the live tail", func() {
|
|
||||||
conv := &Conversation{Items: []*types.MessageItemUnion{
|
|
||||||
user("1", "a"), user("2", "b"), user("3", "c"), user("4", "d"),
|
|
||||||
user("5", "e"), user("6", "f"), user("7", "g"), user("8", "h"),
|
|
||||||
}}
|
|
||||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4, MaxSummaryTokens: 512}
|
|
||||||
m := &fakeModel{predictResp: backend.LLMResponse{Response: "ROLLED UP"}}
|
|
||||||
|
|
||||||
s.compact(conv, m)
|
|
||||||
|
|
||||||
Expect(conv.Memory).To(Equal("ROLLED UP"))
|
|
||||||
Expect(len(conv.Items)).To(Equal(4))
|
|
||||||
Expect(itemID(conv.Items[0])).To(Equal("5"))
|
|
||||||
// The summarizer saw the evicted turns.
|
|
||||||
Expect(m.lastMessages[1].StringContent).To(ContainSubstring("a"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("leaves Items and Memory untouched when the summarizer errors", func() {
|
|
||||||
items := []*types.MessageItemUnion{user("1", "a"), user("2", "b"), user("3", "c")}
|
|
||||||
conv := &Conversation{Items: items}
|
|
||||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 2, MaxHistoryItems: 1, MaxSummaryTokens: 512}
|
|
||||||
m := &fakeModel{predictErr: errors.New("boom")}
|
|
||||||
|
|
||||||
s.compact(conv, m)
|
|
||||||
|
|
||||||
Expect(conv.Memory).To(Equal(""))
|
|
||||||
Expect(len(conv.Items)).To(Equal(3))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("strips leaked reasoning tags from the summary via the shared extractor", func() {
|
|
||||||
conv := &Conversation{Items: []*types.MessageItemUnion{
|
|
||||||
user("1", "a"), user("2", "b"), user("3", "c"), user("4", "d"),
|
|
||||||
user("5", "e"), user("6", "f"), user("7", "g"), user("8", "h"),
|
|
||||||
}}
|
|
||||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4, MaxSummaryTokens: 512}
|
|
||||||
m := &fakeModel{predictResp: backend.LLMResponse{Response: "<think>planning the summary</think>CLEAN SUMMARY"}}
|
|
||||||
|
|
||||||
s.compact(conv, m)
|
|
||||||
|
|
||||||
Expect(conv.Memory).To(Equal("CLEAN SUMMARY"))
|
|
||||||
Expect(conv.Memory).ToNot(ContainSubstring("planning"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("does nothing when items are at or below the trigger", func() {
|
|
||||||
conv := &Conversation{Items: []*types.MessageItemUnion{user("1", "a")}}
|
|
||||||
s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4}
|
|
||||||
s.compact(conv, &fakeModel{predictResp: backend.LLMResponse{Response: "x"}})
|
|
||||||
Expect(conv.Memory).To(Equal(""))
|
|
||||||
Expect(len(conv.Items)).To(Equal(1))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
var _ = Describe("prefixMatches", func() {
|
|
||||||
user := func(id string) *types.MessageItemUnion {
|
|
||||||
return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
|
|
||||||
}
|
|
||||||
|
|
||||||
It("matches when items begins with the snapshot ids in order", func() {
|
|
||||||
items := []*types.MessageItemUnion{user("1"), user("2"), user("3")}
|
|
||||||
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
|
||||||
Expect(prefixMatches(items, snap)).To(BeTrue())
|
|
||||||
})
|
|
||||||
|
|
||||||
It("matches an empty snapshot", func() {
|
|
||||||
Expect(prefixMatches([]*types.MessageItemUnion{user("1")}, nil)).To(BeTrue())
|
|
||||||
})
|
|
||||||
|
|
||||||
It("fails when items is shorter than the snapshot (a concurrent delete shrank the head)", func() {
|
|
||||||
items := []*types.MessageItemUnion{user("1")}
|
|
||||||
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
|
||||||
Expect(prefixMatches(items, snap)).To(BeFalse())
|
|
||||||
})
|
|
||||||
|
|
||||||
It("fails when the head ids differ (a concurrent delete reordered the head)", func() {
|
|
||||||
items := []*types.MessageItemUnion{user("2"), user("3")}
|
|
||||||
snap := []*types.MessageItemUnion{user("1"), user("2")}
|
|
||||||
Expect(prefixMatches(items, snap)).To(BeFalse())
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
var _ = Describe("summarizerModel", func() {
|
|
||||||
It("returns the pipeline model when no summary_model is set", func() {
|
|
||||||
m := &fakeModel{}
|
|
||||||
s := &Session{ModelInterface: m}
|
|
||||||
Expect(s.summarizerModel()).To(Equal(m))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("uses the factory (once) when summary_model is set", func() {
|
|
||||||
pipeline := &fakeModel{}
|
|
||||||
small := &fakeModel{}
|
|
||||||
calls := 0
|
|
||||||
s := &Session{ModelInterface: pipeline, SummaryModel: "tiny",
|
|
||||||
summarizerFactory: func() (Model, error) { calls++; return small, nil }}
|
|
||||||
Expect(s.summarizerModel()).To(Equal(small))
|
|
||||||
Expect(s.summarizerModel()).To(Equal(small))
|
|
||||||
Expect(calls).To(Equal(1))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("falls back to the pipeline model when the factory errors", func() {
|
|
||||||
pipeline := &fakeModel{}
|
|
||||||
s := &Session{ModelInterface: pipeline, SummaryModel: "tiny",
|
|
||||||
summarizerFactory: func() (Model, error) { return nil, errors.New("nope") }}
|
|
||||||
Expect(s.summarizerModel()).To(Equal(pipeline))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
var _ = Describe("itemID", func() {
|
|
||||||
It("returns the id for each variant and empty for nil", func() {
|
|
||||||
Expect(itemID(nil)).To(Equal(""))
|
|
||||||
Expect(itemID(&types.MessageItemUnion{User: &types.MessageItemUser{ID: "u1"}})).To(Equal("u1"))
|
|
||||||
Expect(itemID(&types.MessageItemUnion{Assistant: &types.MessageItemAssistant{ID: "a1"}})).To(Equal("a1"))
|
|
||||||
Expect(itemID(&types.MessageItemUnion{System: &types.MessageItemSystem{ID: "s1"}})).To(Equal("s1"))
|
|
||||||
Expect(itemID(&types.MessageItemUnion{FunctionCall: &types.MessageItemFunctionCall{ID: "f1"}})).To(Equal("f1"))
|
|
||||||
Expect(itemID(&types.MessageItemUnion{FunctionCallOutput: &types.MessageItemFunctionCallOutput{ID: "o1"}})).To(Equal("o1"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@@ -432,7 +432,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
|
|||||||
if pipeline.SoundDetection == "" {
|
if pipeline.SoundDetection == "" {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
cfg, err := loadPipelineSubModel(cl, pipeline.SoundDetection, ml.ModelPath)
|
cfg, err := cl.LoadModelConfigFileByName(pipeline.SoundDetection, ml.ModelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to load sound detection config: %w", err)
|
return nil, fmt.Errorf("failed to load sound detection config: %w", err)
|
||||||
}
|
}
|
||||||
@@ -443,7 +443,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
|
|||||||
}
|
}
|
||||||
|
|
||||||
func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (Model, *config.ModelConfig, error) {
|
func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (Model, *config.ModelConfig, error) {
|
||||||
cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath)
|
cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
||||||
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
|
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||||
@@ -453,7 +453,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
|
|||||||
return nil, nil, fmt.Errorf("failed to validate config: %w", err)
|
return nil, nil, fmt.Errorf("failed to validate config: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath)
|
cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
||||||
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
|
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||||
@@ -542,30 +542,11 @@ func buildRealtimeRoutingContext(a *application.Application, sessionID string) *
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// loadPipelineSubModel loads a pipeline sub-model config by name and follows a
|
|
||||||
// single alias hop, so a pipeline that references an alias (e.g. `llm: default`)
|
|
||||||
// gets the alias target's full config (Backend, Model, ...) rather than the
|
|
||||||
// alias stub with an empty Backend. Without this the alias survives unresolved
|
|
||||||
// into model loading and fails downstream — notably in distributed mode with
|
|
||||||
// "backend name is empty". Mirrors the top-level alias resolution in
|
|
||||||
// core/http/middleware/request.go.
|
|
||||||
func loadPipelineSubModel(cl *config.ModelConfigLoader, name, modelPath string) (*config.ModelConfig, error) {
|
|
||||||
cfg, err := cl.LoadModelConfigFileByName(name, modelPath)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
resolved, _, err := cl.ResolveAlias(cfg)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return resolved, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// returns and loads either a wrapped model or a model that support audio-to-audio
|
// returns and loads either a wrapped model or a model that support audio-to-audio
|
||||||
func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, evaluator *templates.Evaluator, routing *RealtimeRoutingContext) (Model, error) {
|
func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, evaluator *templates.Evaluator, routing *RealtimeRoutingContext) (Model, error) {
|
||||||
xlog.Debug("Creating new model pipeline model", "pipeline", pipeline)
|
xlog.Debug("Creating new model pipeline model", "pipeline", pipeline)
|
||||||
|
|
||||||
cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath)
|
cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
||||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||||
@@ -576,7 +557,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Do we always need a transcription model? It can be disabled. Note that any-to-any instruction following models don't transcribe as such, so if transcription is required it is a separate process
|
// TODO: Do we always need a transcription model? It can be disabled. Note that any-to-any instruction following models don't transcribe as such, so if transcription is required it is a separate process
|
||||||
cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath)
|
cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
||||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||||
@@ -608,7 +589,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
|||||||
xlog.Debug("Loading a wrapped model")
|
xlog.Debug("Loading a wrapped model")
|
||||||
|
|
||||||
// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
|
// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
|
||||||
cfgLLM, err := loadPipelineSubModel(cl, pipeline.LLM, ml.ModelPath)
|
cfgLLM, err := cl.LoadModelConfigFileByName(pipeline.LLM, ml.ModelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
||||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||||
@@ -623,7 +604,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
|||||||
applyPipelineReasoning(cfgLLM, *pipeline)
|
applyPipelineReasoning(cfgLLM, *pipeline)
|
||||||
applyPipelineThinking(cfgLLM, *pipeline)
|
applyPipelineThinking(cfgLLM, *pipeline)
|
||||||
|
|
||||||
cfgTTS, err := loadPipelineSubModel(cl, pipeline.TTS, ml.ModelPath)
|
cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
||||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||||
|
|||||||
@@ -1,52 +0,0 @@
|
|||||||
package openai
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
|
||||||
. "github.com/onsi/gomega"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
|
||||||
)
|
|
||||||
|
|
||||||
// loadPipelineSubModel must resolve a pipeline sub-model that references an
|
|
||||||
// alias (e.g. `llm: default`) one hop to the alias target's full config — so
|
|
||||||
// the effective backend is the target's backend, not the empty backend of the
|
|
||||||
// alias stub. This mirrors the top-level alias resolution done in
|
|
||||||
// core/http/middleware/request.go, which the realtime pipeline previously
|
|
||||||
// skipped (failing in distributed mode with "backend name is empty").
|
|
||||||
var _ = Describe("loadPipelineSubModel", func() {
|
|
||||||
It("resolves a sub-model alias one hop to the target's config", func() {
|
|
||||||
tmpDir := GinkgoT().TempDir()
|
|
||||||
|
|
||||||
// A real model config with a concrete backend.
|
|
||||||
realLLM := `name: real-llm
|
|
||||||
backend: llama-cpp
|
|
||||||
parameters:
|
|
||||||
model: real-llm.gguf
|
|
||||||
`
|
|
||||||
Expect(os.WriteFile(filepath.Join(tmpDir, "real-llm.yaml"), []byte(realLLM), 0644)).To(Succeed())
|
|
||||||
|
|
||||||
// An alias pointing at the real model.
|
|
||||||
aliasCfg := `name: default
|
|
||||||
alias: real-llm
|
|
||||||
`
|
|
||||||
Expect(os.WriteFile(filepath.Join(tmpDir, "default.yaml"), []byte(aliasCfg), 0644)).To(Succeed())
|
|
||||||
|
|
||||||
cl := config.NewModelConfigLoader(tmpDir)
|
|
||||||
Expect(cl.LoadModelConfigsFromPath(tmpDir)).To(Succeed())
|
|
||||||
|
|
||||||
// Resolving the alias must follow the hop to the target's full config.
|
|
||||||
resolved, err := loadPipelineSubModel(cl, "default", tmpDir)
|
|
||||||
Expect(err).NotTo(HaveOccurred())
|
|
||||||
Expect(resolved.IsAlias()).To(BeFalse())
|
|
||||||
Expect(resolved.Backend).To(Equal("llama-cpp"))
|
|
||||||
|
|
||||||
// A non-alias name must load unchanged.
|
|
||||||
direct, err := loadPipelineSubModel(cl, "real-llm", tmpDir)
|
|
||||||
Expect(err).NotTo(HaveOccurred())
|
|
||||||
Expect(direct.Backend).To(Equal("llama-cpp"))
|
|
||||||
Expect(direct.Name).To(Equal("real-llm"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@@ -288,21 +288,6 @@ test.describe('Model Editor - Interactive Tab', () => {
|
|||||||
await expect(page.locator('input[placeholder^="match,"]')).toBeVisible()
|
await expect(page.locator('input[placeholder^="match,"]')).toBeVisible()
|
||||||
})
|
})
|
||||||
|
|
||||||
test('pattern min_len clamps a directly-typed negative to 0', async ({ page }) => {
|
|
||||||
const searchInput = page.locator('input[placeholder="Search fields to add..."]')
|
|
||||||
await searchInput.fill('Custom Secret Patterns')
|
|
||||||
const dropdown = searchInput.locator('..').locator('..')
|
|
||||||
await dropdown.locator('div', { hasText: 'Custom Secret Patterns' }).first().click()
|
|
||||||
|
|
||||||
await page.locator('button', { hasText: 'Add pattern' }).click()
|
|
||||||
// The number input's min={0} only limits the spinner arrows, not keyboard
|
|
||||||
// entry; the editor must sanitise a typed negative so a meaningless
|
|
||||||
// negative length floor never reaches the saved config.
|
|
||||||
const minLen = page.locator('input[aria-label="Minimum length"]')
|
|
||||||
await minLen.fill('-5')
|
|
||||||
await expect(minLen).toHaveValue('0')
|
|
||||||
})
|
|
||||||
|
|
||||||
// Regression: a map-typed field (entity_actions) present in the loaded YAML
|
// Regression: a map-typed field (entity_actions) present in the loaded YAML
|
||||||
// must render WITH its values. flattenConfig used to recurse into the map,
|
// must render WITH its values. flattenConfig used to recurse into the map,
|
||||||
// scattering it across pii_detection.entity_actions.<GROUP> paths that match
|
// scattering it across pii_detection.entity_actions.<GROUP> paths that match
|
||||||
@@ -344,37 +329,4 @@ test.describe('Model Editor - Interactive Tab', () => {
|
|||||||
await expect(page.getByText(/block —/i).first()).toBeVisible()
|
await expect(page.getByText(/block —/i).first()).toBeVisible()
|
||||||
})
|
})
|
||||||
|
|
||||||
// A map cannot hold two values for one key, so renaming a row to an existing
|
|
||||||
// group must collapse to a single row (Object.fromEntries, last write wins)
|
|
||||||
// rather than rendering two conflicting rows that silently lose one on save.
|
|
||||||
test('entity_actions collapses a duplicate group to a single row', async ({ page }) => {
|
|
||||||
await page.route('**/api/models/edit/ner-model', (route) => {
|
|
||||||
route.fulfill({
|
|
||||||
contentType: 'application/json',
|
|
||||||
body: JSON.stringify({
|
|
||||||
name: 'ner-model',
|
|
||||||
config: [
|
|
||||||
'name: ner-model',
|
|
||||||
'backend: llama-cpp',
|
|
||||||
'pii_detection:',
|
|
||||||
' entity_actions:',
|
|
||||||
' SSN: block',
|
|
||||||
' EMAIL: mask',
|
|
||||||
'',
|
|
||||||
].join('\n'),
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
await page.goto('/app/model-editor/ner-model')
|
|
||||||
|
|
||||||
const groupInputs = page.locator('input[aria-label="Entity group"]')
|
|
||||||
await expect(groupInputs).toHaveCount(2)
|
|
||||||
|
|
||||||
// Rename the EMAIL row to duplicate SSN; the editor collapses to one SSN row.
|
|
||||||
await groupInputs.nth(1).fill('SSN')
|
|
||||||
await expect(groupInputs).toHaveCount(1)
|
|
||||||
await expect(groupInputs.nth(0)).toHaveValue('SSN')
|
|
||||||
})
|
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,34 +0,0 @@
|
|||||||
import { test, expect } from './coverage-fixtures.js'
|
|
||||||
|
|
||||||
const ID = 'n1'
|
|
||||||
async function mockNode(page) {
|
|
||||||
await page.route(`**/api/nodes/${ID}`, r => r.fulfill({ status: 200, contentType: 'application/json',
|
|
||||||
body: JSON.stringify({ id: ID, name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy', total_vram: 24e9, available_vram: 12e9, max_replicas_per_model: 1, labels: { env: 'prod' } }) }))
|
|
||||||
await page.route(`**/api/nodes/${ID}/models`, r => r.fulfill({ status: 200, contentType: 'application/json',
|
|
||||||
body: JSON.stringify([{ node_id: ID, model_name: 'llama-3.3', state: 'loaded', in_flight: 0, replica_index: 0 }]) }))
|
|
||||||
await page.route(`**/api/nodes/${ID}/backends`, r => r.fulfill({ status: 200, contentType: 'application/json',
|
|
||||||
body: JSON.stringify([{ name: 'llama-cpp', is_system: true, installed_at: '2026-06-01T00:00:00Z' }]) }))
|
|
||||||
}
|
|
||||||
|
|
||||||
test.describe('Node detail page', () => {
|
|
||||||
test('renders sections for a node', async ({ page }) => {
|
|
||||||
await mockNode(page)
|
|
||||||
await page.goto(`/app/nodes/${ID}`)
|
|
||||||
await expect(page.locator('.page-title').first()).toBeVisible({ timeout: 15_000 })
|
|
||||||
await expect(page.getByText('alpha')).toBeVisible()
|
|
||||||
await expect(page.getByText('llama-3.3')).toBeVisible()
|
|
||||||
await expect(page.getByText('llama-cpp')).toBeVisible()
|
|
||||||
await expect(page.getByText('env=prod')).toBeVisible()
|
|
||||||
})
|
|
||||||
|
|
||||||
test('is reachable by clicking a roster panel', async ({ page }) => {
|
|
||||||
await page.route('**/api/nodes', r => r.fulfill({ status: 200, contentType: 'application/json',
|
|
||||||
body: JSON.stringify([{ id: ID, name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy' }]) }))
|
|
||||||
await page.route('**/api/nodes/models', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
|
||||||
await page.route('**/api/nodes/scheduling', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
|
|
||||||
await mockNode(page)
|
|
||||||
await page.goto('/app/nodes')
|
|
||||||
await page.locator('.node-panel').filter({ hasText: 'alpha' }).getByText('alpha').click()
|
|
||||||
await expect(page).toHaveURL(new RegExp(`/app/nodes/${ID}$`))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user