mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-30 11:26:32 -04:00
Compare commits
102 Commits
v4.5.0
...
fix/watchd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
347cdcf545 | ||
|
|
0e381897b5 | ||
|
|
b1af37257d | ||
|
|
ebefa6dcca | ||
|
|
605348925d | ||
|
|
686ce10b54 | ||
|
|
2cee318fad | ||
|
|
1a4f68ed4a | ||
|
|
28d7397743 | ||
|
|
5d0c43ec6e | ||
|
|
6ab29ec8b9 | ||
|
|
036f950b1b | ||
|
|
5b7b914b4f | ||
|
|
d1cee4c52a | ||
|
|
baaa0fe94f | ||
|
|
c3b5c7c3fa | ||
|
|
bd1ec8f2c2 | ||
|
|
135debf9af | ||
|
|
e8c18ae28e | ||
|
|
c4d302e1ab | ||
|
|
323b57a4bc | ||
|
|
3d2f639213 | ||
|
|
be1ae9338b | ||
|
|
923c47020d | ||
|
|
b7a1dec773 | ||
|
|
de2ec2f136 | ||
|
|
d3a26f961d | ||
|
|
13b1ae53bc | ||
|
|
e68ca109c5 | ||
|
|
6740e988d2 | ||
|
|
ade9cc9e37 | ||
|
|
471e38e4e7 | ||
|
|
f3d829e2ef | ||
|
|
91885c2c7e | ||
|
|
f1fcafb888 | ||
|
|
fdff114701 | ||
|
|
1154be5eea | ||
|
|
8aba4fdba3 | ||
|
|
d7d7721eae | ||
|
|
c548150f99 | ||
|
|
ec26b86dd4 | ||
|
|
d11b202dd2 | ||
|
|
e95018ef70 | ||
|
|
0258f8af55 | ||
|
|
14b29ebf4e | ||
|
|
f0d0bff232 | ||
|
|
64150ca7ab | ||
|
|
f98b0f1c1e | ||
|
|
2c96c2d08e | ||
|
|
f01a969f7b | ||
|
|
56600eec3e | ||
|
|
c4fa256cdf | ||
|
|
17c1fc74b2 | ||
|
|
068d397acf | ||
|
|
5b3572f8b8 | ||
|
|
6afe127cd4 | ||
|
|
f58dcefed4 | ||
|
|
11b062f8f4 | ||
|
|
114eeaae81 | ||
|
|
d388f874de | ||
|
|
86677495a2 | ||
|
|
253aedff06 | ||
|
|
74f07ecc35 | ||
|
|
ae0da454a7 | ||
|
|
179210b970 | ||
|
|
6c03e46390 | ||
|
|
f2ed63e39a | ||
|
|
286c508ce0 | ||
|
|
d1a9d59917 | ||
|
|
f72046b5b5 | ||
|
|
79783120dd | ||
|
|
4ac67d255d | ||
|
|
3a87d9e48f | ||
|
|
693e3eec05 | ||
|
|
f1e5071321 | ||
|
|
93d6255de3 | ||
|
|
fe4f425fb5 | ||
|
|
fae9f6356f | ||
|
|
066abf82c0 | ||
|
|
a7fec9a49d | ||
|
|
c678530cf0 | ||
|
|
3c63431e46 | ||
|
|
3f647a2764 | ||
|
|
f88981cdce | ||
|
|
0d6de15ae9 | ||
|
|
5c3d48ab50 | ||
|
|
764b0352b9 | ||
|
|
75ba2daba1 | ||
|
|
62b14fd635 | ||
|
|
193d0e6aef | ||
|
|
482314c623 | ||
|
|
e8ae88a2a0 | ||
|
|
e1994579f8 | ||
|
|
e5620989dd | ||
|
|
fc618dcee6 | ||
|
|
e6042080c0 | ||
|
|
0f3b24436d | ||
|
|
4b6f911835 | ||
|
|
a5e28942a6 | ||
|
|
dba9cd7ca4 | ||
|
|
c93190de50 | ||
|
|
4dbf69f889 |
@@ -102,6 +102,24 @@ Multi-arch backends are NOT a single matrix entry with `platforms: 'linux/amd64,
|
||||
|
||||
Entries whose `dockerfile` is `./backend/Dockerfile.{llama-cpp,ik-llama-cpp,turboquant}` must also set a `builder-base-image` field pointing at a prebuilt base from `quay.io/go-skynet/ci-cache:base-grpc-*` (CI builds these via `.github/workflows/base-images.yml`). The mapping is by `(build-type, platforms)` — see existing entries for the pattern. CI uses these prebuilt bases to skip the gRPC compile (~25–35 min cold). Local `make backends/<name>` ignores `builder-base-image` and uses the from-source path inside the Dockerfile, so you don't need quay access for local builds.
|
||||
|
||||
### Cover every OS the project supports (Linux **and** Darwin)
|
||||
|
||||
`.github/backend-matrix.yml` has two matrices, and they are the source of truth for which OS a backend ships on:
|
||||
|
||||
- `include:` — the **Linux** matrix (x86_64 + arm64; CPU and CUDA / ROCm / SYCL / Vulkan).
|
||||
- `includeDarwin:` — the **macOS / Apple Silicon** matrix (arm64; Metal where the engine supports it, otherwise a native arm64 CPU build).
|
||||
|
||||
**A new backend must target every OS it can build for — do not ship Linux-only by default.** A backend that appears only under `include:` is silently unavailable on macOS even when its code would run there. Most C/C++/GGML engines build on Darwin out of the box (ggml defaults `GGML_METAL=ON` on Apple, so a plain build is Metal-enabled), and many Python backends do too (CPU / MPS wheels). If a backend genuinely cannot support an OS (e.g. CUDA-only, no CPU variant), state that in the PR description instead of omitting it silently.
|
||||
|
||||
Wiring a backend into `includeDarwin:` is more than the matrix entry:
|
||||
|
||||
1. **`includeDarwin:` entry** — `tag-suffix: "-metal-darwin-arm64-<backend>"`, `build-type: "metal"`, `lang: "go"` for go+ggml backends; omit `build-type` for the bespoke C++ ones (llama-cpp / ds4 / privacy-filter). Match an existing entry of the same shape.
|
||||
2. **`backend/index.yaml`** — add `metal:` to the backend's `capabilities` map (main and `-development`) and concrete `metal-<backend>` / `metal-<backend>-development` image entries pointing at the `-metal-darwin-arm64-<backend>` images.
|
||||
3. **C/C++ backends only** — add an `inferBackendPathDarwin` case in `scripts/changed-backends.js` returning `backend/cpp/<backend>/` (the generic fallthrough assumes `backend/<lang>/`, which is wrong for a C++ source tree driven with `lang: go`), and give `run.sh` a Darwin branch that exports `DYLD_LIBRARY_PATH` instead of `LD_LIBRARY_PATH`. If the build is bespoke (single `grpc-server` + dylib bundling), model it on `scripts/build/ds4-darwin.sh` and add a `backends/<backend>-darwin` make target plus a gated step in `.github/workflows/backend_build_darwin.yml`.
|
||||
4. **C++ proto gotcha** — if the backend compiles the generated gRPC/protobuf in a separate CMake target (e.g. `hw_grpc_proto`), that target must link `protobuf::libprotobuf` + `gRPC::grpc++` so the Homebrew include dirs propagate; otherwise macOS fails with `google/protobuf/runtime_version.h not found` (Linux hides this because apt headers sit in `/usr/include`).
|
||||
|
||||
The CI path filter only builds a backend on a PR when a file under its directory changes, so a darwin-only YAML edit builds nothing — touch a file under `backend/<lang>/<backend>/` (a one-line comment is enough) in the same PR.
|
||||
|
||||
## 3. Add Backend Metadata to `backend/index.yaml`
|
||||
|
||||
**Step 3a: Add Meta Definition**
|
||||
@@ -225,6 +243,7 @@ After adding a new backend, verify:
|
||||
|
||||
- [ ] Backend directory structure is complete with all necessary files
|
||||
- [ ] Build configurations added to `.github/backend-matrix.yml` for all desired platforms (per-arch entries with `platform-tag` for multi-arch; `builder-base-image` for llama-cpp / ik-llama-cpp / turboquant)
|
||||
- [ ] **OS coverage considered**: added to `includeDarwin:` (macOS/Apple Silicon) if the backend can build there — with the `backend/index.yaml` `metal:` capability + `metal-<backend>` image entries, a `run.sh` Darwin/DYLD branch and `inferBackendPathDarwin` case for C++ backends — or the PR explains why an OS is unsupported. Do not ship Linux-only by default.
|
||||
- [ ] Meta definition added to `backend/index.yaml` in the `## metas` section
|
||||
- [ ] Image entries added to `backend/index.yaml` for all build variants (latest + development)
|
||||
- [ ] Tag suffixes match between workflow file and index.yaml
|
||||
|
||||
@@ -17,19 +17,29 @@ if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
|
||||
rm -rf /LocalAI/backend/cpp/llama-cpp-*-build
|
||||
fi
|
||||
|
||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
make llama-cpp-fallback
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
if [ -z "${BUILD_TYPE:-}" ]; then
|
||||
# Pure CPU image (BUILD_TYPE empty): one build with ggml CPU_ALL_VARIANTS replaces the
|
||||
# per-microarch binaries (x86: avx/avx2/avx512/fallback; arm64: armv8.x/armv9.x). ggml
|
||||
# dlopens the best libggml-cpu-*.so at runtime by probing host CPU features.
|
||||
#
|
||||
# arm64: the CPU_ALL_VARIANTS table includes armv9.2 SME variants whose -march=...+sme is
|
||||
# rejected by the Ubuntu 24.04 default gcc-13. gcc-14 accepts it, so build the arm64
|
||||
# variants with it (the host never *selects* SME unless it has it, but every variant must
|
||||
# still compile).
|
||||
if [ "${TARGETARCH}" = "arm64" ]; then
|
||||
apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
|
||||
export CC=gcc-14 CXX=g++-14
|
||||
fi
|
||||
make llama-cpp-cpu-all
|
||||
else
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
make llama-cpp-avx
|
||||
make llama-cpp-avx2
|
||||
make llama-cpp-avx512
|
||||
# GPU build (cublas/hipblas/sycl/vulkan/...): the accelerator does the compute, so a
|
||||
# single fallback CPU build is enough - no per-microarch CPU variants needed. (This also
|
||||
# keeps the heavy GPU backend compile from also building the whole CPU variant matrix,
|
||||
# and avoids the gcc-14 apt step on GPU base images such as nvidia l4t.)
|
||||
make llama-cpp-fallback
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
fi
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
|
||||
ccache -s || true
|
||||
|
||||
@@ -19,17 +19,21 @@ fi
|
||||
|
||||
cd /LocalAI/backend/cpp/turboquant
|
||||
|
||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
|
||||
make turboquant-fallback
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
if [ -z "${BUILD_TYPE:-}" ]; then
|
||||
# Pure CPU image: one ggml CPU_ALL_VARIANTS build replaces the per-microarch binaries.
|
||||
# arm64: the armv9.2 SME variants need gcc-14 (gcc-13 rejects +sme).
|
||||
if [ "${TARGETARCH}" = "arm64" ]; then
|
||||
apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
|
||||
export CC=gcc-14 CXX=g++-14
|
||||
fi
|
||||
make turboquant-cpu-all
|
||||
else
|
||||
make turboquant-avx
|
||||
make turboquant-avx2
|
||||
make turboquant-avx512
|
||||
# GPU build (cublas/hipblas/sycl/vulkan/...): single fallback CPU build, the accelerator
|
||||
# does the compute. Keeps the GPU compile from also building the CPU variant matrix and
|
||||
# avoids the gcc-14 apt step on GPU base images such as nvidia l4t.
|
||||
make turboquant-fallback
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
fi
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
|
||||
ccache -s || true
|
||||
|
||||
@@ -7,8 +7,11 @@
|
||||
# Runs only the checks relevant to what's staged:
|
||||
# - Go files -> make lint + make test-coverage-check
|
||||
# - core/http/react-ui -> make test-ui-coverage-check (Playwright e2e + gate)
|
||||
# A commit touching neither is skipped entirely (docs/YAML/etc. can't change
|
||||
# lint findings, Go coverage, or the UI).
|
||||
# - realtime state machines / specs -> make test-realtime-conformance
|
||||
# (respcoord/**, turncoord/**, or formal-verification/** -- a pure .fizz
|
||||
# spec edit must still re-verify the design, detected separately from Go)
|
||||
# A commit touching none of these is skipped entirely (other docs/YAML can't
|
||||
# change lint findings, Go coverage, the UI, or the realtime conformance gate).
|
||||
#
|
||||
# To bypass for a single commit (e.g. a WIP checkpoint): git commit --no-verify
|
||||
set -eu
|
||||
@@ -20,11 +23,13 @@ staged="$(git diff --cached --name-only --diff-filter=ACMRD)"
|
||||
|
||||
go_changed=0
|
||||
ui_changed=0
|
||||
rt_changed=0
|
||||
if echo "$staged" | grep -qE '\.go$'; then go_changed=1; fi
|
||||
if echo "$staged" | grep -qE '^core/http/react-ui/'; then ui_changed=1; fi
|
||||
if echo "$staged" | grep -qE '^(core/http/endpoints/openai/(coordinator|respcoord|turncoord|conncoord|compactcoord|ttscoord)/|formal-verification/)'; then rt_changed=1; fi
|
||||
|
||||
if [ "$go_changed" -eq 0 ] && [ "$ui_changed" -eq 0 ]; then
|
||||
echo "pre-commit: no Go or React UI changes staged — skipping."
|
||||
if [ "$go_changed" -eq 0 ] && [ "$ui_changed" -eq 0 ] && [ "$rt_changed" -eq 0 ]; then
|
||||
echo "pre-commit: no Go, React UI, or realtime-spec changes staged — skipping."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
@@ -57,4 +62,11 @@ if [ "$ui_changed" -eq 1 ]; then
|
||||
make test-ui-coverage-check
|
||||
fi
|
||||
|
||||
if [ "$rt_changed" -eq 1 ]; then
|
||||
echo "pre-commit ▶ realtime state-machine conformance (make test-realtime-conformance) —"
|
||||
echo " Go transition/rapid tests under -race + FizzBee model check of the"
|
||||
echo " authoritative specs. Fail-closed: needs FizzBee (make install-fizzbee)."
|
||||
make test-realtime-conformance
|
||||
fi
|
||||
|
||||
echo "pre-commit ✓ all relevant checks passed"
|
||||
|
||||
377
.github/backend-matrix.yml
vendored
377
.github/backend-matrix.yml
vendored
@@ -2,6 +2,28 @@
|
||||
# Matrix data for backend container image builds.
|
||||
# Consumed by scripts/changed-backends.js for both backend.yml and backend_pr.yml.
|
||||
# This file is NOT a workflow — it has no top-level 'on:' or 'jobs:'.
|
||||
#
|
||||
# OS / platform coverage — READ THIS WHEN ADDING A BACKEND
|
||||
# --------------------------------------------------------
|
||||
# This file is the source of truth for which OS each backend is built and
|
||||
# published for. A backend ships ONLY for the matrices it appears in:
|
||||
# - Linux -> the `include:` matrix below (x86_64 + arm64; CPU and
|
||||
# CUDA / ROCm / SYCL / Vulkan variants).
|
||||
# - macOS -> the `includeDarwin:` matrix (Apple Silicon / arm64; Metal where
|
||||
# the engine supports it, otherwise a native arm64 CPU build).
|
||||
#
|
||||
# New backends must target EVERY OS they can build for, not just Linux. A backend
|
||||
# listed only under `include:` is silently unavailable on macOS even when its code
|
||||
# would run there. Most C/C++/GGML engines build on Darwin (ggml defaults
|
||||
# GGML_METAL=ON on Apple, so a plain build is Metal-enabled), and many Python
|
||||
# backends do too (CPU / MPS). If a backend genuinely cannot support an OS, say so
|
||||
# in its PR description rather than silently omitting it.
|
||||
#
|
||||
# Adding a backend to `includeDarwin:` is more than one line — see the darwin
|
||||
# checklist in .agents/adding-backends.md (includeDarwin entry, the index.yaml
|
||||
# `metal:` capability + `metal-<backend>` image entries, a `run.sh` Darwin/DYLD
|
||||
# branch for C/C++ backends, and the inferBackendPathDarwin case in
|
||||
# scripts/changed-backends.js so the path filter actually builds it).
|
||||
|
||||
# Linux matrix (consumed by backend-jobs).
|
||||
include:
|
||||
@@ -3723,6 +3745,302 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
# voice-detect
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-voice-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "voice-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-voice-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "voice-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-cuda-13-arm64-voice-detect'
|
||||
base-image: "ubuntu:24.04"
|
||||
ubuntu-version: '2404'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "voice-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
platform-tag: 'amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-voice-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "voice-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/arm64'
|
||||
platform-tag: 'arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-voice-detect'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "voice-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f32'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-intel-sycl-f32-voice-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "voice-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f16'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-intel-sycl-f16-voice-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "voice-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
platform-tag: 'amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-vulkan-voice-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "voice-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/arm64'
|
||||
platform-tag: 'arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-vulkan-voice-detect'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "voice-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-voice-detect'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "voice-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2204'
|
||||
- build-type: 'hipblas'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-rocm-hipblas-voice-detect'
|
||||
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
|
||||
runs-on: 'ubuntu-latest'
|
||||
skip-drivers: 'false'
|
||||
backend: "voice-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
# face-detect
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-face-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "face-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-face-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "face-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-cuda-13-arm64-face-detect'
|
||||
base-image: "ubuntu:24.04"
|
||||
ubuntu-version: '2404'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "face-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
platform-tag: 'amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-face-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "face-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/arm64'
|
||||
platform-tag: 'arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-face-detect'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "face-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f32'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-intel-sycl-f32-face-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "face-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f16'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-intel-sycl-f16-face-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "face-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
platform-tag: 'amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-vulkan-face-detect'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "face-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/arm64'
|
||||
platform-tag: 'arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-vulkan-face-detect'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "face-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-face-detect'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "face-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2204'
|
||||
- build-type: 'hipblas'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-rocm-hipblas-face-detect'
|
||||
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
|
||||
runs-on: 'ubuntu-latest'
|
||||
skip-drivers: 'false'
|
||||
backend: "face-detect"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
# acestep-cpp
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
@@ -4906,6 +5224,14 @@ includeDarwin:
|
||||
tag-suffix: "-metal-darwin-arm64-ced"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "voice-detect"
|
||||
tag-suffix: "-metal-darwin-arm64-voice-detect"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "face-detect"
|
||||
tag-suffix: "-metal-darwin-arm64-face-detect"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "acestep-cpp"
|
||||
tag-suffix: "-metal-darwin-arm64-acestep-cpp"
|
||||
build-type: "metal"
|
||||
@@ -4922,6 +5248,37 @@ includeDarwin:
|
||||
tag-suffix: "-metal-darwin-arm64-vibevoice-cpp"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
# Vision/utility C++/ggml backends (go+cgo). Their Makefiles already carry a
|
||||
# Darwin/Metal path (GGML_METAL=ON when build-type=metal); this just builds and
|
||||
# publishes the metal image so Apple Silicon can install them.
|
||||
- backend: "depth-anything-cpp"
|
||||
tag-suffix: "-metal-darwin-arm64-depth-anything-cpp"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "locate-anything-cpp"
|
||||
tag-suffix: "-metal-darwin-arm64-locate-anything-cpp"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "rfdetr-cpp"
|
||||
tag-suffix: "-metal-darwin-arm64-rfdetr-cpp"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "sam3-cpp"
|
||||
tag-suffix: "-metal-darwin-arm64-sam3-cpp"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
# privacy-filter (PII/NER) is a C++/ggml backend built by a bespoke darwin
|
||||
# script (make backends/privacy-filter-darwin); ggml defaults Metal ON on Apple
|
||||
# so the build is Metal-enabled. lang=go drives runner/toolchain selection only.
|
||||
- backend: "privacy-filter"
|
||||
tag-suffix: "-metal-darwin-arm64-privacy-filter"
|
||||
lang: "go"
|
||||
# LocalVQE has no Metal path; on Apple Silicon it builds CPU-only (GGML_METAL
|
||||
# OFF) but is still a native arm64 image. Uses the darwin/metal build profile.
|
||||
- backend: "localvqe"
|
||||
tag-suffix: "-metal-darwin-arm64-localvqe"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "voxtral"
|
||||
tag-suffix: "-metal-darwin-arm64-voxtral"
|
||||
build-type: "metal"
|
||||
@@ -4938,9 +5295,6 @@ includeDarwin:
|
||||
- backend: "qwen-tts"
|
||||
tag-suffix: "-metal-darwin-arm64-qwen-tts"
|
||||
build-type: "mps"
|
||||
- backend: "fish-speech"
|
||||
tag-suffix: "-metal-darwin-arm64-fish-speech"
|
||||
build-type: "mps"
|
||||
- backend: "voxcpm"
|
||||
tag-suffix: "-metal-darwin-arm64-voxcpm"
|
||||
build-type: "mps"
|
||||
@@ -4974,6 +5328,19 @@ includeDarwin:
|
||||
- backend: "kitten-tts"
|
||||
tag-suffix: "-metal-darwin-arm64-kitten-tts"
|
||||
build-type: "mps"
|
||||
# vLLM on Apple Silicon via vllm-metal (MLX). The install is custom
|
||||
# (backend/python/vllm/install.sh has a darwin branch); lang stays python so
|
||||
# backend_build_darwin.yml drives it through build-darwin-python-backend ->
|
||||
# scripts/build/python-darwin.sh, which runs the backend's install.sh.
|
||||
- backend: "vllm"
|
||||
tag-suffix: "-metal-darwin-arm64-vllm"
|
||||
build-type: "mps"
|
||||
- backend: "trl"
|
||||
tag-suffix: "-metal-darwin-arm64-trl"
|
||||
build-type: "mps"
|
||||
- backend: "liquid-audio"
|
||||
tag-suffix: "-metal-darwin-arm64-liquid-audio"
|
||||
build-type: "mps"
|
||||
- backend: "piper"
|
||||
tag-suffix: "-metal-darwin-arm64-piper"
|
||||
build-type: "metal"
|
||||
@@ -4990,6 +5357,10 @@ includeDarwin:
|
||||
tag-suffix: "-metal-darwin-arm64-sherpa-onnx"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "supertonic"
|
||||
tag-suffix: "-metal-darwin-arm64-supertonic"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "local-store"
|
||||
tag-suffix: "-metal-darwin-arm64-local-store"
|
||||
build-type: "metal"
|
||||
|
||||
55
.github/bump_vllm_metal.sh
vendored
Executable file
55
.github/bump_vllm_metal.sh
vendored
Executable file
@@ -0,0 +1,55 @@
|
||||
#!/bin/bash
|
||||
# Bump the single vllm-metal pin (VLLM_METAL_VERSION) in the vLLM backend's
|
||||
# darwin (Apple Silicon) install path. The macOS/Metal build
|
||||
# (backend/python/vllm/install.sh, Darwin branch) installs vllm-metal, which is
|
||||
# version-locked to a specific vLLM source release. install.sh derives that vLLM
|
||||
# version at build time from vllm-metal's own installer (`vllm_v=`) at the pinned
|
||||
# tag, so there is only ONE value to bump here -- mirroring bump_vllm_wheel.sh,
|
||||
# which bumps the Linux cu130 wheel pin.
|
||||
#
|
||||
# This deliberately tracks vllm-project/vllm-metal, NOT vllm-project/vllm: the
|
||||
# darwin build can only use the exact vLLM version vllm-metal supports, so it may
|
||||
# lag the Linux pin (requirements-cublas13-after.txt) until vllm-metal catches up.
|
||||
set -xe
|
||||
REPO=$1 # vllm-project/vllm-metal
|
||||
FILE=$2 # backend/python/vllm/install.sh
|
||||
VAR=$3 # VLLM_METAL_VERSION (used for the workflow's output file names)
|
||||
|
||||
if [ -z "$FILE" ] || [ -z "$REPO" ] || [ -z "$VAR" ]; then
|
||||
echo "usage: $0 <repo> <install-file> <var-name>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# vllm-metal ships frequent dev releases, all flagged as non-prerelease, so
|
||||
# /releases/latest returns the newest one (with its cp312 wheel asset).
|
||||
LATEST_TAG=$(curl -sS -H "Accept: application/vnd.github+json" \
|
||||
"https://api.github.com/repos/$REPO/releases/latest" \
|
||||
| python3 -c "import json,sys; print(json.load(sys.stdin)['tag_name'])")
|
||||
|
||||
# The coupled vLLM source version lives in vllm-metal's installer at that tag.
|
||||
NEW_VLLM_VERSION=$(curl -fsSL \
|
||||
"https://raw.githubusercontent.com/$REPO/$LATEST_TAG/install.sh" \
|
||||
| grep -oE 'vllm_v="[0-9]+\.[0-9]+\.[0-9]+"' | head -1 | cut -d'"' -f2)
|
||||
|
||||
if [ -z "$LATEST_TAG" ] || [ -z "$NEW_VLLM_VERSION" ]; then
|
||||
echo "Could not resolve vllm-metal tag ($LATEST_TAG) or its vllm_v ($NEW_VLLM_VERSION)." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set +e
|
||||
CURRENT_TAG=$(grep -oE 'VLLM_METAL_VERSION="[^"]*"' "$FILE" | head -1 | cut -d'"' -f2)
|
||||
set -e
|
||||
|
||||
# Rewrite the single pin. install.sh derives VLLM_VERSION from this tag at build
|
||||
# time, so there is nothing else to touch. peter-evans/create-pull-request opens
|
||||
# no PR on a clean tree, so a no-op rewrite (already current) is safe.
|
||||
sed -i "$FILE" \
|
||||
-e "s|VLLM_METAL_VERSION=\"[^\"]*\"|VLLM_METAL_VERSION=\"$LATEST_TAG\"|"
|
||||
|
||||
if [ -z "$CURRENT_TAG" ]; then
|
||||
echo "Could not find VLLM_METAL_VERSION=\"...\" in $FILE." >&2
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "vllm-metal ${CURRENT_TAG} -> ${LATEST_TAG} (builds vLLM ${NEW_VLLM_VERSION}): https://github.com/$REPO/releases/tag/${LATEST_TAG}" >> "${VAR}_message.txt"
|
||||
echo "${LATEST_TAG}" >> "${VAR}_commit.txt"
|
||||
34
.github/workflows/backend_build_darwin.yml
vendored
34
.github/workflows/backend_build_darwin.yml
vendored
@@ -82,7 +82,7 @@ jobs:
|
||||
# as the Linux registry cache.
|
||||
- name: Restore Homebrew cache
|
||||
id: brew-cache
|
||||
uses: actions/cache/restore@v4
|
||||
uses: actions/cache/restore@v6
|
||||
with:
|
||||
path: |
|
||||
~/Library/Caches/Homebrew/downloads
|
||||
@@ -99,6 +99,7 @@ jobs:
|
||||
/opt/homebrew/Cellar/xxhash
|
||||
/opt/homebrew/Cellar/zstd
|
||||
/opt/homebrew/Cellar/nlohmann-json
|
||||
/opt/homebrew/Cellar/opus
|
||||
key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }}
|
||||
|
||||
- name: Dependencies
|
||||
@@ -113,7 +114,12 @@ jobs:
|
||||
# nlohmann-json is header-only and required by the ds4 backend
|
||||
# (dsml_renderer.cpp includes <nlohmann/json.hpp>); on Linux it comes
|
||||
# from the apt-installed nlohmann-json3-dev in the build image.
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json
|
||||
# opus + pkg-config are required by the opus go backend: its
|
||||
# Makefile/package.sh call `pkg-config --cflags/--libs opus` to build
|
||||
# libopusshim.dylib and to locate libopus.dylib for bundling. brew's
|
||||
# pkg-config defaults its search path to the Homebrew prefix so the
|
||||
# opus.pc is found.
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json opus pkg-config
|
||||
# Force-reinstall ccache so brew re-validates its full runtime-dep
|
||||
# closure on every run. This is the durable fix: when the upstream
|
||||
# ccache formula gains a new transitive dep (as it has multiple times
|
||||
@@ -132,11 +138,11 @@ jobs:
|
||||
# and decides "already installed" without re-linking, so on a cache-
|
||||
# hit run the formulas aren't on PATH. Force-link them; --overwrite
|
||||
# tolerates pre-existing symlinks from earlier installs.
|
||||
brew link --overwrite protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json 2>/dev/null || true
|
||||
brew link --overwrite protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json opus pkg-config 2>/dev/null || true
|
||||
|
||||
- name: Save Homebrew cache
|
||||
if: github.event_name != 'pull_request' && steps.brew-cache.outputs.cache-hit != 'true'
|
||||
uses: actions/cache/save@v4
|
||||
uses: actions/cache/save@v6
|
||||
with:
|
||||
path: |
|
||||
~/Library/Caches/Homebrew/downloads
|
||||
@@ -153,6 +159,7 @@ jobs:
|
||||
/opt/homebrew/Cellar/xxhash
|
||||
/opt/homebrew/Cellar/zstd
|
||||
/opt/homebrew/Cellar/nlohmann-json
|
||||
/opt/homebrew/Cellar/opus
|
||||
key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }}
|
||||
|
||||
# ---- ccache for llama.cpp CMake builds ----
|
||||
@@ -171,7 +178,7 @@ jobs:
|
||||
- name: Restore ccache
|
||||
if: inputs.backend == 'llama-cpp'
|
||||
id: ccache-cache
|
||||
uses: actions/cache/restore@v4
|
||||
uses: actions/cache/restore@v6
|
||||
with:
|
||||
path: ~/Library/Caches/ccache
|
||||
key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}
|
||||
@@ -204,7 +211,7 @@ jobs:
|
||||
- name: Restore Python wheel cache
|
||||
if: inputs.lang == 'python'
|
||||
id: pyenv-cache
|
||||
uses: actions/cache/restore@v4
|
||||
uses: actions/cache/restore@v6
|
||||
with:
|
||||
path: |
|
||||
~/Library/Caches/pip
|
||||
@@ -228,8 +235,17 @@ jobs:
|
||||
run: |
|
||||
make backends/ds4-darwin
|
||||
|
||||
# privacy-filter is a C++/ggml backend like ds4 - a single grpc-server with
|
||||
# otool dylib bundling - so it gets its own bespoke darwin script rather than
|
||||
# the generic build-darwin-go-backend path.
|
||||
- name: Build privacy-filter backend (Darwin Metal)
|
||||
if: inputs.backend == 'privacy-filter'
|
||||
run: |
|
||||
make protogen-go
|
||||
make backends/privacy-filter-darwin
|
||||
|
||||
- name: Build ${{ inputs.backend }}-darwin
|
||||
if: inputs.backend != 'llama-cpp' && inputs.backend != 'ds4'
|
||||
if: inputs.backend != 'llama-cpp' && inputs.backend != 'ds4' && inputs.backend != 'privacy-filter'
|
||||
run: |
|
||||
make protogen-go
|
||||
BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend
|
||||
@@ -240,14 +256,14 @@ jobs:
|
||||
|
||||
- name: Save ccache
|
||||
if: inputs.backend == 'llama-cpp' && github.event_name != 'pull_request'
|
||||
uses: actions/cache/save@v4
|
||||
uses: actions/cache/save@v6
|
||||
with:
|
||||
path: ~/Library/Caches/ccache
|
||||
key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}
|
||||
|
||||
- name: Save Python wheel cache
|
||||
if: inputs.lang == 'python' && github.event_name != 'pull_request' && steps.pyenv-cache.outputs.cache-hit != 'true'
|
||||
uses: actions/cache/save@v4
|
||||
uses: actions/cache/save@v6
|
||||
with:
|
||||
path: |
|
||||
~/Library/Caches/pip
|
||||
|
||||
44
.github/workflows/bump_deps.yaml
vendored
44
.github/workflows/bump_deps.yaml
vendored
@@ -46,6 +46,14 @@ jobs:
|
||||
variable: "CED_VERSION"
|
||||
branch: "master"
|
||||
file: "backend/go/ced/Makefile"
|
||||
- repository: "mudler/voice-detect.cpp"
|
||||
variable: "VOICEDETECT_VERSION"
|
||||
branch: "master"
|
||||
file: "backend/go/voice-detect/Makefile"
|
||||
- repository: "mudler/face-detect.cpp"
|
||||
variable: "FACEDETECT_VERSION"
|
||||
branch: "master"
|
||||
file: "backend/go/face-detect/Makefile"
|
||||
- repository: "mudler/depth-anything.cpp"
|
||||
variable: "DEPTHANYTHING_VERSION"
|
||||
branch: "master"
|
||||
@@ -154,3 +162,39 @@ jobs:
|
||||
branch: "update/VLLM_VERSION"
|
||||
body: ${{ steps.bump.outputs.message }}
|
||||
signoff: true
|
||||
|
||||
bump-vllm-metal:
|
||||
# The darwin (Apple Silicon) vLLM build installs vllm-metal, which is locked
|
||||
# to a specific vLLM source release. install.sh pins both VLLM_METAL_VERSION
|
||||
# (the wheel release) and VLLM_VERSION (the vLLM it builds against); this job
|
||||
# tracks vllm-project/vllm-metal and rewrites both atomically. Separate from
|
||||
# bump-vllm-wheel because darwin follows vllm-metal, not vllm/vllm latest.
|
||||
if: github.repository == 'mudler/LocalAI'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v7
|
||||
- name: Bump vllm-metal pin 🔧
|
||||
id: bump
|
||||
run: |
|
||||
bash .github/bump_vllm_metal.sh vllm-project/vllm-metal backend/python/vllm/install.sh VLLM_METAL_VERSION
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
cat "VLLM_METAL_VERSION_message.txt"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo 'commit<<EOF'
|
||||
cat "VLLM_METAL_VERSION_commit.txt"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
rm -rfv VLLM_METAL_VERSION_message.txt VLLM_METAL_VERSION_commit.txt
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v8
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: ':arrow_up: Update vllm-project/vllm-metal (darwin)'
|
||||
title: 'chore: :arrow_up: Update vllm-metal (darwin) to `${{ steps.bump.outputs.commit }}`'
|
||||
branch: "update/VLLM_METAL_VERSION"
|
||||
body: ${{ steps.bump.outputs.message }}
|
||||
signoff: true
|
||||
|
||||
69
.github/workflows/realtime-conformance.yml
vendored
Normal file
69
.github/workflows/realtime-conformance.yml
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
---
|
||||
name: 'realtime-conformance'
|
||||
|
||||
# Verifies the realtime state-machine implementations conform to their formal
|
||||
# designs (docs/design/realtime-state-machines.md, formal-verification/). BOTH
|
||||
# layers are enforced and the gate is fail-closed: the Go conformance layer
|
||||
# (respcoord + turncoord transition/rapid tests under -race) AND the FizzBee model check of
|
||||
# the authoritative specs. FizzBee is pinned + checksum-verified
|
||||
# (formal-verification/fizzbee.sha256), so a failed install fails the job rather
|
||||
# than silently skipping verification.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'core/http/endpoints/openai/coordinator/**'
|
||||
- 'core/http/endpoints/openai/respcoord/**'
|
||||
- 'core/http/endpoints/openai/turncoord/**'
|
||||
- 'core/http/endpoints/openai/conncoord/**'
|
||||
- 'core/http/endpoints/openai/compactcoord/**'
|
||||
- 'core/http/endpoints/openai/ttscoord/**'
|
||||
- 'formal-verification/**'
|
||||
- 'scripts/realtime-conformance.sh'
|
||||
- 'scripts/install-fizzbee.sh'
|
||||
- '.github/workflows/realtime-conformance.yml'
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- 'core/http/endpoints/openai/coordinator/**'
|
||||
- 'core/http/endpoints/openai/respcoord/**'
|
||||
- 'core/http/endpoints/openai/turncoord/**'
|
||||
- 'core/http/endpoints/openai/conncoord/**'
|
||||
- 'core/http/endpoints/openai/compactcoord/**'
|
||||
- 'core/http/endpoints/openai/ttscoord/**'
|
||||
- 'formal-verification/**'
|
||||
- 'scripts/realtime-conformance.sh'
|
||||
|
||||
concurrency:
|
||||
group: realtime-conformance-${{ github.event.pull_request.number || github.sha }}-${{ github.repository }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
conformance:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
go-version: ['1.26.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ matrix.go-version }}
|
||||
cache: false
|
||||
- name: Cache FizzBee
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: .tools/fizzbee
|
||||
key: fizzbee-v0.5.2-${{ runner.os }}-${{ hashFiles('formal-verification/fizzbee.sha256') }}
|
||||
- name: Install FizzBee (pinned, checksum-verified)
|
||||
# No `|| true`: a failed/forged download must fail the job, not silently
|
||||
# drop the design verification. install-fizzbee.sh is a no-op if the
|
||||
# cached binary is already present and valid.
|
||||
run: ./scripts/install-fizzbee.sh
|
||||
- name: Run conformance gate (fail-closed)
|
||||
# No skip env: both the Go conformance and the FizzBee model check are
|
||||
# required. The gate auto-detects .tools/fizzbee/fizz.
|
||||
run: make test-realtime-conformance
|
||||
21
.github/workflows/release.yaml
vendored
21
.github/workflows/release.yaml
vendored
@@ -24,6 +24,11 @@ jobs:
|
||||
args: release --clean
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
MACOS_SIGN_P12: ${{ secrets.MACOS_CERTIFICATE }}
|
||||
MACOS_SIGN_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PWD }}
|
||||
MACOS_NOTARY_KEY: ${{ secrets.MACOS_NOTARY_KEY }}
|
||||
MACOS_NOTARY_KEY_ID: ${{ secrets.MACOS_NOTARY_KEY_ID }}
|
||||
MACOS_NOTARY_ISSUER_ID: ${{ secrets.MACOS_NOTARY_ISSUER_ID }}
|
||||
launcher-build-darwin:
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
@@ -35,9 +40,19 @@ jobs:
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.23
|
||||
- name: Build launcher for macOS ARM64
|
||||
run: |
|
||||
make build-launcher-darwin
|
||||
- name: Import signing certificate
|
||||
env:
|
||||
MACOS_CERTIFICATE: ${{ secrets.MACOS_CERTIFICATE }}
|
||||
MACOS_CERTIFICATE_PWD: ${{ secrets.MACOS_CERTIFICATE_PWD }}
|
||||
MACOS_CI_KEYCHAIN_PWD: ${{ secrets.MACOS_CI_KEYCHAIN_PWD }}
|
||||
run: bash contrib/macos/sign-and-notarize.sh import-cert
|
||||
- name: Build, sign and notarize the DMG
|
||||
env:
|
||||
MACOS_SIGN_IDENTITY: ${{ secrets.MACOS_SIGN_IDENTITY }}
|
||||
MACOS_NOTARY_KEY: ${{ secrets.MACOS_NOTARY_KEY }}
|
||||
MACOS_NOTARY_KEY_ID: ${{ secrets.MACOS_NOTARY_KEY_ID }}
|
||||
MACOS_NOTARY_ISSUER_ID: ${{ secrets.MACOS_NOTARY_ISSUER_ID }}
|
||||
run: make release-launcher-darwin
|
||||
- name: Upload DMG to Release
|
||||
uses: softprops/action-gh-release@v3
|
||||
with:
|
||||
|
||||
6
.github/workflows/test-extra.yml
vendored
6
.github/workflows/test-extra.yml
vendored
@@ -1008,7 +1008,11 @@ jobs:
|
||||
# image + working dir.
|
||||
tests-vibevoice-cpp-grpc-transcription:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.vibevoice-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true'
|
||||
# Skip on release tag pushes: the ASR Q4_K model is ~10 GB and cannot be
|
||||
# pulled from HF within the inner `go test -timeout 30m` budget on a CI
|
||||
# runner, so every tag build hung and timed out. Still runs on PRs/branch
|
||||
# pushes that touch vibevoice-cpp so regressions are caught off the release path.
|
||||
if: (needs.detect-changes.outputs.vibevoice-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true') && !startsWith(github.ref, 'refs/tags/')
|
||||
runs-on: bigger-runner
|
||||
timeout-minutes: 150
|
||||
steps:
|
||||
|
||||
16
.github/workflows/test.yml
vendored
16
.github/workflows/test.yml
vendored
@@ -121,3 +121,19 @@ jobs:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
# Fast standalone unit tests for the backends' pure C++ helpers - currently the
|
||||
# llama-cpp message reconstruction (backend/cpp/llama-cpp/message_content.h),
|
||||
# which guards the OpenAI chat content normalization (mudler/LocalAI#10524,
|
||||
# #7324, #7528). The runner discovers every *_test.cpp under backend/cpp/, so
|
||||
# new pure-C++ unit tests are picked up with no CI changes. These need only the
|
||||
# C++ stdlib + nlohmann/json, so they run on every PR without the full
|
||||
# llama.cpp + gRPC backend build. (The same suite is also wired as an opt-in
|
||||
# CMake/ctest target, -DLLAMA_GRPC_BUILD_TESTS=ON, for in-backend-build runs.)
|
||||
tests-backend-cpp:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
- name: Run backend C++ unit tests
|
||||
run: make test-backend-cpp
|
||||
|
||||
12
.gitignore
vendored
12
.gitignore
vendored
@@ -94,3 +94,15 @@ core/http/react-ui/test-results/
|
||||
|
||||
# SDD / brainstorm scratch (agent-driven development)
|
||||
.superpowers/
|
||||
|
||||
# Local Apple signing material (never commit)
|
||||
.certs/
|
||||
|
||||
# Pinned dev tools (e.g. FizzBee for the realtime-conformance gate)
|
||||
.tools/
|
||||
|
||||
# FizzBee model-check artifacts: the parser emits <spec>.json next to each
|
||||
# .fizz and the checker writes run dirs under out/. Both are regenerated by
|
||||
# the realtime-conformance gate; only the .fizz sources are authoritative.
|
||||
formal-verification/*.json
|
||||
formal-verification/out/
|
||||
|
||||
@@ -9,7 +9,8 @@ source:
|
||||
enabled: true
|
||||
name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
|
||||
builds:
|
||||
- main: ./cmd/local-ai
|
||||
- id: local-ai
|
||||
main: ./cmd/local-ai
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
ldflags:
|
||||
@@ -35,3 +36,19 @@ snapshot:
|
||||
version_template: "{{ .Tag }}-next"
|
||||
changelog:
|
||||
use: github-native
|
||||
# Sign + notarize the macOS server binary via the quill backend (runs on Linux,
|
||||
# no macOS runner needed). Disabled automatically when MACOS_SIGN_P12 is unset
|
||||
# (forks / PRs), so those builds stay unsigned and green.
|
||||
notarize:
|
||||
macos:
|
||||
- enabled: '{{ isEnvSet "MACOS_SIGN_P12" }}'
|
||||
ids:
|
||||
- local-ai
|
||||
sign:
|
||||
certificate: "{{.Env.MACOS_SIGN_P12}}"
|
||||
password: "{{.Env.MACOS_SIGN_PASSWORD}}"
|
||||
notarize:
|
||||
issuer_id: "{{.Env.MACOS_NOTARY_ISSUER_ID}}"
|
||||
key_id: "{{.Env.MACOS_NOTARY_KEY_ID}}"
|
||||
key: "{{.Env.MACOS_NOTARY_KEY}}"
|
||||
wait: true
|
||||
|
||||
@@ -43,4 +43,5 @@ LocalAI follows the Linux kernel project's [guidelines for AI coding assistants]
|
||||
- **New API endpoints**: LocalAI advertises its capability surface in several independent places — swagger `@Tags`, `/api/instructions` registry, auth `RouteFeatureRegistry`, React UI `capabilities.js`, docs. Read [.agents/api-endpoints-and-auth.md](.agents/api-endpoints-and-auth.md) and follow its checklist — missing any surface means clients, admins, and the UI won't know the endpoint exists.
|
||||
- **Admin endpoints → MCP tool**: every admin endpoint that an admin would manage conversationally (install/list/edit/toggle/upgrade) MUST also be exposed as an MCP tool in `pkg/mcp/localaitools/`. The LocalAI Assistant chat modality and the standalone `local-ai mcp-server` consume that package; drift between REST and MCP is a real risk. Read [.agents/localai-assistant-mcp.md](.agents/localai-assistant-mcp.md) — the `TestToolHTTPRouteMappingComplete` test fails until you wire the new tool and update the route map.
|
||||
- **Build**: Inspect `Makefile` and `.github/workflows/` — ask the user before running long builds
|
||||
- **Backend OS coverage**: a new backend must target every OS it can build for, not just Linux. `.github/backend-matrix.yml` has two matrices — `include:` (Linux) and `includeDarwin:` (macOS / Apple Silicon). Most C/C++/GGML and many Python backends build on Darwin too — wire the `includeDarwin` entry + `backend/index.yaml` `metal:` entries, or say in the PR why an OS is unsupported. See the darwin checklist in [.agents/adding-backends.md](.agents/adding-backends.md).
|
||||
- **UI**: The active UI is the React app in `core/http/react-ui/`. The older Alpine.js/HTML UI in `core/http/static/` is pending deprecation — all new UI work goes in the React UI
|
||||
|
||||
64
Makefile
64
Makefile
@@ -1,5 +1,5 @@
|
||||
# Disable parallel execution for backend builds
|
||||
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/crispasr backends/parakeet-cpp backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/omnivoice-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio backends/supertonic backends/depth-anything-cpp backends/privacy-filter
|
||||
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/crispasr backends/parakeet-cpp backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/omnivoice-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio backends/supertonic backends/depth-anything-cpp backends/privacy-filter backends/privacy-filter-darwin
|
||||
|
||||
GOCMD=go
|
||||
GOTEST=$(GOCMD) test
|
||||
@@ -103,7 +103,7 @@ COVERAGE_E2E_LABELS?=!real-models
|
||||
COVERAGE_EXCLUDE_RE?=grpc/proto/.*[.]pb[.]go
|
||||
|
||||
|
||||
.PHONY: all test test-coverage test-coverage-baseline test-coverage-check test-ui test-ui-coverage-baseline test-ui-coverage-check install-hooks build vendor lint lint-all
|
||||
.PHONY: all test test-coverage test-coverage-baseline test-coverage-check test-backend-cpp test-ui test-ui-coverage-baseline test-ui-coverage-check install-hooks build vendor lint lint-all
|
||||
|
||||
all: help
|
||||
|
||||
@@ -201,6 +201,13 @@ test: prepare-test
|
||||
OPUS_SHIM_LIBRARY=$(abspath ./pkg/opus/shim/libopusshim.so) \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
|
||||
## Compiles and runs the standalone C++ unit tests for the backends (pure
|
||||
## helpers that depend only on the stdlib + nlohmann/json, no full backend
|
||||
## build). Discovers every *_test.cpp under backend/cpp/ - see
|
||||
## backend/cpp/run-unit-tests.sh. Set NLOHMANN_INCLUDE to skip the header fetch.
|
||||
test-backend-cpp:
|
||||
bash backend/cpp/run-unit-tests.sh
|
||||
|
||||
## Runs the core suite ($(TEST_PATHS)) with statement-coverage instrumentation
|
||||
## and writes a merged profile to $(COVERAGE_PROFILE). Deliberately omits
|
||||
## --fail-fast so a single failure doesn't truncate the coverage number, and
|
||||
@@ -398,6 +405,18 @@ test-realtime: build-mock-backend
|
||||
@echo 'Running realtime e2e tests (mock backend)'
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="Realtime && !real-models" --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
|
||||
|
||||
# Verify the realtime state-machine implementations conform to their formal
|
||||
# designs (Go transition/rapid tests under -race + FizzBee model check of the
|
||||
# authoritative specs). See docs/design/realtime-state-machines.md (Part 6) and
|
||||
# docs/design/specs/README.md.
|
||||
test-realtime-conformance:
|
||||
GOCMD=$(GOCMD) ./scripts/realtime-conformance.sh
|
||||
|
||||
# Install the pinned, checksum-verified FizzBee model checker (into .tools/,
|
||||
# gitignored) used by test-realtime-conformance. Idempotent; no-op if present.
|
||||
install-fizzbee:
|
||||
./scripts/install-fizzbee.sh
|
||||
|
||||
# Container-based real-model realtime testing. Build env vars / pipeline
|
||||
# definition kept here so test-realtime-models-docker can drive a fully wired
|
||||
# pipeline (VAD + STT + LLM + TTS) from inside a containerised runner.
|
||||
@@ -1020,7 +1039,7 @@ test-extra-backend-whisper-transcription: docker-build-whisper
|
||||
## is reachable.
|
||||
test-extra-backend-parakeet-cpp-transcription: docker-build-parakeet-cpp
|
||||
BACKEND_IMAGE=local-ai-backend:parakeet-cpp \
|
||||
BACKEND_TEST_MODEL_URL=https://huggingface.co/mudler/parakeet-cpp-gguf/resolve/main/tdt_ctc-110m-f16.gguf \
|
||||
BACKEND_TEST_MODEL_URL=https://huggingface.co/mudler/parakeet-cpp-gguf/resolve/main/realtime_eou_120m-v1-f16.gguf \
|
||||
BACKEND_TEST_AUDIO_URL=https://github.com/ggml-org/whisper.cpp/raw/master/samples/jfk.wav \
|
||||
BACKEND_TEST_CAPS=health,load,transcription \
|
||||
$(MAKE) test-extra-backend
|
||||
@@ -1129,6 +1148,10 @@ backends/ds4-darwin: build
|
||||
bash ./scripts/build/ds4-darwin.sh
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/ds4.tar)"
|
||||
|
||||
backends/privacy-filter-darwin: build
|
||||
bash ./scripts/build/privacy-filter-darwin.sh
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/privacy-filter.tar)"
|
||||
|
||||
build-darwin-python-backend: build
|
||||
bash ./scripts/build/python-darwin.sh
|
||||
|
||||
@@ -1449,13 +1472,32 @@ docs: docs/static/gallery.html
|
||||
########################################################
|
||||
|
||||
## fyne cross-platform build
|
||||
build-launcher-darwin: build-launcher
|
||||
go run github.com/tiagomelo/macos-dmg-creator/cmd/createdmg@latest \
|
||||
--appName "LocalAI" \
|
||||
--appBinaryPath "$(LAUNCHER_BINARY_NAME)" \
|
||||
--bundleIdentifier "com.localai.launcher" \
|
||||
--iconPath "core/http/static/logo.png" \
|
||||
--outputDir "dist/"
|
||||
# Build LocalAI.app from the launcher via fyne (metadata read from cmd/launcher/FyneApp.toml).
|
||||
# Signing happens via contrib/macos/sign-and-notarize.sh, which is a no-op when the signing
|
||||
# secrets are unset, so unsigned local/fork builds keep working.
|
||||
build-launcher-darwin:
|
||||
rm -rf dist/LocalAI.app cmd/launcher/LocalAI.app
|
||||
mkdir -p dist
|
||||
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os darwin -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)
|
||||
mv cmd/launcher/LocalAI.app dist/LocalAI.app
|
||||
bash contrib/macos/sign-and-notarize.sh sign dist/LocalAI.app
|
||||
|
||||
# Wrap the (signed) app into a drag-to-Applications DMG via hdiutil, then sign the DMG.
|
||||
dmg-launcher-darwin: build-launcher-darwin
|
||||
rm -rf dist/dmg dist/LocalAI.dmg
|
||||
mkdir -p dist/dmg
|
||||
cp -R dist/LocalAI.app dist/dmg/LocalAI.app
|
||||
ln -s /Applications dist/dmg/Applications
|
||||
hdiutil create -volname "LocalAI" -srcfolder dist/dmg -ov -format UDZO dist/LocalAI.dmg
|
||||
bash contrib/macos/sign-and-notarize.sh sign dist/LocalAI.dmg
|
||||
|
||||
# Submit the DMG to Apple notarization and staple the ticket (no-op without notary secrets).
|
||||
notarize-launcher-darwin: dmg-launcher-darwin
|
||||
bash contrib/macos/sign-and-notarize.sh notarize dist/LocalAI.dmg
|
||||
|
||||
# Single entrypoint for CI: build -> sign app -> dmg -> sign dmg -> notarize -> staple.
|
||||
release-launcher-darwin: notarize-launcher-darwin
|
||||
@echo "dist/LocalAI.dmg is ready"
|
||||
|
||||
build-launcher-linux:
|
||||
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv launcher.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
|
||||
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv LocalAI.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
|
||||
|
||||
@@ -177,6 +177,7 @@ For more details, see the [Getting Started guide](https://localai.io/basics/gett
|
||||
|
||||
## Latest News
|
||||
|
||||
- **June 2026**: New native biometric backends from the LocalAI team: [voice-detect.cpp](https://github.com/mudler/voice-detect.cpp) for speaker recognition and voice analysis (ECAPA-TDNN, WeSpeaker, ERes2Net, CAM++, wav2vec2 age/gender/emotion) and [face-detect.cpp](https://github.com/mudler/face-detect.cpp) for face detection, recognition, demographics and anti-spoofing (SCRFD/ArcFace, YuNet/SFace). Both are from-scratch C++/ggml engines with no Python or onnxruntime at inference, self-contained GGUF weights, bit-exact parity with the reference, and GPU cuDNN parity, replacing the heavier Python `insightface` and `speaker-recognition` backends ([PR #10441](https://github.com/mudler/LocalAI/pull/10441)).
|
||||
- **June 2026**: New [realtime voice assistant demo](https://github.com/localai-org/localai-realtime-demo) (a tiny Go client for the Realtime API with a full talk-back voice loop and tool calling), plus [streaming of the realtime LLM / TTS / transcription pipeline stages](https://github.com/mudler/LocalAI/pull/10176) and [configurable WebRTC ICE candidates](https://github.com/mudler/LocalAI/pull/10231).
|
||||
- **June 2026**: Big speech push: the [parakeet.cpp](https://github.com/mudler/parakeet.cpp) ASR engine gains [NeMo-faithful segment timestamps](https://github.com/mudler/LocalAI/pull/10207), a [multilingual streaming Nemotron-3.5 model](https://github.com/mudler/LocalAI/pull/10199), [dynamic batching for concurrent transcription](https://github.com/mudler/LocalAI/pull/10112) and [CUDA graphs](https://github.com/mudler/LocalAI/pull/10273); the new [CrispASR backend](https://github.com/mudler/LocalAI/pull/10099) adds multi-architecture ASR + TTS, and [60 Piper TTS voices across 42 languages](https://github.com/mudler/LocalAI/pull/10296) land in the gallery (plus [per-request TTS instructions and params](https://github.com/mudler/LocalAI/pull/10172)).
|
||||
- **June 2026**: New backends and models: [locate-anything.cpp](https://github.com/mudler/LocalAI/pull/10264) for open-vocabulary object detection via ggml, [Ideogram4 image generation](https://github.com/mudler/LocalAI/pull/10201) in stablediffusion-ggml, [llama.cpp video input](https://github.com/mudler/LocalAI/pull/10216), and the [Gemma 4 QAT family with MTP speculative-decoding pairs](https://github.com/mudler/LocalAI/pull/10215). Plus an [interactive CLI chat mode](https://github.com/mudler/LocalAI/pull/10226) and [RAG source citations in agent responses](https://github.com/mudler/LocalAI/pull/10228).
|
||||
|
||||
@@ -137,7 +137,7 @@ RUN <<EOT bash
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
||||
if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
|
||||
apt-get install -y --no-install-recommends \
|
||||
libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
||||
libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} libcudnn9-dev-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
|
||||
fi
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -18,6 +18,18 @@ service Backend {
|
||||
rpc GenerateVideo(GenerateVideoRequest) returns (Result) {}
|
||||
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
||||
rpc AudioTranscriptionStream(TranscriptRequest) returns (stream TranscriptStreamResponse) {}
|
||||
// AudioTranscriptionLive is the bidirectional live-microphone ASR RPC. The
|
||||
// first message MUST carry a Config; subsequent messages carry Audio frames
|
||||
// (mono float PCM at config.sample_rate, 16 kHz default). After a
|
||||
// successful open the backend replies with a single ready ack
|
||||
// (TranscriptLiveResponse{ready:true}); backends or models without
|
||||
// cache-aware streaming support return UNIMPLEMENTED instead. Newly
|
||||
// finalized text streams back as deltas; eou=true marks the model's
|
||||
// end-of-utterance token. One stream spans many utterances (the decoder
|
||||
// resets itself after each EOU). Closing the send side finalizes: the
|
||||
// backend flushes the decoder tail and emits a terminal message carrying
|
||||
// final_result. A second Config mid-stream resets the decode session.
|
||||
rpc AudioTranscriptionLive(stream TranscriptLiveRequest) returns (stream TranscriptLiveResponse) {}
|
||||
rpc TTS(TTSRequest) returns (Result) {}
|
||||
rpc TTSStream(TTSRequest) returns (stream Reply) {}
|
||||
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
||||
@@ -479,6 +491,10 @@ message TranscriptResult {
|
||||
string text = 2;
|
||||
string language = 3;
|
||||
float duration = 4;
|
||||
// True when the decode ended on the model's end-of-utterance special token
|
||||
// (<EOU>/<EOB>, emitted by cache-aware streaming models such as
|
||||
// parakeet_realtime_eou_120m-v1). The marker itself is stripped from text.
|
||||
bool eou = 5;
|
||||
}
|
||||
|
||||
message TranscriptStreamResponse {
|
||||
@@ -486,6 +502,34 @@ message TranscriptStreamResponse {
|
||||
TranscriptResult final_result = 2;
|
||||
}
|
||||
|
||||
// === AudioTranscriptionLive messages =====================================
|
||||
|
||||
message TranscriptLiveRequest {
|
||||
oneof payload {
|
||||
TranscriptLiveConfig config = 1;
|
||||
TranscriptLiveAudio audio = 2;
|
||||
}
|
||||
}
|
||||
|
||||
message TranscriptLiveConfig {
|
||||
string language = 1; // "" => model default
|
||||
int32 sample_rate = 2; // 0 => 16000; backends may reject others
|
||||
map<string, string> params = 3; // backend-specific tuning
|
||||
}
|
||||
|
||||
message TranscriptLiveAudio {
|
||||
repeated float pcm = 1; // mono PCM in [-1,1] at config.sample_rate
|
||||
}
|
||||
|
||||
message TranscriptLiveResponse {
|
||||
bool ready = 1; // open ack: sent once, before any delta
|
||||
string delta = 2; // newly-finalized text since previous response
|
||||
bool eou = 3; // <EOU> fired during this feed (the user yielded the turn)
|
||||
repeated TranscriptWord words = 4; // words finalized by this feed (stream-relative ns)
|
||||
TranscriptResult final_result = 5; // terminal message only, after the send side closes
|
||||
bool eob = 6; // <EOB> fired: a backchannel ("uh-huh") ended — NOT a turn boundary
|
||||
}
|
||||
|
||||
message TranscriptWord {
|
||||
int64 start = 1;
|
||||
int64 end = 2;
|
||||
|
||||
@@ -1,15 +1,6 @@
|
||||
## Clip/LLaVA library for multimodal support — built locally from copied sources
|
||||
set(TARGET myclip)
|
||||
add_library(${TARGET} clip.cpp clip.h llava.cpp llava.h)
|
||||
install(TARGETS ${TARGET} LIBRARY)
|
||||
target_include_directories(myclip PUBLIC .)
|
||||
target_include_directories(myclip PUBLIC ../..)
|
||||
target_include_directories(myclip PUBLIC ../../common)
|
||||
target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if (NOT MSVC)
|
||||
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual)
|
||||
endif()
|
||||
## Multimodal support is provided by the in-tree `mtmd` library target
|
||||
## (examples/mtmd/), which the grpc-server links and includes below. clip/llava
|
||||
## were pruned upstream; the high-level mtmd_* / mtmd_helper_* API is used instead.
|
||||
|
||||
set(TARGET grpc-server)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
@@ -67,12 +58,16 @@ add_library(hw_grpc_proto
|
||||
${hw_proto_hdrs} )
|
||||
|
||||
add_executable(${TARGET} grpc-server.cpp json.hpp)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
# mtmd public headers (mtmd.h / mtmd-helper.h) live in examples/mtmd/.
|
||||
# Linking the mtmd target also propagates this include dir, but we add it
|
||||
# explicitly for clarity.
|
||||
target_include_directories(${TARGET} PRIVATE ../mtmd)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
absl::flags_parse
|
||||
gRPC::${_REFLECTION}
|
||||
gRPC::${_GRPC_GRPCPP}
|
||||
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
endif()
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=6c00e87ac84404af588ad2e65935bd6f079c696f
|
||||
IK_LLAMA_VERSION?=f74a6fb87b315b2c3154166e075360e15021a61d
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <getopt.h>
|
||||
#include "clip.h"
|
||||
#include "llava.h"
|
||||
#include "mtmd.h"
|
||||
#include "mtmd-helper.h"
|
||||
#include "log.h"
|
||||
#include "common.h"
|
||||
#include "json.hpp"
|
||||
@@ -45,7 +45,9 @@ using backend::HealthMessage;
|
||||
|
||||
///// LLAMA.CPP server code below
|
||||
|
||||
using json = nlohmann::json;
|
||||
// Match mtmd.h and ik_llama's server/common headers, which all use
|
||||
// nlohmann::ordered_json; a plain nlohmann::json alias collides at global scope.
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
struct server_params
|
||||
{
|
||||
@@ -219,6 +221,11 @@ struct llama_client_slot
|
||||
|
||||
// multimodal
|
||||
std::vector<slot_image> images;
|
||||
// Full prompt with mtmd media markers (mtmd_default_marker()) substituted in
|
||||
// place of the legacy [img-N] tags, covering the text up to and including the
|
||||
// last image. The text after the last image is kept in params.input_suffix and
|
||||
// decoded through the normal token path so the sampling loop is unchanged.
|
||||
std::string mtmd_prompt;
|
||||
|
||||
// stats
|
||||
size_t sent_count = 0;
|
||||
@@ -252,14 +259,14 @@ struct llama_client_slot
|
||||
|
||||
for (slot_image & img : images)
|
||||
{
|
||||
free(img.image_embedding);
|
||||
if (img.img_data) {
|
||||
clip_image_u8_free(img.img_data);
|
||||
if (img.bitmap) {
|
||||
mtmd_bitmap_free(img.bitmap);
|
||||
img.bitmap = nullptr;
|
||||
}
|
||||
img.prefix_prompt = "";
|
||||
}
|
||||
|
||||
images.clear();
|
||||
mtmd_prompt = "";
|
||||
}
|
||||
|
||||
bool has_budget(gpt_params &global_params) {
|
||||
@@ -396,46 +403,13 @@ struct llama_metrics {
|
||||
}
|
||||
};
|
||||
|
||||
struct llava_embd_batch {
|
||||
std::vector<llama_pos> pos;
|
||||
std::vector<int32_t> n_seq_id;
|
||||
std::vector<llama_seq_id> seq_id_0;
|
||||
std::vector<llama_seq_id *> seq_ids;
|
||||
std::vector<int8_t> logits;
|
||||
llama_batch batch;
|
||||
llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
|
||||
pos .resize(n_tokens);
|
||||
n_seq_id.resize(n_tokens);
|
||||
seq_ids .resize(n_tokens + 1);
|
||||
logits .resize(n_tokens);
|
||||
seq_id_0.resize(1);
|
||||
seq_id_0[0] = seq_id;
|
||||
seq_ids [n_tokens] = nullptr;
|
||||
batch = {
|
||||
/*n_tokens =*/ n_tokens,
|
||||
/*tokens =*/ nullptr,
|
||||
/*embd =*/ embd,
|
||||
/*pos =*/ pos.data(),
|
||||
/*n_seq_id =*/ n_seq_id.data(),
|
||||
/*seq_id =*/ seq_ids.data(),
|
||||
/*logits =*/ logits.data(),
|
||||
};
|
||||
for (int i = 0; i < n_tokens; i++) {
|
||||
batch.pos [i] = pos_0 + i;
|
||||
batch.n_seq_id[i] = 1;
|
||||
batch.seq_id [i] = seq_id_0.data();
|
||||
batch.logits [i] = false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct llama_server_context
|
||||
{
|
||||
llama_model *model = nullptr;
|
||||
llama_context *ctx = nullptr;
|
||||
const llama_vocab * vocab = nullptr;
|
||||
|
||||
clip_ctx *clp_ctx = nullptr;
|
||||
mtmd_context *mctx = nullptr;
|
||||
|
||||
gpt_params params;
|
||||
|
||||
@@ -491,11 +465,6 @@ struct llama_server_context
|
||||
if (!params.mmproj.path.empty()) {
|
||||
multimodal = true;
|
||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||
clp_ctx = clip_model_load(params.mmproj.path.c_str(), /*verbosity=*/ 1);
|
||||
if(clp_ctx == nullptr) {
|
||||
LOG_ERR("unable to load clip model: %s", params.mmproj.path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params.n_ctx < 2048) { // request larger context for the image embedding
|
||||
params.n_ctx = 2048;
|
||||
@@ -512,10 +481,24 @@ struct llama_server_context
|
||||
}
|
||||
|
||||
if (multimodal) {
|
||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
||||
const int n_embd_llm = llama_model_n_embd(model);
|
||||
if (n_embd_clip != n_embd_llm) {
|
||||
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
||||
// mtmd_init_from_file requires the already-loaded text model, so it must
|
||||
// run AFTER llama_init_from_gpt_params. It validates the projector
|
||||
// against the model internally and returns nullptr on dim mismatch, so
|
||||
// the explicit clip_n_mmproj_embd check is no longer needed.
|
||||
mtmd_context_params mparams = mtmd_context_params_default();
|
||||
mparams.use_gpu = params.mmproj_use_gpu;
|
||||
mparams.print_timings = false;
|
||||
mparams.n_threads = params.n_threads_mtmd != -1 ? params.n_threads_mtmd
|
||||
: params.n_threads_batch != -1 ? params.n_threads_batch
|
||||
: params.n_threads;
|
||||
mparams.verbosity = GGML_LOG_LEVEL_INFO;
|
||||
mparams.flash_attn_type = params.flash_attn ? LLAMA_FLASH_ATTN_TYPE_ENABLED
|
||||
: LLAMA_FLASH_ATTN_TYPE_DISABLED;
|
||||
mparams.image_min_tokens = params.image_min_tokens;
|
||||
mparams.image_max_tokens = params.image_max_tokens;
|
||||
mctx = mtmd_init_from_file(params.mmproj.path.c_str(), model, mparams);
|
||||
if (mctx == nullptr) {
|
||||
LOG_ERR("unable to load multimodal projector: %s", params.mmproj.path.c_str());
|
||||
llama_free(ctx);
|
||||
llama_free_model(model);
|
||||
return false;
|
||||
@@ -865,8 +848,8 @@ struct llama_server_context
|
||||
|
||||
slot_image img_sl;
|
||||
img_sl.id = img.count("id") != 0 ? img["id"].get<int>() : slot->images.size();
|
||||
img_sl.img_data = clip_image_u8_init();
|
||||
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
||||
img_sl.bitmap = mtmd_helper_bitmap_init_from_buf(mctx, image_buffer.data(), image_buffer.size());
|
||||
if (img_sl.bitmap == nullptr)
|
||||
{
|
||||
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
|
||||
__func__,
|
||||
@@ -879,50 +862,74 @@ struct llama_server_context
|
||||
{"slot_id", slot->id},
|
||||
{"img_sl_id", img_sl.id}
|
||||
});
|
||||
img_sl.request_encode_image = true;
|
||||
slot->images.push_back(img_sl);
|
||||
}
|
||||
// process prompt
|
||||
// example: system prompt [img-102] user [img-103] describe [img-134] -> [{id: 102, prefix: 'system prompt '}, {id: 103, prefix: ' user '}, {id: 134, prefix: ' describe '}]}
|
||||
// Translate the legacy [img-N] tags into mtmd media markers, in
|
||||
// order, and collect the matching bitmaps in marker order so they
|
||||
// line up with the markers passed to mtmd_tokenize(). The text after
|
||||
// the last image stays in input_suffix and is decoded through the
|
||||
// normal token path, so the sampling loop is unchanged.
|
||||
// example: system prompt [img-102] user [img-103] describe [img-134]
|
||||
if (slot->images.size() > 0 && !slot->prompt.is_array())
|
||||
{
|
||||
const std::string marker = mtmd_default_marker();
|
||||
std::string prompt = slot->prompt.get<std::string>();
|
||||
size_t pos = 0, begin_prefix = 0;
|
||||
std::string built_prompt;
|
||||
std::vector<slot_image> ordered;
|
||||
size_t pos = 0, copy_from = 0;
|
||||
std::string pattern = "[img-";
|
||||
while ((pos = prompt.find(pattern, pos)) != std::string::npos) {
|
||||
size_t end_prefix = pos;
|
||||
pos += pattern.length();
|
||||
size_t end_pos = prompt.find(']', pos);
|
||||
if (end_pos != std::string::npos)
|
||||
{
|
||||
std::string image_id = prompt.substr(pos, end_pos - pos);
|
||||
try
|
||||
{
|
||||
int img_id = std::stoi(image_id);
|
||||
bool found = false;
|
||||
for (slot_image &img : slot->images)
|
||||
{
|
||||
if (img.id == img_id) {
|
||||
found = true;
|
||||
img.prefix_prompt = prompt.substr(begin_prefix, end_prefix - begin_prefix);
|
||||
begin_prefix = end_pos + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
LOG("ERROR: Image with id: %i, not found.\n", img_id);
|
||||
slot->images.clear();
|
||||
return false;
|
||||
}
|
||||
} catch (const std::invalid_argument& e) {
|
||||
LOG("Invalid image number id in prompt\n");
|
||||
slot->images.clear();
|
||||
return false;
|
||||
|
||||
auto free_images = [&]() {
|
||||
for (slot_image &img : slot->images) {
|
||||
if (img.bitmap) {
|
||||
mtmd_bitmap_free(img.bitmap);
|
||||
img.bitmap = nullptr;
|
||||
}
|
||||
}
|
||||
slot->images.clear();
|
||||
};
|
||||
|
||||
while ((pos = prompt.find(pattern, pos)) != std::string::npos) {
|
||||
size_t tag_begin = pos;
|
||||
pos += pattern.length();
|
||||
size_t end_pos = prompt.find(']', pos);
|
||||
if (end_pos == std::string::npos) {
|
||||
break;
|
||||
}
|
||||
std::string image_id = prompt.substr(pos, end_pos - pos);
|
||||
try
|
||||
{
|
||||
int img_id = std::stoi(image_id);
|
||||
bool found = false;
|
||||
for (slot_image &img : slot->images)
|
||||
{
|
||||
if (img.id == img_id) {
|
||||
found = true;
|
||||
// text before this tag, then the media marker
|
||||
built_prompt += prompt.substr(copy_from, tag_begin - copy_from);
|
||||
built_prompt += marker;
|
||||
copy_from = end_pos + 1;
|
||||
ordered.push_back(img);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
LOG("ERROR: Image with id: %i, not found.\n", img_id);
|
||||
free_images();
|
||||
return false;
|
||||
}
|
||||
} catch (const std::invalid_argument& e) {
|
||||
LOG("Invalid image number id in prompt\n");
|
||||
free_images();
|
||||
return false;
|
||||
}
|
||||
pos = end_pos + 1;
|
||||
}
|
||||
// bitmaps are consumed in marker order by mtmd_tokenize()
|
||||
slot->images = ordered;
|
||||
slot->mtmd_prompt = built_prompt;
|
||||
slot->prompt = "";
|
||||
slot->params.input_suffix = prompt.substr(begin_prefix);
|
||||
slot->params.input_suffix = prompt.substr(copy_from);
|
||||
slot->params.cache_prompt = false; // multimodal doesn't support cache prompt
|
||||
}
|
||||
}
|
||||
@@ -1176,21 +1183,10 @@ struct llama_server_context
|
||||
|
||||
bool process_images(llama_client_slot &slot) const
|
||||
{
|
||||
for (slot_image &img : slot.images)
|
||||
{
|
||||
if (!img.request_encode_image)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
||||
LOG("Error processing the given image");
|
||||
return false;
|
||||
}
|
||||
|
||||
img.request_encode_image = false;
|
||||
}
|
||||
|
||||
// With the mtmd pipeline, image encoding is no longer eager: the bitmaps
|
||||
// are tokenized and encoded together with the surrounding text inside
|
||||
// ingest_images() via mtmd_tokenize() + mtmd_helper_eval_chunks(). This
|
||||
// just reports whether the slot carries any images to process.
|
||||
return slot.images.size() > 0;
|
||||
}
|
||||
|
||||
@@ -1435,69 +1431,70 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
// for multiple images processing
|
||||
// Tokenize the multimodal prompt (text interleaved with media markers) together
|
||||
// with the slot's bitmaps, then decode the resulting chunks into the llama
|
||||
// context via the high-level mtmd helper. The helper runs llama_decode() on the
|
||||
// text chunks and mtmd_encode() + llama_decode() on the image chunks, handling
|
||||
// batching and any pre/post decode setup (e.g. non-causal attention for gemma3).
|
||||
// Advances slot.n_past by the number of positions consumed, then leaves the
|
||||
// post-image suffix tokens in `batch` so the normal decode + sampling loop
|
||||
// produces the first generated token.
|
||||
bool ingest_images(llama_client_slot &slot, int n_batch)
|
||||
{
|
||||
int image_idx = 0;
|
||||
|
||||
while (image_idx < (int) slot.images.size())
|
||||
if (mctx == nullptr)
|
||||
{
|
||||
slot_image &img = slot.images[image_idx];
|
||||
LOG("%s : multimodal context is not initialized\n", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
// process prefix prompt
|
||||
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
|
||||
{
|
||||
const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
|
||||
llama_batch batch_view = {
|
||||
n_tokens,
|
||||
batch.token + i,
|
||||
nullptr,
|
||||
batch.pos + i,
|
||||
batch.n_seq_id + i,
|
||||
batch.seq_id + i,
|
||||
batch.logits + i,
|
||||
};
|
||||
if (llama_decode(ctx, batch_view))
|
||||
{
|
||||
LOG("%s : failed to eval\n", __func__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// bitmaps stay owned by slot.images (freed on reset()); pass non-owning ptrs
|
||||
std::vector<const mtmd_bitmap *> bitmaps;
|
||||
bitmaps.reserve(slot.images.size());
|
||||
for (const slot_image &img : slot.images)
|
||||
{
|
||||
bitmaps.push_back(img.bitmap);
|
||||
}
|
||||
|
||||
// process image with llm
|
||||
for (int i = 0; i < img.image_tokens; i += n_batch)
|
||||
{
|
||||
int n_eval = img.image_tokens - i;
|
||||
if (n_eval > n_batch)
|
||||
{
|
||||
n_eval = n_batch;
|
||||
}
|
||||
mtmd_input_text inp_txt;
|
||||
inp_txt.text = slot.mtmd_prompt.c_str();
|
||||
inp_txt.add_special = add_bos_token;
|
||||
inp_txt.parse_special = true;
|
||||
|
||||
const int n_embd = llama_model_n_embd(model);
|
||||
float * embd = img.image_embedding + i * n_embd;
|
||||
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
|
||||
if (llama_decode(ctx, llava_batch.batch))
|
||||
{
|
||||
LOG("%s : failed to eval image\n", __func__);
|
||||
return false;
|
||||
}
|
||||
slot.n_past += n_eval;
|
||||
}
|
||||
image_idx++;
|
||||
mtmd::input_chunks chunks(mtmd_input_chunks_init());
|
||||
int32_t res = mtmd_tokenize(mctx,
|
||||
chunks.ptr.get(),
|
||||
&inp_txt,
|
||||
bitmaps.data(),
|
||||
bitmaps.size());
|
||||
if (res != 0)
|
||||
{
|
||||
LOG("%s : failed to tokenize multimodal prompt, res = %d\n", __func__, res);
|
||||
return false;
|
||||
}
|
||||
|
||||
common_batch_clear(batch);
|
||||
const llama_pos start_pos = (llama_pos) system_tokens.size() + slot.n_past;
|
||||
llama_pos new_n_past = start_pos;
|
||||
if (mtmd_helper_eval_chunks(mctx,
|
||||
ctx,
|
||||
chunks.ptr.get(),
|
||||
start_pos,
|
||||
slot.id,
|
||||
n_batch,
|
||||
/*logits_last=*/ false,
|
||||
&new_n_past) != 0)
|
||||
{
|
||||
LOG("%s : failed to eval multimodal chunks\n", __func__);
|
||||
return false;
|
||||
}
|
||||
slot.n_past += (int32_t) (new_n_past - start_pos);
|
||||
|
||||
// append prefix of next image
|
||||
const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
|
||||
slot.params.input_suffix : // no more images, then process suffix prompt
|
||||
(json)(slot.images[image_idx].prefix_prompt);
|
||||
|
||||
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
|
||||
for (int i = 0; i < (int) append_tokens.size(); ++i)
|
||||
{
|
||||
common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
|
||||
slot.n_past += 1;
|
||||
}
|
||||
// queue the post-image suffix text for the normal decode + sampling path
|
||||
common_batch_clear(batch);
|
||||
std::vector<llama_token> suffix_tokens = tokenize(slot.params.input_suffix, false);
|
||||
for (llama_token tok : suffix_tokens)
|
||||
{
|
||||
common_batch_add(batch, tok, system_tokens.size() + slot.n_past, { slot.id }, false);
|
||||
slot.n_past += 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -1884,8 +1881,11 @@ struct llama_server_context
|
||||
|
||||
const bool has_images = process_images(slot);
|
||||
|
||||
// process the prefix of first image
|
||||
std::vector<llama_token> prefix_tokens = has_images ? tokenize(slot.images[0].prefix_prompt, add_bos_token) : prompt_tokens;
|
||||
// For the multimodal path the whole pre-image / inter-image text is
|
||||
// tokenized and decoded inside ingest_images() via mtmd, so no prefix
|
||||
// tokens are queued here; the post-image suffix is appended by
|
||||
// ingest_images() for the normal decode + sampling loop.
|
||||
std::vector<llama_token> prefix_tokens = has_images ? std::vector<llama_token>() : prompt_tokens;
|
||||
|
||||
int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past;
|
||||
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
--- a/examples/llava/clip.cpp
|
||||
+++ b/examples/llava/clip.cpp
|
||||
@@ -2494,7 +2494,7 @@
|
||||
}
|
||||
new_data = work.data();
|
||||
|
||||
- new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr);
|
||||
+ new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr, nullptr);
|
||||
} else {
|
||||
new_type = cur->type;
|
||||
new_data = cur->data;
|
||||
@@ -17,28 +17,9 @@ cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
cp -r utils.hpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/examples/grpc-server/
|
||||
|
||||
## Copy clip/llava files for multimodal support (built as myclip library)
|
||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||
# Prepend llama.h include to llava.h
|
||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||
# Copy clip-impl.h if it exists
|
||||
if [ -f llama.cpp/examples/llava/clip-impl.h ]; then
|
||||
cp -rfv llama.cpp/examples/llava/clip-impl.h llama.cpp/examples/grpc-server/clip-impl.h
|
||||
fi
|
||||
# Copy stb_image.h
|
||||
if [ -f llama.cpp/vendor/stb/stb_image.h ]; then
|
||||
cp -rfv llama.cpp/vendor/stb/stb_image.h llama.cpp/examples/grpc-server/stb_image.h
|
||||
elif [ -f llama.cpp/common/stb_image.h ]; then
|
||||
cp -rfv llama.cpp/common/stb_image.h llama.cpp/examples/grpc-server/stb_image.h
|
||||
fi
|
||||
|
||||
## Fix API compatibility in llava.cpp (llama_n_embd -> llama_model_n_embd)
|
||||
if [ -f llama.cpp/examples/grpc-server/llava.cpp ]; then
|
||||
sed -i 's/llama_n_embd(/llama_model_n_embd(/g' llama.cpp/examples/grpc-server/llava.cpp
|
||||
fi
|
||||
## Multimodal support is provided by the `mtmd` library target (examples/mtmd/),
|
||||
## which the grpc-server links and includes directly. No source copy is needed:
|
||||
## clip/llava were pruned upstream and the high-level mtmd_* API is used instead.
|
||||
|
||||
set +e
|
||||
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
set -ex
|
||||
|
||||
# Get the absolute current dir where the script is located
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
cd /
|
||||
|
||||
@@ -13,28 +13,28 @@ grep -e "flags" /proc/cpuinfo | head -1
|
||||
# ik_llama.cpp requires AVX2 — default to avx2 binary
|
||||
BINARY=ik-llama-cpp-avx2
|
||||
|
||||
if [ -e $CURDIR/ik-llama-cpp-fallback ] && ! grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
if [ -e "$CURDIR"/ik-llama-cpp-fallback ] && ! grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 NOT found, using fallback"
|
||||
BINARY=ik-llama-cpp-fallback
|
||||
fi
|
||||
|
||||
# Extend ld library path with the dir where this script is located/lib
|
||||
if [ "$(uname)" == "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
#export DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
#export DYLD_FALLBACK_LIBRARY_PATH="$CURDIR"/lib:$DYLD_FALLBACK_LIBRARY_PATH
|
||||
else
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using binary: $BINARY"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/$BINARY "$@"
|
||||
exec "$CURDIR"/lib/ld.so "$CURDIR"/$BINARY "$@"
|
||||
fi
|
||||
|
||||
echo "Using binary: $BINARY"
|
||||
exec $CURDIR/$BINARY "$@"
|
||||
exec "$CURDIR"/$BINARY "$@"
|
||||
|
||||
# We should never reach this point, however just in case we do, run fallback
|
||||
exec $CURDIR/ik-llama-cpp-fallback "$@"
|
||||
exec "$CURDIR"/ik-llama-cpp-fallback "$@"
|
||||
|
||||
@@ -11,9 +11,12 @@
|
||||
|
||||
#include "json.hpp"
|
||||
|
||||
#include "clip.h"
|
||||
#include "mtmd.h"
|
||||
|
||||
using json = nlohmann::json;
|
||||
// mtmd.h and ik_llama's entire server/common stack (chat.h, server-common.h,
|
||||
// server-task.h, ...) declare `using json = nlohmann::ordered_json`, so match it
|
||||
// here: a plain `nlohmann::json` alias collides with mtmd.h's at global scope.
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
extern bool server_verbose;
|
||||
|
||||
@@ -111,13 +114,12 @@ struct slot_image
|
||||
{
|
||||
int32_t id;
|
||||
|
||||
bool request_encode_image = false;
|
||||
float * image_embedding = nullptr;
|
||||
int32_t image_tokens = 0;
|
||||
|
||||
clip_image_u8 * img_data;
|
||||
|
||||
std::string prefix_prompt; // before of this image
|
||||
// mtmd bitmap (image/audio) decoded from the request buffer. Owned by the
|
||||
// slot; freed via mtmd_bitmap_free() on reset. The high-level mtmd pipeline
|
||||
// (mtmd_tokenize + mtmd_helper_eval_chunks) consumes these directly, so the
|
||||
// legacy eager-encode fields (embedding/tokens) and per-image prefix prompt
|
||||
// are no longer needed.
|
||||
mtmd_bitmap * bitmap = nullptr;
|
||||
};
|
||||
|
||||
// completion token output with probabilities
|
||||
|
||||
@@ -50,8 +50,13 @@ add_custom_command(
|
||||
"${hw_proto}"
|
||||
DEPENDS "${hw_proto}")
|
||||
|
||||
# hw_grpc_proto
|
||||
add_library(hw_grpc_proto
|
||||
# hw_grpc_proto: force STATIC. Under the CPU_ALL_VARIANTS build BUILD_SHARED_LIBS=ON
|
||||
# (ggml/llama become shared), which would otherwise make this glue library a DSO. As a
|
||||
# DSO it references the hidden-visibility symbols in the static libprotobuf.a, which the
|
||||
# linker cannot satisfy ("hidden symbol ... in libprotobuf.a is referenced by DSO").
|
||||
# Keeping it STATIC links protobuf/gRPC directly into the grpc-server executable while
|
||||
# only ggml/llama stay shared. No effect on the static variants (already BUILD_SHARED_LIBS=OFF).
|
||||
add_library(hw_grpc_proto STATIC
|
||||
${hw_grpc_srcs}
|
||||
${hw_grpc_hdrs}
|
||||
${hw_proto_srcs}
|
||||
@@ -82,3 +87,18 @@ target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
endif()
|
||||
|
||||
# Unit test for the message-content normalization helper (message_content.h).
|
||||
# Off by default so the normal backend build is untouched; enable with
|
||||
# -DLLAMA_GRPC_BUILD_TESTS=ON and run via ctest. It reuses llama.cpp's vendored
|
||||
# <nlohmann/json.hpp> (propagated by the common helpers library) so it has no
|
||||
# extra dependency beyond what the backend already builds against.
|
||||
option(LLAMA_GRPC_BUILD_TESTS "Build grpc-server unit tests" OFF)
|
||||
if(LLAMA_GRPC_BUILD_TESTS)
|
||||
enable_testing()
|
||||
add_executable(message_content_test message_content_test.cpp message_content.h)
|
||||
target_include_directories(message_content_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_libraries(message_content_test PRIVATE ${_LLAMA_COMMON_TARGET})
|
||||
target_compile_features(message_content_test PRIVATE cxx_std_17)
|
||||
add_test(NAME message_content_test COMMAND message_content_test)
|
||||
endif()
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=73618f27a801c0b8614ceaf3547d3c2a99baae14
|
||||
LLAMA_VERSION?=6f4f53f2b7da54fcdbbecaaa734337c337ad6176
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
@@ -10,8 +10,16 @@ TARGET?=--target grpc-server
|
||||
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1)
|
||||
ARCH?=$(shell uname -m)
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||
# Shared libs default to OFF: we link static gRPC and the avx/avx2/avx512/fallback
|
||||
# variants are fully static. The CPU_ALL_VARIANTS build flips SHARED_LIBS=ON (ggml/llama
|
||||
# become shared so the dynamic CPU backends work; gRPC stays static via its imported
|
||||
# targets). SHARED_LIBS is a make variable, not an appended -D, so it survives the
|
||||
# recursive sub-make into the VARIANT build dir (which re-parses this Makefile) instead
|
||||
# of being re-clobbered by a second -DBUILD_SHARED_LIBS=OFF. EXTRA_CMAKE_ARGS is the hook
|
||||
# the CPU_ALL_VARIANTS target uses to inject -DGGML_BACKEND_DL/-DGGML_CPU_ALL_VARIANTS.
|
||||
SHARED_LIBS?=OFF
|
||||
EXTRA_CMAKE_ARGS?=
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=$(SHARED_LIBS) -DLLAMA_CURL=OFF $(EXTRA_CMAKE_ARGS)
|
||||
|
||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
ifeq ($(NATIVE),false)
|
||||
@@ -120,15 +128,39 @@ llama-cpp-fallback: llama.cpp
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
|
||||
|
||||
# Single-build CPU backend using ggml's CPU_ALL_VARIANTS. Produces ONE grpc-server
|
||||
# plus a set of dlopen-able libggml-cpu-*.so (sandybridge/haswell/skylakex/...) that
|
||||
# ggml's backend registry selects from at runtime by probing host CPU features.
|
||||
# Replaces the avx/avx2/avx512/fallback multi-binary build on x86.
|
||||
#
|
||||
# CPU_ALL_VARIANTS requires GGML_BACKEND_DL, which requires BUILD_SHARED_LIBS=ON, so we
|
||||
# pass SHARED_LIBS=ON and the DL flags as make variables (NOT pre-expanded into the
|
||||
# CMAKE_ARGS env string): command-line make variables propagate through every recursive
|
||||
# sub-make, so the deepest VARIANT-dir build computes BUILD_SHARED_LIBS=ON consistently.
|
||||
# Only ggml/llama go shared - gRPC is found via its static imported targets, so the
|
||||
# grpc-server binary keeps static gRPC and only dynamically links ggml.
|
||||
#
|
||||
# TARGET adds "ggml": the per-microarch backends are runtime-dlopened, not link deps of
|
||||
# grpc-server, so they only build because each is an add_dependencies() of the ggml target.
|
||||
llama-cpp-cpu-all: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:cpu-all-variants${RESET})
|
||||
$(MAKE) SHARED_LIBS=ON EXTRA_CMAKE_ARGS="-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON" TARGET="--target grpc-server --target ggml" VARIANT="llama-cpp-cpu-all-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build/grpc-server llama-cpp-cpu-all
|
||||
rm -rf ggml-shared-libs && mkdir -p ggml-shared-libs
|
||||
find $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build/llama.cpp/build \( -name '*.so*' -o -name '*.dylib' \) -exec cp -av {} ggml-shared-libs/ \;
|
||||
@echo "Collected ggml shared backends:" && ls -la ggml-shared-libs/
|
||||
|
||||
llama-cpp-grpc: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target ggml-rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
|
||||
|
||||
llama-cpp-rpc-server: llama-cpp-grpc
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/ggml-rpc-server llama-cpp-rpc-server
|
||||
|
||||
llama.cpp:
|
||||
mkdir -p llama.cpp
|
||||
|
||||
@@ -30,6 +30,19 @@
|
||||
#define LOCALAI_HAS_SERVER_SCHEMA 1
|
||||
#include "server-schema.cpp"
|
||||
#endif
|
||||
// server-stream.cpp exists only in llama.cpp after the upstream refactor that
|
||||
// added the SSE stream-resumption layer (stream_session/stream_pipe_producer).
|
||||
// server-context.cpp calls into it (spipe->cleanup(), stream_aware_should_stop,
|
||||
// stream_session_attach_pipe), so its definitions must be part of this
|
||||
// translation unit or the link fails with "undefined reference to
|
||||
// stream_pipe_producer::cleanup()". The file is self-contained (its only
|
||||
// external symbols come from server-common, already pulled in above) and the
|
||||
// http route-handler factories it also defines are unused here but harmless.
|
||||
// __has_include keeps the source compatible with older pins/forks that predate
|
||||
// the split.
|
||||
#if __has_include("server-stream.cpp")
|
||||
#include "server-stream.cpp"
|
||||
#endif
|
||||
#include "server-context.cpp"
|
||||
|
||||
// LocalAI
|
||||
@@ -37,7 +50,9 @@
|
||||
#include "backend.pb.h"
|
||||
#include "backend.grpc.pb.h"
|
||||
#include "common.h"
|
||||
#include "arg.h"
|
||||
#include "chat-auto-parser.h"
|
||||
#include "message_content.h"
|
||||
#include <getopt.h>
|
||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||
#include <grpcpp/grpcpp.h>
|
||||
@@ -592,6 +607,10 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
params.checkpoint_min_step = 256;
|
||||
#endif
|
||||
|
||||
// Raw upstream llama-server flags collected from any option entry that
|
||||
// starts with '-'. Applied once after the loop via common_params_parse.
|
||||
std::vector<std::string> extra_argv;
|
||||
|
||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
||||
for (int i = 0; i < request->options_size(); i++) {
|
||||
std::string opt = request->options(i);
|
||||
@@ -1080,6 +1099,31 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
} catch (...) {}
|
||||
}
|
||||
|
||||
// --- main model MoE on CPU (upstream --cpu-moe / --n-cpu-moe) ---
|
||||
} else if (!strcmp(optname, "cpu_moe")) {
|
||||
// Bool-style flag: keep all MoE expert weights on CPU.
|
||||
const bool enable = (optval == NULL) ||
|
||||
optval_str == "true" || optval_str == "1" || optval_str == "yes" ||
|
||||
optval_str == "on" || optval_str == "enabled";
|
||||
if (enable) {
|
||||
params.tensor_buft_overrides.push_back(llm_ffn_exps_cpu_override());
|
||||
}
|
||||
} else if (!strcmp(optname, "n_cpu_moe")) {
|
||||
if (optval != NULL) {
|
||||
try {
|
||||
int n = std::stoi(optval_str);
|
||||
if (n < 0) n = 0;
|
||||
// Keep override-name storage alive for the lifetime of the
|
||||
// params struct (mirrors upstream arg.cpp's function-local static).
|
||||
static std::list<std::string> buft_overrides_main;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
buft_overrides_main.push_back(llm_ffn_exps_block_regex(i));
|
||||
params.tensor_buft_overrides.push_back(
|
||||
{buft_overrides_main.back().c_str(), ggml_backend_cpu_buffer_type()});
|
||||
}
|
||||
} catch (...) {}
|
||||
}
|
||||
|
||||
// --- draft model tensor buffer overrides (upstream --spec-draft-override-tensor) ---
|
||||
} else if (!strcmp(optname, "draft_override_tensor") || !strcmp(optname, "spec_draft_override_tensor")) {
|
||||
// Format: <tensor regex>=<buffer type>,<tensor regex>=<buffer type>,...
|
||||
@@ -1111,6 +1155,30 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
else { cur.push_back(c); }
|
||||
}
|
||||
if (!cur.empty()) flush(cur);
|
||||
|
||||
// --- generic passthrough: any entry starting with '-' is a raw
|
||||
// upstream llama-server flag, forwarded verbatim to the parser. ---
|
||||
} else if (optname[0] == '-') {
|
||||
std::string flag = optname;
|
||||
// These flags make upstream's parser exit() (printing usage /
|
||||
// completion), which would kill the backend process. Skip them.
|
||||
if (flag == "-h" || flag == "--help" || flag == "--usage" ||
|
||||
flag == "--version" || flag == "--license" ||
|
||||
flag == "--list-devices" || flag == "-cl" ||
|
||||
flag == "--cache-list" ||
|
||||
flag.rfind("--completion", 0) == 0) {
|
||||
fprintf(stderr,
|
||||
"[llama-cpp] ignoring passthrough flag that would exit: %s\n",
|
||||
flag.c_str());
|
||||
} else {
|
||||
extra_argv.push_back(flag);
|
||||
// Preserve the whole value after the first ':' so embedded
|
||||
// colons (e.g. host:port) survive strtok's truncation of optval.
|
||||
auto colon = opt.find(':');
|
||||
if (colon != std::string::npos) {
|
||||
extra_argv.push_back(opt.substr(colon + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1146,27 +1214,6 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
}
|
||||
}
|
||||
|
||||
if (!params.kv_overrides.empty()) {
|
||||
params.kv_overrides.emplace_back();
|
||||
params.kv_overrides.back().key[0] = 0;
|
||||
}
|
||||
|
||||
// tensor_buft_overrides sentinel termination (mirrors upstream common/arg.cpp).
|
||||
// Real entries are pushed during option parsing; here we pad/terminate so the
|
||||
// model loader sees back().pattern == nullptr (GGML_ASSERT at common.cpp:1543)
|
||||
// and so llama_params_fit has the placeholder slots it requires.
|
||||
{
|
||||
const size_t ntbo = llama_max_tensor_buft_overrides();
|
||||
while (params.tensor_buft_overrides.size() < ntbo) {
|
||||
params.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||
}
|
||||
}
|
||||
// Terminate the draft tensor_buft_overrides list with a sentinel, mirroring
|
||||
// the main-model handling above.
|
||||
if (!params.speculative.draft.tensor_buft_overrides.empty()) {
|
||||
params.speculative.draft.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||
}
|
||||
|
||||
// TODO: Add yarn
|
||||
|
||||
if (!request->tensorsplit().empty()) {
|
||||
@@ -1259,6 +1306,69 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
||||
}
|
||||
}
|
||||
|
||||
// Apply any raw upstream flags last so an explicit passthrough flag wins
|
||||
// over the LocalAI-resolved field it maps to (e.g. --ctx-size beats
|
||||
// context_size). This is the same parser llama-server itself uses.
|
||||
if (!extra_argv.empty()) {
|
||||
// common_params_parser_init resets a few fields for the SERVER example
|
||||
// (n_parallel -> -1, use_color). Snapshot n_parallel so an unrelated
|
||||
// passthrough flag can't silently clobber LocalAI's resolved value.
|
||||
const int saved_n_parallel = params.n_parallel;
|
||||
|
||||
std::vector<char *> argv;
|
||||
std::string prog = "llama-server";
|
||||
argv.push_back(prog.data());
|
||||
for (auto & a : extra_argv) {
|
||||
argv.push_back(a.data());
|
||||
}
|
||||
|
||||
// ctx_arg.params is a reference, so this overlays the given flags onto
|
||||
// `params` in place. Returns false on a recoverable parse error (and
|
||||
// self-restores params); may exit() on a hard error, exactly as
|
||||
// passing the same bad flag to llama-server would.
|
||||
if (!common_params_parse((int)argv.size(), argv.data(), params,
|
||||
LLAMA_EXAMPLE_SERVER)) {
|
||||
fprintf(stderr,
|
||||
"[llama-cpp] failed to parse passthrough options; ignoring them\n");
|
||||
}
|
||||
|
||||
// Restore n_parallel unless a passthrough flag explicitly set it
|
||||
// (parser_init's reset sentinel for SERVER is -1).
|
||||
if (params.n_parallel == -1) {
|
||||
params.n_parallel = saved_n_parallel;
|
||||
}
|
||||
}
|
||||
|
||||
// Terminate/pad the override vectors only after BOTH the named-option loop
|
||||
// and the generic passthrough (common_params_parse above) have pushed their
|
||||
// real entries, so back() is the null sentinel the model loader asserts on.
|
||||
// Running these before the passthrough let a passthrough flag (--cpu-moe,
|
||||
// --override-tensor, --override-kv, ...) append a real entry after the
|
||||
// sentinel: a GGML_ASSERT crash for tensor_buft_overrides, a silent drop for
|
||||
// kv_overrides. Double-termination is harmless (the while is a no-op if the
|
||||
// passthrough parse already padded; an extra trailing null is ignored).
|
||||
|
||||
if (!params.kv_overrides.empty()) {
|
||||
params.kv_overrides.emplace_back();
|
||||
params.kv_overrides.back().key[0] = 0;
|
||||
}
|
||||
|
||||
// tensor_buft_overrides sentinel termination (mirrors upstream common/arg.cpp).
|
||||
// Real entries are pushed during option parsing; here we pad/terminate so the
|
||||
// model loader sees back().pattern == nullptr (GGML_ASSERT at common.cpp:1543)
|
||||
// and so llama_params_fit has the placeholder slots it requires.
|
||||
{
|
||||
const size_t ntbo = llama_max_tensor_buft_overrides();
|
||||
while (params.tensor_buft_overrides.size() < ntbo) {
|
||||
params.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||
}
|
||||
}
|
||||
// Terminate the draft tensor_buft_overrides list with a sentinel, mirroring
|
||||
// the main-model handling above.
|
||||
if (!params.speculative.draft.tensor_buft_overrides.empty()) {
|
||||
params.speculative.draft.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1520,242 +1630,20 @@ public:
|
||||
|
||||
for (int i = 0; i < request->messages_size(); i++) {
|
||||
const auto& msg = request->messages(i);
|
||||
json msg_json;
|
||||
msg_json["role"] = msg.role();
|
||||
|
||||
bool is_last_user_msg = (i == last_user_msg_idx);
|
||||
bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0 || request->videos_size() > 0);
|
||||
|
||||
// Handle content - can be string, null, or array
|
||||
// For multimodal content, we'll embed images/audio from separate fields
|
||||
if (!msg.content().empty()) {
|
||||
// Try to parse content as JSON to see if it's already an array
|
||||
json content_val;
|
||||
try {
|
||||
content_val = json::parse(msg.content());
|
||||
// Handle null values - convert to empty string to avoid template errors
|
||||
if (content_val.is_null()) {
|
||||
content_val = "";
|
||||
}
|
||||
} catch (const json::parse_error&) {
|
||||
// Not JSON, treat as plain string
|
||||
content_val = msg.content();
|
||||
}
|
||||
|
||||
// If content is an object (e.g., from tool call failures), convert to string
|
||||
if (content_val.is_object()) {
|
||||
content_val = content_val.dump();
|
||||
}
|
||||
|
||||
// If content is a string and this is the last user message with images/audio, combine them
|
||||
if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
|
||||
json content_array = json::array();
|
||||
// Add text first
|
||||
content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
|
||||
// Add images
|
||||
if (request->images_size() > 0) {
|
||||
for (int j = 0; j < request->images_size(); j++) {
|
||||
json image_chunk;
|
||||
image_chunk["type"] = "image_url";
|
||||
json image_url;
|
||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
||||
image_chunk["image_url"] = image_url;
|
||||
content_array.push_back(image_chunk);
|
||||
}
|
||||
}
|
||||
// Add audios
|
||||
if (request->audios_size() > 0) {
|
||||
for (int j = 0; j < request->audios_size(); j++) {
|
||||
json audio_chunk;
|
||||
audio_chunk["type"] = "input_audio";
|
||||
json input_audio;
|
||||
input_audio["data"] = request->audios(j);
|
||||
input_audio["format"] = "wav"; // default, could be made configurable
|
||||
audio_chunk["input_audio"] = input_audio;
|
||||
content_array.push_back(audio_chunk);
|
||||
}
|
||||
}
|
||||
if (request->videos_size() > 0) {
|
||||
for (int j = 0; j < request->videos_size(); j++) {
|
||||
json video_chunk;
|
||||
video_chunk["type"] = "input_video";
|
||||
json input_video;
|
||||
input_video["data"] = request->videos(j);
|
||||
video_chunk["input_video"] = input_video;
|
||||
content_array.push_back(video_chunk);
|
||||
}
|
||||
}
|
||||
msg_json["content"] = content_array;
|
||||
} else {
|
||||
// Use content as-is (already array or not last user message)
|
||||
// Ensure null values are converted to empty string
|
||||
if (content_val.is_null()) {
|
||||
msg_json["content"] = "";
|
||||
} else {
|
||||
msg_json["content"] = content_val;
|
||||
}
|
||||
}
|
||||
} else if (is_last_user_msg && has_images_or_audio) {
|
||||
// If no content but this is the last user message with images/audio, create content array
|
||||
json content_array = json::array();
|
||||
if (request->images_size() > 0) {
|
||||
for (int j = 0; j < request->images_size(); j++) {
|
||||
json image_chunk;
|
||||
image_chunk["type"] = "image_url";
|
||||
json image_url;
|
||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
||||
image_chunk["image_url"] = image_url;
|
||||
content_array.push_back(image_chunk);
|
||||
}
|
||||
}
|
||||
if (request->audios_size() > 0) {
|
||||
for (int j = 0; j < request->audios_size(); j++) {
|
||||
json audio_chunk;
|
||||
audio_chunk["type"] = "input_audio";
|
||||
json input_audio;
|
||||
input_audio["data"] = request->audios(j);
|
||||
input_audio["format"] = "wav"; // default, could be made configurable
|
||||
audio_chunk["input_audio"] = input_audio;
|
||||
content_array.push_back(audio_chunk);
|
||||
}
|
||||
}
|
||||
if (request->videos_size() > 0) {
|
||||
for (int j = 0; j < request->videos_size(); j++) {
|
||||
json video_chunk;
|
||||
video_chunk["type"] = "input_video";
|
||||
json input_video;
|
||||
input_video["data"] = request->videos(j);
|
||||
video_chunk["input_video"] = input_video;
|
||||
content_array.push_back(video_chunk);
|
||||
}
|
||||
}
|
||||
msg_json["content"] = content_array;
|
||||
} else if (msg.role() == "tool") {
|
||||
// Tool role messages must have content field set, even if empty
|
||||
// Jinja templates expect content to be a string, not null or object
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
|
||||
if (msg.content().empty()) {
|
||||
msg_json["content"] = "";
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): empty content, set to empty string\n", i);
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): content exists: %s\n",
|
||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
||||
// Content exists, parse and ensure it's a string
|
||||
json content_val;
|
||||
try {
|
||||
content_val = json::parse(msg.content());
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): parsed JSON, type=%s\n",
|
||||
i, content_val.is_null() ? "null" :
|
||||
content_val.is_object() ? "object" :
|
||||
content_val.is_string() ? "string" :
|
||||
content_val.is_array() ? "array" : "other");
|
||||
// Handle null values - Jinja templates expect content to be a string, not null
|
||||
if (content_val.is_null()) {
|
||||
msg_json["content"] = "";
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): null content, converted to empty string\n", i);
|
||||
} else if (content_val.is_object()) {
|
||||
// If content is an object (e.g., from tool call failures/errors), convert to string
|
||||
msg_json["content"] = content_val.dump();
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): object content, converted to string: %s\n",
|
||||
i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
|
||||
} else if (content_val.is_string()) {
|
||||
msg_json["content"] = content_val.get<std::string>();
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): string content, using as-is\n", i);
|
||||
} else {
|
||||
// For arrays or other types, convert to string
|
||||
msg_json["content"] = content_val.dump();
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): %s content, converted to string\n",
|
||||
i, content_val.is_array() ? "array" : "other type");
|
||||
}
|
||||
} catch (const json::parse_error&) {
|
||||
// Not JSON, treat as plain string
|
||||
msg_json["content"] = msg.content();
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): not JSON, using as string\n", i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Ensure all messages have content set (fallback for any unhandled cases)
|
||||
// Jinja templates expect content to be present, default to empty string if not set
|
||||
if (!msg_json.contains("content")) {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (role=%s): no content field, adding empty string\n",
|
||||
i, msg.role().c_str());
|
||||
msg_json["content"] = "";
|
||||
}
|
||||
llama_grpc::ReconstructedMessageInput rin;
|
||||
rin.role = msg.role();
|
||||
rin.content = msg.content();
|
||||
rin.name = msg.name();
|
||||
rin.tool_call_id = msg.tool_call_id();
|
||||
rin.reasoning_content = msg.reasoning_content();
|
||||
rin.tool_calls = msg.tool_calls();
|
||||
rin.is_last_user_msg = (i == last_user_msg_idx);
|
||||
if (rin.is_last_user_msg) {
|
||||
for (int j = 0; j < request->images_size(); j++) rin.images.push_back(request->images(j));
|
||||
for (int j = 0; j < request->audios_size(); j++) rin.audios.push_back(request->audios(j));
|
||||
for (int j = 0; j < request->videos_size(); j++) rin.videos.push_back(request->videos(j));
|
||||
}
|
||||
|
||||
// Add optional fields for OpenAI-compatible message format
|
||||
if (!msg.name().empty()) {
|
||||
msg_json["name"] = msg.name();
|
||||
}
|
||||
if (!msg.tool_call_id().empty()) {
|
||||
msg_json["tool_call_id"] = msg.tool_call_id();
|
||||
}
|
||||
if (!msg.reasoning_content().empty()) {
|
||||
msg_json["reasoning_content"] = msg.reasoning_content();
|
||||
}
|
||||
if (!msg.tool_calls().empty()) {
|
||||
// Parse tool_calls JSON string and add to message
|
||||
try {
|
||||
json tool_calls = json::parse(msg.tool_calls());
|
||||
msg_json["tool_calls"] = tool_calls;
|
||||
SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
|
||||
// IMPORTANT: If message has tool_calls but content is empty or not set,
|
||||
// set content to space " " instead of empty string "", because llama.cpp's
|
||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
||||
// which causes template errors when accessing message.content[:tool_start_length]
|
||||
if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d has tool_calls but empty content, setting to space\n", i);
|
||||
msg_json["content"] = " ";
|
||||
}
|
||||
// Log each tool call with name and arguments
|
||||
if (tool_calls.is_array()) {
|
||||
for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
|
||||
const auto& tc = tool_calls[tc_idx];
|
||||
std::string tool_name = "unknown";
|
||||
std::string tool_args = "{}";
|
||||
if (tc.contains("function")) {
|
||||
const auto& func = tc["function"];
|
||||
if (func.contains("name")) {
|
||||
tool_name = func["name"].get<std::string>();
|
||||
}
|
||||
if (func.contains("arguments")) {
|
||||
tool_args = func["arguments"].is_string() ?
|
||||
func["arguments"].get<std::string>() :
|
||||
func["arguments"].dump();
|
||||
}
|
||||
} else if (tc.contains("name")) {
|
||||
tool_name = tc["name"].get<std::string>();
|
||||
if (tc.contains("arguments")) {
|
||||
tool_args = tc["arguments"].is_string() ?
|
||||
tc["arguments"].get<std::string>() :
|
||||
tc["arguments"].dump();
|
||||
}
|
||||
}
|
||||
SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d, tool_call %zu: name=%s, arguments=%s\n",
|
||||
i, tc_idx, tool_name.c_str(), tool_args.c_str());
|
||||
}
|
||||
}
|
||||
} catch (const json::parse_error& e) {
|
||||
SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
// Debug: Log final content state before adding to array
|
||||
if (msg_json.contains("content")) {
|
||||
if (msg_json["content"].is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content type=%s, has_value=%d\n",
|
||||
i, msg_json["content"].is_string() ? "string" :
|
||||
msg_json["content"].is_array() ? "array" :
|
||||
msg_json["content"].is_object() ? "object" : "other",
|
||||
msg_json["content"].is_null() ? 0 : 1);
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
|
||||
}
|
||||
|
||||
messages_json.push_back(msg_json);
|
||||
messages_json.push_back(llama_grpc::build_reconstructed_message(rin));
|
||||
}
|
||||
|
||||
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
||||
@@ -1976,36 +1864,7 @@ public:
|
||||
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
||||
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
||||
auto& msg = body_json["messages"][idx];
|
||||
std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
|
||||
if (msg.contains("content")) {
|
||||
if (msg["content"].is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
|
||||
msg["content"] = ""; // Fix null content
|
||||
} else if (role_str == "tool" && msg["content"].is_array()) {
|
||||
// Tool messages must have string content, not array
|
||||
// oaicompat_chat_params_parse expects tool messages to have string content
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=tool) has array content, converting to string\n", idx);
|
||||
msg["content"] = msg["content"].dump();
|
||||
} else if (!msg["content"].is_string() && !msg["content"].is_array()) {
|
||||
// If content is object or other non-string type, convert to string for templates
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
|
||||
if (msg["content"].is_object()) {
|
||||
msg["content"] = msg["content"].dump();
|
||||
} else {
|
||||
msg["content"] = "";
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
|
||||
idx, role_str.c_str(),
|
||||
msg["content"].is_string() ? "string" :
|
||||
msg["content"].is_array() ? "array" :
|
||||
msg["content"].is_object() ? "object" : "other");
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
|
||||
msg["content"] = ""; // Add missing content
|
||||
}
|
||||
llama_grpc::normalize_template_message(body_json["messages"][idx]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2337,264 +2196,20 @@ public:
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Processing %d messages\n", request->messages_size());
|
||||
for (int i = 0; i < request->messages_size(); i++) {
|
||||
const auto& msg = request->messages(i);
|
||||
json msg_json;
|
||||
msg_json["role"] = msg.role();
|
||||
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d: role=%s, content_empty=%d, content_length=%zu\n",
|
||||
i, msg.role().c_str(), msg.content().empty() ? 1 : 0, msg.content().size());
|
||||
if (!msg.content().empty()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content (first 200 chars): %s\n",
|
||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
||||
llama_grpc::ReconstructedMessageInput rin;
|
||||
rin.role = msg.role();
|
||||
rin.content = msg.content();
|
||||
rin.name = msg.name();
|
||||
rin.tool_call_id = msg.tool_call_id();
|
||||
rin.reasoning_content = msg.reasoning_content();
|
||||
rin.tool_calls = msg.tool_calls();
|
||||
rin.is_last_user_msg = (i == last_user_msg_idx);
|
||||
if (rin.is_last_user_msg) {
|
||||
for (int j = 0; j < request->images_size(); j++) rin.images.push_back(request->images(j));
|
||||
for (int j = 0; j < request->audios_size(); j++) rin.audios.push_back(request->audios(j));
|
||||
for (int j = 0; j < request->videos_size(); j++) rin.videos.push_back(request->videos(j));
|
||||
}
|
||||
|
||||
bool is_last_user_msg = (i == last_user_msg_idx);
|
||||
bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0 || request->videos_size() > 0);
|
||||
|
||||
// Handle content - can be string, null, or array
|
||||
// For multimodal content, we'll embed images/audio from separate fields
|
||||
if (!msg.content().empty()) {
|
||||
// Try to parse content as JSON to see if it's already an array
|
||||
json content_val;
|
||||
try {
|
||||
content_val = json::parse(msg.content());
|
||||
// Handle null values - convert to empty string to avoid template errors
|
||||
if (content_val.is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d parsed JSON is null, converting to empty string\n", i);
|
||||
content_val = "";
|
||||
}
|
||||
} catch (const json::parse_error&) {
|
||||
// Not JSON, treat as plain string
|
||||
content_val = msg.content();
|
||||
}
|
||||
|
||||
// If content is an object (e.g., from tool call failures), convert to string
|
||||
if (content_val.is_object()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content is object, converting to string\n", i);
|
||||
content_val = content_val.dump();
|
||||
}
|
||||
|
||||
// If content is a string and this is the last user message with images/audio, combine them
|
||||
if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
|
||||
json content_array = json::array();
|
||||
// Add text first
|
||||
content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
|
||||
// Add images
|
||||
if (request->images_size() > 0) {
|
||||
for (int j = 0; j < request->images_size(); j++) {
|
||||
json image_chunk;
|
||||
image_chunk["type"] = "image_url";
|
||||
json image_url;
|
||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
||||
image_chunk["image_url"] = image_url;
|
||||
content_array.push_back(image_chunk);
|
||||
}
|
||||
}
|
||||
// Add audios
|
||||
if (request->audios_size() > 0) {
|
||||
for (int j = 0; j < request->audios_size(); j++) {
|
||||
json audio_chunk;
|
||||
audio_chunk["type"] = "input_audio";
|
||||
json input_audio;
|
||||
input_audio["data"] = request->audios(j);
|
||||
input_audio["format"] = "wav"; // default, could be made configurable
|
||||
audio_chunk["input_audio"] = input_audio;
|
||||
content_array.push_back(audio_chunk);
|
||||
}
|
||||
}
|
||||
if (request->videos_size() > 0) {
|
||||
for (int j = 0; j < request->videos_size(); j++) {
|
||||
json video_chunk;
|
||||
video_chunk["type"] = "input_video";
|
||||
json input_video;
|
||||
input_video["data"] = request->videos(j);
|
||||
video_chunk["input_video"] = input_video;
|
||||
content_array.push_back(video_chunk);
|
||||
}
|
||||
}
|
||||
msg_json["content"] = content_array;
|
||||
} else {
|
||||
// Use content as-is (already array or not last user message)
|
||||
// Ensure null values are converted to empty string
|
||||
if (content_val.is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content_val was null, setting to empty string\n", i);
|
||||
msg_json["content"] = "";
|
||||
} else {
|
||||
msg_json["content"] = content_val;
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content set, type=%s\n",
|
||||
i, content_val.is_string() ? "string" :
|
||||
content_val.is_array() ? "array" :
|
||||
content_val.is_object() ? "object" : "other");
|
||||
}
|
||||
}
|
||||
} else if (is_last_user_msg && has_images_or_audio) {
|
||||
// If no content but this is the last user message with images/audio, create content array
|
||||
json content_array = json::array();
|
||||
if (request->images_size() > 0) {
|
||||
for (int j = 0; j < request->images_size(); j++) {
|
||||
json image_chunk;
|
||||
image_chunk["type"] = "image_url";
|
||||
json image_url;
|
||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
||||
image_chunk["image_url"] = image_url;
|
||||
content_array.push_back(image_chunk);
|
||||
}
|
||||
}
|
||||
if (request->audios_size() > 0) {
|
||||
for (int j = 0; j < request->audios_size(); j++) {
|
||||
json audio_chunk;
|
||||
audio_chunk["type"] = "input_audio";
|
||||
json input_audio;
|
||||
input_audio["data"] = request->audios(j);
|
||||
input_audio["format"] = "wav"; // default, could be made configurable
|
||||
audio_chunk["input_audio"] = input_audio;
|
||||
content_array.push_back(audio_chunk);
|
||||
}
|
||||
}
|
||||
if (request->videos_size() > 0) {
|
||||
for (int j = 0; j < request->videos_size(); j++) {
|
||||
json video_chunk;
|
||||
video_chunk["type"] = "input_video";
|
||||
json input_video;
|
||||
input_video["data"] = request->videos(j);
|
||||
video_chunk["input_video"] = input_video;
|
||||
content_array.push_back(video_chunk);
|
||||
}
|
||||
}
|
||||
msg_json["content"] = content_array;
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d created content array with media\n", i);
|
||||
} else if (!msg.tool_calls().empty()) {
|
||||
// Tool call messages may have null content, but templates expect string
|
||||
// IMPORTANT: Set to space " " instead of empty string "", because llama.cpp's
|
||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
||||
// which causes template errors when accessing message.content[:tool_start_length]
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls, setting content to space (not empty string)\n", i);
|
||||
msg_json["content"] = " ";
|
||||
} else if (msg.role() == "tool") {
|
||||
// Tool role messages must have content field set, even if empty
|
||||
// Jinja templates expect content to be a string, not null or object
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
|
||||
if (msg.content().empty()) {
|
||||
msg_json["content"] = "";
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): empty content, set to empty string\n", i);
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): content exists: %s\n",
|
||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
||||
// Content exists, parse and ensure it's a string
|
||||
json content_val;
|
||||
try {
|
||||
content_val = json::parse(msg.content());
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): parsed JSON, type=%s\n",
|
||||
i, content_val.is_null() ? "null" :
|
||||
content_val.is_object() ? "object" :
|
||||
content_val.is_string() ? "string" :
|
||||
content_val.is_array() ? "array" : "other");
|
||||
// Handle null values - Jinja templates expect content to be a string, not null
|
||||
if (content_val.is_null()) {
|
||||
msg_json["content"] = "";
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): null content, converted to empty string\n", i);
|
||||
} else if (content_val.is_object()) {
|
||||
// If content is an object (e.g., from tool call failures/errors), convert to string
|
||||
msg_json["content"] = content_val.dump();
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): object content, converted to string: %s\n",
|
||||
i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
|
||||
} else if (content_val.is_string()) {
|
||||
msg_json["content"] = content_val.get<std::string>();
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): string content, using as-is\n", i);
|
||||
} else {
|
||||
// For arrays or other types, convert to string
|
||||
msg_json["content"] = content_val.dump();
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): %s content, converted to string\n",
|
||||
i, content_val.is_array() ? "array" : "other type");
|
||||
}
|
||||
} catch (const json::parse_error&) {
|
||||
// Not JSON, treat as plain string
|
||||
msg_json["content"] = msg.content();
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): not JSON, using as string\n", i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Ensure all messages have content set (fallback for any unhandled cases)
|
||||
// Jinja templates expect content to be present, default to empty string if not set
|
||||
if (!msg_json.contains("content")) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (role=%s): no content field, adding empty string\n",
|
||||
i, msg.role().c_str());
|
||||
msg_json["content"] = "";
|
||||
}
|
||||
}
|
||||
|
||||
// Add optional fields for OpenAI-compatible message format
|
||||
if (!msg.name().empty()) {
|
||||
msg_json["name"] = msg.name();
|
||||
}
|
||||
if (!msg.tool_call_id().empty()) {
|
||||
msg_json["tool_call_id"] = msg.tool_call_id();
|
||||
}
|
||||
if (!msg.reasoning_content().empty()) {
|
||||
msg_json["reasoning_content"] = msg.reasoning_content();
|
||||
}
|
||||
if (!msg.tool_calls().empty()) {
|
||||
// Parse tool_calls JSON string and add to message
|
||||
try {
|
||||
json tool_calls = json::parse(msg.tool_calls());
|
||||
msg_json["tool_calls"] = tool_calls;
|
||||
SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
|
||||
// IMPORTANT: If message has tool_calls but content is empty or not set,
|
||||
// set content to space " " instead of empty string "", because llama.cpp's
|
||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
||||
// which causes template errors when accessing message.content[:tool_start_length]
|
||||
if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls but empty content, setting to space\n", i);
|
||||
msg_json["content"] = " ";
|
||||
}
|
||||
// Log each tool call with name and arguments
|
||||
if (tool_calls.is_array()) {
|
||||
for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
|
||||
const auto& tc = tool_calls[tc_idx];
|
||||
std::string tool_name = "unknown";
|
||||
std::string tool_args = "{}";
|
||||
if (tc.contains("function")) {
|
||||
const auto& func = tc["function"];
|
||||
if (func.contains("name")) {
|
||||
tool_name = func["name"].get<std::string>();
|
||||
}
|
||||
if (func.contains("arguments")) {
|
||||
tool_args = func["arguments"].is_string() ?
|
||||
func["arguments"].get<std::string>() :
|
||||
func["arguments"].dump();
|
||||
}
|
||||
} else if (tc.contains("name")) {
|
||||
tool_name = tc["name"].get<std::string>();
|
||||
if (tc.contains("arguments")) {
|
||||
tool_args = tc["arguments"].is_string() ?
|
||||
tc["arguments"].get<std::string>() :
|
||||
tc["arguments"].dump();
|
||||
}
|
||||
}
|
||||
SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d, tool_call %zu: name=%s, arguments=%s\n",
|
||||
i, tc_idx, tool_name.c_str(), tool_args.c_str());
|
||||
}
|
||||
}
|
||||
} catch (const json::parse_error& e) {
|
||||
SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
// Debug: Log final content state before adding to array
|
||||
if (msg_json.contains("content")) {
|
||||
if (msg_json["content"].is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content type=%s, has_value=%d\n",
|
||||
i, msg_json["content"].is_string() ? "string" :
|
||||
msg_json["content"].is_array() ? "array" :
|
||||
msg_json["content"].is_object() ? "object" : "other",
|
||||
msg_json["content"].is_null() ? 0 : 1);
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
|
||||
}
|
||||
|
||||
messages_json.push_back(msg_json);
|
||||
messages_json.push_back(llama_grpc::build_reconstructed_message(rin));
|
||||
}
|
||||
|
||||
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
||||
@@ -2815,36 +2430,7 @@ public:
|
||||
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
||||
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
||||
auto& msg = body_json["messages"][idx];
|
||||
std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
|
||||
if (msg.contains("content")) {
|
||||
if (msg["content"].is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
|
||||
msg["content"] = ""; // Fix null content
|
||||
} else if (role_str == "tool" && msg["content"].is_array()) {
|
||||
// Tool messages must have string content, not array
|
||||
// oaicompat_chat_params_parse expects tool messages to have string content
|
||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=tool) has array content, converting to string\n", idx);
|
||||
msg["content"] = msg["content"].dump();
|
||||
} else if (!msg["content"].is_string() && !msg["content"].is_array()) {
|
||||
// If content is object or other non-string type, convert to string for templates
|
||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
|
||||
if (msg["content"].is_object()) {
|
||||
msg["content"] = msg["content"].dump();
|
||||
} else {
|
||||
msg["content"] = "";
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
|
||||
idx, role_str.c_str(),
|
||||
msg["content"].is_string() ? "string" :
|
||||
msg["content"].is_array() ? "array" :
|
||||
msg["content"].is_object() ? "object" : "other");
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
|
||||
msg["content"] = ""; // Add missing content
|
||||
}
|
||||
llama_grpc::normalize_template_message(body_json["messages"][idx]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
192
backend/cpp/llama-cpp/message_content.h
Normal file
192
backend/cpp/llama-cpp/message_content.h
Normal file
@@ -0,0 +1,192 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace llama_grpc {
|
||||
|
||||
// Normalizes a proto message's content string into the JSON value used when
|
||||
// reconstructing OpenAI-format messages for the tokenizer (jinja) template.
|
||||
//
|
||||
// Shared by the streaming (PredictStream) and non-streaming (Predict) message
|
||||
// reconstruction paths so the two cannot drift.
|
||||
//
|
||||
// LocalAI's Go layer (schema.Messages.ToProto) always sends content as a plain
|
||||
// text string; multimodal media travels in separate proto fields, never inside
|
||||
// content. So user/system/developer content is *only ever* opaque text and must
|
||||
// NOT be JSON-sniffed: a prompt that merely looks like JSON (e.g. an ingredient
|
||||
// list ["1/4 cup sugar", ...]) would otherwise be reinterpreted as structured
|
||||
// content parts and rejected by oaicompat_chat_params_parse with
|
||||
// "unsupported content[].type" (https://github.com/mudler/LocalAI/issues/10524).
|
||||
// (developer is OpenAI's modern system alias - same "human-authored text" nature.)
|
||||
//
|
||||
// For assistant/tool messages we still collapse a literal JSON null/object
|
||||
// (tool-call bookkeeping) to a string, but we never turn a plain string into an
|
||||
// array/scalar. The array defense is therefore role-independent (arrays/scalars
|
||||
// fall through for every role); the role gate only governs the null/object case.
|
||||
inline nlohmann::ordered_json normalize_message_content(const std::string& role,
|
||||
const std::string& content) {
|
||||
nlohmann::ordered_json content_val = content;
|
||||
if (role != "user" && role != "system" && role != "developer") {
|
||||
try {
|
||||
nlohmann::ordered_json parsed = nlohmann::ordered_json::parse(content);
|
||||
if (parsed.is_null()) {
|
||||
content_val = "";
|
||||
} else if (parsed.is_object()) {
|
||||
content_val = parsed.dump();
|
||||
}
|
||||
// arrays / scalars: keep the original plain-text string as-is
|
||||
} catch (const nlohmann::ordered_json::parse_error&) {
|
||||
// Not JSON, already the plain string
|
||||
}
|
||||
}
|
||||
return content_val;
|
||||
}
|
||||
|
||||
// Final safety pass applied to each reconstructed OpenAI message right before it
|
||||
// is handed to oaicompat_chat_params_parse (jinja templating). Jinja templates
|
||||
// assume content is a string: a literal null breaks slicing such as
|
||||
// message.content[:N] (#7324), and a tool message with array content is rejected
|
||||
// (#7528). A multimodal user message legitimately carries a typed-part array
|
||||
// ({type:text}, {type:image_url}, ...), which must be left intact. Shared by the
|
||||
// streaming and non-streaming paths so this invariant cannot drift between them.
|
||||
inline void normalize_template_message(nlohmann::ordered_json& msg) {
|
||||
if (!msg.contains("content")) {
|
||||
msg["content"] = ""; // templates expect the field to exist
|
||||
return;
|
||||
}
|
||||
nlohmann::ordered_json& content = msg["content"];
|
||||
const std::string role = (msg.contains("role") && msg["role"].is_string())
|
||||
? msg["role"].get<std::string>()
|
||||
: std::string();
|
||||
if (content.is_null()) {
|
||||
content = ""; // #7324: null would crash content[:N] slicing
|
||||
} else if (role == "tool" && content.is_array()) {
|
||||
content = content.dump(); // #7528: tool messages must have string content
|
||||
} else if (!content.is_string() && !content.is_array()) {
|
||||
if (content.is_object()) {
|
||||
content = content.dump(); // tool-call bookkeeping object -> string
|
||||
} else {
|
||||
content = ""; // other scalar (number/bool) -> empty
|
||||
}
|
||||
}
|
||||
// string, or a non-tool (multimodal) typed-part array: leave untouched
|
||||
}
|
||||
|
||||
// One proto message's data, flattened to plain types so the reconstruction logic
|
||||
// can be shared and unit-tested without protobuf. The streaming and non-streaming
|
||||
// predict paths both populate this from proto::Message + the request's media.
|
||||
struct ReconstructedMessageInput {
|
||||
std::string role;
|
||||
std::string content; // proto.Message.content (always a plain string)
|
||||
std::string name;
|
||||
std::string tool_call_id;
|
||||
std::string reasoning_content;
|
||||
std::string tool_calls; // tool_calls as a JSON string, or empty
|
||||
bool is_last_user_msg = false; // attach request media to this message
|
||||
std::vector<std::string> images; // base64 (jpeg)
|
||||
std::vector<std::string> audios; // base64 (wav)
|
||||
std::vector<std::string> videos; // base64
|
||||
};
|
||||
|
||||
// Appends the request's media as OpenAI typed content parts. Imperative (not
|
||||
// brace-init) to avoid nlohmann's object-vs-array initializer-list ambiguity.
|
||||
inline void append_media_parts(nlohmann::ordered_json& content_array,
|
||||
const std::vector<std::string>& images,
|
||||
const std::vector<std::string>& audios,
|
||||
const std::vector<std::string>& videos) {
|
||||
for (const auto& img : images) {
|
||||
nlohmann::ordered_json image_chunk;
|
||||
image_chunk["type"] = "image_url";
|
||||
nlohmann::ordered_json image_url;
|
||||
image_url["url"] = "data:image/jpeg;base64," + img;
|
||||
image_chunk["image_url"] = image_url;
|
||||
content_array.push_back(image_chunk);
|
||||
}
|
||||
for (const auto& aud : audios) {
|
||||
nlohmann::ordered_json audio_chunk;
|
||||
audio_chunk["type"] = "input_audio";
|
||||
nlohmann::ordered_json input_audio;
|
||||
input_audio["data"] = aud;
|
||||
input_audio["format"] = "wav"; // default; could be made configurable
|
||||
audio_chunk["input_audio"] = input_audio;
|
||||
content_array.push_back(audio_chunk);
|
||||
}
|
||||
for (const auto& vid : videos) {
|
||||
nlohmann::ordered_json video_chunk;
|
||||
video_chunk["type"] = "input_video";
|
||||
nlohmann::ordered_json input_video;
|
||||
input_video["data"] = vid;
|
||||
video_chunk["input_video"] = input_video;
|
||||
content_array.push_back(video_chunk);
|
||||
}
|
||||
}
|
||||
|
||||
// Reconstructs a single OpenAI-format message (the object fed to
|
||||
// oaicompat_chat_params_parse) from a proto message. Shared by PredictStream and
|
||||
// Predict so the content/multimodal/tool_calls handling cannot drift between the
|
||||
// two stream modes (it previously lived as two ~150-line copies with a redundant
|
||||
// Predict-only tool_calls->" " branch). Guarantees content is always a string or
|
||||
// a typed-part array, never null/missing.
|
||||
inline nlohmann::ordered_json build_reconstructed_message(const ReconstructedMessageInput& in) {
|
||||
nlohmann::ordered_json msg_json;
|
||||
msg_json["role"] = in.role;
|
||||
const bool has_media = !in.images.empty() || !in.audios.empty() || !in.videos.empty();
|
||||
|
||||
if (!in.content.empty()) {
|
||||
nlohmann::ordered_json content_val = normalize_message_content(in.role, in.content);
|
||||
if (content_val.is_string() && in.is_last_user_msg && has_media) {
|
||||
// Last user message + media: build a typed-part array (text first).
|
||||
nlohmann::ordered_json content_array = nlohmann::ordered_json::array();
|
||||
nlohmann::ordered_json text_part;
|
||||
text_part["type"] = "text";
|
||||
text_part["text"] = content_val.get<std::string>();
|
||||
content_array.push_back(text_part);
|
||||
append_media_parts(content_array, in.images, in.audios, in.videos);
|
||||
msg_json["content"] = content_array;
|
||||
} else if (content_val.is_null()) {
|
||||
msg_json["content"] = "";
|
||||
} else {
|
||||
msg_json["content"] = content_val;
|
||||
}
|
||||
} else if (in.is_last_user_msg && has_media) {
|
||||
// No text but media on the last user message: media-only typed array.
|
||||
nlohmann::ordered_json content_array = nlohmann::ordered_json::array();
|
||||
append_media_parts(content_array, in.images, in.audios, in.videos);
|
||||
msg_json["content"] = content_array;
|
||||
} else {
|
||||
// Empty content (any role, incl. tool/assistant): templates need a string.
|
||||
msg_json["content"] = "";
|
||||
}
|
||||
|
||||
if (!in.name.empty()) {
|
||||
msg_json["name"] = in.name;
|
||||
}
|
||||
if (!in.tool_call_id.empty()) {
|
||||
msg_json["tool_call_id"] = in.tool_call_id;
|
||||
}
|
||||
if (!in.reasoning_content.empty()) {
|
||||
msg_json["reasoning_content"] = in.reasoning_content;
|
||||
}
|
||||
if (!in.tool_calls.empty()) {
|
||||
try {
|
||||
nlohmann::ordered_json tool_calls = nlohmann::ordered_json::parse(in.tool_calls);
|
||||
msg_json["tool_calls"] = tool_calls;
|
||||
// tool_calls + empty/blank content: use " " not "", because llama.cpp's
|
||||
// common_chat_msgs_to_json_oaicompat turns "" into null, which breaks
|
||||
// templates that slice message.content[:tool_start_length] (#7324).
|
||||
if (!msg_json.contains("content") ||
|
||||
(msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
||||
msg_json["content"] = " ";
|
||||
}
|
||||
} catch (const nlohmann::ordered_json::parse_error&) {
|
||||
// Malformed tool_calls JSON: leave content as-is (prior behavior).
|
||||
}
|
||||
}
|
||||
|
||||
return msg_json;
|
||||
}
|
||||
|
||||
} // namespace llama_grpc
|
||||
234
backend/cpp/llama-cpp/message_content_test.cpp
Normal file
234
backend/cpp/llama-cpp/message_content_test.cpp
Normal file
@@ -0,0 +1,234 @@
|
||||
// Unit tests for the shared message-reconstruction helpers (message_content.h).
|
||||
//
|
||||
// Build & run standalone (nlohmann/json single header on the include path):
|
||||
// g++ -std=c++17 -I<dir-with-nlohmann> message_content_test.cpp -o t && ./t
|
||||
// or via CMake: -DLLAMA_GRPC_BUILD_TESTS=ON then ctest.
|
||||
//
|
||||
// Regression coverage for:
|
||||
// #10524 - a user/system prompt that is itself a JSON-array string must stay
|
||||
// plain text, never be reinterpreted as OpenAI structured parts.
|
||||
// #7324 - assistant/tool null content -> "" (templates slice content[:N]);
|
||||
// assistant+tool_calls+empty content -> " " (not "", which becomes null).
|
||||
// #7528 - tool message array content must reach the template as a string.
|
||||
// multimodal - last user message text + media -> typed-part array, media kept.
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "message_content.h"
|
||||
|
||||
using nlohmann::ordered_json;
|
||||
using llama_grpc::normalize_message_content;
|
||||
using llama_grpc::normalize_template_message;
|
||||
using llama_grpc::build_reconstructed_message;
|
||||
using llama_grpc::ReconstructedMessageInput;
|
||||
|
||||
static int failures = 0;
|
||||
|
||||
static void check(bool ok, const std::string& name, const std::string& detail = "") {
|
||||
if (!ok) {
|
||||
std::cerr << "FAIL " << name << (detail.empty() ? "" : ": " + detail) << "\n";
|
||||
failures++;
|
||||
}
|
||||
}
|
||||
|
||||
// ---- normalize_message_content -------------------------------------------
|
||||
|
||||
static void expect_norm_string(const char* name, const std::string& role,
|
||||
const std::string& content, const std::string& want) {
|
||||
auto got = normalize_message_content(role, content);
|
||||
if (!got.is_string()) {
|
||||
check(false, name, "expected a JSON string, got " +
|
||||
std::string(got.is_array() ? "array" : got.is_object() ? "object" : "other") +
|
||||
" (" + got.dump() + ")");
|
||||
return;
|
||||
}
|
||||
check(got.get<std::string>() == want, name, "expected \"" + want + "\", got \"" + got.get<std::string>() + "\"");
|
||||
}
|
||||
|
||||
static void test_normalize() {
|
||||
const std::string ingredients = R"(["1/4 cup brown sugar, packed","1 pound ground beef"])";
|
||||
|
||||
// #10524 - JSON-array text must stay a string. Role-INDEPENDENT array defense.
|
||||
for (const char* role : {"user", "system", "developer", "function", "assistant", "tool"}) {
|
||||
expect_norm_string((std::string("json_array_stays_text:") + role).c_str(), role, ingredients, ingredients);
|
||||
}
|
||||
|
||||
// #10524 - user/system/developer JSON-object text stays verbatim (NOT re-dumped).
|
||||
expect_norm_string("user_json_object_verbatim", "user", R"({"a":1})", R"({"a":1})");
|
||||
expect_norm_string("system_json_object_verbatim", "system", R"({"a":1})", R"({"a":1})");
|
||||
expect_norm_string("developer_json_object_verbatim", "developer", R"({"a":1})", R"({"a":1})");
|
||||
|
||||
// Plain text unchanged for all roles.
|
||||
expect_norm_string("user_plain_text", "user", "hello world", "hello world");
|
||||
expect_norm_string("assistant_non_json_text_kept", "assistant", "hi [unclosed", "hi [unclosed");
|
||||
|
||||
// #7324 boundary - user/system/developer literal "null" preserved (never parsed).
|
||||
expect_norm_string("user_literal_null_stays", "user", "null", "null");
|
||||
expect_norm_string("system_literal_null_stays", "system", "null", "null");
|
||||
expect_norm_string("developer_literal_null_stays", "developer", "null", "null");
|
||||
|
||||
// #7324 - assistant/tool literal null collapses to empty string.
|
||||
expect_norm_string("assistant_null_to_empty", "assistant", "null", "");
|
||||
expect_norm_string("tool_null_to_empty", "tool", "null", "");
|
||||
|
||||
// #7324/#7528 - assistant/tool object bookkeeping stringified (stays a string).
|
||||
check(normalize_message_content("assistant", R"({"tool":"x"})").is_string(), "assistant_object_stringified");
|
||||
check(normalize_message_content("tool", R"({"error":"boom"})").is_string(), "tool_object_stringified");
|
||||
|
||||
// #10524-family - a bare scalar that parses as a JSON number stays the string.
|
||||
expect_norm_string("assistant_scalar_number_stays_string", "assistant", "42", "42");
|
||||
|
||||
// baseline - empty content stays empty.
|
||||
expect_norm_string("user_empty_stays_empty", "user", "", "");
|
||||
}
|
||||
|
||||
// ---- normalize_template_message (BEFORE TEMPLATE sanitizer) ---------------
|
||||
|
||||
static void test_template_sanitizer() {
|
||||
// #7528 - a tool message with an ACTUAL array becomes a string.
|
||||
{
|
||||
ordered_json msg = {{"role", "tool"}, {"content", ordered_json::array({{{"type", "text"}, {"text", "r"}}})}};
|
||||
normalize_template_message(msg);
|
||||
check(msg["content"].is_string(), "before_template_tool_array_to_string", "got " + msg["content"].dump());
|
||||
}
|
||||
// #7324 - null content -> "" for any role.
|
||||
{
|
||||
ordered_json msg = {{"role", "assistant"}, {"content", nullptr}};
|
||||
normalize_template_message(msg);
|
||||
check(msg["content"].is_string() && msg["content"] == "", "before_template_null_to_empty");
|
||||
}
|
||||
// object content -> dumped string (would otherwise throw at the template).
|
||||
{
|
||||
ordered_json msg = {{"role", "assistant"}, {"content", {{"x", 1}}}};
|
||||
normalize_template_message(msg);
|
||||
check(msg["content"].is_string(), "before_template_object_to_string", "got " + msg["content"].dump());
|
||||
}
|
||||
// missing content field -> "".
|
||||
{
|
||||
ordered_json msg = {{"role", "user"}};
|
||||
normalize_template_message(msg);
|
||||
check(msg.contains("content") && msg["content"] == "", "before_template_missing_to_empty");
|
||||
}
|
||||
// multimodal: a well-typed user array must be left UNTOUCHED (role!=tool).
|
||||
{
|
||||
ordered_json parts = ordered_json::array();
|
||||
parts.push_back({{"type", "text"}, {"text", "x"}});
|
||||
ordered_json img; img["type"] = "image_url"; img["image_url"] = {{"url", "data:..."}};
|
||||
parts.push_back(img);
|
||||
ordered_json msg = {{"role", "user"}, {"content", parts}};
|
||||
normalize_template_message(msg);
|
||||
check(msg["content"].is_array() && msg["content"].size() == 2, "before_template_user_typed_array_preserved",
|
||||
"got " + msg["content"].dump());
|
||||
}
|
||||
// a plain string is left untouched.
|
||||
{
|
||||
ordered_json msg = {{"role", "user"}, {"content", "hello"}};
|
||||
normalize_template_message(msg);
|
||||
check(msg["content"] == "hello", "before_template_string_untouched");
|
||||
}
|
||||
}
|
||||
|
||||
// ---- build_reconstructed_message ----------------------------------------
|
||||
|
||||
static void test_reconstruction() {
|
||||
const std::string ingredients = R"(["1/4 cup brown sugar","1 pound ground beef"])";
|
||||
|
||||
// #10524 end-state - user JSON-array text, no media -> string content.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "user"; in.content = ingredients;
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_string() && m["content"] == ingredients, "recon_user_json_array_string",
|
||||
"got " + m["content"].dump());
|
||||
}
|
||||
// multimodal - user text + one image on last user msg -> typed array, image kept.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "user"; in.content = ingredients; in.is_last_user_msg = true;
|
||||
in.images.push_back("BASE64IMG");
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_array() && m["content"].size() == 2, "recon_multimodal_text_plus_image",
|
||||
"got " + m["content"].dump());
|
||||
check(m["content"][0]["type"] == "text" && m["content"][0]["text"] == ingredients, "recon_multimodal_text_first");
|
||||
check(m["content"][1]["type"] == "image_url", "recon_multimodal_image_kept");
|
||||
}
|
||||
// multimodal media-only - empty text + image on last user msg.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "user"; in.content = ""; in.is_last_user_msg = true;
|
||||
in.images.push_back("BASE64IMG");
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_array() && m["content"].size() == 1 && m["content"][0]["type"] == "image_url",
|
||||
"recon_media_only", "got " + m["content"].dump());
|
||||
}
|
||||
// #7528 - tool array-string content stays a string.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "tool"; in.content = R"(["a","b"])"; in.tool_call_id = "call_1";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_string() && m["content"] == R"(["a","b"])", "recon_tool_array_string",
|
||||
"got " + m["content"].dump());
|
||||
check(m["tool_call_id"] == "call_1", "recon_tool_call_id_set");
|
||||
}
|
||||
// tool empty content -> "".
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "tool"; in.content = "";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_string() && m["content"] == "", "recon_tool_empty_to_string");
|
||||
}
|
||||
// #7324 - assistant + tool_calls + empty content -> " " (single space, not "").
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "assistant"; in.content = "";
|
||||
in.tool_calls = R"([{"id":"c1","type":"function","function":{"name":"f","arguments":"{}"}}])";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_string() && m["content"] == " ", "recon_toolcalls_empty_content_space",
|
||||
"got " + m["content"].dump());
|
||||
check(m["tool_calls"].is_array() && m["tool_calls"].size() == 1, "recon_toolcalls_parsed");
|
||||
}
|
||||
// assistant + tool_calls + real content keeps the content.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "assistant"; in.content = "I'll call f";
|
||||
in.tool_calls = R"([{"id":"c1","type":"function","function":{"name":"f","arguments":"{}"}}])";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"] == "I'll call f", "recon_toolcalls_with_content_kept");
|
||||
}
|
||||
// assistant null content -> "".
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "assistant"; in.content = "null";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"] == "", "recon_assistant_null_to_empty");
|
||||
}
|
||||
// malformed tool_calls JSON must not throw; content preserved.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "assistant"; in.content = "hi"; in.tool_calls = "{not json";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"] == "hi" && !m.contains("tool_calls"), "recon_malformed_toolcalls_safe");
|
||||
}
|
||||
// optional fields: name + reasoning carried through.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "tool"; in.content = "result"; in.name = "get_weather"; in.reasoning_content = "thinking";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["name"] == "get_weather" && m["reasoning_content"] == "thinking", "recon_optional_fields");
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
test_normalize();
|
||||
test_template_sanitizer();
|
||||
test_reconstruction();
|
||||
|
||||
if (failures == 0) {
|
||||
std::cout << "OK: all message_content tests passed\n";
|
||||
return 0;
|
||||
}
|
||||
std::cerr << failures << " test(s) failed\n";
|
||||
return 1;
|
||||
}
|
||||
@@ -14,6 +14,22 @@ mkdir -p $CURDIR/package/lib
|
||||
cp -avrf $CURDIR/llama-cpp-* $CURDIR/package/
|
||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Bundle the ggml shared backends produced by the CPU_ALL_VARIANTS build (libggml-base.so,
|
||||
# libggml.so, libllama.so and the per-microarch libggml-cpu-*.so), all into package/lib.
|
||||
#
|
||||
# Two distinct resolution mechanisms both land here:
|
||||
# - NEEDED deps (libggml-base/libggml/libllama): resolved by the dynamic linker via the
|
||||
# LD_LIBRARY_PATH=$CURDIR/lib that run.sh exports.
|
||||
# - The per-microarch libggml-cpu-*.so are NOT linked; ggml *discovers* them at runtime by
|
||||
# scanning the executable's own directory (readlink /proc/self/exe). run.sh launches via
|
||||
# the bundled $CURDIR/lib/ld.so, so /proc/self/exe -> .../lib/ld.so and ggml scans lib/.
|
||||
# That is why the variants must sit in lib/ (next to ld.so), not just on the link path.
|
||||
# No-op on builds (arm64/darwin) that don't produce the all-variants set.
|
||||
if [ -d "$CURDIR/ggml-shared-libs" ]; then
|
||||
echo "Bundling ggml shared backends (CPU_ALL_VARIANTS)..."
|
||||
cp -avf $CURDIR/ggml-shared-libs/*.so* $CURDIR/package/lib/
|
||||
fi
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
|
||||
@@ -18,6 +18,10 @@ done
|
||||
|
||||
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
||||
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
||||
# Shared message-reconstruction helpers (included by grpc-server.cpp) and their
|
||||
# unit test (compiled only when -DLLAMA_GRPC_BUILD_TESTS=ON).
|
||||
cp -r message_content.h llama.cpp/tools/grpc-server/
|
||||
cp -r message_content_test.cpp llama.cpp/tools/grpc-server/
|
||||
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
|
||||
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
set -ex
|
||||
|
||||
# Get the absolute current dir where the script is located
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
cd /
|
||||
|
||||
@@ -12,55 +12,41 @@ grep -e "flags" /proc/cpuinfo | head -1
|
||||
|
||||
BINARY=llama-cpp-fallback
|
||||
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx ]; then
|
||||
BINARY=llama-cpp-avx
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx2 ]; then
|
||||
BINARY=llama-cpp-avx2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx512 ]; then
|
||||
BINARY=llama-cpp-avx512
|
||||
fi
|
||||
# CPU images (x86, arm64, darwin) ship a single llama-cpp-cpu-all built with ggml
|
||||
# CPU_ALL_VARIANTS: ggml's backend registry dlopens the best libggml-cpu-*.so for this
|
||||
# host, so no shell-side AVX probing. GPU images (cublas/sycl/vulkan/hipblas) ship only
|
||||
# llama-cpp-fallback (the accelerator does the compute), so fall back to it when absent.
|
||||
if [ -e "$CURDIR"/llama-cpp-cpu-all ]; then
|
||||
BINARY=llama-cpp-cpu-all
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||
if [ -e $CURDIR/llama-cpp-grpc ]; then
|
||||
if [ -e "$CURDIR"/llama-cpp-grpc ]; then
|
||||
BINARY=llama-cpp-grpc
|
||||
fi
|
||||
fi
|
||||
|
||||
# Extend ld library path with the dir where this script is located/lib
|
||||
if [ "$(uname)" == "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
#export DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
#export DYLD_FALLBACK_LIBRARY_PATH="$CURDIR"/lib:$DYLD_FALLBACK_LIBRARY_PATH
|
||||
else
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
# Tell rocBLAS where to find TensileLibrary data (GPU kernel tuning files)
|
||||
if [ -d "$CURDIR/lib/rocblas/library" ]; then
|
||||
export ROCBLAS_TENSILE_LIBPATH=$CURDIR/lib/rocblas/library
|
||||
export ROCBLAS_TENSILE_LIBPATH="$CURDIR"/lib/rocblas/library
|
||||
fi
|
||||
fi
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using binary: $BINARY"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/$BINARY "$@"
|
||||
exec "$CURDIR"/lib/ld.so "$CURDIR"/$BINARY "$@"
|
||||
fi
|
||||
|
||||
echo "Using binary: $BINARY"
|
||||
exec $CURDIR/$BINARY "$@"
|
||||
exec "$CURDIR"/$BINARY "$@"
|
||||
|
||||
# We should never reach this point, however just in case we do, run fallback
|
||||
exec $CURDIR/llama-cpp-fallback "$@"
|
||||
exec "$CURDIR"/llama-cpp-fallback "$@"
|
||||
@@ -51,6 +51,14 @@ add_library(hw_grpc_proto STATIC
|
||||
${HW_GRPC_SRCS} ${HW_GRPC_HDRS}
|
||||
${HW_PROTO_SRCS} ${HW_PROTO_HDRS})
|
||||
target_include_directories(hw_grpc_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||
# The generated proto/grpc sources include protobuf and grpc++ headers, so this
|
||||
# library must see their include dirs. Linking the imported targets propagates
|
||||
# them. On Linux the apt headers live in /usr/include (default search path) so
|
||||
# this was a no-op; on macOS the Homebrew headers are under /opt/homebrew and
|
||||
# would otherwise be missed (runtime_version.h not found).
|
||||
target_link_libraries(hw_grpc_proto PUBLIC
|
||||
protobuf::libprotobuf
|
||||
gRPC::grpc++)
|
||||
|
||||
# Build only the pf static lib (+ ggml) from the engine tree — no CLI/bench/tests.
|
||||
# PF_VULKAN is honored when passed on the cmake command line (it lands in the
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
# Local development: point at a working checkout instead of cloning, e.g.
|
||||
# make PRIVACY_FILTER_SRC=$HOME/c/privacy-filter.cpp grpc-server
|
||||
|
||||
PRIVACY_FILTER_VERSION?=98f52c5ef2250f207cc6b9a6aef05393a120cb7c
|
||||
PRIVACY_FILTER_VERSION?=595f59630c69d361b5196f2aba2c71c873d0c13c
|
||||
PRIVACY_FILTER_REPO?=https://github.com/localai-org/privacy-filter.cpp
|
||||
PRIVACY_FILTER_SRC?=
|
||||
|
||||
|
||||
@@ -2,7 +2,13 @@
|
||||
# Entry point for the privacy-filter backend image / BACKEND_BINARY mode.
|
||||
set -e
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$LD_LIBRARY_PATH"
|
||||
# macOS has no bundled ld.so; the darwin package ships only dylibs under lib/,
|
||||
# resolved via DYLD_LIBRARY_PATH (the ld.so branch below is skipped there).
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH="$CURDIR/lib:$DYLD_LIBRARY_PATH"
|
||||
else
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$LD_LIBRARY_PATH"
|
||||
fi
|
||||
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||
exec "$CURDIR/lib/ld.so" "$CURDIR/grpc-server" "$@"
|
||||
fi
|
||||
|
||||
71
backend/cpp/run-unit-tests.sh
Executable file
71
backend/cpp/run-unit-tests.sh
Executable file
@@ -0,0 +1,71 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Discovers and runs every standalone C++ unit test under backend/cpp/.
|
||||
#
|
||||
# A "standalone" unit test is a *_test.cpp that depends only on the C++ standard
|
||||
# library and nlohmann/json (single header) - i.e. it exercises pure helpers and
|
||||
# does not need the full llama.cpp + gRPC backend build. Tests that DO need the
|
||||
# backend build use the CMake/ctest path (e.g. -DLLAMA_GRPC_BUILD_TESTS=ON)
|
||||
# instead and are skipped here.
|
||||
#
|
||||
# This keeps CI generic: adding a new pure-C++ unit test file named *_test.cpp in
|
||||
# an active backend source dir is picked up automatically, with no CI edits.
|
||||
#
|
||||
# Env:
|
||||
# NLOHMANN_INCLUDE include dir that contains nlohmann/json.hpp. If unset, the
|
||||
# nlohmann/json single header is fetched to a temp dir.
|
||||
# CXX compiler (default: g++).
|
||||
# JSON_VERSION nlohmann/json tag to fetch when NLOHMANN_INCLUDE is unset
|
||||
# (default: v3.11.3).
|
||||
set -uo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "$0")" && pwd)"
|
||||
CXX="${CXX:-g++}"
|
||||
JSON_VERSION="${JSON_VERSION:-v3.11.3}"
|
||||
|
||||
JSON_INC="${NLOHMANN_INCLUDE:-}"
|
||||
if [ -z "$JSON_INC" ]; then
|
||||
JSON_INC="$(mktemp -d)"
|
||||
mkdir -p "$JSON_INC/nlohmann"
|
||||
echo "Fetching nlohmann/json ${JSON_VERSION} single header..."
|
||||
if ! curl -L -sf \
|
||||
"https://raw.githubusercontent.com/nlohmann/json/${JSON_VERSION}/single_include/nlohmann/json.hpp" \
|
||||
-o "$JSON_INC/nlohmann/json.hpp"; then
|
||||
echo "ERROR: failed to fetch nlohmann/json header" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Active source dirs only - exclude per-variant build copies, dev snapshots and
|
||||
# the vendored upstream llama.cpp tree.
|
||||
mapfile -t tests < <(find "$ROOT" -name '*_test.cpp' \
|
||||
-not -path '*/llama.cpp/*' \
|
||||
-not -path '*-build/*' \
|
||||
-not -path '*-dev/*' \
|
||||
-not -path '*fallback*' | sort)
|
||||
|
||||
if [ "${#tests[@]}" -eq 0 ]; then
|
||||
echo "No standalone C++ unit tests found under $ROOT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
fail=0
|
||||
for test_src in "${tests[@]}"; do
|
||||
name="$(basename "$test_src" .cpp)"
|
||||
bin="$(mktemp -d)/$name"
|
||||
echo "==> $test_src"
|
||||
if ! "$CXX" -std=c++17 -Wall -Wextra \
|
||||
-I"$JSON_INC" -I"$(dirname "$test_src")" \
|
||||
"$test_src" -o "$bin"; then
|
||||
echo "COMPILE FAILED: $test_src" >&2
|
||||
fail=1
|
||||
continue
|
||||
fi
|
||||
if ! "$bin"; then
|
||||
echo "TEST FAILED: $test_src" >&2
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Ran ${#tests[@]} standalone C++ unit test file(s)"
|
||||
exit "$fail"
|
||||
@@ -65,6 +65,29 @@ turboquant-avx:
|
||||
turboquant-fallback:
|
||||
$(call turboquant-build,fallback,-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off,--target grpc-server)
|
||||
|
||||
# Single-build CPU backend via ggml CPU_ALL_VARIANTS (mirrors llama-cpp-cpu-all).
|
||||
# turboquant reuses backend/cpp/llama-cpp's CMakeLists.txt (hw_grpc_proto STATIC) and
|
||||
# Makefile (SHARED_LIBS make-var + EXTRA_CMAKE_ARGS), so this passes the same overrides
|
||||
# through to the copied build: SHARED_LIBS=ON, the DL flags, and --target ggml (which
|
||||
# pulls in the per-microarch libggml-cpu-*.so via ggml's add_dependencies). The .so set
|
||||
# is collected for package.sh to bundle into package/lib.
|
||||
turboquant-cpu-all:
|
||||
rm -rf $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build
|
||||
cp -rf $(LLAMA_CPP_DIR) $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build purge
|
||||
bash $(CURRENT_MAKEFILE_DIR)/patch-grpc-server.sh $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/grpc-server.cpp
|
||||
$(info $(GREEN)I turboquant build info:cpu-all-variants$(RESET))
|
||||
LLAMA_REPO=$(LLAMA_REPO) LLAMA_VERSION=$(TURBOQUANT_VERSION) \
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build llama.cpp
|
||||
bash $(CURRENT_MAKEFILE_DIR)/apply-patches.sh $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/llama.cpp $(PATCHES_DIR)
|
||||
SHARED_LIBS=ON EXTRA_CMAKE_ARGS="-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON" TARGET="--target grpc-server --target ggml" \
|
||||
LLAMA_REPO=$(LLAMA_REPO) LLAMA_VERSION=$(TURBOQUANT_VERSION) \
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/grpc-server turboquant-cpu-all
|
||||
rm -rf ggml-shared-libs && mkdir -p ggml-shared-libs
|
||||
find $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/llama.cpp/build \( -name '*.so*' -o -name '*.dylib' \) -exec cp -av {} ggml-shared-libs/ \;
|
||||
@echo "Collected ggml shared backends:" && ls -la ggml-shared-libs/
|
||||
|
||||
turboquant-grpc:
|
||||
$(call turboquant-build,grpc,-DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off,--target grpc-server --target rpc-server)
|
||||
|
||||
|
||||
@@ -14,6 +14,15 @@ mkdir -p $CURDIR/package/lib
|
||||
cp -avrf $CURDIR/turboquant-* $CURDIR/package/
|
||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Bundle the ggml shared backends from the CPU_ALL_VARIANTS build into package/lib. ggml
|
||||
# discovers the per-microarch libggml-cpu-*.so by scanning the executable directory, which
|
||||
# (via the bundled lib/ld.so that run.sh launches through) resolves to lib/. See the
|
||||
# matching comment in backend/cpp/llama-cpp/package.sh. No-op on the fallback/ROCm builds.
|
||||
if [ -d "$CURDIR/ggml-shared-libs" ]; then
|
||||
echo "Bundling ggml shared backends (CPU_ALL_VARIANTS)..."
|
||||
cp -avf $CURDIR/ggml-shared-libs/*.so* $CURDIR/package/lib/
|
||||
fi
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
set -ex
|
||||
|
||||
# Get the absolute current dir where the script is located
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
cd /
|
||||
|
||||
@@ -12,54 +12,39 @@ grep -e "flags" /proc/cpuinfo | head -1
|
||||
|
||||
BINARY=turboquant-fallback
|
||||
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/turboquant-avx ]; then
|
||||
BINARY=turboquant-avx
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/turboquant-avx2 ]; then
|
||||
BINARY=turboquant-avx2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/turboquant-avx512 ]; then
|
||||
BINARY=turboquant-avx512
|
||||
fi
|
||||
# x86/arm64 ship a single turboquant-cpu-all built with ggml CPU_ALL_VARIANTS: ggml's
|
||||
# backend registry dlopens the best libggml-cpu-*.so for this host, so no shell-side
|
||||
# probing. ROCm ships only turboquant-fallback, so fall back to it when cpu-all is absent.
|
||||
if [ -e "$CURDIR"/turboquant-cpu-all ]; then
|
||||
BINARY=turboquant-cpu-all
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||
if [ -e $CURDIR/turboquant-grpc ]; then
|
||||
if [ -e "$CURDIR"/turboquant-grpc ]; then
|
||||
BINARY=turboquant-grpc
|
||||
fi
|
||||
fi
|
||||
|
||||
# Extend ld library path with the dir where this script is located/lib
|
||||
if [ "$(uname)" == "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
# Tell rocBLAS where to find TensileLibrary data (GPU kernel tuning files)
|
||||
if [ -d "$CURDIR/lib/rocblas/library" ]; then
|
||||
export ROCBLAS_TENSILE_LIBPATH=$CURDIR/lib/rocblas/library
|
||||
export ROCBLAS_TENSILE_LIBPATH="$CURDIR"/lib/rocblas/library
|
||||
fi
|
||||
fi
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using binary: $BINARY"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/$BINARY "$@"
|
||||
exec "$CURDIR"/lib/ld.so "$CURDIR"/$BINARY "$@"
|
||||
fi
|
||||
|
||||
echo "Using binary: $BINARY"
|
||||
exec $CURDIR/$BINARY "$@"
|
||||
exec "$CURDIR"/$BINARY "$@"
|
||||
|
||||
# We should never reach this point, however just in case we do, run fallback
|
||||
exec $CURDIR/turboquant-fallback "$@"
|
||||
exec "$CURDIR"/turboquant-fallback "$@"
|
||||
|
||||
@@ -117,7 +117,8 @@ libgoacestepcpp-custom: CMakeLists.txt cpp/goacestepcpp.cpp cpp/goacestepcpp.h
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) --target goacestepcpp && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgoacestepcpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
test: acestep-cpp
|
||||
@echo "Running acestep-cpp tests..."
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -22,7 +23,11 @@ func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("ACESTEP_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgoacestepcpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgoacestepcpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgoacestepcpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -13,6 +13,7 @@ mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/acestep-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/libgoacestepcpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgoacestepcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
set -ex
|
||||
|
||||
# Get the absolute current dir where the script is located
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
cd /
|
||||
|
||||
@@ -12,19 +12,29 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single library variant (Metal or Accelerate). The goacestepcpp
|
||||
# target is built as a CMake MODULE, which emits a .dylib for a SHARED
|
||||
# build but a .so for a MODULE build on Apple, so prefer .dylib and fall
|
||||
# back to .so.
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-fallback.dylib"
|
||||
if [ ! -e "$LIBRARY" ]; then
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
|
||||
fi
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgoacestepcpp-avx.so ]; then
|
||||
if [ -e "$CURDIR"/libgoacestepcpp-avx.so ]; then
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-avx.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/libgoacestepcpp-avx2.so ]; then
|
||||
if [ -e "$CURDIR"/libgoacestepcpp-avx2.so ]; then
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-avx2.so"
|
||||
fi
|
||||
fi
|
||||
@@ -32,21 +42,22 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/libgoacestepcpp-avx512.so ]; then
|
||||
if [ -e "$CURDIR"/libgoacestepcpp-avx512.so ]; then
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export ACESTEP_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/acestep-cpp "$@"
|
||||
exec "$CURDIR"/lib/ld.so "$CURDIR"/acestep-cpp "$@"
|
||||
fi
|
||||
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/acestep-cpp "$@"
|
||||
exec "$CURDIR"/acestep-cpp "$@"
|
||||
|
||||
@@ -57,6 +57,7 @@ libced.so: sources/ced.cpp
|
||||
cmake -B sources/ced.cpp/build-shared -S sources/ced.cpp $(CMAKE_ARGS)
|
||||
cmake --build sources/ced.cpp/build-shared --config Release -j$(JOBS)
|
||||
cp -fv sources/ced.cpp/build-shared/libced.so* ./ 2>/dev/null || true
|
||||
cp -fv sources/ced.cpp/build-shared/libced.dylib ./ 2>/dev/null || true
|
||||
cp -fv sources/ced.cpp/include/ced_capi.h ./
|
||||
|
||||
ced-grpc: libced.so main.go goced.go
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -27,7 +28,11 @@ type libFunc struct {
|
||||
func main() {
|
||||
libName := os.Getenv("CED_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "libced.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "libced.dylib"
|
||||
} else {
|
||||
libName = "libced.so"
|
||||
}
|
||||
}
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
if err != nil {
|
||||
|
||||
@@ -15,10 +15,12 @@ mkdir -p "$CURDIR/package/lib"
|
||||
cp -avf "$CURDIR/ced-grpc" "$CURDIR/package/"
|
||||
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||
|
||||
cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||
echo "ERROR: libced.so not found in $CURDIR, run 'make' first" >&2
|
||||
cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || true
|
||||
cp -avf "$CURDIR"/libced.dylib "$CURDIR/package/lib/" 2>/dev/null || true
|
||||
if ! ls "$CURDIR"/package/lib/libced.* >/dev/null 2>&1; then
|
||||
echo "ERROR: libced shared library not found in $CURDIR, run 'make' first" >&2
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||
|
||||
@@ -3,7 +3,12 @@ set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH="$CURDIR/lib:"$CURDIR":${DYLD_LIBRARY_PATH:-}"
|
||||
export CED_LIBRARY="$CURDIR/lib/libced.dylib"
|
||||
else
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:"$CURDIR":${LD_LIBRARY_PATH:-}"
|
||||
fi
|
||||
|
||||
# If a self-contained ld.so was packaged, route through it so the packaged
|
||||
# libc / libstdc++ are used instead of the host's (matches the sibling backends).
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
exec $CURDIR/cloud-proxy "$@"
|
||||
exec "$CURDIR"/cloud-proxy "$@"
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# CrispASR version (release tag)
|
||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||
CRISPASR_VERSION?=63b57289255267edf66e43e33bc3911e04a2e92d
|
||||
CRISPASR_VERSION?=3b93758f9725d400eca82976f895e4cec3f31260
|
||||
SO_TARGET?=libgocrispasr.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
@@ -75,7 +75,8 @@ UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libgocrispasr-avx.so libgocrispasr-avx2.so libgocrispasr-avx512.so libgocrispasr-fallback.so
|
||||
else
|
||||
VARIANT_TARGETS = libgocrispasr-fallback.so
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
|
||||
VARIANT_TARGETS = libgocrispasr-fallback.dylib
|
||||
endif
|
||||
|
||||
crispasr: main.go gocrispasr.go $(VARIANT_TARGETS)
|
||||
@@ -87,7 +88,7 @@ package: crispasr
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libgocrispasr*.so package sources/CrispASR crispasr
|
||||
rm -rf libgocrispasr*.so libgocrispasr*.dylib package sources/CrispASR crispasr
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -118,13 +119,21 @@ libgocrispasr-fallback.so: sources/CrispASR
|
||||
SO_TARGET=libgocrispasr-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
|
||||
rm -rfv build*
|
||||
|
||||
# Build fallback variant as a dylib (Darwin)
|
||||
libgocrispasr-fallback.dylib: sources/CrispASR
|
||||
$(MAKE) purge
|
||||
$(info ${GREEN}I crispasr build info:fallback (dylib)${RESET})
|
||||
SO_TARGET=libgocrispasr-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
|
||||
rm -rfv build*
|
||||
|
||||
libgocrispasr-custom: CMakeLists.txt cpp/crispasr_shim.cpp cpp/crispasr_shim.h
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
cd build-$(SO_TARGET) && \
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgocrispasr.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
test: crispasr
|
||||
CGO_ENABLED=0 $(GOCMD) test -v ./...
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -21,7 +22,11 @@ type LibFuncs struct {
|
||||
func main() {
|
||||
libName := os.Getenv("CRISPASR_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgocrispasr-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgocrispasr-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgocrispasr-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/crispasr $CURDIR/package/
|
||||
cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/libgocrispasr-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
set -ex
|
||||
|
||||
# Get the absolute current dir where the script is located
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
cd /
|
||||
|
||||
@@ -12,19 +12,23 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgocrispasr-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/libgocrispasr-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libgocrispasr-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgocrispasr-avx.so ]; then
|
||||
if [ -e "$CURDIR"/libgocrispasr-avx.so ]; then
|
||||
LIBRARY="$CURDIR/libgocrispasr-avx.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/libgocrispasr-avx2.so ]; then
|
||||
if [ -e "$CURDIR"/libgocrispasr-avx2.so ]; then
|
||||
LIBRARY="$CURDIR/libgocrispasr-avx2.so"
|
||||
fi
|
||||
fi
|
||||
@@ -32,26 +36,27 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/libgocrispasr-avx512.so ]; then
|
||||
if [ -e "$CURDIR"/libgocrispasr-avx512.so ]; then
|
||||
LIBRARY="$CURDIR/libgocrispasr-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export CRISPASR_LIBRARY=$LIBRARY
|
||||
|
||||
# Point piper's espeak-ng phonemizer at the bundled voice data. The variable
|
||||
# names the directory CONTAINING espeak-ng-data (package.sh drops it next to
|
||||
# this script). Harmless when espeak-ng wasn't bundled.
|
||||
export CRISPASR_ESPEAK_DATA_PATH=$CURDIR
|
||||
export CRISPASR_ESPEAK_DATA_PATH="$CURDIR"
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/crispasr "$@"
|
||||
exec "$CURDIR"/lib/ld.so "$CURDIR"/crispasr "$@"
|
||||
fi
|
||||
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/crispasr "$@"
|
||||
exec "$CURDIR"/crispasr "$@"
|
||||
|
||||
@@ -40,6 +40,8 @@ else ifeq ($(BUILD_TYPE),hipblas)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON -DDA_GGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
# macOS/Metal: built + published as an OCI image by CI (includeDarwin in
|
||||
# .github/backend-matrix.yml) so Apple Silicon users can install this backend.
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
@@ -77,7 +79,7 @@ ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libdepthanythingcpp-avx.so libdepthanythingcpp-avx2.so libdepthanythingcpp-avx512.so libdepthanythingcpp-fallback.so
|
||||
else
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||
VARIANT_TARGETS = libdepthanythingcpp-fallback.so
|
||||
VARIANT_TARGETS = libdepthanythingcpp-fallback.dylib
|
||||
endif
|
||||
|
||||
depth-anything-cpp: main.go godepthanythingcpp.go $(VARIANT_TARGETS)
|
||||
@@ -89,7 +91,7 @@ package: depth-anything-cpp
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libdepthanythingcpp*.so depth-anything-cpp package sources
|
||||
rm -rf libdepthanythingcpp*.so libdepthanythingcpp*.dylib depth-anything-cpp package sources
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -116,11 +118,19 @@ libdepthanythingcpp-avx512.so: sources/depth-anything.cpp
|
||||
endif
|
||||
|
||||
# Build fallback variant (all platforms)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
libdepthanythingcpp-fallback.dylib: sources/depth-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
else
|
||||
libdepthanythingcpp-fallback.so: sources/depth-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
endif
|
||||
|
||||
libdepthanythingcpp-custom: CMakeLists.txt
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
@@ -128,7 +138,8 @@ libdepthanythingcpp-custom: CMakeLists.txt
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libdepthanything.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
all: depth-anything-cpp package
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -27,7 +28,11 @@ func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("DEPTHANYTHING_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libdepthanythingcpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libdepthanythingcpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libdepthanythingcpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
# Create lib directory
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/libdepthanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -avf $CURDIR/depth-anything-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
set -ex
|
||||
|
||||
# Get the absolute current dir where the script is located
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
cd /
|
||||
|
||||
@@ -12,19 +12,23 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then
|
||||
if [ -e "$CURDIR"/libdepthanythingcpp-avx.so ]; then
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-avx.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/libdepthanythingcpp-avx2.so ]; then
|
||||
if [ -e "$CURDIR"/libdepthanythingcpp-avx2.so ]; then
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-avx2.so"
|
||||
fi
|
||||
fi
|
||||
@@ -32,21 +36,22 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/libdepthanythingcpp-avx512.so ]; then
|
||||
if [ -e "$CURDIR"/libdepthanythingcpp-avx512.so ]; then
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export DEPTHANYTHING_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/depth-anything-cpp "$@"
|
||||
exec "$CURDIR"/lib/ld.so "$CURDIR"/depth-anything-cpp "$@"
|
||||
fi
|
||||
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/depth-anything-cpp "$@"
|
||||
exec "$CURDIR"/depth-anything-cpp "$@"
|
||||
|
||||
18
backend/go/face-detect/.gitignore
vendored
Normal file
18
backend/go/face-detect/.gitignore
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
# Fetched upstream sources
|
||||
sources/
|
||||
|
||||
# CMake build directories
|
||||
build*/
|
||||
|
||||
# build artifacts staged in-tree by the Makefile (cp from sources/) or
|
||||
# symlinked for local dev; the real sources live in face-detect.cpp upstream.
|
||||
*.so
|
||||
*.so.*
|
||||
facedetect_capi.h
|
||||
compile_commands.json
|
||||
|
||||
# Compiled backend binary
|
||||
face-detect-grpc
|
||||
|
||||
# Packaging output
|
||||
package/
|
||||
110
backend/go/face-detect/Makefile
Normal file
110
backend/go/face-detect/Makefile
Normal file
@@ -0,0 +1,110 @@
|
||||
# face-detect backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as FACEDETECT_VERSION?=e22260d5d5490b37b021b7f795079f386d553afd
|
||||
# can find and update it - matches the voice-detect / parakeet.cpp / whisper.cpp
|
||||
# convention).
|
||||
#
|
||||
# Local dev shortcut: if you already have an out-of-tree face-detect.cpp build,
|
||||
# symlink the .so + header into this directory and skip the clone/cmake steps:
|
||||
#
|
||||
# ln -sf /path/to/face-detect.cpp/build-shared/libfacedetect.so .
|
||||
# ln -sf /path/to/face-detect.cpp/include/facedetect_capi.h .
|
||||
# go build -o face-detect-grpc .
|
||||
#
|
||||
# The default target below does the proper clone-at-pin + cmake build so CI does
|
||||
# not need a side-checkout.
|
||||
|
||||
FACEDETECT_VERSION?=e22260d5d5490b37b021b7f795079f386d553afd
|
||||
FACEDETECT_REPO?=https://github.com/mudler/face-detect.cpp
|
||||
|
||||
GOCMD?=go
|
||||
GO_TAGS?=
|
||||
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
|
||||
|
||||
BUILD_TYPE?=
|
||||
NATIVE?=false
|
||||
|
||||
# Resolve the target arch. The backend matrix / Docker build pass TARGETARCH
|
||||
# (amd64|arm64); fall back to uname -m (aarch64|x86_64) for a local build.
|
||||
RECON_ARCH?=$(or $(TARGETARCH),$(shell uname -m))
|
||||
|
||||
# Build ggml + the vendored libjpeg-turbo statically into libfacedetect.so (PIC)
|
||||
# so the shared lib is self-contained: dlopen needs no libggml*.so alongside it,
|
||||
# only system libs (libstdc++/libgomp/libc) the runtime image already provides.
|
||||
# The vendored jpeg symbols are hidden via -Wl,--exclude-libs,ALL on the C++
|
||||
# side, so only the facedetect_capi_* surface is exported.
|
||||
CMAKE_ARGS?=-DCMAKE_BUILD_TYPE=Release -DFACEDETECT_SHARED=ON -DFACEDETECT_BUILD_CLI=OFF -DFACEDETECT_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||
|
||||
ifeq ($(NATIVE),false)
|
||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
endif
|
||||
|
||||
# face-detect.cpp gates its GGML backends behind FACEDETECT_GGML_* options and
|
||||
# does set(GGML_CUDA ${FACEDETECT_GGML_CUDA} CACHE BOOL "" FORCE), so a bare
|
||||
# -DGGML_CUDA=ON is overwritten back to OFF. Forward the FACEDETECT_GGML_*
|
||||
# options instead. (openblas is not gated, so -DGGML_BLAS passes through.)
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CMAKE_ARGS+=-DFACEDETECT_GGML_CUDA=ON
|
||||
# Opt-in cuDNN implicit-GEMM conv path (kills im2col on GPU, SCRFD 2.3x
|
||||
# vs torch-cuDNN parity). Only the arm64 + CUDA 13 image (GB10/Jetson/L4T)
|
||||
# ships libcudnn9 + the -dev headers, so gate cuDNN to that variant.
|
||||
# x86 CUDA images carry no cuDNN -> enabling it there is a link failure.
|
||||
ifeq ($(CUDA_MAJOR_VERSION),13)
|
||||
ifneq (,$(filter arm64 aarch64,$(RECON_ARCH)))
|
||||
CMAKE_ARGS+=-DFACEDETECT_GGML_CUDNN=ON
|
||||
endif
|
||||
endif
|
||||
else ifeq ($(BUILD_TYPE),openblas)
|
||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DFACEDETECT_GGML_HIP=ON
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DFACEDETECT_GGML_VULKAN=ON
|
||||
else ifeq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DFACEDETECT_GGML_METAL=ON
|
||||
endif
|
||||
|
||||
.PHONY: face-detect-grpc package build clean purge test all
|
||||
|
||||
all: face-detect-grpc
|
||||
|
||||
# Clone the upstream face-detect.cpp source at the pinned commit. Directory acts
|
||||
# as the target so make only re-clones when missing. After a FACEDETECT_VERSION
|
||||
# bump, run 'make purge && make' to refetch.
|
||||
sources/face-detect.cpp:
|
||||
mkdir -p sources/face-detect.cpp
|
||||
cd sources/face-detect.cpp && \
|
||||
git init -q && \
|
||||
git remote add origin $(FACEDETECT_REPO) && \
|
||||
git fetch --depth 1 origin $(FACEDETECT_VERSION) && \
|
||||
git checkout FETCH_HEAD && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
# Build the shared lib + header out-of-tree, then stage them next to the Go
|
||||
# sources so purego.Dlopen("libfacedetect.so") and the cgo-less build both pick
|
||||
# them up.
|
||||
libfacedetect.so: sources/face-detect.cpp
|
||||
cmake -B sources/face-detect.cpp/build-shared -S sources/face-detect.cpp $(CMAKE_ARGS)
|
||||
cmake --build sources/face-detect.cpp/build-shared --config Release -j$(JOBS) --target facedetect
|
||||
cp -fv sources/face-detect.cpp/build-shared/libfacedetect.so* ./ 2>/dev/null || true
|
||||
cp -fv sources/face-detect.cpp/include/facedetect_capi.h ./
|
||||
|
||||
face-detect-grpc: libfacedetect.so main.go gofacedetect.go options.go
|
||||
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o face-detect-grpc .
|
||||
|
||||
package: face-detect-grpc
|
||||
bash package.sh
|
||||
|
||||
build: package
|
||||
|
||||
# Test target. The embed/detect/verify/analyze smoke specs are gated on
|
||||
# FACEDETECT_BACKEND_TEST_MODEL + FACEDETECT_BACKEND_TEST_IMAGE; without them the
|
||||
# heavy specs auto-skip and only the pure-Go parsing specs run.
|
||||
test:
|
||||
LD_LIBRARY_PATH=$(CURDIR):$$LD_LIBRARY_PATH $(GOCMD) test ./... -count=1
|
||||
|
||||
clean: purge
|
||||
rm -rf libfacedetect.so* facedetect_capi.h package face-detect-grpc
|
||||
|
||||
purge:
|
||||
rm -rf sources/face-detect.cpp
|
||||
431
backend/go/face-detect/gofacedetect.go
Normal file
431
backend/go/face-detect/gofacedetect.go
Normal file
@@ -0,0 +1,431 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/xlog"
|
||||
)
|
||||
|
||||
// purego-bound entry points from libfacedetect.so. Names match
|
||||
// facedetect_capi.h exactly so a `nm libfacedetect.so | grep facedetect_capi`
|
||||
// is enough to spot drift.
|
||||
//
|
||||
// The opaque ctx and the malloc'd char*/float* return values are declared as
|
||||
// uintptr so we get the raw pointer back and can release it via the matching
|
||||
// capi free function. purego's native string/[]float32 returns would copy and
|
||||
// forget the original pointer, leaking the C-owned buffer on every call.
|
||||
var (
|
||||
CppAbiVersion func() int32
|
||||
CppLoad func(ggufPath string) uintptr
|
||||
CppFree func(ctx uintptr)
|
||||
CppLastError func(ctx uintptr) string
|
||||
CppFreeString func(s uintptr)
|
||||
CppFreeVec func(v uintptr)
|
||||
CppEmbedPath func(ctx uintptr, imagePath string, outVec, outDim unsafe.Pointer) int32
|
||||
CppEmbedRGB func(ctx uintptr, rgb []byte, width, height int32, outVec, outDim unsafe.Pointer) int32
|
||||
CppDetectJSON func(ctx uintptr, imagePath string) uintptr
|
||||
CppVerifyPaths func(ctx uintptr, a, b string, threshold float32, antiSpoof int32, outDistance, outVerified unsafe.Pointer) int32
|
||||
CppAnalyzeJSON func(ctx uintptr, imagePath string) uintptr
|
||||
)
|
||||
|
||||
// FaceDetect implements the face-recognition (biometric) subset of the Backend
|
||||
// gRPC service over libfacedetect.so. The C side keeps a single loaded model
|
||||
// pack plus a per-ctx last-error buffer and is not reentrant, so
|
||||
// base.SingleThread serializes every call.
|
||||
type FaceDetect struct {
|
||||
base.SingleThread
|
||||
opts loadOptions
|
||||
ctxPtr uintptr
|
||||
}
|
||||
|
||||
func (f *FaceDetect) Load(opts *pb.ModelOptions) error {
|
||||
model := opts.ModelFile
|
||||
if model == "" {
|
||||
model = opts.ModelPath
|
||||
}
|
||||
if !filepath.IsAbs(model) && opts.ModelPath != "" {
|
||||
model = filepath.Join(opts.ModelPath, model)
|
||||
}
|
||||
if model == "" {
|
||||
return errors.New("face-detect: ModelFile is required")
|
||||
}
|
||||
|
||||
f.opts = parseOptions(opts.Options)
|
||||
if f.opts.modelName == "" {
|
||||
f.opts.modelName = filepath.Base(model)
|
||||
}
|
||||
|
||||
// Propagate LocalAI's per-model thread budget to the engine. LocalAI spawns
|
||||
// one backend process per model and serves requests concurrently, so the
|
||||
// engine's own min(hardware_concurrency, 8) default can oversubscribe cores.
|
||||
// FACEDETECT_THREADS is read by the engine at backend construction, so it
|
||||
// must be set before the capi load. A non-positive Threads means "unset":
|
||||
// leave the env alone so the engine keeps its sane default.
|
||||
threads := opts.Threads
|
||||
if threads > 0 {
|
||||
if err := os.Setenv("FACEDETECT_THREADS", strconv.Itoa(int(threads))); err != nil {
|
||||
return fmt.Errorf("face-detect: set FACEDETECT_THREADS: %w", err)
|
||||
}
|
||||
xlog.Info("face-detect: applying LocalAI thread budget", "threads", threads)
|
||||
}
|
||||
|
||||
xlog.Info("face-detect: loading model", "model", model,
|
||||
"verify_threshold", f.opts.verifyThreshold, "abi", CppAbiVersion())
|
||||
|
||||
ctx := CppLoad(model)
|
||||
if ctx == 0 {
|
||||
// The last-error buffer lives on the ctx that was never returned, so
|
||||
// surface the path the operator tried to load instead.
|
||||
return fmt.Errorf("face-detect: facedetect_capi_load failed for %q", model)
|
||||
}
|
||||
f.ctxPtr = ctx
|
||||
return nil
|
||||
}
|
||||
|
||||
// Embeddings returns the L2-normalized ArcFace embedding of the primary face in
|
||||
// the supplied image. Mirroring the Python face backend, the image is read from
|
||||
// Images[0] as a base64 payload; materializeImage decodes it to a temp file so
|
||||
// the path-based C-API can run its own decode (cv2.imread parity). The gRPC
|
||||
// server wraps the returned slice in an EmbeddingResult.
|
||||
func (f *FaceDetect) Embeddings(req *pb.PredictOptions) ([]float32, error) {
|
||||
if f.ctxPtr == 0 {
|
||||
return nil, errors.New("face-detect: model not loaded")
|
||||
}
|
||||
if len(req.Images) == 0 || req.Images[0] == "" {
|
||||
return nil, errors.New("face-detect: Embedding requires Images[0] to be a base64 image")
|
||||
}
|
||||
|
||||
path, cleanup, err := materializeImage(req.Images[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
return f.embedPath(path)
|
||||
}
|
||||
|
||||
func (f *FaceDetect) embedPath(path string) ([]float32, error) {
|
||||
var vec uintptr
|
||||
var dim int32
|
||||
rc := CppEmbedPath(f.ctxPtr, path, unsafe.Pointer(&vec), unsafe.Pointer(&dim))
|
||||
if rc != 0 || vec == 0 || dim <= 0 {
|
||||
return nil, f.lastErr("embed", path)
|
||||
}
|
||||
defer CppFreeVec(vec)
|
||||
// Copy out of the C-owned malloc'd buffer before freeing it. The
|
||||
// uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||
// a C heap pointer from Go-managed memory; safe here, the GC neither tracks
|
||||
// nor moves this buffer and we copy immediately.
|
||||
src := unsafe.Slice((*float32)(unsafe.Pointer(vec)), int(dim)) //nolint:govet // C-owned malloc'd vector, copied out before free
|
||||
out := make([]float32, int(dim))
|
||||
copy(out, src)
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Detect runs SCRFD over the image and returns one Detection per face. The
|
||||
// C-API emits a box as [x1,y1,x2,y2] in pixels; the proto carries x/y plus
|
||||
// width/height, so the corners are converted. The 5 facial landmarks the engine
|
||||
// also returns are dropped: the Detection message has no field for them.
|
||||
func (f *FaceDetect) Detect(req *pb.DetectOptions) (pb.DetectResponse, error) {
|
||||
if f.ctxPtr == 0 {
|
||||
return pb.DetectResponse{}, errors.New("face-detect: model not loaded")
|
||||
}
|
||||
if req.Src == "" {
|
||||
return pb.DetectResponse{}, errors.New("face-detect: src image is required")
|
||||
}
|
||||
|
||||
path, cleanup, err := materializeImage(req.Src)
|
||||
if err != nil {
|
||||
return pb.DetectResponse{}, err
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
faces, err := f.detectFaces(path)
|
||||
if err != nil {
|
||||
return pb.DetectResponse{}, err
|
||||
}
|
||||
|
||||
dets := make([]*pb.Detection, 0, len(faces))
|
||||
for _, fc := range faces {
|
||||
if req.Threshold > 0 && fc.Score < req.Threshold {
|
||||
continue
|
||||
}
|
||||
x, y, w, h := fc.xywh()
|
||||
dets = append(dets, &pb.Detection{
|
||||
X: x,
|
||||
Y: y,
|
||||
Width: w,
|
||||
Height: h,
|
||||
Confidence: fc.Score,
|
||||
ClassName: "face",
|
||||
})
|
||||
}
|
||||
return pb.DetectResponse{Detections: dets}, nil
|
||||
}
|
||||
|
||||
// FaceVerify embeds the primary face in each image and reports whether they are
|
||||
// the same identity by cosine distance against a threshold. A request threshold
|
||||
// <= 0 falls back to the model-configured default (verify_threshold option,
|
||||
// 0.35 if unset). When anti_spoofing is set, the C-API applies a MiniFASNet
|
||||
// veto internally (verified forced false on a spoof); the per-image liveness
|
||||
// scores are not exposed by the verify entry point, so img*_is_real /
|
||||
// img*_antispoof_score stay at their zero values.
|
||||
func (f *FaceDetect) FaceVerify(req *pb.FaceVerifyRequest) (pb.FaceVerifyResponse, error) {
|
||||
if f.ctxPtr == 0 {
|
||||
return pb.FaceVerifyResponse{}, errors.New("face-detect: model not loaded")
|
||||
}
|
||||
if req.Img1 == "" || req.Img2 == "" {
|
||||
return pb.FaceVerifyResponse{}, errors.New("face-detect: img1 and img2 are required")
|
||||
}
|
||||
|
||||
path1, cleanup1, err := materializeImage(req.Img1)
|
||||
if err != nil {
|
||||
return pb.FaceVerifyResponse{}, err
|
||||
}
|
||||
defer cleanup1()
|
||||
path2, cleanup2, err := materializeImage(req.Img2)
|
||||
if err != nil {
|
||||
return pb.FaceVerifyResponse{}, err
|
||||
}
|
||||
defer cleanup2()
|
||||
|
||||
threshold := req.Threshold
|
||||
if threshold <= 0 {
|
||||
threshold = f.opts.verifyThreshold
|
||||
}
|
||||
|
||||
antiSpoof := int32(0)
|
||||
if req.AntiSpoofing {
|
||||
antiSpoof = 1
|
||||
}
|
||||
|
||||
started := time.Now()
|
||||
var distance float32
|
||||
var verified int32
|
||||
rc := CppVerifyPaths(f.ctxPtr, path1, path2, threshold, antiSpoof,
|
||||
unsafe.Pointer(&distance), unsafe.Pointer(&verified))
|
||||
if rc != 0 {
|
||||
return pb.FaceVerifyResponse{}, f.lastErr("verify", req.Img1[:min(8, len(req.Img1))]+"...")
|
||||
}
|
||||
elapsedMs := float32(time.Since(started).Seconds() * 1000.0)
|
||||
|
||||
// Confidence decays linearly from 100 at distance 0 to 0 at the threshold,
|
||||
// matching the Python face backend's reporting.
|
||||
confidence := float32(0)
|
||||
if threshold > 0 {
|
||||
confidence = float32(math.Max(0, math.Min(100, (1.0-float64(distance)/float64(threshold))*100.0)))
|
||||
}
|
||||
|
||||
return pb.FaceVerifyResponse{
|
||||
Verified: verified != 0,
|
||||
Distance: distance,
|
||||
Threshold: threshold,
|
||||
Confidence: confidence,
|
||||
Model: f.opts.modelName,
|
||||
Img1Area: f.bestArea(path1),
|
||||
Img2Area: f.bestArea(path2),
|
||||
ProcessingTimeMs: elapsedMs,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// FaceAnalyze runs the genderage head on every detected face. The C-API returns
|
||||
// "M"/"F" gender labels and a rounded age; the labels are normalized to the
|
||||
// "Man"/"Woman" values the proto documents.
|
||||
func (f *FaceDetect) FaceAnalyze(req *pb.FaceAnalyzeRequest) (pb.FaceAnalyzeResponse, error) {
|
||||
if f.ctxPtr == 0 {
|
||||
return pb.FaceAnalyzeResponse{}, errors.New("face-detect: model not loaded")
|
||||
}
|
||||
if req.Img == "" {
|
||||
return pb.FaceAnalyzeResponse{}, errors.New("face-detect: img is required")
|
||||
}
|
||||
|
||||
path, cleanup, err := materializeImage(req.Img)
|
||||
if err != nil {
|
||||
return pb.FaceAnalyzeResponse{}, err
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
ptr := CppAnalyzeJSON(f.ctxPtr, path)
|
||||
if ptr == 0 {
|
||||
return pb.FaceAnalyzeResponse{}, f.lastErr("analyze", path)
|
||||
}
|
||||
defer CppFreeString(ptr)
|
||||
|
||||
faces, err := parseAnalyzeJSON(goStringFromCPtr(ptr))
|
||||
if err != nil {
|
||||
return pb.FaceAnalyzeResponse{}, fmt.Errorf("face-detect: analyze JSON: %w", err)
|
||||
}
|
||||
return pb.FaceAnalyzeResponse{Faces: faces}, nil
|
||||
}
|
||||
|
||||
// faceBox is one entry of the detect/analyze JSON documents the engine emits.
|
||||
type faceBox struct {
|
||||
Score float32 `json:"score"`
|
||||
Box []float32 `json:"box"`
|
||||
Age float32 `json:"age"`
|
||||
Gender string `json:"gender"`
|
||||
}
|
||||
|
||||
// xywh converts the engine's [x1,y1,x2,y2] box into the x/y/width/height the
|
||||
// proto carries. A short or missing box yields zeros.
|
||||
func (b faceBox) xywh() (x, y, w, h float32) {
|
||||
if len(b.Box) < 4 {
|
||||
return 0, 0, 0, 0
|
||||
}
|
||||
return b.Box[0], b.Box[1], b.Box[2] - b.Box[0], b.Box[3] - b.Box[1]
|
||||
}
|
||||
|
||||
type facesJSON struct {
|
||||
Faces []faceBox `json:"faces"`
|
||||
}
|
||||
|
||||
func (f *FaceDetect) detectFaces(path string) ([]faceBox, error) {
|
||||
ptr := CppDetectJSON(f.ctxPtr, path)
|
||||
if ptr == 0 {
|
||||
return nil, f.lastErr("detect", path)
|
||||
}
|
||||
defer CppFreeString(ptr)
|
||||
|
||||
var doc facesJSON
|
||||
if err := json.Unmarshal([]byte(goStringFromCPtr(ptr)), &doc); err != nil {
|
||||
return nil, fmt.Errorf("face-detect: detect JSON: %w", err)
|
||||
}
|
||||
return doc.Faces, nil
|
||||
}
|
||||
|
||||
// bestArea returns the FacialArea of the highest-scoring face in an image, or an
|
||||
// empty area when detection fails or finds nothing. Best-effort: verify already
|
||||
// succeeded, so a missing region must not turn a valid match into an error.
|
||||
func (f *FaceDetect) bestArea(path string) *pb.FacialArea {
|
||||
faces, err := f.detectFaces(path)
|
||||
if err != nil || len(faces) == 0 {
|
||||
return &pb.FacialArea{}
|
||||
}
|
||||
best := faces[0]
|
||||
for _, fc := range faces[1:] {
|
||||
if fc.Score > best.Score {
|
||||
best = fc
|
||||
}
|
||||
}
|
||||
x, y, w, h := best.xywh()
|
||||
return &pb.FacialArea{X: x, Y: y, W: w, H: h}
|
||||
}
|
||||
|
||||
// parseAnalyzeJSON maps the engine's analyze document onto FaceAnalysis entries.
|
||||
// The engine reports gender as "M"/"F"; both the dominant label and the score
|
||||
// map are filled with the "Man"/"Woman" form the proto documents.
|
||||
func parseAnalyzeJSON(doc string) ([]*pb.FaceAnalysis, error) {
|
||||
var parsed facesJSON
|
||||
if err := json.Unmarshal([]byte(doc), &parsed); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
out := make([]*pb.FaceAnalysis, 0, len(parsed.Faces))
|
||||
for _, fc := range parsed.Faces {
|
||||
x, y, w, h := fc.xywh()
|
||||
fa := &pb.FaceAnalysis{
|
||||
Region: &pb.FacialArea{X: x, Y: y, W: w, H: h},
|
||||
FaceConfidence: fc.Score,
|
||||
Age: fc.Age,
|
||||
}
|
||||
if label := normalizeGender(fc.Gender); label != "" {
|
||||
fa.DominantGender = label
|
||||
fa.Gender = map[string]float32{label: 1.0}
|
||||
}
|
||||
out = append(out, fa)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// normalizeGender maps the engine's "M"/"F" code to the "Man"/"Woman" labels the
|
||||
// proto documents. Unknown codes pass through unchanged.
|
||||
func normalizeGender(g string) string {
|
||||
switch strings.ToUpper(strings.TrimSpace(g)) {
|
||||
case "M":
|
||||
return "Man"
|
||||
case "F":
|
||||
return "Woman"
|
||||
case "":
|
||||
return ""
|
||||
default:
|
||||
return g
|
||||
}
|
||||
}
|
||||
|
||||
// materializeImage decodes a base64 image payload into a temp file and returns
|
||||
// its path plus a cleanup func. As a convenience for callers that already pass a
|
||||
// filesystem path (e.g. a test fixture), an existing path is used as-is with a
|
||||
// no-op cleanup. data: URI prefixes are stripped before decoding.
|
||||
func materializeImage(src string) (path string, cleanup func(), err error) {
|
||||
noop := func() {}
|
||||
if src == "" {
|
||||
return "", noop, errors.New("face-detect: empty image input")
|
||||
}
|
||||
if _, statErr := os.Stat(src); statErr == nil {
|
||||
return src, noop, nil
|
||||
}
|
||||
|
||||
payload := src
|
||||
if i := strings.Index(payload, ","); strings.HasPrefix(payload, "data:") && i >= 0 {
|
||||
payload = payload[i+1:]
|
||||
}
|
||||
data, decErr := base64.StdEncoding.DecodeString(strings.TrimSpace(payload))
|
||||
if decErr != nil || len(data) == 0 {
|
||||
return "", noop, errors.New("face-detect: image is neither an existing path nor valid base64")
|
||||
}
|
||||
|
||||
tmp, createErr := os.CreateTemp("", "face-detect-*.img")
|
||||
if createErr != nil {
|
||||
return "", noop, fmt.Errorf("face-detect: create temp image: %w", createErr)
|
||||
}
|
||||
cleanup = func() { _ = os.Remove(tmp.Name()) }
|
||||
if _, wErr := tmp.Write(data); wErr != nil {
|
||||
_ = tmp.Close()
|
||||
cleanup()
|
||||
return "", noop, fmt.Errorf("face-detect: write temp image: %w", wErr)
|
||||
}
|
||||
if cErr := tmp.Close(); cErr != nil {
|
||||
cleanup()
|
||||
return "", noop, fmt.Errorf("face-detect: close temp image: %w", cErr)
|
||||
}
|
||||
return tmp.Name(), cleanup, nil
|
||||
}
|
||||
|
||||
// lastErr wraps the C-API's per-ctx last-error buffer into a Go error.
|
||||
func (f *FaceDetect) lastErr(op, subject string) error {
|
||||
msg := strings.TrimSpace(CppLastError(f.ctxPtr))
|
||||
if msg == "" {
|
||||
msg = "no error detail"
|
||||
}
|
||||
return fmt.Errorf("face-detect: %s failed for %q: %s", op, subject, msg)
|
||||
}
|
||||
|
||||
// goStringFromCPtr copies a NUL-terminated C string into Go memory. cptr is a
|
||||
// malloc'd buffer the caller owns; release it via CppFreeString after the copy.
|
||||
//
|
||||
// The uintptr->Pointer conversion trips vet's unsafeptr check, which can't tell
|
||||
// a C heap pointer from Go-managed memory. Safe here: the GC neither tracks nor
|
||||
// moves the buffer and we dereference it immediately to copy the bytes out.
|
||||
func goStringFromCPtr(cptr uintptr) string {
|
||||
if cptr == 0 {
|
||||
return ""
|
||||
}
|
||||
p := unsafe.Pointer(cptr) //nolint:govet // C-owned malloc'd buffer, not Go-GC memory (see doc above)
|
||||
n := 0
|
||||
for *(*byte)(unsafe.Add(p, n)) != 0 {
|
||||
n++
|
||||
}
|
||||
return string(unsafe.Slice((*byte)(p), n))
|
||||
}
|
||||
230
backend/go/face-detect/gofacedetect_test.go
Normal file
230
backend/go/face-detect/gofacedetect_test.go
Normal file
@@ -0,0 +1,230 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"os"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestFaceDetect(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "face-detect Backend Suite")
|
||||
}
|
||||
|
||||
var (
|
||||
libLoadOnce sync.Once
|
||||
libLoadErr error
|
||||
)
|
||||
|
||||
// ensureLibLoaded mirrors main.go's bootstrap so a Go test can drive the C-API
|
||||
// bridge without spinning up the gRPC server. Records the error (the smoke
|
||||
// specs skip themselves) when libfacedetect.so is not loadable from cwd
|
||||
// (LD_LIBRARY_PATH or a symlink in ./).
|
||||
func ensureLibLoaded() error {
|
||||
libLoadOnce.Do(func() {
|
||||
libName := os.Getenv("FACEDETECT_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "libfacedetect.so"
|
||||
}
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
if err != nil {
|
||||
libLoadErr = err
|
||||
return
|
||||
}
|
||||
purego.RegisterLibFunc(&CppAbiVersion, lib, "facedetect_capi_abi_version")
|
||||
purego.RegisterLibFunc(&CppLoad, lib, "facedetect_capi_load")
|
||||
purego.RegisterLibFunc(&CppFree, lib, "facedetect_capi_free")
|
||||
purego.RegisterLibFunc(&CppLastError, lib, "facedetect_capi_last_error")
|
||||
purego.RegisterLibFunc(&CppFreeString, lib, "facedetect_capi_free_string")
|
||||
purego.RegisterLibFunc(&CppFreeVec, lib, "facedetect_capi_free_vec")
|
||||
purego.RegisterLibFunc(&CppEmbedPath, lib, "facedetect_capi_embed_path")
|
||||
purego.RegisterLibFunc(&CppEmbedRGB, lib, "facedetect_capi_embed_rgb")
|
||||
purego.RegisterLibFunc(&CppDetectJSON, lib, "facedetect_capi_detect_path_json")
|
||||
purego.RegisterLibFunc(&CppVerifyPaths, lib, "facedetect_capi_verify_paths")
|
||||
purego.RegisterLibFunc(&CppAnalyzeJSON, lib, "facedetect_capi_analyze_path_json")
|
||||
})
|
||||
return libLoadErr
|
||||
}
|
||||
|
||||
var _ = Describe("parseOptions", func() {
|
||||
It("defaults verify_threshold to 0.35", func() {
|
||||
o := parseOptions(nil)
|
||||
Expect(o.verifyThreshold).To(Equal(float32(0.35)))
|
||||
Expect(o.modelName).To(Equal(""))
|
||||
})
|
||||
|
||||
It("parses verify_threshold, threshold alias and model_name", func() {
|
||||
o := parseOptions([]string{"verify_threshold:0.4", "model_name:buffalo_l", "unknown:x"})
|
||||
Expect(o.verifyThreshold).To(Equal(float32(0.4)))
|
||||
Expect(o.modelName).To(Equal("buffalo_l"))
|
||||
|
||||
o2 := parseOptions([]string{"threshold:0.3"})
|
||||
Expect(o2.verifyThreshold).To(Equal(float32(0.3)))
|
||||
})
|
||||
|
||||
It("ignores non-positive thresholds and keeps the default", func() {
|
||||
o := parseOptions([]string{"verify_threshold:0", "threshold:-1"})
|
||||
Expect(o.verifyThreshold).To(Equal(float32(0.35)))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("normalizeGender", func() {
|
||||
It("maps M/F codes to Man/Woman", func() {
|
||||
Expect(normalizeGender("M")).To(Equal("Man"))
|
||||
Expect(normalizeGender("f")).To(Equal("Woman"))
|
||||
Expect(normalizeGender(" m ")).To(Equal("Man"))
|
||||
})
|
||||
|
||||
It("passes empty and unknown codes through", func() {
|
||||
Expect(normalizeGender("")).To(Equal(""))
|
||||
Expect(normalizeGender("nonbinary")).To(Equal("nonbinary"))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("faceBox.xywh", func() {
|
||||
It("converts an [x1,y1,x2,y2] box to x/y/width/height", func() {
|
||||
b := faceBox{Box: []float32{10, 20, 50, 80}}
|
||||
x, y, w, h := b.xywh()
|
||||
Expect(x).To(Equal(float32(10)))
|
||||
Expect(y).To(Equal(float32(20)))
|
||||
Expect(w).To(Equal(float32(40)))
|
||||
Expect(h).To(Equal(float32(60)))
|
||||
})
|
||||
|
||||
It("returns zeros for a short box", func() {
|
||||
x, y, w, h := faceBox{Box: []float32{1, 2}}.xywh()
|
||||
Expect([]float32{x, y, w, h}).To(Equal([]float32{0, 0, 0, 0}))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("parseAnalyzeJSON", func() {
|
||||
It("maps region, age and gender for each face", func() {
|
||||
doc := `{"faces":[
|
||||
{"score":0.997,"box":[10,20,50,80],"age":31,"gender":"M"},
|
||||
{"score":0.81,"box":[0,0,40,40],"age":24,"gender":"F"}]}`
|
||||
faces, err := parseAnalyzeJSON(doc)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(faces).To(HaveLen(2))
|
||||
|
||||
Expect(faces[0].FaceConfidence).To(BeNumerically("~", 0.997, 1e-4))
|
||||
Expect(faces[0].Age).To(BeNumerically("~", 31, 1e-4))
|
||||
Expect(faces[0].DominantGender).To(Equal("Man"))
|
||||
Expect(faces[0].Gender).To(HaveKeyWithValue("Man", float32(1.0)))
|
||||
Expect(faces[0].Region.W).To(Equal(float32(40)))
|
||||
Expect(faces[0].Region.H).To(Equal(float32(60)))
|
||||
|
||||
Expect(faces[1].DominantGender).To(Equal("Woman"))
|
||||
})
|
||||
|
||||
It("tolerates a missing gender field", func() {
|
||||
faces, err := parseAnalyzeJSON(`{"faces":[{"score":0.5,"box":[0,0,10,10],"age":40}]}`)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(faces).To(HaveLen(1))
|
||||
Expect(faces[0].DominantGender).To(Equal(""))
|
||||
Expect(faces[0].Gender).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("returns no faces for an empty document", func() {
|
||||
faces, err := parseAnalyzeJSON(`{"faces":[]}`)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(faces).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("returns an error on malformed JSON", func() {
|
||||
_, err := parseAnalyzeJSON(`{not-json`)
|
||||
Expect(err).To(HaveOccurred())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("materializeImage", func() {
|
||||
It("decodes a base64 payload to a temp file", func() {
|
||||
payload := base64.StdEncoding.EncodeToString([]byte("\xff\xd8\xff\xe0fake-jpeg"))
|
||||
path, cleanup, err := materializeImage(payload)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
defer cleanup()
|
||||
data, rerr := os.ReadFile(path)
|
||||
Expect(rerr).ToNot(HaveOccurred())
|
||||
Expect(data).To(Equal([]byte("\xff\xd8\xff\xe0fake-jpeg")))
|
||||
})
|
||||
|
||||
It("strips a data: URI prefix before decoding", func() {
|
||||
payload := "data:image/png;base64," + base64.StdEncoding.EncodeToString([]byte("hello"))
|
||||
path, cleanup, err := materializeImage(payload)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
defer cleanup()
|
||||
data, rerr := os.ReadFile(path)
|
||||
Expect(rerr).ToNot(HaveOccurred())
|
||||
Expect(data).To(Equal([]byte("hello")))
|
||||
})
|
||||
|
||||
It("uses an existing path as-is", func() {
|
||||
tmp, err := os.CreateTemp("", "face-detect-fixture-*.bin")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
defer func() { _ = os.Remove(tmp.Name()) }()
|
||||
Expect(tmp.Close()).To(Succeed())
|
||||
|
||||
path, cleanup, err := materializeImage(tmp.Name())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
defer cleanup()
|
||||
Expect(path).To(Equal(tmp.Name()))
|
||||
})
|
||||
|
||||
It("errors on input that is neither a path nor base64", func() {
|
||||
_, _, err := materializeImage("not base64!!!")
|
||||
Expect(err).To(HaveOccurred())
|
||||
})
|
||||
})
|
||||
|
||||
// The specs below exercise the real C-API end to end. They run only when both a
|
||||
// model GGUF and a test image are provided, and skip cleanly otherwise so the
|
||||
// suite stays green without large assets.
|
||||
var _ = Describe("FaceDetect end-to-end", Ordered, func() {
|
||||
var (
|
||||
f *FaceDetect
|
||||
modelPath = os.Getenv("FACEDETECT_BACKEND_TEST_MODEL")
|
||||
imagePath = os.Getenv("FACEDETECT_BACKEND_TEST_IMAGE")
|
||||
)
|
||||
|
||||
BeforeAll(func() {
|
||||
if modelPath == "" || imagePath == "" {
|
||||
Skip("set FACEDETECT_BACKEND_TEST_MODEL and FACEDETECT_BACKEND_TEST_IMAGE to run the e2e specs")
|
||||
}
|
||||
if err := ensureLibLoaded(); err != nil {
|
||||
Skip("libfacedetect.so not loadable: " + err.Error())
|
||||
}
|
||||
f = &FaceDetect{}
|
||||
Expect(f.Load(&pb.ModelOptions{ModelFile: modelPath})).To(Succeed())
|
||||
})
|
||||
|
||||
It("embeds the primary face in an image", func() {
|
||||
emb, err := f.Embeddings(&pb.PredictOptions{Images: []string{imagePath}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(emb).ToNot(BeEmpty())
|
||||
})
|
||||
|
||||
It("detects at least one face", func() {
|
||||
resp, err := f.Detect(&pb.DetectOptions{Src: imagePath})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(resp.Detections).ToNot(BeEmpty())
|
||||
Expect(resp.Detections[0].ClassName).To(Equal("face"))
|
||||
})
|
||||
|
||||
It("verifies an image against itself as the same identity", func() {
|
||||
resp, err := f.FaceVerify(&pb.FaceVerifyRequest{Img1: imagePath, Img2: imagePath})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(resp.Verified).To(BeTrue())
|
||||
Expect(resp.Distance).To(BeNumerically("<=", resp.Threshold))
|
||||
})
|
||||
|
||||
It("analyzes age/gender for each face", func() {
|
||||
resp, err := f.FaceAnalyze(&pb.FaceAnalyzeRequest{Img: imagePath})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(resp.Faces).ToNot(BeEmpty())
|
||||
})
|
||||
})
|
||||
65
backend/go/face-detect/main.go
Normal file
65
backend/go/face-detect/main.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package main
|
||||
|
||||
// Started internally by LocalAI - one gRPC server per loaded model.
|
||||
//
|
||||
// Loads libfacedetect.so via purego and registers the flat C-API entry points
|
||||
// declared in facedetect_capi.h. The library name can be overridden with
|
||||
// FACEDETECT_LIBRARY (mirrors the VOICEDETECT_LIBRARY / PARAKEET_LIBRARY
|
||||
// convention in the sibling backends); the default looks for the .so next to
|
||||
// this binary (resolved via LD_LIBRARY_PATH by run.sh).
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
type LibFuncs struct {
|
||||
FuncPtr any
|
||||
Name string
|
||||
}
|
||||
|
||||
func main() {
|
||||
libName := os.Getenv("FACEDETECT_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "libfacedetect.so"
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("face-detect: dlopen %q: %w", libName, err))
|
||||
}
|
||||
|
||||
// Bound 1:1 to facedetect_capi.h. char*/float* returns are registered as
|
||||
// uintptr so the raw pointer can be freed via the matching capi free fn.
|
||||
libFuncs := []LibFuncs{
|
||||
{&CppAbiVersion, "facedetect_capi_abi_version"},
|
||||
{&CppLoad, "facedetect_capi_load"},
|
||||
{&CppFree, "facedetect_capi_free"},
|
||||
{&CppLastError, "facedetect_capi_last_error"},
|
||||
{&CppFreeString, "facedetect_capi_free_string"},
|
||||
{&CppFreeVec, "facedetect_capi_free_vec"},
|
||||
{&CppEmbedPath, "facedetect_capi_embed_path"},
|
||||
{&CppEmbedRGB, "facedetect_capi_embed_rgb"},
|
||||
{&CppDetectJSON, "facedetect_capi_detect_path_json"},
|
||||
{&CppVerifyPaths, "facedetect_capi_verify_paths"},
|
||||
{&CppAnalyzeJSON, "facedetect_capi_analyze_path_json"},
|
||||
}
|
||||
for _, lf := range libFuncs {
|
||||
purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name)
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "[face-detect] ABI=%d\n", CppAbiVersion())
|
||||
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &FaceDetect{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
47
backend/go/face-detect/options.go
Normal file
47
backend/go/face-detect/options.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// defaultVerifyThreshold is the cosine-distance cutoff used when a request does
|
||||
// not set one. Matches the insightface buffalo_l ArcFace R50 default the Python
|
||||
// face backend ships with so the two implementations agree on verdicts out of
|
||||
// the box.
|
||||
const defaultVerifyThreshold float32 = 0.35
|
||||
|
||||
// loadOptions holds the parsed model-level options for face-detect.
|
||||
type loadOptions struct {
|
||||
verifyThreshold float32
|
||||
modelName string
|
||||
}
|
||||
|
||||
func splitOption(o string) (key, value string, ok bool) {
|
||||
i := strings.Index(o, ":")
|
||||
if i < 0 {
|
||||
return "", "", false
|
||||
}
|
||||
return strings.TrimSpace(o[:i]), strings.TrimSpace(o[i+1:]), true
|
||||
}
|
||||
|
||||
// parseOptions reads the backend "key:value" option slice. Unknown keys are
|
||||
// ignored. Defaults: verify_threshold 0.35, model_name derived from the file.
|
||||
func parseOptions(opts []string) loadOptions {
|
||||
o := loadOptions{verifyThreshold: defaultVerifyThreshold}
|
||||
for _, oo := range opts {
|
||||
key, value, ok := splitOption(oo)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
switch key {
|
||||
case "verify_threshold", "threshold":
|
||||
if f, err := strconv.ParseFloat(value, 32); err == nil && f > 0 {
|
||||
o.verifyThreshold = float32(f)
|
||||
}
|
||||
case "model_name":
|
||||
o.modelName = value
|
||||
}
|
||||
}
|
||||
return o
|
||||
}
|
||||
68
backend/go/face-detect/package.sh
Normal file
68
backend/go/face-detect/package.sh
Normal file
@@ -0,0 +1,68 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Bundle the face-detect-grpc binary, libfacedetect.so, the core runtime libs
|
||||
# (libc/libstdc++/libgomp + ld.so) and the GPU runtime for the active BUILD_TYPE
|
||||
# so the package is self-contained. Mirrors backend/go/voice-detect/package.sh;
|
||||
# run.sh routes the (CGO_ENABLED=0) binary through lib/ld.so so the packaged libc
|
||||
# is used instead of the host's.
|
||||
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
REPO_ROOT="${CURDIR}/../../.."
|
||||
|
||||
mkdir -p "$CURDIR/package/lib"
|
||||
|
||||
cp -avf "$CURDIR/face-detect-grpc" "$CURDIR/package/"
|
||||
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||
|
||||
# libfacedetect.so + any soname symlinks. purego.Dlopen resolves it via
|
||||
# LD_LIBRARY_PATH, which run.sh points at lib/.
|
||||
cp -avf "$CURDIR"/libfacedetect.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||
echo "ERROR: libfacedetect.so not found in $CURDIR, run 'make' first" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Detect architecture and copy the core runtime libs libfacedetect.so links
|
||||
# against, plus the matching dynamic loader as lib/ld.so.
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 "$CURDIR/package/lib/ld.so"
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||
cp -arfLv /lib/ld-linux-aarch64.so.1 "$CURDIR/package/lib/ld.so"
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 "$CURDIR/package/lib/libc.so.6"
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 "$CURDIR/package/lib/libgcc_s.so.1"
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 "$CURDIR/package/lib/libstdc++.so.6"
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 "$CURDIR/package/lib/libm.so.6"
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 "$CURDIR/package/lib/libgomp.so.1"
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 "$CURDIR/package/lib/libdl.so.2"
|
||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||
echo "Detected Darwin"
|
||||
else
|
||||
echo "Error: Could not detect architecture"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Package GPU libraries (CUDA/ROCm/Intel/Vulkan loader + ICDs + drivers) based on
|
||||
# BUILD_TYPE so the backend can reach the GPU without the runtime base image
|
||||
# shipping those drivers.
|
||||
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
|
||||
if [ -f "$GPU_LIB_SCRIPT" ]; then
|
||||
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
|
||||
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
|
||||
package_gpu_libs
|
||||
fi
|
||||
|
||||
echo "Packaging completed successfully"
|
||||
ls -liah "$CURDIR/package/" "$CURDIR/package/lib/"
|
||||
16
backend/go/face-detect/run.sh
Normal file
16
backend/go/face-detect/run.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||
|
||||
# If a self-contained ld.so was packaged, route through it so the packaged
|
||||
# libc / libstdc++ are used instead of the host's (matches the voice-detect /
|
||||
# whisper / parakeet backends' runtime layout).
|
||||
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec "$CURDIR/lib/ld.so" "$CURDIR/face-detect-grpc" "$@"
|
||||
fi
|
||||
|
||||
exec "$CURDIR/face-detect-grpc" "$@"
|
||||
15
backend/go/face-detect/test.sh
Normal file
15
backend/go/face-detect/test.sh
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
cd "$CURDIR"
|
||||
|
||||
echo "Running face-detect backend tests..."
|
||||
|
||||
# The pure-Go parsing specs always run. The embed/detect/verify/analyze smoke
|
||||
# specs run only when a model + image are provided via
|
||||
# FACEDETECT_BACKEND_TEST_MODEL and FACEDETECT_BACKEND_TEST_IMAGE; otherwise they
|
||||
# auto-skip.
|
||||
LD_LIBRARY_PATH="$CURDIR:${LD_LIBRARY_PATH:-}" go test -v -timeout 1200s .
|
||||
|
||||
echo "face-detect tests completed."
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
exec $CURDIR/local-store "$@"
|
||||
exec "$CURDIR"/local-store "$@"
|
||||
@@ -32,6 +32,8 @@ endif
|
||||
ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON -DLOCALVQE_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
# Apple Silicon: CPU-only (no Metal upstream); built + published as an arm64
|
||||
# image by CI (includeDarwin in .github/backend-matrix.yml) for macOS install.
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
endif
|
||||
|
||||
@@ -67,8 +69,9 @@ $(LIB_SENTINEL): sources/LocalVQE
|
||||
# that the loader picks at runtime. We must build every target — the
|
||||
# default `--target localvqe_shared` drops these. CMAKE_LIBRARY_OUTPUT_DIRECTORY
|
||||
# routes all of them into build/bin; copy them out next to the binary.
|
||||
cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.so* .
|
||||
cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/bin/liblocalvqe.dylib . 2>/dev/null || cp -P build/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.dylib .
|
||||
cp -P build/bin/libggml*.so* . 2>/dev/null || true
|
||||
cp -P build/bin/libggml*.dylib . 2>/dev/null || true
|
||||
touch $(LIB_SENTINEL)
|
||||
|
||||
liblocalvqe.so: $(LIB_SENTINEL)
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -21,7 +22,11 @@ type LibFuncs struct {
|
||||
func main() {
|
||||
libName := os.Getenv("LOCALVQE_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./liblocalvqe.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./liblocalvqe.dylib"
|
||||
} else {
|
||||
libName = "./liblocalvqe.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -15,7 +15,9 @@ cp -avf $CURDIR/localvqe $CURDIR/package/
|
||||
# liblocalvqe.so* (with SOVERSION symlinks) and the libggml-*.so runtime
|
||||
# variants — LocalVQE picks the matching CPU variant at load time.
|
||||
cp -P $CURDIR/liblocalvqe.so* $CURDIR/package/ 2>/dev/null || true
|
||||
cp -P $CURDIR/liblocalvqe.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -P $CURDIR/libggml*.so* $CURDIR/package/ 2>/dev/null || true
|
||||
cp -P $CURDIR/libggml*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
@@ -1,23 +1,34 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
# LocalVQE's runtime CPU-variant loader (ggml_backend_load_all) searches
|
||||
# get_executable_path() and current_path() — the second one is what saves us
|
||||
# when /proc/self/exe resolves to lib/ld.so under the bundled-loader path.
|
||||
# So we cd into $CURDIR (where all the libggml-cpu-*.so files live) before
|
||||
# So we cd into "$CURDIR" (where all the libggml-cpu-*.so files live) before
|
||||
# exec'ing the binary.
|
||||
cd "$CURDIR"
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: LocalVQE is built as a SHARED library, so dyld needs the .dylib +
|
||||
# DYLD_LIBRARY_PATH. Prefer .dylib and fall back to .so just in case.
|
||||
export DYLD_LIBRARY_PATH="$CURDIR":"$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
LOCALVQE_LIBRARY="$CURDIR"/liblocalvqe.dylib
|
||||
if [ ! -e "$LOCALVQE_LIBRARY" ]; then
|
||||
LOCALVQE_LIBRARY="$CURDIR"/liblocalvqe.so
|
||||
fi
|
||||
export LOCALVQE_LIBRARY
|
||||
else
|
||||
export LD_LIBRARY_PATH="$CURDIR":"$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
export LOCALVQE_LIBRARY="$CURDIR"/liblocalvqe.so
|
||||
fi
|
||||
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using library: $LOCALVQE_LIBRARY"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/localvqe "$@"
|
||||
exec "$CURDIR"/lib/ld.so "$CURDIR"/localvqe "$@"
|
||||
fi
|
||||
|
||||
echo "Using library: $LOCALVQE_LIBRARY"
|
||||
exec $CURDIR/localvqe "$@"
|
||||
exec "$CURDIR"/localvqe "$@"
|
||||
|
||||
@@ -33,6 +33,8 @@ else ifeq ($(BUILD_TYPE),hipblas)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON -DLA_GGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
# macOS/Metal: built + published as an OCI image by CI (includeDarwin in
|
||||
# .github/backend-matrix.yml) so Apple Silicon users can install this backend.
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
@@ -70,7 +72,7 @@ ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = liblocateanythingcpp-avx.so liblocateanythingcpp-avx2.so liblocateanythingcpp-avx512.so liblocateanythingcpp-fallback.so
|
||||
else
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||
VARIANT_TARGETS = liblocateanythingcpp-fallback.so
|
||||
VARIANT_TARGETS = liblocateanythingcpp-fallback.dylib
|
||||
endif
|
||||
|
||||
locate-anything-cpp: main.go golocateanythingcpp.go $(VARIANT_TARGETS)
|
||||
@@ -82,7 +84,7 @@ package: locate-anything-cpp
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf liblocateanythingcpp*.so locate-anything-cpp package sources
|
||||
rm -rf liblocateanythingcpp*.so liblocateanythingcpp*.dylib locate-anything-cpp package sources
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -109,11 +111,19 @@ liblocateanythingcpp-avx512.so: sources/locate-anything.cpp
|
||||
endif
|
||||
|
||||
# Build fallback variant (all platforms)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
liblocateanythingcpp-fallback.dylib: sources/locate-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
else
|
||||
liblocateanythingcpp-fallback.so: sources/locate-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
endif
|
||||
|
||||
liblocateanythingcpp-custom: CMakeLists.txt
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
@@ -121,7 +131,8 @@ liblocateanythingcpp-custom: CMakeLists.txt
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/liblocateanythingcpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
all: locate-anything-cpp package
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -27,7 +28,11 @@ func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("LOCATEANYTHING_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./liblocateanythingcpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./liblocateanythingcpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./liblocateanythingcpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
# Create lib directory
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/liblocateanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -avf $CURDIR/locate-anything-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
set -ex
|
||||
|
||||
# Get the absolute current dir where the script is located
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
cd /
|
||||
|
||||
@@ -12,19 +12,23 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/liblocateanythingcpp-avx.so ]; then
|
||||
if [ -e "$CURDIR"/liblocateanythingcpp-avx.so ]; then
|
||||
LIBRARY="$CURDIR/liblocateanythingcpp-avx.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/liblocateanythingcpp-avx2.so ]; then
|
||||
if [ -e "$CURDIR"/liblocateanythingcpp-avx2.so ]; then
|
||||
LIBRARY="$CURDIR/liblocateanythingcpp-avx2.so"
|
||||
fi
|
||||
fi
|
||||
@@ -32,21 +36,22 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/liblocateanythingcpp-avx512.so ]; then
|
||||
if [ -e "$CURDIR"/liblocateanythingcpp-avx512.so ]; then
|
||||
LIBRARY="$CURDIR/liblocateanythingcpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export LOCATEANYTHING_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/locate-anything-cpp "$@"
|
||||
exec "$CURDIR"/lib/ld.so "$CURDIR"/locate-anything-cpp "$@"
|
||||
fi
|
||||
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/locate-anything-cpp "$@"
|
||||
exec "$CURDIR"/locate-anything-cpp "$@"
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# omnivoice.cpp version
|
||||
OMNIVOICE_REPO?=https://github.com/ServeurpersoCom/omnivoice.cpp
|
||||
OMNIVOICE_VERSION?=96d30169afd5e6bb3fd6a0e9be0eb505bfe81fcd
|
||||
OMNIVOICE_VERSION?=0f37401bebe9b20c0160a888e592108fc1d17607
|
||||
SO_TARGET?=libgomnivoicecpp.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
@@ -65,7 +65,8 @@ UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libgomnivoicecpp-avx.so libgomnivoicecpp-avx2.so libgomnivoicecpp-avx512.so libgomnivoicecpp-fallback.so
|
||||
else
|
||||
VARIANT_TARGETS = libgomnivoicecpp-fallback.so
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
|
||||
VARIANT_TARGETS = libgomnivoicecpp-fallback.dylib
|
||||
endif
|
||||
|
||||
omnivoice-cpp: main.go gomnivoicecpp.go $(VARIANT_TARGETS)
|
||||
@@ -77,7 +78,7 @@ package: omnivoice-cpp
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libgomnivoicecpp*.so package sources/omnivoice.cpp omnivoice-cpp
|
||||
rm -rf libgomnivoicecpp*.so libgomnivoicecpp*.dylib package sources/omnivoice.cpp omnivoice-cpp
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -106,13 +107,20 @@ libgomnivoicecpp-fallback.so: sources/omnivoice.cpp
|
||||
SO_TARGET=libgomnivoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
|
||||
rm -rf build-libgomnivoicecpp-fallback.so
|
||||
|
||||
# Build fallback variant as a dylib (Darwin)
|
||||
libgomnivoicecpp-fallback.dylib: sources/omnivoice.cpp
|
||||
$(info ${GREEN}I omnivoice-cpp build info:fallback (dylib)${RESET})
|
||||
SO_TARGET=libgomnivoicecpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
|
||||
rm -rf build-libgomnivoicecpp-fallback.dylib
|
||||
|
||||
libgomnivoicecpp-custom: CMakeLists.txt cpp/gomnivoicecpp.cpp cpp/gomnivoicecpp.h
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
cd build-$(SO_TARGET) && \
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) --target gomnivoicecpp && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgomnivoicecpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
test: omnivoice-cpp
|
||||
@echo "Running omnivoice-cpp tests..."
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -21,7 +22,11 @@ type LibFuncs struct {
|
||||
func main() {
|
||||
libName := os.Getenv("OMNIVOICE_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgomnivoicecpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgomnivoicecpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgomnivoicecpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/omnivoice-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/libgomnivoicecpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
set -ex
|
||||
|
||||
# Get the absolute current dir where the script is located
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
cd /
|
||||
|
||||
@@ -12,19 +12,23 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgomnivoicecpp-avx.so ]; then
|
||||
if [ -e "$CURDIR"/libgomnivoicecpp-avx.so ]; then
|
||||
LIBRARY="$CURDIR/libgomnivoicecpp-avx.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/libgomnivoicecpp-avx2.so ]; then
|
||||
if [ -e "$CURDIR"/libgomnivoicecpp-avx2.so ]; then
|
||||
LIBRARY="$CURDIR/libgomnivoicecpp-avx2.so"
|
||||
fi
|
||||
fi
|
||||
@@ -32,21 +36,22 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/libgomnivoicecpp-avx512.so ]; then
|
||||
if [ -e "$CURDIR"/libgomnivoicecpp-avx512.so ]; then
|
||||
LIBRARY="$CURDIR/libgomnivoicecpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export OMNIVOICE_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/omnivoice-cpp "$@"
|
||||
exec "$CURDIR"/lib/ld.so "$CURDIR"/omnivoice-cpp "$@"
|
||||
fi
|
||||
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/omnivoice-cpp "$@"
|
||||
exec "$CURDIR"/omnivoice-cpp "$@"
|
||||
|
||||
@@ -1,13 +1,30 @@
|
||||
GOCMD?=go
|
||||
GO_TAGS?=
|
||||
|
||||
# The opus shim is a small C wrapper around libopus' variadic
|
||||
# opus_encoder_ctl (see csrc/opus_shim.c). It is built as a shared library
|
||||
# and dlopen'd at runtime by the Go backend (codec.go). The extension is
|
||||
# OS-specific: Linux uses .so, macOS uses .dylib. OS is exported by the root
|
||||
# Makefile (`export OS := $(shell uname -s)`).
|
||||
SHIM_EXT=so
|
||||
|
||||
OPUS_CFLAGS := $(shell pkg-config --cflags opus)
|
||||
OPUS_LIBS := $(shell pkg-config --libs opus)
|
||||
SHIM_LDFLAGS := $(OPUS_LIBS)
|
||||
|
||||
libopusshim.so: csrc/opus_shim.c
|
||||
$(CC) -shared -fPIC -o $@ $< $(OPUS_CFLAGS) $(OPUS_LIBS)
|
||||
ifeq ($(OS),Darwin)
|
||||
SHIM_EXT=dylib
|
||||
# Resolve libopus symbols lazily from the already globally-loaded
|
||||
# libopus (codec.go dlopens it RTLD_GLOBAL before the shim) rather than
|
||||
# recording an absolute Homebrew path in the dylib. This keeps the
|
||||
# packaged shim relocatable on machines that have no Homebrew.
|
||||
SHIM_LDFLAGS := -undefined dynamic_lookup
|
||||
endif
|
||||
|
||||
opus: libopusshim.so
|
||||
libopusshim.$(SHIM_EXT): csrc/opus_shim.c
|
||||
$(CC) -shared -fPIC -o $@ $< $(OPUS_CFLAGS) $(SHIM_LDFLAGS)
|
||||
|
||||
opus: libopusshim.$(SHIM_EXT)
|
||||
$(GOCMD) build -tags "$(GO_TAGS)" -o opus ./
|
||||
|
||||
package: opus
|
||||
@@ -16,4 +33,7 @@ package: opus
|
||||
build: package
|
||||
|
||||
clean:
|
||||
rm -f opus libopusshim.so
|
||||
rm -f opus libopusshim.$(SHIM_EXT)
|
||||
rm -rf package
|
||||
|
||||
.PHONY: build package clean
|
||||
|
||||
@@ -8,13 +8,23 @@ mkdir -p $CURDIR/package/lib
|
||||
cp -avf $CURDIR/opus $CURDIR/package/
|
||||
cp -avf $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Copy the opus shim library
|
||||
cp -avf $CURDIR/libopusshim.so $CURDIR/package/lib/
|
||||
# The shim extension is OS-specific (.so on Linux, .dylib on macOS).
|
||||
SHIM_EXT=so
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
SHIM_EXT=dylib
|
||||
fi
|
||||
|
||||
# Copy system libopus
|
||||
# Copy the opus shim library
|
||||
cp -avf $CURDIR/libopusshim.$SHIM_EXT $CURDIR/package/lib/
|
||||
|
||||
# Copy system libopus so the backend is self-contained: the runtime base
|
||||
# image has neither libopus-dev (Linux) nor Homebrew (macOS), so codec.go's
|
||||
# dlopen would otherwise fail. Both name patterns are attempted; only the
|
||||
# host's matching one exists.
|
||||
if command -v pkg-config >/dev/null 2>&1 && pkg-config --exists opus; then
|
||||
LIBOPUS_DIR=$(pkg-config --variable=libdir opus)
|
||||
cp -avfL $LIBOPUS_DIR/libopus.so* $CURDIR/package/lib/ 2>/dev/null || true
|
||||
cp -avf $LIBOPUS_DIR/libopus.so* $CURDIR/package/lib/ 2>/dev/null || true
|
||||
cp -avf $LIBOPUS_DIR/libopus*.dylib $CURDIR/package/lib/ 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
@@ -38,6 +48,8 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||
echo "Detected Darwin — system libraries linked dynamically, no bundled loader needed"
|
||||
else
|
||||
echo "Warning: Could not detect architecture for system library bundling"
|
||||
fi
|
||||
|
||||
@@ -1,15 +1,20 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export OPUS_SHIM_LIBRARY=$CURDIR/lib/libopusshim.so
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/opus "$@"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
export OPUS_SHIM_LIBRARY="$CURDIR"/lib/libopusshim.dylib
|
||||
else
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
export OPUS_SHIM_LIBRARY="$CURDIR"/lib/libopusshim.so
|
||||
fi
|
||||
|
||||
exec $CURDIR/opus "$@"
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec "$CURDIR"/lib/ld.so "$CURDIR"/opus "$@"
|
||||
fi
|
||||
|
||||
exec "$CURDIR"/opus "$@"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# parakeet-cpp backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
|
||||
# (.github/bump_deps.sh) can find and update it - matches the
|
||||
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
||||
#
|
||||
@@ -15,7 +15,7 @@
|
||||
# That's what the L0 smoke test uses. The default target below does the
|
||||
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
||||
|
||||
PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||
PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
|
||||
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
||||
|
||||
GOCMD?=go
|
||||
@@ -74,6 +74,7 @@ libparakeet.so: sources/parakeet.cpp
|
||||
cmake -B sources/parakeet.cpp/build-shared -S sources/parakeet.cpp $(CMAKE_ARGS)
|
||||
cmake --build sources/parakeet.cpp/build-shared --config Release -j$(JOBS)
|
||||
cp -fv sources/parakeet.cpp/build-shared/libparakeet.so* ./ 2>/dev/null || true
|
||||
cp -fv sources/parakeet.cpp/build-shared/libparakeet.dylib ./ 2>/dev/null || true
|
||||
cp -fv sources/parakeet.cpp/include/parakeet_capi.h ./
|
||||
|
||||
parakeet-cpp-grpc: libparakeet.so main.go goparakeetcpp.go
|
||||
|
||||
81
backend/go/parakeet-cpp/boundary.go
Normal file
81
backend/go/parakeet-cpp/boundary.go
Normal file
@@ -0,0 +1,81 @@
|
||||
package main
|
||||
|
||||
// utteranceBoundary is the single definition of a small state machine that was
|
||||
// previously open-coded three times — as a bare `finalEou` bool with an ad-hoc
|
||||
// toggle — in the live feed (live.go), the file-stream text path, and the
|
||||
// file-stream JSON path (goparakeetcpp.go).
|
||||
//
|
||||
// It answers one running question: does the decode currently rest on an
|
||||
// end-of-utterance boundary? That is the value a closing FinalResult reports as
|
||||
// .Eou and the realtime turn detector treats as a commit point.
|
||||
//
|
||||
// parakeet auto-resets its decoder after every <EOU>/<EOB>, so one streaming
|
||||
// session is a sequence of utterances and this is a LATCH, not a monotonic
|
||||
// flag: it closes on an <EOU> and reopens as soon as the next utterance starts.
|
||||
// (Contrast the realtime API's per-turn `eouSeen`, which only ever goes
|
||||
// false->true because each turn gets a fresh stream. Here the stream outlives
|
||||
// the turn, so the boundary status must be able to reopen.)
|
||||
//
|
||||
// The only transitions, over the events one streamFeedResult carries — an
|
||||
// <EOU>, an <EOB> (backchannel), or plain speech output (text and/or words):
|
||||
//
|
||||
// <EOU>
|
||||
// open ───────────► closed
|
||||
// ▲ ▲ │ │ │
|
||||
// │ └─┘ <EOB>|speech │ │ <EOU>
|
||||
// │ (stay open) │ └─┘ (stay closed)
|
||||
// └──────────────────┘
|
||||
// <EOB>|speech
|
||||
//
|
||||
// open = NOT on an utterance boundary: mid-utterance, the last boundary was
|
||||
// a backchannel <EOB>, or the stream just began (the initial state).
|
||||
// closed = the last meaningful event was an <EOU> with no later speech: a real
|
||||
// turn boundary.
|
||||
//
|
||||
// A feed that carries nothing (no eou/eob/text/words — e.g. a finalize flush
|
||||
// that produced no tail) is a no-op and leaves the state unchanged, matching
|
||||
// the legacy "leave finalEou as it was" behaviour.
|
||||
//
|
||||
// The state carries no data, so it is modelled as a two-valued type (a named
|
||||
// bool) rather than an int enum: every inhabitant is legal, so illegal states
|
||||
// are unrepresentable — the payload-free analog of the sealed sum types the
|
||||
// realtime machines use (those need interfaces because their states carry data,
|
||||
// e.g. Active{ID}, where "Active with no ID" is the illegal combination a scalar
|
||||
// cannot even express).
|
||||
type utteranceBoundary bool
|
||||
|
||||
const (
|
||||
// boundaryOpen is the zero value (false), so a fresh decode starts open —
|
||||
// exactly the legacy `var finalEou bool` (false) initial condition.
|
||||
boundaryOpen utteranceBoundary = false
|
||||
boundaryClosed utteranceBoundary = true
|
||||
)
|
||||
|
||||
// observe folds one decode increment into the latch and returns the new state.
|
||||
//
|
||||
// <EOU> takes priority when a single feed carries both an <EOU> and speech
|
||||
// (e.g. {"text":"hello","eou":1}): the utterance both produced that text AND
|
||||
// ended, so the decode rests on the boundary. This matches the legacy
|
||||
// eou-checked-first ordering at every call site.
|
||||
func (b utteranceBoundary) observe(r streamFeedResult) utteranceBoundary {
|
||||
switch {
|
||||
case r.Eou:
|
||||
return boundaryClosed
|
||||
case r.Eob || r.Delta != "" || len(r.Words) > 0:
|
||||
return boundaryOpen
|
||||
default:
|
||||
return b
|
||||
}
|
||||
}
|
||||
|
||||
// ended reports whether the decode currently rests on an end-of-utterance
|
||||
// boundary (a real <EOU>, not a backchannel <EOB>). This is what a closing
|
||||
// FinalResult carries as .Eou.
|
||||
func (b utteranceBoundary) ended() bool { return b == boundaryClosed }
|
||||
|
||||
func (b utteranceBoundary) String() string {
|
||||
if b == boundaryClosed {
|
||||
return "closed"
|
||||
}
|
||||
return "open"
|
||||
}
|
||||
92
backend/go/parakeet-cpp/boundary_test.go
Normal file
92
backend/go/parakeet-cpp/boundary_test.go
Normal file
@@ -0,0 +1,92 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"math/rand/v2"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("utteranceBoundary (decode end-of-utterance latch)", func() {
|
||||
It("starts open: a fresh decode is not on a boundary", func() {
|
||||
var b utteranceBoundary
|
||||
Expect(b).To(Equal(boundaryOpen))
|
||||
Expect(b.ended()).To(BeFalse())
|
||||
})
|
||||
|
||||
DescribeTable("single feed transition from the open state",
|
||||
func(r streamFeedResult, wantEnded bool) {
|
||||
Expect(boundaryOpen.observe(r).ended()).To(Equal(wantEnded))
|
||||
},
|
||||
Entry("<EOU> closes it", streamFeedResult{Eou: true}, true),
|
||||
Entry("<EOU> with text closes it (eou wins)", streamFeedResult{Delta: "hi", Eou: true}, true),
|
||||
Entry("<EOB> stays open (backchannel is not a turn boundary)", streamFeedResult{Eob: true}, false),
|
||||
Entry("plain text stays open", streamFeedResult{Delta: "hello"}, false),
|
||||
Entry("words-only stays open", streamFeedResult{Words: []transcriptWord{{W: "x"}}}, false),
|
||||
Entry("empty feed is a no-op (stays open)", streamFeedResult{}, false),
|
||||
)
|
||||
|
||||
DescribeTable("single feed transition from the closed state",
|
||||
func(r streamFeedResult, wantEnded bool) {
|
||||
Expect(boundaryClosed.observe(r).ended()).To(Equal(wantEnded))
|
||||
},
|
||||
Entry("another <EOU> stays closed", streamFeedResult{Eou: true}, true),
|
||||
Entry("trailing speech reopens it", streamFeedResult{Delta: "and more"}, false),
|
||||
Entry("words reopen it", streamFeedResult{Words: []transcriptWord{{W: "x"}}}, false),
|
||||
Entry("a backchannel <EOB> reopens it", streamFeedResult{Eob: true}, false),
|
||||
Entry("empty feed is a no-op (stays closed)", streamFeedResult{}, true),
|
||||
)
|
||||
|
||||
It("is a latch: <EOU> then trailing speech reopens, then <EOU> closes again", func() {
|
||||
b := boundaryOpen
|
||||
b = b.observe(streamFeedResult{Delta: "turn one", Eou: true})
|
||||
Expect(b.ended()).To(BeTrue())
|
||||
b = b.observe(streamFeedResult{Delta: " and more"})
|
||||
Expect(b.ended()).To(BeFalse(), "trailing speech without an EOU is an open utterance")
|
||||
b = b.observe(streamFeedResult{Eou: true})
|
||||
Expect(b.ended()).To(BeTrue())
|
||||
})
|
||||
|
||||
It("treats a backchannel before a real EOU correctly", func() {
|
||||
b := boundaryOpen
|
||||
b = b.observe(streamFeedResult{Delta: "uh huh", Eob: true})
|
||||
Expect(b.ended()).To(BeFalse(), "a backchannel must not masquerade as a turn boundary")
|
||||
b = b.observe(streamFeedResult{Delta: "done", Eou: true})
|
||||
Expect(b.ended()).To(BeTrue())
|
||||
})
|
||||
|
||||
It("matches the reference fold over seeded random feed sequences", func() {
|
||||
// The invariant: after any sequence of feeds, ended() is true iff the
|
||||
// last feed that carried ANY event was an <EOU>. <EOU> takes priority
|
||||
// when a feed carries both an EOU and speech; empty feeds are ignored.
|
||||
for seed := uint64(1); seed <= 200; seed++ {
|
||||
rng := rand.New(rand.NewPCG(seed, seed*2654435761))
|
||||
b := boundaryOpen
|
||||
lastWasEou := false // reference: did the last meaningful feed end on EOU?
|
||||
steps := rng.IntN(30)
|
||||
for i := 0; i < steps; i++ {
|
||||
var r streamFeedResult
|
||||
switch rng.IntN(5) {
|
||||
case 0:
|
||||
r = streamFeedResult{Eou: true}
|
||||
case 1:
|
||||
r = streamFeedResult{Eob: true}
|
||||
case 2:
|
||||
r = streamFeedResult{Delta: "w"}
|
||||
case 3:
|
||||
r = streamFeedResult{Delta: "w", Eou: true} // eou + speech, eou wins
|
||||
case 4:
|
||||
r = streamFeedResult{} // empty: no-op
|
||||
}
|
||||
b = b.observe(r)
|
||||
if r.Eou {
|
||||
lastWasEou = true
|
||||
} else if r.Eob || r.Delta != "" || len(r.Words) > 0 {
|
||||
lastWasEou = false
|
||||
}
|
||||
}
|
||||
Expect(b.ended()).To(Equal(lastWasEou),
|
||||
"seed %d: latch disagreed with the reference fold", seed)
|
||||
}
|
||||
})
|
||||
})
|
||||
82
backend/go/parakeet-cpp/driver.go
Normal file
82
backend/go/parakeet-cpp/driver.go
Normal file
@@ -0,0 +1,82 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
// streamFeedResult is one decode increment from a cache-aware streaming session:
|
||||
// the newly-finalized text plus the model's own per-feed boundary tokens
|
||||
// (<EOU>/<EOB>) and word timings. It is the single event type both the live
|
||||
// (bidi) and file (server-stream) paths fold over, hiding the ABI v4 JSON vs
|
||||
// older text-only entry-point split behind one shape.
|
||||
type streamFeedResult struct {
|
||||
Delta string
|
||||
Eou bool
|
||||
Eob bool
|
||||
Words []transcriptWord
|
||||
}
|
||||
|
||||
// feedChunk feeds one PCM chunk to the streaming session (or finalizes it, when
|
||||
// finalize is true) and returns the unified decode increment. It prefers the
|
||||
// ABI v4 JSON entry points (which also carry per-word timestamps) and falls
|
||||
// back to the older text-only entry points against an older libparakeet.so.
|
||||
//
|
||||
// This is the one place the JSON-vs-text choice is made; every consumer works
|
||||
// in terms of streamFeedResult.
|
||||
func (p *ParakeetCpp) feedChunk(stream uintptr, pcm []float32, finalize bool) (streamFeedResult, error) {
|
||||
if CppStreamFeedJSON != nil {
|
||||
doc, err := p.streamFeedDoc(stream, pcm, finalize)
|
||||
if err != nil {
|
||||
return streamFeedResult{}, err
|
||||
}
|
||||
return streamFeedResult{Delta: doc.Text, Eou: doc.Eou != 0, Eob: doc.Eob != 0, Words: doc.Words}, nil
|
||||
}
|
||||
delta, eou, eob, err := p.streamFeedText(stream, pcm, finalize)
|
||||
if err != nil {
|
||||
return streamFeedResult{}, err
|
||||
}
|
||||
return streamFeedResult{Delta: delta, Eou: eou, Eob: eob}, nil
|
||||
}
|
||||
|
||||
// feedSlices feeds pcm through the session in streamChunkSamples slices,
|
||||
// invoking onFeed for each decode increment. It does NOT finalize: callers
|
||||
// decide when the send side is done. The file path finalizes after the whole
|
||||
// file; the live path finalizes only when its request channel closes, never
|
||||
// between audio messages. Slicing keeps each per-call engineMu hold short so
|
||||
// concurrent unary transcription interleaves fairly (the C session buffers
|
||||
// internally).
|
||||
//
|
||||
// If ctx is non-nil it is checked before each slice so a cancelled file
|
||||
// transcription stops promptly; the live path passes nil (it is bounded by its
|
||||
// request channel instead of a ctx).
|
||||
func (p *ParakeetCpp) feedSlices(ctx context.Context, stream uintptr, pcm []float32, onFeed func(streamFeedResult) error) error {
|
||||
for off := 0; off < len(pcm); off += streamChunkSamples {
|
||||
if ctx != nil {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return status.Error(codes.Canceled, "transcription cancelled")
|
||||
}
|
||||
}
|
||||
end := min(off+streamChunkSamples, len(pcm))
|
||||
res, err := p.feedChunk(stream, pcm[off:end], false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := onFeed(res); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// flushTail finalizes the session once and folds the flushed tail (the last
|
||||
// ~2 encoder frames of text, which only appear on finalize) through onFeed.
|
||||
func (p *ParakeetCpp) flushTail(stream uintptr, onFeed func(streamFeedResult) error) error {
|
||||
res, err := p.feedChunk(stream, nil, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return onFeed(res)
|
||||
}
|
||||
@@ -103,12 +103,13 @@ type transcriptJSON struct {
|
||||
// {"text":"...","eou":0,"eob":0,"frame_sec":0.080000,
|
||||
// "words":[{"w":"...","start":0.480,"end":0.640,"conf":0.9100}, ...]}
|
||||
//
|
||||
// "text" is the newly-finalized text since the last call; "eou" is 1 when an
|
||||
// <EOU> (end of utterance) fired this feed and "eob" is 1 when an <EOB>
|
||||
// (backchannel) fired. ABI v4 conflated the two into "eou"; v5 split them, so
|
||||
// we read both and treat either as an utterance boundary for segmentation.
|
||||
// "words" are the words finalized this call with absolute (stream-relative)
|
||||
// start/end seconds.
|
||||
// "text" is the newly-finalized text since the last call. Under ABI v5 "eou"
|
||||
// is 1 iff an <EOU> fired this feed (the user yielded the turn) and "eob" 1
|
||||
// iff an <EOB> fired (a backchannel like "uh-huh" ended — NOT a turn
|
||||
// boundary). A v4 library has no "eob" field and its "eou" conflates both
|
||||
// tokens: Eob stays 0 and Eou keeps the old any-event meaning. "words" are
|
||||
// the words finalized this call with absolute (stream-relative) start/end
|
||||
// seconds.
|
||||
type streamFeedJSON struct {
|
||||
Text string `json:"text"`
|
||||
Eou int `json:"eou"`
|
||||
@@ -364,7 +365,7 @@ var segmentSeparators = []rune{'.', '?', '!'}
|
||||
// the caller requested word granularity; token ids populate each segment's
|
||||
// Tokens by time-window membership. Shared by the batched and direct paths.
|
||||
func transcriptResultFromDoc(doc transcriptJSON, opts *pb.TranscriptRequest, gapFrames int) pb.TranscriptResult {
|
||||
text := strings.TrimSpace(doc.Text)
|
||||
text, eou := stripEouMarker(strings.TrimSpace(doc.Text))
|
||||
|
||||
// Frame-unit gap threshold -> seconds (NeMo segment_gap_threshold). 0 = off.
|
||||
gapSeconds := 0.0
|
||||
@@ -383,6 +384,7 @@ func transcriptResultFromDoc(doc transcriptJSON, opts *pb.TranscriptRequest, gap
|
||||
return pb.TranscriptResult{
|
||||
Text: text,
|
||||
Segments: []*pb.TranscriptSegment{{Id: 0, Text: text}},
|
||||
Eou: eou,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -409,7 +411,25 @@ func transcriptResultFromDoc(doc transcriptJSON, opts *pb.TranscriptRequest, gap
|
||||
}
|
||||
segments = append(segments, seg)
|
||||
}
|
||||
return pb.TranscriptResult{Text: text, Segments: segments}
|
||||
return pb.TranscriptResult{Text: text, Segments: segments, Eou: eou}
|
||||
}
|
||||
|
||||
// stripEouMarker removes a trailing literal <EOU>/<EOB> from offline-decode
|
||||
// text and reports whether the decode ended on an end-of-UTTERANCE token. The
|
||||
// realtime EOU model's offline decode keeps the special token in the
|
||||
// detokenized text (the streaming path strips it and surfaces it as flags
|
||||
// instead); user-visible transcripts must never carry either marker, but only
|
||||
// <EOU> may confirm the semantic_vad retranscribe cross-check — a decode
|
||||
// ending on <EOB> means the last thing heard was a backchannel, not the user
|
||||
// yielding the turn.
|
||||
func stripEouMarker(text string) (string, bool) {
|
||||
if strings.HasSuffix(text, "<EOU>") {
|
||||
return strings.TrimSpace(strings.TrimSuffix(text, "<EOU>")), true
|
||||
}
|
||||
if strings.HasSuffix(text, "<EOB>") {
|
||||
return strings.TrimSpace(strings.TrimSuffix(text, "<EOB>")), false
|
||||
}
|
||||
return text, false
|
||||
}
|
||||
|
||||
// splitWordsIntoSegments groups words into segments exactly as NeMo's
|
||||
@@ -476,41 +496,55 @@ func tokensInWindow(tokens []transcriptToken, start, end float64) []int32 {
|
||||
return ids
|
||||
}
|
||||
|
||||
// streamSegmenter accumulates streaming words into per-utterance segments. EOU
|
||||
// is the model's own utterance boundary; each closed segment takes its start/end
|
||||
// from its first/last accumulated word.
|
||||
// streamSegmenter accumulates streaming decode increments into per-utterance
|
||||
// segments. <EOU>/<EOB> are the model's own utterance boundaries; each closes a
|
||||
// segment. When the feed carries per-word timings (ABI v4 JSON), a closed
|
||||
// segment takes its start/end from its first/last word; against an older
|
||||
// text-only library (no words) it falls back to segmenting the delta text, so
|
||||
// the same assembler serves both paths.
|
||||
type streamSegmenter struct {
|
||||
segs []*pb.TranscriptSegment
|
||||
cur []transcriptWord
|
||||
nextID int32
|
||||
segs []*pb.TranscriptSegment
|
||||
cur []transcriptWord // words for the open segment (ABI v4 JSON path)
|
||||
curText []string // delta text for the open segment (text-only path)
|
||||
nextID int32
|
||||
}
|
||||
|
||||
func (s *streamSegmenter) add(doc streamFeedJSON) {
|
||||
s.cur = append(s.cur, doc.Words...)
|
||||
// Close the segment on either turn signal: <EOU> (end of utterance) or
|
||||
// <EOB> (backchannel). ABI v4 reported both via "eou"; v5 split them, so we
|
||||
// OR them here to keep the v4 segmentation boundaries.
|
||||
if doc.Eou != 0 || doc.Eob != 0 {
|
||||
func (s *streamSegmenter) add(r streamFeedResult) {
|
||||
s.cur = append(s.cur, r.Words...)
|
||||
if len(r.Words) == 0 && r.Delta != "" {
|
||||
// Older libparakeet.so with no per-word timing: segment from the text.
|
||||
s.curText = append(s.curText, r.Delta)
|
||||
}
|
||||
// Both <EOU> and <EOB> reset the decoder, so both close a segment.
|
||||
if r.Eou || r.Eob {
|
||||
s.flush()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *streamSegmenter) flush() {
|
||||
if len(s.cur) == 0 {
|
||||
return
|
||||
switch {
|
||||
case len(s.cur) > 0:
|
||||
parts := make([]string, len(s.cur))
|
||||
for i, w := range s.cur {
|
||||
parts[i] = w.W
|
||||
}
|
||||
s.segs = append(s.segs, &pb.TranscriptSegment{
|
||||
Id: s.nextID,
|
||||
Start: secondsToNanos(s.cur[0].Start),
|
||||
End: secondsToNanos(s.cur[len(s.cur)-1].End),
|
||||
Text: strings.TrimSpace(strings.Join(parts, " ")),
|
||||
})
|
||||
s.nextID++
|
||||
case len(s.curText) > 0:
|
||||
// No words this segment: emit a text-only segment (no timestamps),
|
||||
// skipping a purely-whitespace one as the legacy text path did.
|
||||
if t := strings.TrimSpace(strings.Join(s.curText, "")); t != "" {
|
||||
s.segs = append(s.segs, &pb.TranscriptSegment{Id: s.nextID, Text: t})
|
||||
s.nextID++
|
||||
}
|
||||
}
|
||||
parts := make([]string, len(s.cur))
|
||||
for i, w := range s.cur {
|
||||
parts[i] = w.W
|
||||
}
|
||||
s.segs = append(s.segs, &pb.TranscriptSegment{
|
||||
Id: s.nextID,
|
||||
Start: secondsToNanos(s.cur[0].Start),
|
||||
End: secondsToNanos(s.cur[len(s.cur)-1].End),
|
||||
Text: strings.TrimSpace(strings.Join(parts, " ")),
|
||||
})
|
||||
s.nextID++
|
||||
s.cur = nil
|
||||
s.curText = nil
|
||||
}
|
||||
|
||||
func (s *streamSegmenter) segments() []*pb.TranscriptSegment { return s.segs }
|
||||
@@ -535,18 +569,119 @@ func secondsToNanos(sec float64) int64 {
|
||||
return int64(sec * 1e9)
|
||||
}
|
||||
|
||||
// Per-C-call engine serialization for the streaming paths.
|
||||
//
|
||||
// Every individual C call (begin / feed / finalize / free) takes engineMu and
|
||||
// re-checks ctxPtr under the lock; the lock is NEVER held across a stream's
|
||||
// lifetime. This is safe because each parakeet.cpp call builds its own ggml
|
||||
// graph and all streaming caches live in the session object, not the ctx —
|
||||
// the only ctx-shared mutable state is last_error, which is why it is read
|
||||
// under the same lock as the failing call. Holding the lock per call (rather
|
||||
// than per stream, as this file previously did) keeps a long-lived live
|
||||
// session from starving batched unary transcription and vice versa.
|
||||
//
|
||||
// A stream must not outlive its ctx (C-API contract). Free() takes engineMu
|
||||
// and zeroes ctxPtr, so a racing per-call helper returns ModelNotLoaded
|
||||
// instead of feeding a freed engine; streamFree of an orphaned session only
|
||||
// runs the session destructor, which does not touch the ctx.
|
||||
|
||||
// streamBegin opens a cache-aware streaming session. A 0 stream with nil
|
||||
// error means the loaded model is not a streaming model.
|
||||
func (p *ParakeetCpp) streamBegin(lang string) (uintptr, error) {
|
||||
p.engineMu.Lock()
|
||||
defer p.engineMu.Unlock()
|
||||
if p.ctxPtr == 0 {
|
||||
return 0, grpcerrors.ModelNotLoaded("parakeet-cpp")
|
||||
}
|
||||
if CppStreamBeginLang != nil {
|
||||
return CppStreamBeginLang(p.ctxPtr, lang), nil
|
||||
}
|
||||
return CppStreamBegin(p.ctxPtr), nil
|
||||
}
|
||||
|
||||
func (p *ParakeetCpp) streamFree(stream uintptr) {
|
||||
if stream == 0 {
|
||||
return
|
||||
}
|
||||
p.engineMu.Lock()
|
||||
defer p.engineMu.Unlock()
|
||||
CppStreamFree(stream)
|
||||
}
|
||||
|
||||
// streamFeedText runs one text-mode feed (or the finalize flush when
|
||||
// finalize is true) under engineMu, returning the newly-finalized delta and
|
||||
// whether an <EOU>/<EOB> fired during the call.
|
||||
func (p *ParakeetCpp) streamFeedText(stream uintptr, pcm []float32, finalize bool) (delta string, eou, eob bool, err error) {
|
||||
p.engineMu.Lock()
|
||||
defer p.engineMu.Unlock()
|
||||
if p.ctxPtr == 0 {
|
||||
return "", false, false, grpcerrors.ModelNotLoaded("parakeet-cpp")
|
||||
}
|
||||
var ret uintptr
|
||||
var events int32
|
||||
if finalize {
|
||||
ret = CppStreamFinalize(stream)
|
||||
} else {
|
||||
ret = CppStreamFeed(stream, pcm, int32(len(pcm)), unsafe.Pointer(&events))
|
||||
}
|
||||
if ret == 0 {
|
||||
// last_error is ctx-shared: read it under the same lock as the call.
|
||||
msg := CppLastError(p.ctxPtr)
|
||||
if msg == "" {
|
||||
msg = "unknown error"
|
||||
}
|
||||
return "", false, false, fmt.Errorf("parakeet-cpp: stream feed/finalize failed: %s", msg)
|
||||
}
|
||||
delta = goStringFromCPtr(ret)
|
||||
CppFreeString(ret)
|
||||
// ABI v5: eou_out is a bitmask (bit 0 = <EOU>, bit 1 = <EOB>). A v4
|
||||
// library sets 0/1 for either token, which the bit-0 test reads as the
|
||||
// old conflated eou — the EOB distinction simply isn't available there.
|
||||
return delta, events&1 != 0, events&2 != 0, nil
|
||||
}
|
||||
|
||||
// streamFeedDoc runs one ABI v4 JSON feed (or finalize) under engineMu and
|
||||
// returns the parsed {text,eou,frame_sec,words} document.
|
||||
func (p *ParakeetCpp) streamFeedDoc(stream uintptr, pcm []float32, finalize bool) (streamFeedJSON, error) {
|
||||
p.engineMu.Lock()
|
||||
defer p.engineMu.Unlock()
|
||||
if p.ctxPtr == 0 {
|
||||
return streamFeedJSON{}, grpcerrors.ModelNotLoaded("parakeet-cpp")
|
||||
}
|
||||
var ret uintptr
|
||||
if finalize {
|
||||
ret = CppStreamFinalizeJSON(stream)
|
||||
} else {
|
||||
ret = CppStreamFeedJSON(stream, pcm, int32(len(pcm)))
|
||||
}
|
||||
if ret == 0 {
|
||||
msg := CppLastError(p.ctxPtr)
|
||||
if msg == "" {
|
||||
msg = "unknown error"
|
||||
}
|
||||
return streamFeedJSON{}, fmt.Errorf("parakeet-cpp: stream feed/finalize failed: %s", msg)
|
||||
}
|
||||
raw := goStringFromCPtr(ret)
|
||||
CppFreeString(ret)
|
||||
var doc streamFeedJSON
|
||||
if err := json.Unmarshal([]byte(raw), &doc); err != nil {
|
||||
return streamFeedJSON{}, fmt.Errorf("parakeet-cpp: decode stream json: %w", err)
|
||||
}
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
// AudioTranscriptionStream drives the cache-aware streaming RNN-T over the
|
||||
// audio at opts.Dst: it decodes the file to 16 kHz mono PCM, feeds it in
|
||||
// chunks to parakeet_capi_stream_feed, and emits each newly-finalized text
|
||||
// run as a TranscriptStreamResponse delta. <EOU>/<EOB> events close the
|
||||
// current segment; a closing FinalResult carries the full transcript and the
|
||||
// per-utterance segments.
|
||||
// audio at opts.Dst: it decodes the file to 16 kHz mono PCM, feeds it through
|
||||
// the shared decode driver (feedSlices/flushTail), and emits each
|
||||
// newly-finalized text run as a TranscriptStreamResponse delta. <EOU>/<EOB>
|
||||
// events close the current segment; a closing FinalResult carries the full
|
||||
// transcript, the per-utterance segments, and whether the file ended on an
|
||||
// utterance boundary.
|
||||
//
|
||||
// stream_begin returns 0 for models that are not cache-aware streaming models
|
||||
// (only e.g. nvidia/parakeet_realtime_eou_120m-v1 qualifies). For those we fall
|
||||
// back to a single offline transcription emitted as one delta plus a closing
|
||||
// FinalResult, matching LocalAI's non-streaming streaming contract (and the
|
||||
// whisper backend), so the streaming endpoint works for every model.
|
||||
// (only e.g. nvidia/parakeet_realtime_eou_120m-v1 qualifies). For those this
|
||||
// returns codes.Unimplemented rather than faking a stream from an offline
|
||||
// decode — see the stream==0 branch and grpcerrors.StreamTranscriptionUnsupported.
|
||||
func (p *ParakeetCpp) AudioTranscriptionStream(ctx context.Context, opts *pb.TranscriptRequest, results chan *pb.TranscriptStreamResponse) error {
|
||||
defer close(results)
|
||||
|
||||
@@ -560,185 +695,73 @@ func (p *ParakeetCpp) AudioTranscriptionStream(ctx context.Context, opts *pb.Tra
|
||||
return status.Error(codes.Canceled, "transcription cancelled")
|
||||
}
|
||||
|
||||
var stream uintptr
|
||||
if CppStreamBeginLang != nil {
|
||||
stream = CppStreamBeginLang(p.ctxPtr, opts.GetLanguage())
|
||||
} else {
|
||||
stream = CppStreamBegin(p.ctxPtr)
|
||||
stream, err := p.streamBegin(opts.GetLanguage())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if stream == 0 {
|
||||
// Not a cache-aware streaming model: run a normal offline
|
||||
// transcription and emit it as one delta + a closing final result.
|
||||
res, err := p.AudioTranscription(ctx, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if t := strings.TrimSpace(res.Text); t != "" {
|
||||
results <- &pb.TranscriptStreamResponse{Delta: t}
|
||||
}
|
||||
results <- &pb.TranscriptStreamResponse{FinalResult: &res}
|
||||
return nil
|
||||
// Not a cache-aware streaming model. Report the missing capability
|
||||
// honestly instead of decoding offline and emitting it as one "delta"
|
||||
// + final: a client that asked for streaming must learn the model
|
||||
// cannot stream, not receive a batch result dressed as a stream (which
|
||||
// is indistinguishable except qualitatively, and silently breaks any
|
||||
// feature that genuinely needs incremental output). Callers wanting a
|
||||
// plain transcript use the unary AudioTranscription path. This mirrors
|
||||
// AudioTranscriptionLive, which already returns Unimplemented here.
|
||||
return grpcerrors.StreamTranscriptionUnsupported("parakeet-cpp",
|
||||
"loaded model is not a cache-aware streaming model")
|
||||
}
|
||||
defer CppStreamFree(stream)
|
||||
// The C engine is a single shared context: a streaming session and a batched
|
||||
// unary dispatch must never touch it at once, so hold engineMu for the whole
|
||||
// stream. This lock is intentionally taken AFTER the non-streaming fallback
|
||||
// above returns: that fallback goes through AudioTranscription -> the batcher
|
||||
// -> runBatch, which itself acquires engineMu, so locking here first would
|
||||
// deadlock. Do not hoist this lock above the fallback.
|
||||
p.engineMu.Lock()
|
||||
defer p.engineMu.Unlock()
|
||||
defer p.streamFree(stream)
|
||||
|
||||
data, duration, err := decodeWavMono16k(opts.Dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// ABI v4: when the streaming JSON entry points are present, drive them so the
|
||||
// per-utterance segments carry per-word start/end timestamps. Falls through to
|
||||
// the text-only loop below against an older libparakeet.so. Runs under the
|
||||
// engineMu already held above.
|
||||
if CppStreamFeedJSON != nil {
|
||||
return p.streamJSON(ctx, stream, data, duration, results)
|
||||
}
|
||||
|
||||
// Fold the shared decode driver's per-feed increments into the streamed
|
||||
// deltas and the closing batch result: words/text accumulate into
|
||||
// per-utterance segments (streamSegmenter), and the utterance-boundary
|
||||
// latch (boundary.go) records whether the file ended on an <EOU>. These
|
||||
// are the offline path's concern — the live RPC carries none of them.
|
||||
var (
|
||||
full strings.Builder
|
||||
segText strings.Builder
|
||||
segments []*pb.TranscriptSegment
|
||||
segID int32
|
||||
seg streamSegmenter
|
||||
boundary utteranceBoundary
|
||||
)
|
||||
|
||||
flushSegment := func() {
|
||||
t := strings.TrimSpace(segText.String())
|
||||
segText.Reset()
|
||||
if t == "" {
|
||||
return
|
||||
emit := func(r streamFeedResult) error {
|
||||
if r.Delta != "" {
|
||||
full.WriteString(r.Delta)
|
||||
results <- &pb.TranscriptStreamResponse{Delta: r.Delta}
|
||||
}
|
||||
segments = append(segments, &pb.TranscriptSegment{Id: segID, Text: t})
|
||||
segID++
|
||||
}
|
||||
|
||||
// emitDelta consumes the malloc'd char* returned by feed/finalize: frees
|
||||
// it, accumulates the text, and sends a delta when non-empty. A 0 return
|
||||
// is an error (vs the "" empty-but-non-NULL no-new-text case).
|
||||
emitDelta := func(ret uintptr) error {
|
||||
if ret == 0 {
|
||||
msg := CppLastError(p.ctxPtr)
|
||||
if msg == "" {
|
||||
msg = "unknown error"
|
||||
}
|
||||
return fmt.Errorf("parakeet-cpp: stream feed/finalize failed: %s", msg)
|
||||
}
|
||||
delta := goStringFromCPtr(ret)
|
||||
CppFreeString(ret)
|
||||
if delta == "" {
|
||||
return nil
|
||||
}
|
||||
full.WriteString(delta)
|
||||
segText.WriteString(delta)
|
||||
results <- &pb.TranscriptStreamResponse{Delta: delta}
|
||||
seg.add(r)
|
||||
boundary = boundary.observe(r)
|
||||
return nil
|
||||
}
|
||||
|
||||
for off := 0; off < len(data); off += streamChunkSamples {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return status.Error(codes.Canceled, "transcription cancelled")
|
||||
}
|
||||
end := min(off+streamChunkSamples, len(data))
|
||||
chunk := data[off:end]
|
||||
|
||||
var eou int32
|
||||
ret := CppStreamFeed(stream, chunk, int32(len(chunk)), unsafe.Pointer(&eou))
|
||||
if err := emitDelta(ret); err != nil {
|
||||
return err
|
||||
}
|
||||
if eou != 0 {
|
||||
flushSegment()
|
||||
}
|
||||
}
|
||||
|
||||
// Flush the streaming tail (final encoder chunk).
|
||||
if err := emitDelta(CppStreamFinalize(stream)); err != nil {
|
||||
if err := p.feedSlices(ctx, stream, data, emit); err != nil {
|
||||
return err
|
||||
}
|
||||
flushSegment()
|
||||
|
||||
text := strings.TrimSpace(full.String())
|
||||
if len(segments) == 0 && text != "" {
|
||||
segments = append(segments, &pb.TranscriptSegment{Id: 0, Text: text})
|
||||
}
|
||||
results <- &pb.TranscriptStreamResponse{
|
||||
FinalResult: &pb.TranscriptResult{
|
||||
Text: text,
|
||||
Segments: segments,
|
||||
Duration: duration,
|
||||
},
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// streamJSON drives the streaming JSON entry points (present since ABI v4): each
|
||||
// feed/finalize returns a {text,eou,eob,frame_sec,words} document. The
|
||||
// newly-finalized text is emitted as a delta (unchanged streaming contract)
|
||||
// while words are accumulated into per-utterance segments (closed on <EOU> or
|
||||
// <EOB>) so the closing FinalResult carries timestamped segments. Runs under
|
||||
// engineMu (already held by the caller).
|
||||
func (p *ParakeetCpp) streamJSON(ctx context.Context, stream uintptr, data []float32,
|
||||
duration float32, results chan *pb.TranscriptStreamResponse) error {
|
||||
var (
|
||||
full strings.Builder
|
||||
seg streamSegmenter
|
||||
)
|
||||
// consume frees the malloc'd char* (a 0 return is an error), parses the JSON,
|
||||
// emits the delta, and routes words through the segmenter.
|
||||
consume := func(ret uintptr) error {
|
||||
if ret == 0 {
|
||||
msg := CppLastError(p.ctxPtr)
|
||||
if msg == "" {
|
||||
msg = "unknown error"
|
||||
}
|
||||
return fmt.Errorf("parakeet-cpp: stream feed/finalize failed: %s", msg)
|
||||
}
|
||||
raw := goStringFromCPtr(ret)
|
||||
CppFreeString(ret)
|
||||
var doc streamFeedJSON
|
||||
if err := json.Unmarshal([]byte(raw), &doc); err != nil {
|
||||
return fmt.Errorf("parakeet-cpp: decode stream json: %w", err)
|
||||
}
|
||||
if doc.Text != "" {
|
||||
full.WriteString(doc.Text)
|
||||
results <- &pb.TranscriptStreamResponse{Delta: doc.Text}
|
||||
}
|
||||
seg.add(doc)
|
||||
return nil
|
||||
}
|
||||
|
||||
for off := 0; off < len(data); off += streamChunkSamples {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return status.Error(codes.Canceled, "transcription cancelled")
|
||||
}
|
||||
end := min(off+streamChunkSamples, len(data))
|
||||
chunk := data[off:end]
|
||||
if err := consume(CppStreamFeedJSON(stream, chunk, int32(len(chunk)))); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := consume(CppStreamFinalizeJSON(stream)); err != nil {
|
||||
if err := p.flushTail(stream, emit); err != nil {
|
||||
return err
|
||||
}
|
||||
seg.flush() // close any trailing utterance that never saw an EOU
|
||||
seg.flush() // close a trailing utterance that never saw an <EOU>
|
||||
|
||||
text := strings.TrimSpace(full.String())
|
||||
// final.Text is the exact concatenation of the streamed deltas (full is
|
||||
// their accumulation), so concat(deltas) == FinalResult.Text holds even
|
||||
// when the model prepends a leading space to the first word (SentencePiece
|
||||
// detokenization). This matches the whisper backend's streaming contract.
|
||||
// The single-segment fallback stays trimmed.
|
||||
fullText := full.String()
|
||||
segments := seg.segments()
|
||||
if len(segments) == 0 && text != "" {
|
||||
segments = append(segments, &pb.TranscriptSegment{Id: 0, Text: text})
|
||||
if trimmed := strings.TrimSpace(fullText); len(segments) == 0 && trimmed != "" {
|
||||
segments = append(segments, &pb.TranscriptSegment{Id: 0, Text: trimmed})
|
||||
}
|
||||
results <- &pb.TranscriptStreamResponse{
|
||||
FinalResult: &pb.TranscriptResult{
|
||||
Text: text,
|
||||
Text: fullText,
|
||||
Segments: segments,
|
||||
Duration: duration,
|
||||
Eou: boundary.ended(),
|
||||
},
|
||||
}
|
||||
return nil
|
||||
@@ -803,6 +826,10 @@ func (p *ParakeetCpp) Free() error {
|
||||
close(p.batStop)
|
||||
p.batStop = nil
|
||||
}
|
||||
// engineMu so an in-flight streaming call (which locks per C call and
|
||||
// re-checks ctxPtr under the lock) can never feed into a freed ctx.
|
||||
p.engineMu.Lock()
|
||||
defer p.engineMu.Unlock()
|
||||
if p.ctxPtr != 0 {
|
||||
CppFree(p.ctxPtr)
|
||||
p.ctxPtr = 0
|
||||
|
||||
@@ -14,6 +14,8 @@ import (
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
func TestParakeetCpp(t *testing.T) {
|
||||
@@ -201,6 +203,29 @@ var _ = Describe("ParakeetCpp", func() {
|
||||
})
|
||||
|
||||
Context("AudioTranscriptionStream", func() {
|
||||
It("returns the typed Unimplemented signal for non-streaming models (no offline fallback)", func() {
|
||||
// stream_begin == 0 means the loaded model is not a cache-aware
|
||||
// streaming model. The backend must surface that, not silently
|
||||
// decode offline and fake a one-shot "stream".
|
||||
savedBegin, savedBeginLang := CppStreamBegin, CppStreamBeginLang
|
||||
defer func() { CppStreamBegin, CppStreamBeginLang = savedBegin, savedBeginLang }()
|
||||
CppStreamBeginLang = nil
|
||||
CppStreamBegin = func(ctx uintptr) uintptr { return 0 }
|
||||
|
||||
p := &ParakeetCpp{ctxPtr: 1}
|
||||
results := make(chan *pb.TranscriptStreamResponse, 8)
|
||||
err := p.AudioTranscriptionStream(context.Background(),
|
||||
&pb.TranscriptRequest{Dst: "ignored.wav"}, results)
|
||||
Expect(status.Code(err)).To(Equal(codes.Unimplemented))
|
||||
|
||||
// Honest signal: nothing was emitted — no faked batch result.
|
||||
var emitted []*pb.TranscriptStreamResponse
|
||||
for r := range results {
|
||||
emitted = append(emitted, r)
|
||||
}
|
||||
Expect(emitted).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("streams deltas and a closing FinalResult from a cache-aware model", func() {
|
||||
// Streaming needs a cache-aware streaming model (e.g.
|
||||
// realtime_eou); the offline test model would fail stream_begin.
|
||||
|
||||
186
backend/go/parakeet-cpp/live.go
Normal file
186
backend/go/parakeet-cpp/live.go
Normal file
@@ -0,0 +1,186 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/grpcerrors"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/xlog"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
// liveSampleRate is the only PCM rate the parakeet C streaming API accepts.
|
||||
const liveSampleRate = 16000
|
||||
|
||||
// AudioTranscriptionLive drives one cache-aware streaming session over audio
|
||||
// fed incrementally by the caller (the realtime API's semantic_vad turn
|
||||
// detection). Contract:
|
||||
//
|
||||
// - the first request must carry a Config; a Config mid-stream resets the
|
||||
// decode session (free + begin) and drops accumulated transcript state;
|
||||
// - a Ready ack is sent right after a successful stream_begin so callers
|
||||
// can degrade synchronously when the model has no streaming support
|
||||
// (LiveTranscriptionUnsupported, codes.Unimplemented);
|
||||
// - every feed that produced output is forwarded as {delta, eou, words};
|
||||
// the <EOU>/<EOB> flag is the model's own utterance boundary and the
|
||||
// decoder auto-resets after it, so one session spans many utterances;
|
||||
// - closing the send side finalizes: the held-back tail chunk is flushed
|
||||
// (the last ~2 encoder frames of words only appear here) and a terminal
|
||||
// FinalResult carries the full transcript Text only. Per-utterance
|
||||
// segments, duration, and the terminal <EOU> flag are NOT produced here —
|
||||
// the realtime core consumes the streamed per-feed tokens and the final
|
||||
// Text; those batch fields are the file path's concern (see
|
||||
// AudioTranscriptionStream).
|
||||
//
|
||||
// Engine access is serialized per C call (streamBegin/streamFeed*/streamFree
|
||||
// take engineMu internally), never for the session lifetime — unary
|
||||
// transcription keeps flowing between feeds.
|
||||
func (p *ParakeetCpp) AudioTranscriptionLive(in <-chan *pb.TranscriptLiveRequest, out chan<- *pb.TranscriptLiveResponse) error {
|
||||
defer close(out)
|
||||
|
||||
if p.ctxPtr == 0 {
|
||||
return grpcerrors.ModelNotLoaded("parakeet-cpp")
|
||||
}
|
||||
|
||||
first, ok := <-in
|
||||
if !ok {
|
||||
return nil // caller closed without sending anything
|
||||
}
|
||||
cfg := first.GetConfig()
|
||||
if cfg == nil {
|
||||
return status.Error(codes.InvalidArgument, "parakeet-cpp: first live message must carry a config")
|
||||
}
|
||||
if err := validateLiveConfig(cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stream, err := p.streamBegin(cfg.GetLanguage())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if stream == 0 {
|
||||
return grpcerrors.LiveTranscriptionUnsupported("parakeet-cpp",
|
||||
"loaded model is not a cache-aware streaming model")
|
||||
}
|
||||
// stream is reassigned on a mid-stream Config reset; free whatever is
|
||||
// current when the RPC unwinds.
|
||||
defer func() { p.streamFree(stream) }()
|
||||
|
||||
out <- &pb.TranscriptLiveResponse{Ready: true}
|
||||
|
||||
var (
|
||||
full strings.Builder
|
||||
fedSecs float64
|
||||
|
||||
// behindSec accumulates how far decode wall time has fallen behind
|
||||
// the audio it was fed. A live caller feeds in real time, so a
|
||||
// persistent positive backlog means every downstream signal —
|
||||
// including the <EOU> the turn detector waits on — arrives that many
|
||||
// seconds late. Warned once per session; reset by a Config reset.
|
||||
behindSec float64
|
||||
behindWarned bool
|
||||
)
|
||||
|
||||
// emit forwards one decode increment: it streams the per-feed tokens the
|
||||
// realtime turn detector consumes (delta/eou/eob/words) and accumulates the
|
||||
// running transcript for the closing FinalResult. No segmentation or
|
||||
// boundary latch here — the live consumer reads only the streamed tokens
|
||||
// and the final Text; per-utterance segments and the terminal <EOU> flag
|
||||
// are an offline-path concern (see AudioTranscriptionStream / boundary.go).
|
||||
emit := func(r streamFeedResult) error {
|
||||
if r.Delta != "" {
|
||||
full.WriteString(r.Delta)
|
||||
}
|
||||
if r.Delta != "" || r.Eou || r.Eob || len(r.Words) > 0 {
|
||||
out <- &pb.TranscriptLiveResponse{
|
||||
Delta: r.Delta,
|
||||
Eou: r.Eou,
|
||||
Eob: r.Eob,
|
||||
Words: liveWordsToProto(r.Words),
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
for req := range in {
|
||||
switch payload := req.GetPayload().(type) {
|
||||
case *pb.TranscriptLiveRequest_Config:
|
||||
if err := validateLiveConfig(payload.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
// Reset: a fresh decode session, dropping accumulated state.
|
||||
p.streamFree(stream)
|
||||
stream, err = p.streamBegin(payload.Config.GetLanguage())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if stream == 0 {
|
||||
return grpcerrors.LiveTranscriptionUnsupported("parakeet-cpp",
|
||||
"loaded model is not a cache-aware streaming model")
|
||||
}
|
||||
full.Reset()
|
||||
fedSecs = 0
|
||||
case *pb.TranscriptLiveRequest_Audio:
|
||||
pcm := payload.Audio.GetPcm()
|
||||
audioSec := float64(len(pcm)) / liveSampleRate
|
||||
fedSecs += audioSec
|
||||
start := time.Now()
|
||||
// nil ctx: a live session is bounded by this request channel, not a
|
||||
// context — cancellation is the caller closing the stream.
|
||||
if err := p.feedSlices(nil, stream, pcm, emit); err != nil {
|
||||
return err
|
||||
}
|
||||
wallSec := time.Since(start).Seconds()
|
||||
behindSec += wallSec - audioSec
|
||||
if behindSec < 0 {
|
||||
behindSec = 0
|
||||
}
|
||||
xlog.Debug("parakeet-cpp: live feed",
|
||||
"audio_ms", int(audioSec*1000), "wall_ms", int(wallSec*1000),
|
||||
"behind_ms", int(behindSec*1000), "fed_s", fedSecs)
|
||||
if behindSec > 1 && !behindWarned {
|
||||
behindWarned = true
|
||||
xlog.Warn("parakeet-cpp: live decode is falling behind real time; "+
|
||||
"end-of-utterance signals will arrive late",
|
||||
"behind_s", behindSec, "fed_s", fedSecs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Send side closed: flush the streaming tail and emit the final transcript.
|
||||
// The live FinalResult carries only Text — the authoritative full-turn
|
||||
// transcript the realtime core commits. Per-utterance segments, duration,
|
||||
// and the terminal <EOU> flag are not produced on the live path.
|
||||
if err := p.flushTail(stream, emit); err != nil {
|
||||
return err
|
||||
}
|
||||
out <- &pb.TranscriptLiveResponse{
|
||||
FinalResult: &pb.TranscriptResult{Text: strings.TrimSpace(full.String())},
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateLiveConfig(cfg *pb.TranscriptLiveConfig) error {
|
||||
if sr := cfg.GetSampleRate(); sr != 0 && sr != liveSampleRate {
|
||||
return status.Errorf(codes.InvalidArgument,
|
||||
"parakeet-cpp: unsupported live sample_rate %d (only %d)", sr, liveSampleRate)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func liveWordsToProto(words []transcriptWord) []*pb.TranscriptWord {
|
||||
if len(words) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]*pb.TranscriptWord, len(words))
|
||||
for i, w := range words {
|
||||
out[i] = &pb.TranscriptWord{
|
||||
Start: secondsToNanos(w.Start),
|
||||
End: secondsToNanos(w.End),
|
||||
Text: w.W,
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
417
backend/go/parakeet-cpp/live_test.go
Normal file
417
backend/go/parakeet-cpp/live_test.go
Normal file
@@ -0,0 +1,417 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/grpcerrors"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
// The live-RPC specs drive AudioTranscriptionLive entirely against stubbed
|
||||
// Cpp* package vars (the same seam batcher_test.go uses), so they run
|
||||
// without libparakeet.so.
|
||||
|
||||
// liveCstrPool hands out NUL-terminated C-style strings backed by Go memory
|
||||
// and keeps them alive for the duration of a spec (goStringFromCPtr reads
|
||||
// through the raw pointer; Go's GC must not collect the backing array while
|
||||
// a stub's return value is in flight).
|
||||
type liveCstrPool struct {
|
||||
mu sync.Mutex
|
||||
bufs [][]byte
|
||||
}
|
||||
|
||||
func (p *liveCstrPool) cstr(s string) uintptr {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
b := append([]byte(s), 0)
|
||||
p.bufs = append(p.bufs, b)
|
||||
return uintptr(unsafe.Pointer(&b[0]))
|
||||
}
|
||||
|
||||
// liveStubs swaps every C entry point the live path touches and returns a
|
||||
// restore func for AfterEach.
|
||||
func liveStubs() (restore func()) {
|
||||
savedBegin, savedBeginLang := CppStreamBegin, CppStreamBeginLang
|
||||
savedFeed, savedFeedJSON := CppStreamFeed, CppStreamFeedJSON
|
||||
savedFinalize, savedFinalizeJSON := CppStreamFinalize, CppStreamFinalizeJSON
|
||||
savedFree, savedLastError := CppStreamFree, CppLastError
|
||||
savedFreeString := CppFreeString
|
||||
return func() {
|
||||
CppStreamBegin, CppStreamBeginLang = savedBegin, savedBeginLang
|
||||
CppStreamFeed, CppStreamFeedJSON = savedFeed, savedFeedJSON
|
||||
CppStreamFinalize, CppStreamFinalizeJSON = savedFinalize, savedFinalizeJSON
|
||||
CppStreamFree, CppLastError = savedFree, savedLastError
|
||||
CppFreeString = savedFreeString
|
||||
}
|
||||
}
|
||||
|
||||
// runLive starts the RPC on its own goroutine and returns the request
|
||||
// channel plus a collector for everything the backend emitted.
|
||||
func runLive(p *ParakeetCpp) (chan *pb.TranscriptLiveRequest, chan *pb.TranscriptLiveResponse, chan error) {
|
||||
in := make(chan *pb.TranscriptLiveRequest)
|
||||
out := make(chan *pb.TranscriptLiveResponse, 32)
|
||||
errCh := make(chan error, 1)
|
||||
go func() { errCh <- p.AudioTranscriptionLive(in, out) }()
|
||||
return in, out, errCh
|
||||
}
|
||||
|
||||
func liveConfig(lang string) *pb.TranscriptLiveRequest {
|
||||
return &pb.TranscriptLiveRequest{
|
||||
Payload: &pb.TranscriptLiveRequest_Config{Config: &pb.TranscriptLiveConfig{Language: lang}},
|
||||
}
|
||||
}
|
||||
|
||||
func liveAudio(pcm []float32) *pb.TranscriptLiveRequest {
|
||||
return &pb.TranscriptLiveRequest{
|
||||
Payload: &pb.TranscriptLiveRequest_Audio{Audio: &pb.TranscriptLiveAudio{Pcm: pcm}},
|
||||
}
|
||||
}
|
||||
|
||||
func collectLive(out chan *pb.TranscriptLiveResponse) []*pb.TranscriptLiveResponse {
|
||||
var got []*pb.TranscriptLiveResponse
|
||||
for r := range out {
|
||||
got = append(got, r)
|
||||
}
|
||||
return got
|
||||
}
|
||||
|
||||
var _ = Describe("AudioTranscriptionLive (stubbed C API)", func() {
|
||||
var (
|
||||
pool *liveCstrPool
|
||||
restore func()
|
||||
p *ParakeetCpp
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
pool = &liveCstrPool{}
|
||||
restore = liveStubs()
|
||||
p = &ParakeetCpp{ctxPtr: 1}
|
||||
|
||||
CppStreamBeginLang = nil
|
||||
CppStreamBegin = func(ctx uintptr) uintptr { return 7 }
|
||||
CppStreamFree = func(s uintptr) {}
|
||||
CppFreeString = func(s uintptr) {}
|
||||
CppLastError = func(ctx uintptr) string { return "stub error" }
|
||||
CppStreamFeed = nil
|
||||
CppStreamFeedJSON = nil
|
||||
CppStreamFinalize = nil
|
||||
CppStreamFinalizeJSON = nil
|
||||
})
|
||||
|
||||
AfterEach(func() { restore() })
|
||||
|
||||
It("rejects a stream whose first message is not a config", func() {
|
||||
in, out, errCh := runLive(p)
|
||||
in <- liveAudio([]float32{0.1})
|
||||
close(in)
|
||||
|
||||
err := <-errCh
|
||||
Expect(status.Code(err)).To(Equal(codes.InvalidArgument))
|
||||
Expect(collectLive(out)).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("rejects a non-16k sample rate", func() {
|
||||
in, _, errCh := runLive(p)
|
||||
in <- &pb.TranscriptLiveRequest{
|
||||
Payload: &pb.TranscriptLiveRequest_Config{Config: &pb.TranscriptLiveConfig{SampleRate: 8000}},
|
||||
}
|
||||
close(in)
|
||||
Expect(status.Code(<-errCh)).To(Equal(codes.InvalidArgument))
|
||||
})
|
||||
|
||||
It("returns the typed Unimplemented signal for non-streaming models, before any ack", func() {
|
||||
CppStreamBegin = func(ctx uintptr) uintptr { return 0 }
|
||||
|
||||
in, out, errCh := runLive(p)
|
||||
in <- liveConfig("")
|
||||
close(in)
|
||||
|
||||
err := <-errCh
|
||||
Expect(grpcerrors.IsLiveTranscriptionUnsupported(err)).To(BeTrue())
|
||||
Expect(collectLive(out)).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("streams deltas, eou flags and words on the JSON path and finalizes on close", func() {
|
||||
var freed []uintptr
|
||||
CppStreamFree = func(s uintptr) { freed = append(freed, s) }
|
||||
feeds := 0
|
||||
CppStreamFeedJSON = func(s uintptr, pcm []float32, n int32) uintptr {
|
||||
feeds++
|
||||
switch feeds {
|
||||
case 1:
|
||||
return pool.cstr(`{"text":"hello ","eou":0,"frame_sec":0.08,` +
|
||||
`"words":[{"w":"hello","start":0.1,"end":0.4,"conf":0.9}]}`)
|
||||
default:
|
||||
return pool.cstr(`{"text":"world","eou":1,"frame_sec":0.08,` +
|
||||
`"words":[{"w":"world","start":0.5,"end":0.8,"conf":0.9}]}`)
|
||||
}
|
||||
}
|
||||
CppStreamFinalizeJSON = func(s uintptr) uintptr {
|
||||
return pool.cstr(`{"text":"","eou":0,"frame_sec":0.08,"words":[]}`)
|
||||
}
|
||||
|
||||
in, out, errCh := runLive(p)
|
||||
in <- liveConfig("en")
|
||||
in <- liveAudio(make([]float32, 100))
|
||||
in <- liveAudio(make([]float32, 200))
|
||||
close(in)
|
||||
Expect(<-errCh).NotTo(HaveOccurred())
|
||||
|
||||
got := collectLive(out)
|
||||
Expect(got).To(HaveLen(4)) // ready, two deltas, final
|
||||
|
||||
Expect(got[0].Ready).To(BeTrue())
|
||||
|
||||
Expect(got[1].Delta).To(Equal("hello "))
|
||||
Expect(got[1].Eou).To(BeFalse())
|
||||
Expect(got[1].Words).To(HaveLen(1))
|
||||
Expect(got[1].Words[0].Text).To(Equal("hello"))
|
||||
|
||||
Expect(got[2].Delta).To(Equal("world"))
|
||||
Expect(got[2].Eou).To(BeTrue())
|
||||
|
||||
final := got[3].FinalResult
|
||||
Expect(final).NotTo(BeNil())
|
||||
Expect(final.Text).To(Equal("hello world"))
|
||||
// The live FinalResult carries only Text. Per-utterance segments,
|
||||
// duration and the terminal eou flag are an offline-path concern (see
|
||||
// boundary.go / AudioTranscriptionStream); the realtime core reads the
|
||||
// streamed per-feed tokens above plus this Text.
|
||||
Expect(final.Eou).To(BeFalse())
|
||||
Expect(final.Segments).To(BeEmpty())
|
||||
Expect(final.Duration).To(BeZero())
|
||||
|
||||
Expect(freed).To(Equal([]uintptr{7}))
|
||||
})
|
||||
|
||||
It("falls back to the text feed (eou out-param) when the JSON entry points are absent", func() {
|
||||
feeds := 0
|
||||
CppStreamFeed = func(s uintptr, pcm []float32, n int32, eouOut unsafe.Pointer) uintptr {
|
||||
feeds++
|
||||
if feeds == 2 {
|
||||
*(*int32)(eouOut) = 1
|
||||
return pool.cstr("done")
|
||||
}
|
||||
return pool.cstr("first ")
|
||||
}
|
||||
CppStreamFinalize = func(s uintptr) uintptr { return pool.cstr("") }
|
||||
|
||||
in, out, errCh := runLive(p)
|
||||
in <- liveConfig("")
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
close(in)
|
||||
Expect(<-errCh).NotTo(HaveOccurred())
|
||||
|
||||
got := collectLive(out)
|
||||
Expect(got).To(HaveLen(4))
|
||||
Expect(got[1].Delta).To(Equal("first "))
|
||||
Expect(got[1].Eou).To(BeFalse())
|
||||
Expect(got[2].Delta).To(Equal("done"))
|
||||
Expect(got[2].Eou).To(BeTrue())
|
||||
Expect(got[3].FinalResult.Text).To(Equal("first done"))
|
||||
})
|
||||
|
||||
It("forwards <EOB> as eob — a backchannel, never an eou (ABI v5 JSON)", func() {
|
||||
feeds := 0
|
||||
CppStreamFeedJSON = func(s uintptr, pcm []float32, n int32) uintptr {
|
||||
feeds++
|
||||
if feeds == 1 {
|
||||
return pool.cstr(`{"text":"uh-huh","eou":0,"eob":1,"frame_sec":0.08,` +
|
||||
`"words":[{"w":"uh-huh","start":0.1,"end":0.3,"conf":0.9}]}`)
|
||||
}
|
||||
return pool.cstr(`{"text":"the turn","eou":1,"eob":0,"frame_sec":0.08,` +
|
||||
`"words":[{"w":"the","start":0.5,"end":0.6,"conf":0.9},{"w":"turn","start":0.6,"end":0.8,"conf":0.9}]}`)
|
||||
}
|
||||
CppStreamFinalizeJSON = func(s uintptr) uintptr {
|
||||
return pool.cstr(`{"text":"","eou":0,"eob":0,"frame_sec":0.08,"words":[]}`)
|
||||
}
|
||||
|
||||
in, out, errCh := runLive(p)
|
||||
in <- liveConfig("")
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
close(in)
|
||||
Expect(<-errCh).NotTo(HaveOccurred())
|
||||
|
||||
got := collectLive(out)
|
||||
Expect(got).To(HaveLen(4))
|
||||
Expect(got[1].Eob).To(BeTrue())
|
||||
Expect(got[1].Eou).To(BeFalse(), "a backchannel must not masquerade as a turn boundary")
|
||||
Expect(got[2].Eou).To(BeTrue())
|
||||
})
|
||||
|
||||
It("maps the v5 eou_out bitmask on the text path (bit0 <EOU>, bit1 <EOB>)", func() {
|
||||
feeds := 0
|
||||
CppStreamFeed = func(s uintptr, pcm []float32, n int32, eouOut unsafe.Pointer) uintptr {
|
||||
feeds++
|
||||
if feeds == 1 {
|
||||
*(*int32)(eouOut) = 2 // <EOB> only
|
||||
return pool.cstr("uh-huh")
|
||||
}
|
||||
*(*int32)(eouOut) = 1 // <EOU>
|
||||
return pool.cstr(" done")
|
||||
}
|
||||
CppStreamFinalize = func(s uintptr) uintptr { return pool.cstr("") }
|
||||
|
||||
in, out, errCh := runLive(p)
|
||||
in <- liveConfig("")
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
close(in)
|
||||
Expect(<-errCh).NotTo(HaveOccurred())
|
||||
|
||||
got := collectLive(out)
|
||||
Expect(got).To(HaveLen(4))
|
||||
Expect(got[1].Eob).To(BeTrue())
|
||||
Expect(got[1].Eou).To(BeFalse())
|
||||
Expect(got[2].Eou).To(BeTrue())
|
||||
Expect(got[2].Eob).To(BeFalse())
|
||||
})
|
||||
|
||||
It("accumulates trailing text after an EOU into the final transcript", func() {
|
||||
feeds := 0
|
||||
CppStreamFeedJSON = func(s uintptr, pcm []float32, n int32) uintptr {
|
||||
feeds++
|
||||
if feeds == 1 {
|
||||
return pool.cstr(`{"text":"turn one","eou":1,"frame_sec":0.08,"words":[]}`)
|
||||
}
|
||||
return pool.cstr(`{"text":" and more","eou":0,"frame_sec":0.08,"words":[]}`)
|
||||
}
|
||||
CppStreamFinalizeJSON = func(s uintptr) uintptr {
|
||||
return pool.cstr(`{"text":"","eou":0,"frame_sec":0.08,"words":[]}`)
|
||||
}
|
||||
|
||||
in, out, errCh := runLive(p)
|
||||
in <- liveConfig("")
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
close(in)
|
||||
Expect(<-errCh).NotTo(HaveOccurred())
|
||||
|
||||
got := collectLive(out)
|
||||
final := got[len(got)-1].FinalResult
|
||||
Expect(final.Text).To(Equal("turn one and more"))
|
||||
})
|
||||
|
||||
It("resets the decode session on a mid-stream config", func() {
|
||||
var begun, freed int
|
||||
CppStreamBegin = func(ctx uintptr) uintptr { begun++; return uintptr(10 + begun) }
|
||||
CppStreamFree = func(s uintptr) { freed++ }
|
||||
CppStreamFeedJSON = func(s uintptr, pcm []float32, n int32) uintptr {
|
||||
return pool.cstr(`{"text":"x","eou":0,"frame_sec":0.08,"words":[]}`)
|
||||
}
|
||||
CppStreamFinalizeJSON = func(s uintptr) uintptr {
|
||||
return pool.cstr(`{"text":"","eou":0,"frame_sec":0.08,"words":[]}`)
|
||||
}
|
||||
|
||||
in, out, errCh := runLive(p)
|
||||
in <- liveConfig("")
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
in <- liveConfig("") // reset
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
close(in)
|
||||
Expect(<-errCh).NotTo(HaveOccurred())
|
||||
|
||||
got := collectLive(out)
|
||||
final := got[len(got)-1].FinalResult
|
||||
Expect(final.Text).To(Equal("x"), "pre-reset transcript dropped")
|
||||
Expect(begun).To(Equal(2))
|
||||
Expect(freed).To(Equal(2), "old session freed on reset, new one on unwind")
|
||||
})
|
||||
|
||||
It("does not hold engineMu between feeds (unary work interleaves with a live session)", func() {
|
||||
CppStreamFeedJSON = func(s uintptr, pcm []float32, n int32) uintptr {
|
||||
return pool.cstr(`{"text":"","eou":0,"frame_sec":0.08,"words":[]}`)
|
||||
}
|
||||
CppStreamFinalizeJSON = func(s uintptr) uintptr {
|
||||
return pool.cstr(`{"text":"","eou":0,"frame_sec":0.08,"words":[]}`)
|
||||
}
|
||||
|
||||
in, out, errCh := runLive(p)
|
||||
in <- liveConfig("")
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
|
||||
// The session is open and idle between feeds: the engine lock must be
|
||||
// acquirable, which is what lets batched unary transcription proceed
|
||||
// mid-session. Under stream-lifetime locking this probe would block
|
||||
// until the stream ended and the Eventually would time out.
|
||||
locked := make(chan struct{})
|
||||
go func() {
|
||||
p.engineMu.Lock()
|
||||
p.engineMu.Unlock() //nolint:staticcheck // probe: acquire-release proves availability
|
||||
close(locked)
|
||||
}()
|
||||
Eventually(locked, time.Second).Should(BeClosed())
|
||||
|
||||
close(in)
|
||||
Expect(<-errCh).NotTo(HaveOccurred())
|
||||
collectLive(out)
|
||||
})
|
||||
|
||||
It("errors out and reads last_error under the lock when a feed fails", func() {
|
||||
CppStreamFeedJSON = func(s uintptr, pcm []float32, n int32) uintptr { return 0 }
|
||||
|
||||
in, out, errCh := runLive(p)
|
||||
in <- liveConfig("")
|
||||
in <- liveAudio(make([]float32, 10))
|
||||
|
||||
err := <-errCh
|
||||
Expect(err).To(MatchError(ContainSubstring("stub error")))
|
||||
got := collectLive(out)
|
||||
Expect(got).To(HaveLen(1)) // just the ready ack
|
||||
close(in)
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("stripEouMarker", func() {
|
||||
It("strips a trailing <EOU> and reports it", func() {
|
||||
text, eou := stripEouMarker("it is certainly very like the old portrait<EOU>")
|
||||
Expect(text).To(Equal("it is certainly very like the old portrait"))
|
||||
Expect(eou).To(BeTrue())
|
||||
})
|
||||
|
||||
It("strips a trailing <EOB> WITHOUT reporting an utterance end", func() {
|
||||
// A decode ending on a backchannel must not confirm the
|
||||
// retranscribe gate — the user was acknowledging, not yielding.
|
||||
text, eou := stripEouMarker("uh-huh<EOB>")
|
||||
Expect(text).To(Equal("uh-huh"))
|
||||
Expect(eou).To(BeFalse())
|
||||
})
|
||||
|
||||
It("leaves marker-free text alone", func() {
|
||||
text, eou := stripEouMarker("plain transcript")
|
||||
Expect(text).To(Equal("plain transcript"))
|
||||
Expect(eou).To(BeFalse())
|
||||
})
|
||||
|
||||
It("does not strip a marker in the middle of the text", func() {
|
||||
text, eou := stripEouMarker("a<EOU>b")
|
||||
Expect(text).To(Equal("a<EOU>b"))
|
||||
Expect(eou).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("transcriptResultFromDoc EOU handling", func() {
|
||||
It("strips the offline marker from text and sets the result flag", func() {
|
||||
doc := transcriptJSON{Text: "the old portrait<EOU>"}
|
||||
res := transcriptResultFromDoc(doc, &pb.TranscriptRequest{}, 0)
|
||||
Expect(res.Text).To(Equal("the old portrait"))
|
||||
Expect(res.Eou).To(BeTrue())
|
||||
Expect(res.Segments).To(HaveLen(1))
|
||||
Expect(res.Segments[0].Text).To(Equal("the old portrait"))
|
||||
})
|
||||
|
||||
It("reports eou=false for marker-free decodes", func() {
|
||||
doc := transcriptJSON{Text: "no marker here"}
|
||||
res := transcriptResultFromDoc(doc, &pb.TranscriptRequest{}, 0)
|
||||
Expect(res.Text).To(Equal("no marker here"))
|
||||
Expect(res.Eou).To(BeFalse())
|
||||
})
|
||||
})
|
||||
@@ -2,15 +2,17 @@ package main
|
||||
|
||||
// Started internally by LocalAI - one gRPC server per loaded model.
|
||||
//
|
||||
// Loads libparakeet.so via purego and registers the flat C-API entry
|
||||
// points declared in parakeet_capi.h. The library name can be overridden
|
||||
// with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY / VIBEVOICECPP_LIBRARY
|
||||
// convention in the sibling backends); the default looks for the .so next
|
||||
// to this binary.
|
||||
// Loads the parakeet shared library via purego and registers the flat
|
||||
// C-API entry points declared in parakeet_capi.h. The library name can be
|
||||
// overridden with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY /
|
||||
// VIBEVOICECPP_LIBRARY convention in the sibling backends); the default
|
||||
// looks next to this binary for libparakeet.so on Linux and
|
||||
// libparakeet.dylib on macOS.
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -28,7 +30,11 @@ type LibFuncs struct {
|
||||
func main() {
|
||||
libName := os.Getenv("PARAKEET_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "libparakeet.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "libparakeet.dylib"
|
||||
} else {
|
||||
libName = "libparakeet.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -16,12 +16,15 @@ mkdir -p "$CURDIR/package/lib"
|
||||
cp -avf "$CURDIR/parakeet-cpp-grpc" "$CURDIR/package/"
|
||||
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||
|
||||
# libparakeet.so + any soname symlinks (libparakeet.so.X[.Y]). purego.Dlopen
|
||||
# resolves it via LD_LIBRARY_PATH, which run.sh points at lib/.
|
||||
cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||
echo "ERROR: libparakeet.so not found in $CURDIR, run 'make' first" >&2
|
||||
# libparakeet shared lib + any soname symlinks. On Linux this is
|
||||
# libparakeet.so[.X.Y]; on macOS it is libparakeet.dylib. purego.Dlopen
|
||||
# resolves it via the *_LIBRARY_PATH that run.sh points at lib/.
|
||||
cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || true
|
||||
cp -avf "$CURDIR"/libparakeet.dylib "$CURDIR/package/lib/" 2>/dev/null || true
|
||||
if ! ls "$CURDIR"/package/lib/libparakeet.* >/dev/null 2>&1; then
|
||||
echo "ERROR: libparakeet shared library not found in $CURDIR, run 'make' first" >&2
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
|
||||
# Detect architecture and copy the core runtime libs libparakeet.so links
|
||||
# against, plus the matching dynamic loader as lib/ld.so.
|
||||
@@ -48,7 +51,7 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||
echo "Detected Darwin"
|
||||
echo "Detected Darwin — system frameworks linked dynamically, no bundled libs needed"
|
||||
else
|
||||
echo "Error: Could not detect architecture"
|
||||
exit 1
|
||||
|
||||
@@ -3,11 +3,17 @@ set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH="$CURDIR/lib:"$CURDIR":${DYLD_LIBRARY_PATH:-}"
|
||||
export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.dylib"
|
||||
else
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:"$CURDIR":${LD_LIBRARY_PATH:-}"
|
||||
export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.so"
|
||||
fi
|
||||
|
||||
# If a self-contained ld.so was packaged, route through it so the
|
||||
# packaged libc / libstdc++ are used instead of the host's (matches the
|
||||
# whisper backend's runtime layout).
|
||||
# whisper backend's runtime layout). Linux only.
|
||||
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec "$CURDIR/lib/ld.so" "$CURDIR/parakeet-cpp-grpc" "$@"
|
||||
|
||||
@@ -106,7 +106,7 @@ var _ = Describe("transcriptResultFromDoc (multi-segment)", func() {
|
||||
var _ = Describe("streaming segment assembly", func() {
|
||||
It("closes a segment with start/end from its words on EOU", func() {
|
||||
acc := &streamSegmenter{}
|
||||
acc.add(streamFeedJSON{Text: "hello world", Eou: 1, Words: []transcriptWord{
|
||||
acc.add(streamFeedResult{Delta: "hello world", Eou: true, Words: []transcriptWord{
|
||||
{W: "hello", Start: 0.0, End: 0.4}, {W: "world", Start: 0.4, End: 0.9},
|
||||
}})
|
||||
segs := acc.segments()
|
||||
@@ -118,9 +118,9 @@ var _ = Describe("streaming segment assembly", func() {
|
||||
|
||||
It("buffers words across feeds until EOU", func() {
|
||||
acc := &streamSegmenter{}
|
||||
acc.add(streamFeedJSON{Text: "hi", Eou: 0, Words: []transcriptWord{{W: "hi", Start: 0, End: 0.3}}})
|
||||
acc.add(streamFeedResult{Delta: "hi", Words: []transcriptWord{{W: "hi", Start: 0, End: 0.3}}})
|
||||
Expect(acc.segments()).To(BeEmpty())
|
||||
acc.add(streamFeedJSON{Text: "there", Eou: 1, Words: []transcriptWord{{W: "there", Start: 0.3, End: 0.7}}})
|
||||
acc.add(streamFeedResult{Delta: "there", Eou: true, Words: []transcriptWord{{W: "there", Start: 0.3, End: 0.7}}})
|
||||
Expect(acc.segments()).To(HaveLen(1))
|
||||
Expect(acc.segments()[0].Text).To(Equal("hi there"))
|
||||
})
|
||||
@@ -129,7 +129,7 @@ var _ = Describe("streaming segment assembly", func() {
|
||||
// field; a backchannel must still close the segment as it did in v4.
|
||||
It("closes a segment on EOB (backchannel) too", func() {
|
||||
acc := &streamSegmenter{}
|
||||
acc.add(streamFeedJSON{Text: "uh huh", Eou: 0, Eob: 1, Words: []transcriptWord{
|
||||
acc.add(streamFeedResult{Delta: "uh huh", Eob: true, Words: []transcriptWord{
|
||||
{W: "uh", Start: 0.0, End: 0.2}, {W: "huh", Start: 0.2, End: 0.5},
|
||||
}})
|
||||
segs := acc.segments()
|
||||
@@ -137,4 +137,18 @@ var _ = Describe("streaming segment assembly", func() {
|
||||
Expect(segs[0].Text).To(Equal("uh huh"))
|
||||
Expect(segs[0].End).To(Equal(secondsToNanos(0.5)))
|
||||
})
|
||||
|
||||
// Older text-only libparakeet.so: no per-word timings, so a segment is cut
|
||||
// from the delta text on each <EOU>/<EOB> (no timestamps), one per utterance.
|
||||
It("falls back to text segments when the feed carries no words", func() {
|
||||
acc := &streamSegmenter{}
|
||||
acc.add(streamFeedResult{Delta: "first turn", Eou: true})
|
||||
acc.add(streamFeedResult{Delta: "second turn", Eou: true})
|
||||
segs := acc.segments()
|
||||
Expect(segs).To(HaveLen(2))
|
||||
Expect(segs[0].Text).To(Equal("first turn"))
|
||||
Expect(segs[1].Text).To(Equal("second turn"))
|
||||
Expect(segs[0].Start).To(Equal(int64(0)), "no per-word timing on the text path")
|
||||
Expect(segs[0].End).To(Equal(int64(0)))
|
||||
})
|
||||
})
|
||||
|
||||
@@ -16,7 +16,15 @@ cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
cp -rfLv $CURDIR/sources/go-piper/piper-phonemize/pi/lib/* $CURDIR/package/lib/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS has no glibc loader to bundle. The piper binary links its bundled
|
||||
# libs (libucd, libespeak-ng, libpiper_phonemize, libonnxruntime) via
|
||||
# @rpath but ships with no LC_RPATH, so dyld aborts at launch with
|
||||
# "Library not loaded: @rpath/libucd.dylib ... no LC_RPATH's found".
|
||||
# Add an @loader_path/lib rpath so @rpath resolves to package/lib/.
|
||||
echo "Detected macOS; adding @loader_path/lib rpath so bundled libs resolve via @rpath..."
|
||||
install_name_tool -add_rpath @loader_path/lib "$CURDIR/package/piper"
|
||||
elif [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||
|
||||
@@ -1,15 +1,20 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
export ESPEAK_NG_DATA=$CURDIR/espeak-ng-data
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export ESPEAK_NG_DATA="$CURDIR"/espeak-ng-data
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/piper "$@"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
exec $CURDIR/piper "$@"
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec "$CURDIR"/lib/ld.so "$CURDIR"/piper "$@"
|
||||
fi
|
||||
|
||||
exec "$CURDIR"/piper "$@"
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# qwentts.cpp version
|
||||
QWEN3TTS_REPO?=https://github.com/ServeurpersoCom/qwentts.cpp
|
||||
QWEN3TTS_CPP_VERSION?=4536dcdce27c3764a93a06d6bf64026b124962f5
|
||||
QWEN3TTS_CPP_VERSION?=9dbe7ea26a01b30fccb117ae5e86807c1dc23d42
|
||||
SO_TARGET?=libgoqwen3ttscpp.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
@@ -65,8 +65,8 @@ UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libgoqwen3ttscpp-avx.so libgoqwen3ttscpp-avx2.so libgoqwen3ttscpp-avx512.so libgoqwen3ttscpp-fallback.so
|
||||
else
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||
VARIANT_TARGETS = libgoqwen3ttscpp-fallback.so
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
|
||||
VARIANT_TARGETS = libgoqwen3ttscpp-fallback.dylib
|
||||
endif
|
||||
|
||||
qwen3-tts-cpp: main.go goqwen3ttscpp.go $(VARIANT_TARGETS)
|
||||
@@ -78,7 +78,7 @@ package: qwen3-tts-cpp
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libgoqwen3ttscpp*.so package sources/qwentts.cpp qwen3-tts-cpp
|
||||
rm -rf libgoqwen3ttscpp*.so libgoqwen3ttscpp*.dylib package sources/qwentts.cpp qwen3-tts-cpp
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -110,13 +110,20 @@ libgoqwen3ttscpp-fallback.so: sources/qwentts.cpp
|
||||
SO_TARGET=libgoqwen3ttscpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
|
||||
rm -rf build-libgoqwen3ttscpp-fallback.so
|
||||
|
||||
# Build fallback variant as a dylib (Darwin)
|
||||
libgoqwen3ttscpp-fallback.dylib: sources/qwentts.cpp
|
||||
$(info ${GREEN}I qwen3-tts-cpp build info:fallback (dylib)${RESET})
|
||||
SO_TARGET=libgoqwen3ttscpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
|
||||
rm -rf build-libgoqwen3ttscpp-fallback.dylib
|
||||
|
||||
libgoqwen3ttscpp-custom: CMakeLists.txt cpp/goqwen3ttscpp.cpp cpp/goqwen3ttscpp.h
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
cd build-$(SO_TARGET) && \
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) --target goqwen3ttscpp && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgoqwen3ttscpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
test: qwen3-tts-cpp
|
||||
@echo "Running qwen3-tts-cpp tests..."
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -21,7 +22,11 @@ type LibFuncs struct {
|
||||
func main() {
|
||||
libName := os.Getenv("QWEN3TTS_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgoqwen3ttscpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgoqwen3ttscpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgoqwen3ttscpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/qwen3-tts-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/libgoqwen3ttscpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user