Merge origin/master + pin-sync paged backend to 0ed235ea

master auto-bumped the stock llama-cpp pin 9d5d882d -> 0ed235ea and updated the
shared grpc-server.cpp. The paged backend's pin must track the stock pin (the
grpc-server.cpp is shared), so bump its LLAMA_VERSION to match. All 28 paged
patches apply clean on 0ed235ea (verified against a fresh upstream clone). The
bf16-tau state-serialization fix (patch 0026) is included. Bit-exact gate + full
grpc-server build verify on GPU/CI to follow.

Assisted-by: Claude:opus-4.8 [Claude Code]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-06-28 07:56:47 +00:00
95 changed files with 6339 additions and 487 deletions

View File

@@ -3745,6 +3745,302 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
# voice-detect
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-voice-detect'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "voice-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-voice-detect'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "voice-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-voice-detect'
base-image: "ubuntu:24.04"
ubuntu-version: '2404'
runs-on: 'ubuntu-24.04-arm'
backend: "voice-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
platform-tag: 'amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-voice-detect'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "voice-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/arm64'
platform-tag: 'arm64'
tag-latest: 'auto'
tag-suffix: '-cpu-voice-detect'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "voice-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-voice-detect'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "voice-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-voice-detect'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "voice-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
platform-tag: 'amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-voice-detect'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "voice-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/arm64'
platform-tag: 'arm64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-voice-detect'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "voice-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64-voice-detect'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
backend: "voice-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2204'
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-voice-detect'
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
runs-on: 'ubuntu-latest'
skip-drivers: 'false'
backend: "voice-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
# face-detect
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-face-detect'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "face-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-face-detect'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "face-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-face-detect'
base-image: "ubuntu:24.04"
ubuntu-version: '2404'
runs-on: 'ubuntu-24.04-arm'
backend: "face-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
platform-tag: 'amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-face-detect'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "face-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/arm64'
platform-tag: 'arm64'
tag-latest: 'auto'
tag-suffix: '-cpu-face-detect'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "face-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-face-detect'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "face-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-face-detect'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "face-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
platform-tag: 'amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-face-detect'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "face-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/arm64'
platform-tag: 'arm64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-face-detect'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "face-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64-face-detect'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
backend: "face-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2204'
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-face-detect'
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
runs-on: 'ubuntu-latest'
skip-drivers: 'false'
backend: "face-detect"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
# acestep-cpp
- build-type: ''
cuda-major-version: ""
@@ -4961,6 +5257,14 @@ includeDarwin:
tag-suffix: "-metal-darwin-arm64-ced"
build-type: "metal"
lang: "go"
- backend: "voice-detect"
tag-suffix: "-metal-darwin-arm64-voice-detect"
build-type: "metal"
lang: "go"
- backend: "face-detect"
tag-suffix: "-metal-darwin-arm64-face-detect"
build-type: "metal"
lang: "go"
- backend: "acestep-cpp"
tag-suffix: "-metal-darwin-arm64-acestep-cpp"
build-type: "metal"
@@ -5024,9 +5328,6 @@ includeDarwin:
- backend: "qwen-tts"
tag-suffix: "-metal-darwin-arm64-qwen-tts"
build-type: "mps"
- backend: "fish-speech"
tag-suffix: "-metal-darwin-arm64-fish-speech"
build-type: "mps"
- backend: "voxcpm"
tag-suffix: "-metal-darwin-arm64-voxcpm"
build-type: "mps"

View File

@@ -63,6 +63,14 @@ jobs:
variable: "CED_VERSION"
branch: "master"
file: "backend/go/ced/Makefile"
- repository: "mudler/voice-detect.cpp"
variable: "VOICEDETECT_VERSION"
branch: "master"
file: "backend/go/voice-detect/Makefile"
- repository: "mudler/face-detect.cpp"
variable: "FACEDETECT_VERSION"
branch: "master"
file: "backend/go/face-detect/Makefile"
- repository: "mudler/depth-anything.cpp"
variable: "DEPTHANYTHING_VERSION"
branch: "master"

View File

@@ -1008,7 +1008,11 @@ jobs:
# image + working dir.
tests-vibevoice-cpp-grpc-transcription:
needs: detect-changes
if: needs.detect-changes.outputs.vibevoice-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true'
# Skip on release tag pushes: the ASR Q4_K model is ~10 GB and cannot be
# pulled from HF within the inner `go test -timeout 30m` budget on a CI
# runner, so every tag build hung and timed out. Still runs on PRs/branch
# pushes that touch vibevoice-cpp so regressions are caught off the release path.
if: (needs.detect-changes.outputs.vibevoice-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true') && !startsWith(github.ref, 'refs/tags/')
runs-on: bigger-runner
timeout-minutes: 150
steps: