mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-08 13:42:00 -05:00
Compare commits
1 Commits
master
...
feat/voxtr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8cc5ec920a |
18
.github/workflows/backend.yml
vendored
18
.github/workflows/backend.yml
vendored
@@ -1674,6 +1674,20 @@ jobs:
|
|||||||
dockerfile: "./backend/Dockerfile.golang"
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
context: "./"
|
context: "./"
|
||||||
ubuntu-version: '2404'
|
ubuntu-version: '2404'
|
||||||
|
# voxtral
|
||||||
|
- build-type: ''
|
||||||
|
cuda-major-version: ""
|
||||||
|
cuda-minor-version: ""
|
||||||
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cpu-voxtral'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:24.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
|
backend: "voxtral"
|
||||||
|
dockerfile: "./backend/Dockerfile.golang"
|
||||||
|
context: "./"
|
||||||
|
ubuntu-version: '2404'
|
||||||
#silero-vad
|
#silero-vad
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
cuda-major-version: ""
|
cuda-major-version: ""
|
||||||
@@ -1945,6 +1959,10 @@ jobs:
|
|||||||
tag-suffix: "-metal-darwin-arm64-whisper"
|
tag-suffix: "-metal-darwin-arm64-whisper"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
lang: "go"
|
lang: "go"
|
||||||
|
- backend: "voxtral"
|
||||||
|
tag-suffix: "-metal-darwin-arm64-voxtral"
|
||||||
|
build-type: "metal"
|
||||||
|
lang: "go"
|
||||||
- backend: "vibevoice"
|
- backend: "vibevoice"
|
||||||
tag-suffix: "-metal-darwin-arm64-vibevoice"
|
tag-suffix: "-metal-darwin-arm64-vibevoice"
|
||||||
build-type: "mps"
|
build-type: "mps"
|
||||||
|
|||||||
4
.github/workflows/bump_deps.yaml
vendored
4
.github/workflows/bump_deps.yaml
vendored
@@ -30,6 +30,10 @@ jobs:
|
|||||||
variable: "PIPER_VERSION"
|
variable: "PIPER_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
file: "backend/go/piper/Makefile"
|
file: "backend/go/piper/Makefile"
|
||||||
|
- repository: "antirez/voxtral.c"
|
||||||
|
variable: "VOXTRAL_VERSION"
|
||||||
|
branch: "main"
|
||||||
|
file: "backend/go/voxtral/Makefile"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|||||||
31
.github/workflows/test-extra.yml
vendored
31
.github/workflows/test-extra.yml
vendored
@@ -361,3 +361,34 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/voxcpm
|
make --jobs=5 --output-sync=target -C backend/python/voxcpm
|
||||||
make --jobs=5 --output-sync=target -C backend/python/voxcpm test
|
make --jobs=5 --output-sync=target -C backend/python/voxcpm test
|
||||||
|
tests-voxtral:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y build-essential cmake curl libopenblas-dev ffmpeg
|
||||||
|
- name: Setup Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
# You can test your matrix by printing the current Go version
|
||||||
|
- name: Display Go version
|
||||||
|
run: go version
|
||||||
|
- name: Proto Dependencies
|
||||||
|
run: |
|
||||||
|
# Install protoc
|
||||||
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
|
rm protoc.zip
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
|
- name: Build voxtral
|
||||||
|
run: |
|
||||||
|
make --jobs=5 --output-sync=target -C backend/go/voxtral
|
||||||
|
- name: Test voxtral
|
||||||
|
run: |
|
||||||
|
make --jobs=5 --output-sync=target -C backend/go/voxtral test
|
||||||
|
|||||||
6
Makefile
6
Makefile
@@ -1,5 +1,5 @@
|
|||||||
# Disable parallel execution for backend builds
|
# Disable parallel execution for backend builds
|
||||||
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step
|
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/voxtral
|
||||||
|
|
||||||
GOCMD=go
|
GOCMD=go
|
||||||
GOTEST=$(GOCMD) test
|
GOTEST=$(GOCMD) test
|
||||||
@@ -453,6 +453,7 @@ BACKEND_HUGGINGFACE = huggingface|golang|.|false|true
|
|||||||
BACKEND_SILERO_VAD = silero-vad|golang|.|false|true
|
BACKEND_SILERO_VAD = silero-vad|golang|.|false|true
|
||||||
BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|true
|
BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|true
|
||||||
BACKEND_WHISPER = whisper|golang|.|false|true
|
BACKEND_WHISPER = whisper|golang|.|false|true
|
||||||
|
BACKEND_VOXTRAL = voxtral|golang|.|false|true
|
||||||
|
|
||||||
# Python backends with root context
|
# Python backends with root context
|
||||||
BACKEND_RERANKERS = rerankers|python|.|false|true
|
BACKEND_RERANKERS = rerankers|python|.|false|true
|
||||||
@@ -506,6 +507,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE)))
|
|||||||
$(eval $(call generate-docker-build-target,$(BACKEND_SILERO_VAD)))
|
$(eval $(call generate-docker-build-target,$(BACKEND_SILERO_VAD)))
|
||||||
$(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML)))
|
$(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML)))
|
||||||
$(eval $(call generate-docker-build-target,$(BACKEND_WHISPER)))
|
$(eval $(call generate-docker-build-target,$(BACKEND_WHISPER)))
|
||||||
|
$(eval $(call generate-docker-build-target,$(BACKEND_VOXTRAL)))
|
||||||
$(eval $(call generate-docker-build-target,$(BACKEND_RERANKERS)))
|
$(eval $(call generate-docker-build-target,$(BACKEND_RERANKERS)))
|
||||||
$(eval $(call generate-docker-build-target,$(BACKEND_TRANSFORMERS)))
|
$(eval $(call generate-docker-build-target,$(BACKEND_TRANSFORMERS)))
|
||||||
$(eval $(call generate-docker-build-target,$(BACKEND_OUTETTS)))
|
$(eval $(call generate-docker-build-target,$(BACKEND_OUTETTS)))
|
||||||
@@ -533,7 +535,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_ACE_STEP)))
|
|||||||
docker-save-%: backend-images
|
docker-save-%: backend-images
|
||||||
docker save local-ai-backend:$* -o backend-images/$*.tar
|
docker save local-ai-backend:$* -o backend-images/$*.tar
|
||||||
|
|
||||||
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step
|
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-voxtral
|
||||||
|
|
||||||
########################################################
|
########################################################
|
||||||
### Mock Backend for E2E Tests
|
### Mock Backend for E2E Tests
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ RUN apt-get update && \
|
|||||||
build-essential \
|
build-essential \
|
||||||
git ccache \
|
git ccache \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
make cmake wget \
|
make cmake wget libopenblas-dev \
|
||||||
curl unzip \
|
curl unzip \
|
||||||
libssl-dev && \
|
libssl-dev && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
|
|||||||
9
backend/go/voxtral/.gitignore
vendored
Normal file
9
backend/go/voxtral/.gitignore
vendored
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
.cache/
|
||||||
|
sources/
|
||||||
|
build/
|
||||||
|
build-*/
|
||||||
|
package/
|
||||||
|
voxtral
|
||||||
|
*.so
|
||||||
|
*.dylib
|
||||||
|
compile_commands.json
|
||||||
84
backend/go/voxtral/CMakeLists.txt
Normal file
84
backend/go/voxtral/CMakeLists.txt
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.12)
|
||||||
|
|
||||||
|
if(USE_METAL)
|
||||||
|
project(govoxtral LANGUAGES C OBJC)
|
||||||
|
else()
|
||||||
|
project(govoxtral LANGUAGES C)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
|
|
||||||
|
# Workaround: CMake + GCC linker depfile generation fails for MODULE libraries
|
||||||
|
set(CMAKE_C_LINKER_DEPFILE_SUPPORTED FALSE)
|
||||||
|
|
||||||
|
# Build voxtral.c as a library
|
||||||
|
set(VOXTRAL_SOURCES
|
||||||
|
sources/voxtral.c/voxtral.c
|
||||||
|
sources/voxtral.c/voxtral_kernels.c
|
||||||
|
sources/voxtral.c/voxtral_audio.c
|
||||||
|
sources/voxtral.c/voxtral_encoder.c
|
||||||
|
sources/voxtral.c/voxtral_decoder.c
|
||||||
|
sources/voxtral.c/voxtral_tokenizer.c
|
||||||
|
sources/voxtral.c/voxtral_safetensors.c
|
||||||
|
)
|
||||||
|
|
||||||
|
# Metal GPU acceleration (macOS arm64 only)
|
||||||
|
if(USE_METAL)
|
||||||
|
# Generate embedded shader header from .metal source via xxd
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/sources/voxtral.c/voxtral_shaders_source.h
|
||||||
|
COMMAND xxd -i voxtral_shaders.metal > voxtral_shaders_source.h
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/sources/voxtral.c
|
||||||
|
DEPENDS sources/voxtral.c/voxtral_shaders.metal
|
||||||
|
COMMENT "Generating embedded Metal shaders header"
|
||||||
|
)
|
||||||
|
list(APPEND VOXTRAL_SOURCES sources/voxtral.c/voxtral_metal.m)
|
||||||
|
set_source_files_properties(sources/voxtral.c/voxtral_metal.m PROPERTIES
|
||||||
|
COMPILE_FLAGS "-fobjc-arc"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_library(govoxtral MODULE csrc/govoxtral.c ${VOXTRAL_SOURCES})
|
||||||
|
|
||||||
|
target_include_directories(govoxtral PRIVATE sources/voxtral.c csrc)
|
||||||
|
|
||||||
|
target_compile_options(govoxtral PRIVATE -O3 -ffast-math)
|
||||||
|
|
||||||
|
if(USE_METAL)
|
||||||
|
target_compile_definitions(govoxtral PRIVATE USE_BLAS USE_METAL ACCELERATE_NEW_LAPACK)
|
||||||
|
target_link_libraries(govoxtral PRIVATE
|
||||||
|
"-framework Accelerate"
|
||||||
|
"-framework Metal"
|
||||||
|
"-framework MetalPerformanceShaders"
|
||||||
|
"-framework MetalPerformanceShadersGraph"
|
||||||
|
"-framework Foundation"
|
||||||
|
"-framework AudioToolbox"
|
||||||
|
"-framework CoreFoundation"
|
||||||
|
m
|
||||||
|
)
|
||||||
|
# Ensure the generated shader header is built before compiling
|
||||||
|
target_sources(govoxtral PRIVATE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/sources/voxtral.c/voxtral_shaders_source.h
|
||||||
|
)
|
||||||
|
elseif(USE_OPENBLAS)
|
||||||
|
# Try to find OpenBLAS; use it if available, otherwise fall back to pure C
|
||||||
|
find_package(BLAS)
|
||||||
|
if(BLAS_FOUND)
|
||||||
|
target_compile_definitions(govoxtral PRIVATE USE_BLAS USE_OPENBLAS)
|
||||||
|
target_link_libraries(govoxtral PRIVATE ${BLAS_LIBRARIES} m)
|
||||||
|
target_include_directories(govoxtral PRIVATE /usr/include/openblas)
|
||||||
|
else()
|
||||||
|
message(WARNING "OpenBLAS requested but not found, building without BLAS")
|
||||||
|
target_link_libraries(govoxtral PRIVATE m)
|
||||||
|
endif()
|
||||||
|
elseif(APPLE)
|
||||||
|
# macOS without Metal: use Accelerate framework
|
||||||
|
target_compile_definitions(govoxtral PRIVATE USE_BLAS ACCELERATE_NEW_LAPACK)
|
||||||
|
target_link_libraries(govoxtral PRIVATE "-framework Accelerate" m)
|
||||||
|
else()
|
||||||
|
target_link_libraries(govoxtral PRIVATE m)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set_property(TARGET govoxtral PROPERTY C_STANDARD 11)
|
||||||
|
set_target_properties(govoxtral PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||||
124
backend/go/voxtral/Makefile
Normal file
124
backend/go/voxtral/Makefile
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
.NOTPARALLEL:
|
||||||
|
|
||||||
|
CMAKE_ARGS?=
|
||||||
|
BUILD_TYPE?=
|
||||||
|
NATIVE?=true
|
||||||
|
|
||||||
|
GOCMD?=go
|
||||||
|
GO_TAGS?=
|
||||||
|
JOBS?=$(shell nproc --ignore=1 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
|
||||||
|
|
||||||
|
# voxtral.c version
|
||||||
|
VOXTRAL_REPO?=https://github.com/antirez/voxtral.c
|
||||||
|
VOXTRAL_VERSION?=8f810dd23c44be5453cb46c92216a3eaab46e85f
|
||||||
|
|
||||||
|
# Detect OS
|
||||||
|
UNAME_S := $(shell uname -s)
|
||||||
|
|
||||||
|
# Shared library extension
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
SO_EXT=dylib
|
||||||
|
else
|
||||||
|
SO_EXT=so
|
||||||
|
endif
|
||||||
|
|
||||||
|
SO_TARGET?=libgovoxtral.$(SO_EXT)
|
||||||
|
|
||||||
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|
||||||
|
ifeq ($(NATIVE),false)
|
||||||
|
ifneq ($(UNAME_S),Darwin)
|
||||||
|
CMAKE_ARGS+=-DCMAKE_C_FLAGS="-march=x86-64"
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DUSE_OPENBLAS=OFF
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
CMAKE_ARGS+=-DUSE_OPENBLAS=OFF
|
||||||
|
else ifeq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DUSE_OPENBLAS=OFF -DUSE_METAL=ON
|
||||||
|
else ifeq ($(UNAME_S),Darwin)
|
||||||
|
# Default on macOS: use Accelerate (no OpenBLAS needed)
|
||||||
|
CMAKE_ARGS+=-DUSE_OPENBLAS=OFF
|
||||||
|
else
|
||||||
|
CMAKE_ARGS+=-DUSE_OPENBLAS=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Only build CPU variants on Linux; on Darwin build a single dylib
|
||||||
|
ifeq ($(UNAME_S),Linux)
|
||||||
|
VARIANT_TARGETS = libgovoxtral-avx.so libgovoxtral-avx2.so libgovoxtral-fallback.so
|
||||||
|
else ifeq ($(UNAME_S),Darwin)
|
||||||
|
VARIANT_TARGETS = libgovoxtral-fallback.dylib
|
||||||
|
else
|
||||||
|
VARIANT_TARGETS = libgovoxtral-fallback.so
|
||||||
|
endif
|
||||||
|
|
||||||
|
sources/voxtral.c:
|
||||||
|
mkdir -p sources/voxtral.c
|
||||||
|
cd sources/voxtral.c && \
|
||||||
|
git init && \
|
||||||
|
git remote add origin $(VOXTRAL_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout $(VOXTRAL_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
voxtral: main.go govoxtral.go $(VARIANT_TARGETS)
|
||||||
|
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o voxtral ./
|
||||||
|
|
||||||
|
package: voxtral
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: package
|
||||||
|
|
||||||
|
clean: purge
|
||||||
|
rm -rf libgovoxtral*.so libgovoxtral*.dylib package sources/voxtral.c voxtral
|
||||||
|
|
||||||
|
purge:
|
||||||
|
rm -rf build*
|
||||||
|
|
||||||
|
# Build all variants (Linux only)
|
||||||
|
ifeq ($(UNAME_S),Linux)
|
||||||
|
libgovoxtral-avx.so: sources/voxtral.c
|
||||||
|
$(MAKE) purge
|
||||||
|
$(info Building voxtral: avx)
|
||||||
|
SO_TARGET=libgovoxtral-avx.so CMAKE_ARGS="$(CMAKE_ARGS) -DCMAKE_C_FLAGS='-mavx -mno-avx2 -mno-avx512f -mno-fma -mno-f16c'" $(MAKE) libgovoxtral-custom
|
||||||
|
rm -rfv build*
|
||||||
|
|
||||||
|
libgovoxtral-avx2.so: sources/voxtral.c
|
||||||
|
$(MAKE) purge
|
||||||
|
$(info Building voxtral: avx2)
|
||||||
|
SO_TARGET=libgovoxtral-avx2.so CMAKE_ARGS="$(CMAKE_ARGS) -DCMAKE_C_FLAGS='-mavx -mavx2 -mfma -mf16c'" $(MAKE) libgovoxtral-custom
|
||||||
|
rm -rfv build*
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Build fallback variant
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
libgovoxtral-fallback.dylib: sources/voxtral.c
|
||||||
|
$(MAKE) purge
|
||||||
|
$(info Building voxtral: darwin fallback)
|
||||||
|
SO_TARGET=libgovoxtral-fallback.dylib NATIVE=true $(MAKE) libgovoxtral-custom
|
||||||
|
rm -rfv build*
|
||||||
|
else
|
||||||
|
libgovoxtral-fallback.so: sources/voxtral.c
|
||||||
|
$(MAKE) purge
|
||||||
|
$(info Building voxtral: fallback)
|
||||||
|
SO_TARGET=libgovoxtral-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DCMAKE_C_FLAGS='-mno-avx -mno-avx2 -mno-avx512f -mno-fma -mno-f16c'" $(MAKE) libgovoxtral-custom
|
||||||
|
rm -rfv build*
|
||||||
|
endif
|
||||||
|
|
||||||
|
libgovoxtral-custom: CMakeLists.txt csrc/govoxtral.c csrc/govoxtral.h
|
||||||
|
mkdir -p build-$(SO_TARGET) && \
|
||||||
|
cd build-$(SO_TARGET) && \
|
||||||
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
|
cmake --build . --config Release -j$(JOBS) && \
|
||||||
|
cd .. && \
|
||||||
|
(mv build-$(SO_TARGET)/libgovoxtral.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/libgovoxtral.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||||
|
|
||||||
|
test: voxtral
|
||||||
|
@echo "Running voxtral tests..."
|
||||||
|
bash test.sh
|
||||||
|
@echo "voxtral tests completed."
|
||||||
|
|
||||||
|
all: voxtral package
|
||||||
62
backend/go/voxtral/csrc/govoxtral.c
Normal file
62
backend/go/voxtral/csrc/govoxtral.c
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
#include "govoxtral.h"
|
||||||
|
#include "voxtral.h"
|
||||||
|
#include "voxtral_audio.h"
|
||||||
|
#ifdef USE_METAL
|
||||||
|
#include "voxtral_metal.h"
|
||||||
|
#endif
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
static vox_ctx_t *ctx = NULL;
|
||||||
|
static char *last_result = NULL;
|
||||||
|
static int metal_initialized = 0;
|
||||||
|
|
||||||
|
int load_model(const char *model_dir) {
|
||||||
|
if (ctx != NULL) {
|
||||||
|
vox_free(ctx);
|
||||||
|
ctx = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef USE_METAL
|
||||||
|
if (!metal_initialized) {
|
||||||
|
vox_metal_init();
|
||||||
|
metal_initialized = 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ctx = vox_load(model_dir);
|
||||||
|
if (ctx == NULL) {
|
||||||
|
fprintf(stderr, "error: failed to load voxtral model from %s\n", model_dir);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *transcribe(const char *wav_path) {
|
||||||
|
if (ctx == NULL) {
|
||||||
|
fprintf(stderr, "error: model not loaded\n");
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (last_result != NULL) {
|
||||||
|
free(last_result);
|
||||||
|
last_result = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
last_result = vox_transcribe(ctx, wav_path);
|
||||||
|
if (last_result == NULL) {
|
||||||
|
fprintf(stderr, "error: transcription failed for %s\n", wav_path);
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
return last_result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_result(void) {
|
||||||
|
if (last_result != NULL) {
|
||||||
|
free(last_result);
|
||||||
|
last_result = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
8
backend/go/voxtral/csrc/govoxtral.h
Normal file
8
backend/go/voxtral/csrc/govoxtral.h
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
#ifndef GOVOXTRAL_H
|
||||||
|
#define GOVOXTRAL_H
|
||||||
|
|
||||||
|
extern int load_model(const char *model_dir);
|
||||||
|
extern const char *transcribe(const char *wav_path);
|
||||||
|
extern void free_result(void);
|
||||||
|
|
||||||
|
#endif /* GOVOXTRAL_H */
|
||||||
60
backend/go/voxtral/govoxtral.go
Normal file
60
backend/go/voxtral/govoxtral.go
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
CppLoadModel func(modelDir string) int
|
||||||
|
CppTranscribe func(wavPath string) string
|
||||||
|
CppFreeResult func()
|
||||||
|
)
|
||||||
|
|
||||||
|
type Voxtral struct {
|
||||||
|
base.SingleThread
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *Voxtral) Load(opts *pb.ModelOptions) error {
|
||||||
|
if ret := CppLoadModel(opts.ModelFile); ret != 0 {
|
||||||
|
return fmt.Errorf("failed to load Voxtral model from %s", opts.ModelFile)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *Voxtral) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptResult, error) {
|
||||||
|
dir, err := os.MkdirTemp("", "voxtral")
|
||||||
|
if err != nil {
|
||||||
|
return pb.TranscriptResult{}, err
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(dir)
|
||||||
|
|
||||||
|
convertedPath := dir + "/converted.wav"
|
||||||
|
|
||||||
|
if err := utils.AudioToWav(opts.Dst, convertedPath); err != nil {
|
||||||
|
return pb.TranscriptResult{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
result := strings.Clone(CppTranscribe(convertedPath))
|
||||||
|
CppFreeResult()
|
||||||
|
|
||||||
|
text := strings.TrimSpace(result)
|
||||||
|
|
||||||
|
segments := []*pb.TranscriptSegment{}
|
||||||
|
if text != "" {
|
||||||
|
segments = append(segments, &pb.TranscriptSegment{
|
||||||
|
Id: 0,
|
||||||
|
Text: text,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return pb.TranscriptResult{
|
||||||
|
Segments: segments,
|
||||||
|
Text: text,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
53
backend/go/voxtral/main.go
Normal file
53
backend/go/voxtral/main.go
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
|
"github.com/ebitengine/purego"
|
||||||
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
type LibFuncs struct {
|
||||||
|
FuncPtr any
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// Get library name from environment variable, default to fallback
|
||||||
|
libName := os.Getenv("VOXTRAL_LIBRARY")
|
||||||
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./libgovoxtral-fallback.dylib"
|
||||||
|
} else {
|
||||||
|
libName = "./libgovoxtral-fallback.so"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
libFuncs := []LibFuncs{
|
||||||
|
{&CppLoadModel, "load_model"},
|
||||||
|
{&CppTranscribe, "transcribe"},
|
||||||
|
{&CppFreeResult, "free_result"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, lf := range libFuncs {
|
||||||
|
purego.RegisterLibFunc(lf.FuncPtr, gosd, lf.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &Voxtral{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
68
backend/go/voxtral/package.sh
Normal file
68
backend/go/voxtral/package.sh
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Script to copy the appropriate libraries based on architecture
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
REPO_ROOT="${CURDIR}/../../.."
|
||||||
|
|
||||||
|
# Create lib directory
|
||||||
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
|
cp -avf $CURDIR/voxtral $CURDIR/package/
|
||||||
|
cp -fv $CURDIR/libgovoxtral-*.so $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -fv $CURDIR/libgovoxtral-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
|
# Detect architecture and copy appropriate libraries
|
||||||
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
|
# x86_64 architecture
|
||||||
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||||
|
# OpenBLAS if available
|
||||||
|
if [ -f /usr/lib/x86_64-linux-gnu/libopenblas.so.0 ]; then
|
||||||
|
cp -arfLv /usr/lib/x86_64-linux-gnu/libopenblas.so.0 $CURDIR/package/lib/
|
||||||
|
fi
|
||||||
|
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||||
|
# ARM64 architecture
|
||||||
|
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||||
|
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||||
|
# OpenBLAS if available
|
||||||
|
if [ -f /usr/lib/aarch64-linux-gnu/libopenblas.so.0 ]; then
|
||||||
|
cp -arfLv /usr/lib/aarch64-linux-gnu/libopenblas.so.0 $CURDIR/package/lib/
|
||||||
|
fi
|
||||||
|
elif [ $(uname -s) = "Darwin" ]; then
|
||||||
|
echo "Detected Darwin — system frameworks linked dynamically, no bundled libs needed"
|
||||||
|
else
|
||||||
|
echo "Error: Could not detect architecture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Package GPU libraries based on BUILD_TYPE
|
||||||
|
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
|
||||||
|
if [ -f "$GPU_LIB_SCRIPT" ]; then
|
||||||
|
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
|
||||||
|
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
|
||||||
|
package_gpu_libs
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Packaging completed successfully"
|
||||||
|
ls -liah $CURDIR/package/
|
||||||
|
ls -liah $CURDIR/package/lib/
|
||||||
49
backend/go/voxtral/run.sh
Normal file
49
backend/go/voxtral/run.sh
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
# Get the absolute current dir where the script is located
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
cd /
|
||||||
|
|
||||||
|
echo "CPU info:"
|
||||||
|
if [ "$(uname)" != "Darwin" ]; then
|
||||||
|
grep -e "model\sname" /proc/cpuinfo | head -1
|
||||||
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single dylib variant (Metal or Accelerate)
|
||||||
|
LIBRARY="$CURDIR/libgovoxtral-fallback.dylib"
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libgovoxtral-fallback.so"
|
||||||
|
|
||||||
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
|
echo "CPU: AVX found OK"
|
||||||
|
if [ -e $CURDIR/libgovoxtral-avx.so ]; then
|
||||||
|
LIBRARY="$CURDIR/libgovoxtral-avx.so"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||||
|
echo "CPU: AVX2 found OK"
|
||||||
|
if [ -e $CURDIR/libgovoxtral-avx2.so ]; then
|
||||||
|
LIBRARY="$CURDIR/libgovoxtral-avx2.so"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
export VOXTRAL_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
|
# If there is a lib/ld.so, use it (Linux only)
|
||||||
|
if [ -f $CURDIR/lib/ld.so ]; then
|
||||||
|
echo "Using lib/ld.so"
|
||||||
|
echo "Using library: $LIBRARY"
|
||||||
|
exec $CURDIR/lib/ld.so $CURDIR/voxtral "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Using library: $LIBRARY"
|
||||||
|
exec $CURDIR/voxtral "$@"
|
||||||
48
backend/go/voxtral/test.sh
Normal file
48
backend/go/voxtral/test.sh
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
echo "Running voxtral backend tests..."
|
||||||
|
|
||||||
|
# The test requires:
|
||||||
|
# - VOXTRAL_MODEL_DIR: path to directory containing consolidated.safetensors + tekken.json
|
||||||
|
# - VOXTRAL_BINARY: path to the voxtral binary (defaults to ./voxtral)
|
||||||
|
#
|
||||||
|
# Tests that require the model will be skipped if VOXTRAL_MODEL_DIR is not set.
|
||||||
|
|
||||||
|
cd "$CURDIR"
|
||||||
|
export VOXTRAL_MODEL_DIR="${VOXTRAL_MODEL_DIR:-./voxtral-model}"
|
||||||
|
|
||||||
|
if [ ! -d "$VOXTRAL_MODEL_DIR" ]; then
|
||||||
|
echo "Creating voxtral-model directory for tests..."
|
||||||
|
mkdir -p "$VOXTRAL_MODEL_DIR"
|
||||||
|
MODEL_ID="mistralai/Voxtral-Mini-4B-Realtime-2602"
|
||||||
|
echo "Model: ${MODEL_ID}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Files to download
|
||||||
|
FILES=(
|
||||||
|
"consolidated.safetensors"
|
||||||
|
"params.json"
|
||||||
|
"tekken.json"
|
||||||
|
)
|
||||||
|
|
||||||
|
BASE_URL="https://huggingface.co/${MODEL_ID}/resolve/main"
|
||||||
|
|
||||||
|
for file in "${FILES[@]}"; do
|
||||||
|
dest="${VOXTRAL_MODEL_DIR}/${file}"
|
||||||
|
if [ -f "${dest}" ]; then
|
||||||
|
echo " [skip] ${file} (already exists)"
|
||||||
|
else
|
||||||
|
echo " [download] ${file}..."
|
||||||
|
curl -L -o "${dest}" "${BASE_URL}/${file}" --progress-bar
|
||||||
|
echo " [done] ${file}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run Go tests
|
||||||
|
go test -v -timeout 300s ./...
|
||||||
|
|
||||||
|
echo "All voxtral tests passed."
|
||||||
201
backend/go/voxtral/voxtral_test.go
Normal file
201
backend/go/voxtral/voxtral_test.go
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
"google.golang.org/grpc"
|
||||||
|
"google.golang.org/grpc/credentials/insecure"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
testAddr = "localhost:50051"
|
||||||
|
sampleAudio = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR-Repo/asr_en.wav"
|
||||||
|
startupWait = 5 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
func skipIfNoModel(t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
|
modelDir := os.Getenv("VOXTRAL_MODEL_DIR")
|
||||||
|
if modelDir == "" {
|
||||||
|
t.Skip("VOXTRAL_MODEL_DIR not set, skipping test (set to voxtral model directory)")
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(filepath.Join(modelDir, "consolidated.safetensors")); os.IsNotExist(err) {
|
||||||
|
t.Skipf("Model file not found in %s, skipping", modelDir)
|
||||||
|
}
|
||||||
|
return modelDir
|
||||||
|
}
|
||||||
|
|
||||||
|
func startServer(t *testing.T) *exec.Cmd {
|
||||||
|
t.Helper()
|
||||||
|
binary := os.Getenv("VOXTRAL_BINARY")
|
||||||
|
if binary == "" {
|
||||||
|
binary = "./voxtral"
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(binary); os.IsNotExist(err) {
|
||||||
|
t.Skipf("Backend binary not found at %s, skipping", binary)
|
||||||
|
}
|
||||||
|
cmd := exec.Command(binary, "--addr", testAddr)
|
||||||
|
cmd.Stdout = os.Stderr
|
||||||
|
cmd.Stderr = os.Stderr
|
||||||
|
if err := cmd.Start(); err != nil {
|
||||||
|
t.Fatalf("Failed to start server: %v", err)
|
||||||
|
}
|
||||||
|
time.Sleep(startupWait)
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
|
|
||||||
|
func stopServer(cmd *exec.Cmd) {
|
||||||
|
if cmd != nil && cmd.Process != nil {
|
||||||
|
cmd.Process.Kill()
|
||||||
|
cmd.Wait()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func dialGRPC(t *testing.T) *grpc.ClientConn {
|
||||||
|
t.Helper()
|
||||||
|
conn, err := grpc.Dial(testAddr,
|
||||||
|
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024),
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to dial gRPC: %v", err)
|
||||||
|
}
|
||||||
|
return conn
|
||||||
|
}
|
||||||
|
|
||||||
|
func downloadFile(url, dest string) error {
|
||||||
|
resp, err := http.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("HTTP GET failed: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return fmt.Errorf("bad status: %s", resp.Status)
|
||||||
|
}
|
||||||
|
f, err := os.Create(dest)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
_, err = io.Copy(f, resp.Body)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestServerHealth(t *testing.T) {
|
||||||
|
cmd := startServer(t)
|
||||||
|
defer stopServer(cmd)
|
||||||
|
|
||||||
|
conn := dialGRPC(t)
|
||||||
|
defer conn.Close()
|
||||||
|
|
||||||
|
client := pb.NewBackendClient(conn)
|
||||||
|
resp, err := client.Health(context.Background(), &pb.HealthMessage{})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Health check failed: %v", err)
|
||||||
|
}
|
||||||
|
if string(resp.Message) != "OK" {
|
||||||
|
t.Fatalf("Expected OK, got %s", string(resp.Message))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadModel(t *testing.T) {
|
||||||
|
modelDir := skipIfNoModel(t)
|
||||||
|
cmd := startServer(t)
|
||||||
|
defer stopServer(cmd)
|
||||||
|
|
||||||
|
conn := dialGRPC(t)
|
||||||
|
defer conn.Close()
|
||||||
|
|
||||||
|
client := pb.NewBackendClient(conn)
|
||||||
|
resp, err := client.LoadModel(context.Background(), &pb.ModelOptions{
|
||||||
|
ModelFile: modelDir,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadModel failed: %v", err)
|
||||||
|
}
|
||||||
|
if !resp.Success {
|
||||||
|
t.Fatalf("LoadModel returned failure: %s", resp.Message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAudioTranscription(t *testing.T) {
|
||||||
|
modelDir := skipIfNoModel(t)
|
||||||
|
|
||||||
|
tmpDir, err := os.MkdirTemp("", "voxtral-test")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(tmpDir)
|
||||||
|
|
||||||
|
// Download sample audio — JFK "ask not what your country can do for you" clip
|
||||||
|
audioFile := filepath.Join(tmpDir, "sample.wav")
|
||||||
|
t.Log("Downloading sample audio...")
|
||||||
|
if err := downloadFile(sampleAudio, audioFile); err != nil {
|
||||||
|
t.Fatalf("Failed to download sample audio: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := startServer(t)
|
||||||
|
defer stopServer(cmd)
|
||||||
|
|
||||||
|
conn := dialGRPC(t)
|
||||||
|
defer conn.Close()
|
||||||
|
|
||||||
|
client := pb.NewBackendClient(conn)
|
||||||
|
|
||||||
|
// Load model
|
||||||
|
loadResp, err := client.LoadModel(context.Background(), &pb.ModelOptions{
|
||||||
|
ModelFile: modelDir,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadModel failed: %v", err)
|
||||||
|
}
|
||||||
|
if !loadResp.Success {
|
||||||
|
t.Fatalf("LoadModel returned failure: %s", loadResp.Message)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transcribe
|
||||||
|
transcriptResp, err := client.AudioTranscription(context.Background(), &pb.TranscriptRequest{
|
||||||
|
Dst: audioFile,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("AudioTranscription failed: %v", err)
|
||||||
|
}
|
||||||
|
if transcriptResp == nil {
|
||||||
|
t.Fatal("AudioTranscription returned nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("Transcribed text: %s", transcriptResp.Text)
|
||||||
|
t.Logf("Number of segments: %d", len(transcriptResp.Segments))
|
||||||
|
|
||||||
|
if transcriptResp.Text == "" {
|
||||||
|
t.Fatal("Transcription returned empty text")
|
||||||
|
}
|
||||||
|
|
||||||
|
allText := strings.ToLower(transcriptResp.Text)
|
||||||
|
for _, seg := range transcriptResp.Segments {
|
||||||
|
allText += " " + strings.ToLower(seg.Text)
|
||||||
|
}
|
||||||
|
t.Logf("All text: %s", allText)
|
||||||
|
|
||||||
|
if !strings.Contains(allText, "big") {
|
||||||
|
t.Errorf("Expected 'big' in transcription, got: %s", allText)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The sample audio should contain recognizable speech
|
||||||
|
if len(allText) < 10 {
|
||||||
|
t.Errorf("Transcription too short: %q", allText)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -56,6 +56,21 @@
|
|||||||
nvidia-cuda-12: "cuda12-whisper"
|
nvidia-cuda-12: "cuda12-whisper"
|
||||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-whisper"
|
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-whisper"
|
||||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-whisper"
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-whisper"
|
||||||
|
- &voxtral
|
||||||
|
name: "voxtral"
|
||||||
|
alias: "voxtral"
|
||||||
|
license: mit
|
||||||
|
description: |
|
||||||
|
Voxtral Realtime 4B Pure C speech-to-text inference engine
|
||||||
|
urls:
|
||||||
|
- https://github.com/mudler/voxtral.c
|
||||||
|
tags:
|
||||||
|
- audio-transcription
|
||||||
|
- CPU
|
||||||
|
- Metal
|
||||||
|
capabilities:
|
||||||
|
default: "cpu-voxtral"
|
||||||
|
metal-darwin-arm64: "metal-voxtral"
|
||||||
- &stablediffusionggml
|
- &stablediffusionggml
|
||||||
name: "stablediffusion-ggml"
|
name: "stablediffusion-ggml"
|
||||||
alias: "stablediffusion-ggml"
|
alias: "stablediffusion-ggml"
|
||||||
@@ -2594,3 +2609,24 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-pocket-tts"
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-pocket-tts"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-metal-darwin-arm64-pocket-tts
|
- localai/localai-backends:master-metal-darwin-arm64-pocket-tts
|
||||||
|
## voxtral
|
||||||
|
- !!merge <<: *voxtral
|
||||||
|
name: "cpu-voxtral"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-voxtral"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-cpu-voxtral
|
||||||
|
- !!merge <<: *voxtral
|
||||||
|
name: "cpu-voxtral-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-voxtral"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-cpu-voxtral
|
||||||
|
- !!merge <<: *voxtral
|
||||||
|
name: "metal-voxtral"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-voxtral"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-metal-darwin-arm64-voxtral
|
||||||
|
- !!merge <<: *voxtral
|
||||||
|
name: "metal-voxtral-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-voxtral"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-metal-darwin-arm64-voxtral
|
||||||
|
|||||||
@@ -122,3 +122,4 @@ LocalAI supports various types of backends:
|
|||||||
- **Diffusion Backends**: For image generation
|
- **Diffusion Backends**: For image generation
|
||||||
- **TTS Backends**: For text-to-speech conversion
|
- **TTS Backends**: For text-to-speech conversion
|
||||||
- **Whisper Backends**: For speech-to-text conversion
|
- **Whisper Backends**: For speech-to-text conversion
|
||||||
|
- **Sound Generation Backends**: For music and audio generation (e.g., ACE-Step)
|
||||||
Reference in New Issue
Block a user