From a738f8b0e4008cb17a75ade4f5f62d2029a33989 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 12 Mar 2026 18:56:26 +0100 Subject: [PATCH] feat(backends): add ace-step.cpp (#8965) Signed-off-by: Ettore Di Giacinto --- .github/workflows/backend.yml | 122 ++++++++ .github/workflows/bump_deps.yaml | 4 + .github/workflows/test-extra.yml | 30 ++ Makefile | 6 +- backend/go/acestep-cpp/CMakeLists.txt | 54 ++++ backend/go/acestep-cpp/Makefile | 127 ++++++++ backend/go/acestep-cpp/acestepcpp_test.go | 195 +++++++++++++ backend/go/acestep-cpp/cpp/goacestepcpp.cpp | 306 ++++++++++++++++++++ backend/go/acestep-cpp/cpp/goacestepcpp.h | 11 + backend/go/acestep-cpp/goacestepcpp.go | 82 ++++++ backend/go/acestep-cpp/main.go | 47 +++ backend/go/acestep-cpp/package.sh | 65 +++++ backend/go/acestep-cpp/run.sh | 52 ++++ backend/go/acestep-cpp/test.sh | 54 ++++ backend/index.yaml | 124 ++++++++ core/config/model_config.go | 2 +- gallery/index.yaml | 77 +++++ 17 files changed, 1355 insertions(+), 3 deletions(-) create mode 100644 backend/go/acestep-cpp/CMakeLists.txt create mode 100644 backend/go/acestep-cpp/Makefile create mode 100644 backend/go/acestep-cpp/acestepcpp_test.go create mode 100644 backend/go/acestep-cpp/cpp/goacestepcpp.cpp create mode 100644 backend/go/acestep-cpp/cpp/goacestepcpp.h create mode 100644 backend/go/acestep-cpp/goacestepcpp.go create mode 100644 backend/go/acestep-cpp/main.go create mode 100755 backend/go/acestep-cpp/package.sh create mode 100755 backend/go/acestep-cpp/run.sh create mode 100755 backend/go/acestep-cpp/test.sh diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 030b02ef8..e6c3e7055 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -535,6 +535,19 @@ jobs: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "8" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-acestep-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "acestep-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "8" @@ -1069,6 +1082,32 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-acestep-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "acestep-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'false' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-cuda-13-arm64-acestep-cpp' + base-image: "ubuntu:24.04" + ubuntu-version: '2404' + runs-on: 'ubuntu-24.04-arm' + backend: "acestep-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -1882,6 +1921,85 @@ jobs: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + # acestep-cpp + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64,linux/arm64' + tag-latest: 'auto' + tag-suffix: '-cpu-acestep-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "acestep-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-intel-sycl-f32-acestep-cpp' + runs-on: 'ubuntu-latest' + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" + skip-drivers: 'false' + backend: "acestep-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-intel-sycl-f16-acestep-cpp' + runs-on: 'ubuntu-latest' + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" + skip-drivers: 'false' + backend: "acestep-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'vulkan' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64,linux/arm64' + tag-latest: 'auto' + tag-suffix: '-gpu-vulkan-acestep-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "acestep-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'false' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-arm64-acestep-cpp' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + backend: "acestep-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2204' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-rocm-hipblas-acestep-cpp' + base-image: "rocm/dev-ubuntu-24.04:6.4.4" + runs-on: 'ubuntu-latest' + skip-drivers: 'false' + backend: "acestep-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' # voxtral - build-type: '' cuda-major-version: "" @@ -2183,6 +2301,10 @@ jobs: tag-suffix: "-metal-darwin-arm64-whisper" build-type: "metal" lang: "go" + - backend: "acestep-cpp" + tag-suffix: "-metal-darwin-arm64-acestep-cpp" + build-type: "metal" + lang: "go" - backend: "voxtral" tag-suffix: "-metal-darwin-arm64-voxtral" build-type: "metal" diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index 74be8f168..49e489beb 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -30,6 +30,10 @@ jobs: variable: "VOXTRAL_VERSION" branch: "main" file: "backend/go/voxtral/Makefile" + - repository: "ace-step/acestep.cpp" + variable: "ACESTEP_CPP_VERSION" + branch: "master" + file: "backend/go/acestep-cpp/Makefile" runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index a254cafa5..a4bb15f11 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -383,6 +383,36 @@ jobs: run: | make --jobs=5 --output-sync=target -C backend/python/voxcpm make --jobs=5 --output-sync=target -C backend/python/voxcpm test + tests-acestep-cpp: + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v6 + with: + submodules: true + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake curl libopenblas-dev ffmpeg + - name: Setup Go + uses: actions/setup-go@v5 + - name: Display Go version + run: go version + - name: Proto Dependencies + run: | + # Install protoc + curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af + PATH="$PATH:$HOME/go/bin" make protogen-go + - name: Build acestep-cpp + run: | + make --jobs=5 --output-sync=target -C backend/go/acestep-cpp + - name: Test acestep-cpp + run: | + make --jobs=5 --output-sync=target -C backend/go/acestep-cpp test tests-voxtral: runs-on: ubuntu-latest steps: diff --git a/Makefile b/Makefile index fe1f94fdd..4a2385a45 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Disable parallel execution for backend builds -.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/fish-speech backends/voxtral +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral GOCMD=go GOTEST=$(GOCMD) test @@ -476,6 +476,7 @@ BACKEND_SILERO_VAD = silero-vad|golang|.|false|true BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|true BACKEND_WHISPER = whisper|golang|.|false|true BACKEND_VOXTRAL = voxtral|golang|.|false|true +BACKEND_ACESTEP_CPP = acestep-cpp|golang|.|false|true # Python backends with root context BACKEND_RERANKERS = rerankers|python|.|false|true @@ -557,13 +558,14 @@ $(eval $(call generate-docker-build-target,$(BACKEND_NEMO))) $(eval $(call generate-docker-build-target,$(BACKEND_VOXCPM))) $(eval $(call generate-docker-build-target,$(BACKEND_WHISPERX))) $(eval $(call generate-docker-build-target,$(BACKEND_ACE_STEP))) +$(eval $(call generate-docker-build-target,$(BACKEND_ACESTEP_CPP))) $(eval $(call generate-docker-build-target,$(BACKEND_MLX_DISTRIBUTED))) # Pattern rule for docker-save targets docker-save-%: backend-images docker save local-ai-backend:$* -o backend-images/$*.tar -docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-voxtral docker-build-mlx-distributed +docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed ######################################################## ### Mock Backend for E2E Tests diff --git a/backend/go/acestep-cpp/CMakeLists.txt b/backend/go/acestep-cpp/CMakeLists.txt new file mode 100644 index 000000000..2fee794f2 --- /dev/null +++ b/backend/go/acestep-cpp/CMakeLists.txt @@ -0,0 +1,54 @@ +cmake_minimum_required(VERSION 3.14) +project(goacestepcpp LANGUAGES C CXX) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +set(ACESTEP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sources/acestep.cpp) + +# Override upstream's CMAKE_CUDA_ARCHITECTURES before add_subdirectory. +# Upstream sets 120a/121a for CUDA >= 12.8, but those archs require a newer +# toolkit than 12.8.x ships. Pre-defining this variable makes the upstream +# "if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)" guard skip its broken defaults. +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES "75-virtual;80-virtual;86-real;89-real") +endif() + +# EXCLUDE_FROM_ALL: only build targets we explicitly depend on (acestep-core, ggml), +# skip upstream standalone executables (ace-understand, dit-vae, etc.) +add_subdirectory(${ACESTEP_DIR} acestep EXCLUDE_FROM_ALL) + +add_library(goacestepcpp MODULE cpp/goacestepcpp.cpp) +target_link_libraries(goacestepcpp PRIVATE acestep-core ggml ggml-base ggml-cpu) + +# Include dirs matching link_ggml_backends macro, but with absolute paths +target_include_directories(goacestepcpp PRIVATE ${ACESTEP_DIR}/src ${ACESTEP_DIR}) +target_include_directories(goacestepcpp SYSTEM PRIVATE ${ACESTEP_DIR}/ggml/include) + +# Link GPU backends if available (mirrors link_ggml_backends macro) +foreach(backend blas cuda metal vulkan) + if(TARGET ggml-${backend}) + target_link_libraries(goacestepcpp PRIVATE ggml-${backend}) + string(TOUPPER ${backend} BACKEND_UPPER) + target_compile_definitions(goacestepcpp PRIVATE ACESTEP_HAVE_${BACKEND_UPPER}) + if(backend STREQUAL "cuda") + find_package(CUDAToolkit QUIET) + if(CUDAToolkit_FOUND) + target_link_libraries(goacestepcpp PRIVATE CUDA::cudart) + endif() + endif() + endif() +endforeach() + +if(MSVC) + target_compile_options(goacestepcpp PRIVATE /W4 /wd4100 /wd4505) +else() + target_compile_options(goacestepcpp PRIVATE -Wall -Wextra -Wshadow -Wconversion + -Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion) +endif() + +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0) + target_link_libraries(goacestepcpp PRIVATE stdc++fs) +endif() + +set_property(TARGET goacestepcpp PROPERTY CXX_STANDARD 17) +set_target_properties(goacestepcpp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) diff --git a/backend/go/acestep-cpp/Makefile b/backend/go/acestep-cpp/Makefile new file mode 100644 index 000000000..9b3fd5ae0 --- /dev/null +++ b/backend/go/acestep-cpp/Makefile @@ -0,0 +1,127 @@ +CMAKE_ARGS?= +BUILD_TYPE?= +NATIVE?=false + +GOCMD?=go +GO_TAGS?= +JOBS?=$(shell nproc --ignore=1) + +# acestep.cpp version +ACESTEP_REPO?=https://github.com/ace-step/acestep.cpp +ACESTEP_CPP_VERSION?=master +SO_TARGET?=libgoacestepcpp.so + +CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF + +ifeq ($(NATIVE),false) + CMAKE_ARGS+=-DGGML_NATIVE=OFF +endif + +ifeq ($(BUILD_TYPE),cublas) + CMAKE_ARGS+=-DGGML_CUDA=ON +else ifeq ($(BUILD_TYPE),openblas) + CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS +else ifeq ($(BUILD_TYPE),clblas) + CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path +else ifeq ($(BUILD_TYPE),hipblas) + CMAKE_ARGS+=-DGGML_HIPBLAS=ON +else ifeq ($(BUILD_TYPE),vulkan) + CMAKE_ARGS+=-DGGML_VULKAN=ON +else ifeq ($(OS),Darwin) + ifneq ($(BUILD_TYPE),metal) + CMAKE_ARGS+=-DGGML_METAL=OFF + else + CMAKE_ARGS+=-DGGML_METAL=ON + CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON + endif +endif + +ifeq ($(BUILD_TYPE),sycl_f16) + CMAKE_ARGS+=-DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx \ + -DGGML_SYCL_F16=ON +endif + +ifeq ($(BUILD_TYPE),sycl_f32) + CMAKE_ARGS+=-DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx +endif + +sources/acestep.cpp: + mkdir -p sources/acestep.cpp + cd sources/acestep.cpp && \ + git init && \ + git remote add origin $(ACESTEP_REPO) && \ + git fetch origin && \ + git checkout $(ACESTEP_CPP_VERSION) && \ + git submodule update --init --recursive --depth 1 --single-branch + +# Detect OS +UNAME_S := $(shell uname -s) + +# Only build CPU variants on Linux +ifeq ($(UNAME_S),Linux) + VARIANT_TARGETS = libgoacestepcpp-avx.so libgoacestepcpp-avx2.so libgoacestepcpp-avx512.so libgoacestepcpp-fallback.so +else + # On non-Linux (e.g., Darwin), build only fallback variant + VARIANT_TARGETS = libgoacestepcpp-fallback.so +endif + +acestep-cpp: main.go goacestepcpp.go $(VARIANT_TARGETS) + CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o acestep-cpp ./ + +package: acestep-cpp + bash package.sh + +build: package + +clean: purge + rm -rf libgoacestepcpp*.so package sources/acestep.cpp acestep-cpp + +purge: + rm -rf build* + +# Variants must build sequentially: each uses its own build- directory, +# but parallel builds can still race on shared resources (jobserver, disk I/O). +.NOTPARALLEL: + +# Build all variants (Linux only) +ifeq ($(UNAME_S),Linux) +libgoacestepcpp-avx.so: sources/acestep.cpp + $(info ${GREEN}I acestep-cpp build info:avx${RESET}) + SO_TARGET=libgoacestepcpp-avx.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoacestepcpp-custom + rm -rf build-libgoacestepcpp-avx.so + +libgoacestepcpp-avx2.so: sources/acestep.cpp + $(info ${GREEN}I acestep-cpp build info:avx2${RESET}) + SO_TARGET=libgoacestepcpp-avx2.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libgoacestepcpp-custom + rm -rf build-libgoacestepcpp-avx2.so + +libgoacestepcpp-avx512.so: sources/acestep.cpp + $(info ${GREEN}I acestep-cpp build info:avx512${RESET}) + SO_TARGET=libgoacestepcpp-avx512.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libgoacestepcpp-custom + rm -rf build-libgoacestepcpp-avx512.so +endif + +# Build fallback variant (all platforms) +libgoacestepcpp-fallback.so: sources/acestep.cpp + $(info ${GREEN}I acestep-cpp build info:fallback${RESET}) + SO_TARGET=libgoacestepcpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoacestepcpp-custom + rm -rf build-libgoacestepcpp-fallback.so + +libgoacestepcpp-custom: CMakeLists.txt cpp/goacestepcpp.cpp cpp/goacestepcpp.h + mkdir -p build-$(SO_TARGET) && \ + cd build-$(SO_TARGET) && \ + cmake .. $(CMAKE_ARGS) && \ + cmake --build . --config Release -j$(JOBS) --target goacestepcpp && \ + cd .. && \ + mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET) + +test: acestep-cpp + @echo "Running acestep-cpp tests..." + bash test.sh + @echo "acestep-cpp tests completed." + +all: acestep-cpp package diff --git a/backend/go/acestep-cpp/acestepcpp_test.go b/backend/go/acestep-cpp/acestepcpp_test.go new file mode 100644 index 000000000..f9dd86f56 --- /dev/null +++ b/backend/go/acestep-cpp/acestepcpp_test.go @@ -0,0 +1,195 @@ +package main + +import ( + "context" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + pb "github.com/mudler/LocalAI/pkg/grpc/proto" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +const ( + testAddr = "localhost:50051" + startupWait = 5 * time.Second +) + +func skipIfNoModel(t *testing.T) string { + t.Helper() + modelDir := os.Getenv("ACESTEP_MODEL_DIR") + if modelDir == "" { + t.Skip("ACESTEP_MODEL_DIR not set, skipping test (set to directory with GGUF models)") + } + if _, err := os.Stat(filepath.Join(modelDir, "acestep-5Hz-lm-0.6B-Q8_0.gguf")); os.IsNotExist(err) { + t.Skipf("LM model file not found in %s, skipping", modelDir) + } + if _, err := os.Stat(filepath.Join(modelDir, "Qwen3-Embedding-0.6B-Q8_0.gguf")); os.IsNotExist(err) { + t.Skipf("Text encoder model file not found in %s, skipping", modelDir) + } + if _, err := os.Stat(filepath.Join(modelDir, "acestep-v15-turbo-Q8_0.gguf")); os.IsNotExist(err) { + t.Skipf("DiT model file not found in %s, skipping", modelDir) + } + if _, err := os.Stat(filepath.Join(modelDir, "vae-BF16.gguf")); os.IsNotExist(err) { + t.Skipf("VAE model file not found in %s, skipping", modelDir) + } + return modelDir +} + +func startServer(t *testing.T) *exec.Cmd { + t.Helper() + binary := os.Getenv("ACESTEP_BINARY") + if binary == "" { + binary = "./acestep-cpp" + } + if _, err := os.Stat(binary); os.IsNotExist(err) { + t.Skipf("Backend binary not found at %s, skipping", binary) + } + cmd := exec.Command(binary, "--addr", testAddr) + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + if err := cmd.Start(); err != nil { + t.Fatalf("Failed to start server: %v", err) + } + time.Sleep(startupWait) + return cmd +} + +func stopServer(cmd *exec.Cmd) { + if cmd != nil && cmd.Process != nil { + cmd.Process.Kill() + cmd.Wait() + } +} + +func dialGRPC(t *testing.T) *grpc.ClientConn { + t.Helper() + conn, err := grpc.Dial(testAddr, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithDefaultCallOptions( + grpc.MaxCallRecvMsgSize(50*1024*1024), + grpc.MaxCallSendMsgSize(50*1024*1024), + ), + ) + if err != nil { + t.Fatalf("Failed to dial gRPC: %v", err) + } + return conn +} + +func TestServerHealth(t *testing.T) { + cmd := startServer(t) + defer stopServer(cmd) + + conn := dialGRPC(t) + defer conn.Close() + + client := pb.NewBackendClient(conn) + resp, err := client.Health(context.Background(), &pb.HealthMessage{}) + if err != nil { + t.Fatalf("Health check failed: %v", err) + } + if string(resp.Message) != "OK" { + t.Fatalf("Expected OK, got %s", string(resp.Message)) + } +} + +func TestLoadModel(t *testing.T) { + modelDir := skipIfNoModel(t) + cmd := startServer(t) + defer stopServer(cmd) + + conn := dialGRPC(t) + defer conn.Close() + + client := pb.NewBackendClient(conn) + resp, err := client.LoadModel(context.Background(), &pb.ModelOptions{ + ModelFile: filepath.Join(modelDir, "acestep-5Hz-lm-0.6B-Q8_0.gguf"), + Options: []string{ + "text_encoder_model:" + filepath.Join(modelDir, "Qwen3-Embedding-0.6B-Q8_0.gguf"), + "dit_model:" + filepath.Join(modelDir, "acestep-v15-turbo-Q8_0.gguf"), + "vae_model:" + filepath.Join(modelDir, "vae-BF16.gguf"), + }, + }) + if err != nil { + t.Fatalf("LoadModel failed: %v", err) + } + if !resp.Success { + t.Fatalf("LoadModel returned failure: %s", resp.Message) + } +} + +func TestSoundGeneration(t *testing.T) { + modelDir := skipIfNoModel(t) + + tmpDir, err := os.MkdirTemp("", "acestep-test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + + outputFile := filepath.Join(tmpDir, "output.wav") + + cmd := startServer(t) + defer stopServer(cmd) + + conn := dialGRPC(t) + defer conn.Close() + + client := pb.NewBackendClient(conn) + + // Load models + loadResp, err := client.LoadModel(context.Background(), &pb.ModelOptions{ + ModelFile: filepath.Join(modelDir, "acestep-5Hz-lm-0.6B-Q8_0.gguf"), + Options: []string{ + "text_encoder_model:" + filepath.Join(modelDir, "Qwen3-Embedding-0.6B-Q8_0.gguf"), + "dit_model:" + filepath.Join(modelDir, "acestep-v15-turbo-Q8_0.gguf"), + "vae_model:" + filepath.Join(modelDir, "vae-BF16.gguf"), + }, + }) + if err != nil { + t.Fatalf("LoadModel failed: %v", err) + } + if !loadResp.Success { + t.Fatalf("LoadModel returned failure: %s", loadResp.Message) + } + + // Generate music + duration := float32(10.0) + temperature := float32(0.85) + bpm := int32(120) + caption := "A cheerful electronic dance track" + timesig := "4/4" + + _, err = client.SoundGeneration(context.Background(), &pb.SoundGenerationRequest{ + Text: caption, + Caption: &caption, + Dst: outputFile, + Duration: &duration, + Temperature: &temperature, + Bpm: &bpm, + Timesignature: ×ig, + }) + if err != nil { + t.Fatalf("SoundGeneration failed: %v", err) + } + + // Verify output file exists and has content + info, err := os.Stat(outputFile) + if os.IsNotExist(err) { + t.Fatal("Output audio file was not created") + } + if err != nil { + t.Fatalf("Failed to stat output file: %v", err) + } + + t.Logf("Output file size: %d bytes", info.Size()) + + // WAV header is 44 bytes minimum; any real audio should be much larger + if info.Size() < 1000 { + t.Errorf("Output file too small (%d bytes), expected real audio data", info.Size()) + } +} diff --git a/backend/go/acestep-cpp/cpp/goacestepcpp.cpp b/backend/go/acestep-cpp/cpp/goacestepcpp.cpp new file mode 100644 index 000000000..528905769 --- /dev/null +++ b/backend/go/acestep-cpp/cpp/goacestepcpp.cpp @@ -0,0 +1,306 @@ +#include "goacestepcpp.h" +#include "ggml-backend.h" + +#include "audio-io.h" +#include "bpe.h" +#include "cond-enc.h" +#include "dit-sampler.h" +#include "dit.h" +#include "gguf-weights.h" +#include "philox.h" +#include "qwen3-enc.h" +#include "qwen3-lm.h" +#include "request.h" +#include "vae.h" + +#include +#include +#include +#include +#include +#include +#include + +// Global model contexts (loaded once, reused across requests) +static DiTGGML g_dit = {}; +static DiTGGMLConfig g_dit_cfg; +static VAEGGML g_vae = {}; +static bool g_dit_loaded = false; +static bool g_vae_loaded = false; +static bool g_is_turbo = false; + +// Silence latent [15000, 64] — read once from DiT GGUF +static std::vector g_silence_full; + +// Paths for per-request loading (text encoder, tokenizer) +static std::string g_text_enc_path; +static std::string g_dit_path; +static std::string g_lm_path; + +static void ggml_log_cb(enum ggml_log_level level, const char * log, void * data) { + const char * level_str; + if (!log) + return; + switch (level) { + case GGML_LOG_LEVEL_DEBUG: + level_str = "DEBUG"; + break; + case GGML_LOG_LEVEL_INFO: + level_str = "INFO"; + break; + case GGML_LOG_LEVEL_WARN: + level_str = "WARN"; + break; + case GGML_LOG_LEVEL_ERROR: + level_str = "ERROR"; + break; + default: + level_str = "?????"; + break; + } + fprintf(stderr, "[%-5s] ", level_str); + fputs(log, stderr); + fflush(stderr); +} + +int load_model(const char * lm_model_path, const char * text_encoder_path, + const char * dit_model_path, const char * vae_model_path) { + ggml_log_set(ggml_log_cb, nullptr); + ggml_backend_load_all(); + + g_lm_path = lm_model_path; + g_text_enc_path = text_encoder_path; + g_dit_path = dit_model_path; + + // Load DiT model + fprintf(stderr, "[acestep-cpp] Loading DiT from %s\n", dit_model_path); + dit_ggml_init_backend(&g_dit); + if (!dit_ggml_load(&g_dit, dit_model_path, g_dit_cfg, nullptr, 0.0f)) { + fprintf(stderr, "[acestep-cpp] FATAL: failed to load DiT from %s\n", dit_model_path); + return 1; + } + g_dit_loaded = true; + + // Read DiT GGUF metadata + silence_latent + { + GGUFModel gf = {}; + if (gf_load(&gf, dit_model_path)) { + g_is_turbo = gf_get_bool(gf, "acestep.is_turbo"); + const void * sl_data = gf_get_data(gf, "silence_latent"); + if (sl_data) { + g_silence_full.resize(15000 * 64); + memcpy(g_silence_full.data(), sl_data, 15000 * 64 * sizeof(float)); + fprintf(stderr, "[acestep-cpp] silence_latent: [15000, 64] loaded\n"); + } else { + fprintf(stderr, "[acestep-cpp] FATAL: silence_latent not found in %s\n", dit_model_path); + gf_close(&gf); + return 2; + } + gf_close(&gf); + } else { + fprintf(stderr, "[acestep-cpp] FATAL: cannot read GGUF metadata from %s\n", dit_model_path); + return 2; + } + } + + // Load VAE model + fprintf(stderr, "[acestep-cpp] Loading VAE from %s\n", vae_model_path); + vae_ggml_load(&g_vae, vae_model_path); + g_vae_loaded = true; + + fprintf(stderr, "[acestep-cpp] All models loaded successfully (turbo=%d)\n", g_is_turbo); + return 0; +} + +int generate_music(const char * caption, const char * lyrics, int bpm, + const char * keyscale, const char * timesignature, + float duration, float temperature, bool instrumental, + int seed, const char * dst, int threads) { + if (!g_dit_loaded || !g_vae_loaded) { + fprintf(stderr, "[acestep-cpp] ERROR: models not loaded\n"); + return 1; + } + + const int FRAMES_PER_SECOND = 25; + + // Defaults + if (duration <= 0) + duration = 30.0f; + std::string cap_str = caption ? caption : ""; + std::string lyrics_str = (instrumental || !lyrics) ? "" : lyrics; + std::string ks_str = keyscale ? keyscale : "N/A"; + std::string ts_str = timesignature ? timesignature : "4/4"; + std::string lang_str = "unknown"; + char bpm_str[16]; + if (bpm > 0) { + snprintf(bpm_str, sizeof(bpm_str), "%d", bpm); + } else { + snprintf(bpm_str, sizeof(bpm_str), "N/A"); + } + + int num_steps = 8; + float guidance_scale = g_is_turbo ? 1.0f : 7.0f; + float shift = 1.0f; + + if (seed < 0) { + std::random_device rd; + seed = (int)(rd() & 0x7FFFFFFF); + } + + // Compute T (latent frames at 25Hz) + int T = (int)(duration * FRAMES_PER_SECOND); + T = ((T + g_dit_cfg.patch_size - 1) / g_dit_cfg.patch_size) * g_dit_cfg.patch_size; + int S = T / g_dit_cfg.patch_size; + + if (T > 15000) { + fprintf(stderr, "[acestep-cpp] ERROR: T=%d exceeds max 15000\n", T); + return 2; + } + + int Oc = g_dit_cfg.out_channels; // 64 + int ctx_ch = g_dit_cfg.in_channels - Oc; // 128 + + fprintf(stderr, "[acestep-cpp] T=%d, S=%d, duration=%.1fs, seed=%d\n", T, S, duration, seed); + + // 1. Load BPE tokenizer from text encoder GGUF + BPETokenizer tok; + if (!load_bpe_from_gguf(&tok, g_text_enc_path.c_str())) { + fprintf(stderr, "[acestep-cpp] FATAL: failed to load BPE tokenizer\n"); + return 3; + } + + // 2. Build formatted prompts (matches dit-vae.cpp text2music template) + std::string instruction = "Fill the audio semantic mask based on the given conditions:"; + + char metas[512]; + snprintf(metas, sizeof(metas), + "- bpm: %s\n- timesignature: %s\n- keyscale: %s\n- duration: %d seconds\n", + bpm_str, ts_str.c_str(), ks_str.c_str(), (int)duration); + + std::string text_str = std::string("# Instruction\n") + instruction + "\n\n" + + "# Caption\n" + cap_str + "\n\n" + + "# Metas\n" + metas + "<|endoftext|>\n"; + std::string lyric_str = std::string("# Languages\n") + lang_str + "\n\n# Lyric\n" + + lyrics_str + "<|endoftext|>"; + + // 3. Tokenize + auto text_ids = bpe_encode(&tok, text_str.c_str(), true); + auto lyric_ids = bpe_encode(&tok, lyric_str.c_str(), true); + int S_text = (int)text_ids.size(); + int S_lyric = (int)lyric_ids.size(); + + fprintf(stderr, "[acestep-cpp] caption: %d tokens, lyrics: %d tokens\n", S_text, S_lyric); + + // 4. Text encoder forward + Qwen3GGML text_enc = {}; + qwen3_init_backend(&text_enc); + if (!qwen3_load_text_encoder(&text_enc, g_text_enc_path.c_str())) { + fprintf(stderr, "[acestep-cpp] FATAL: failed to load text encoder\n"); + return 4; + } + + int H_text = text_enc.cfg.hidden_size; // 1024 + std::vector text_hidden(H_text * S_text); + + qwen3_forward(&text_enc, text_ids.data(), S_text, text_hidden.data()); + fprintf(stderr, "[acestep-cpp] TextEncoder forward done\n"); + + // 5. Lyric embedding + std::vector lyric_embed(H_text * S_lyric); + qwen3_embed_lookup(&text_enc, lyric_ids.data(), S_lyric, lyric_embed.data()); + + // 6. Condition encoder + CondGGML cond = {}; + cond_ggml_init_backend(&cond); + if (!cond_ggml_load(&cond, g_dit_path.c_str())) { + fprintf(stderr, "[acestep-cpp] FATAL: failed to load condition encoder\n"); + qwen3_free(&text_enc); + return 5; + } + + const int S_ref = 750; + std::vector silence_feats(S_ref * 64); + memcpy(silence_feats.data(), g_silence_full.data(), S_ref * 64 * sizeof(float)); + + int enc_S = 0; + std::vector enc_hidden; + cond_ggml_forward(&cond, text_hidden.data(), S_text, lyric_embed.data(), S_lyric, + silence_feats.data(), S_ref, enc_hidden, &enc_S); + fprintf(stderr, "[acestep-cpp] ConditionEncoder done, enc_S=%d\n", enc_S); + + qwen3_free(&text_enc); + cond_ggml_free(&cond); + + // 7. Build context [T, ctx_ch] = silence[64] + mask[64] + std::vector context(T * ctx_ch); + for (int t = 0; t < T; t++) { + const float * src = g_silence_full.data() + t * Oc; + for (int c = 0; c < Oc; c++) { + context[t * ctx_ch + c] = src[c]; + } + for (int c = 0; c < Oc; c++) { + context[t * ctx_ch + Oc + c] = 1.0f; + } + } + + // 8. Build schedule + std::vector schedule(num_steps); + for (int i = 0; i < num_steps; i++) { + float t = 1.0f - (float)i / (float)num_steps; + schedule[i] = shift * t / (1.0f + (shift - 1.0f) * t); + } + + // 9. Generate noise (Philox) + std::vector noise(Oc * T); + philox_randn((long long)seed, noise.data(), Oc * T, true); + + // 10. DiT generate + std::vector output(Oc * T); + fprintf(stderr, "[acestep-cpp] DiT generate: T=%d, steps=%d, guidance=%.1f\n", T, num_steps, guidance_scale); + + dit_ggml_generate(&g_dit, noise.data(), context.data(), enc_hidden.data(), enc_S, + T, 1, num_steps, schedule.data(), output.data(), guidance_scale, + nullptr, nullptr, -1); + fprintf(stderr, "[acestep-cpp] DiT generation done\n"); + + // 11. VAE decode + int T_audio_max = T * 1920; + std::vector audio(2 * T_audio_max); + + int T_audio = vae_ggml_decode_tiled(&g_vae, output.data(), T, audio.data(), T_audio_max, 256, 64); + if (T_audio < 0) { + fprintf(stderr, "[acestep-cpp] ERROR: VAE decode failed\n"); + return 6; + } + fprintf(stderr, "[acestep-cpp] VAE decode done: %d samples (%.2fs @ 48kHz)\n", T_audio, + (float)T_audio / 48000.0f); + + // 12. Peak normalization to -1.0 dB + { + float peak = 0.0f; + int n_samples = 2 * T_audio; + for (int i = 0; i < n_samples; i++) { + float a = audio[i] < 0 ? -audio[i] : audio[i]; + if (a > peak) { + peak = a; + } + } + if (peak > 1e-6f) { + const float target_amp = powf(10.0f, -1.0f / 20.0f); + float gain = target_amp / peak; + for (int i = 0; i < n_samples; i++) { + audio[i] *= gain; + } + } + } + + // 13. Write WAV output + if (!audio_write_wav(dst, audio.data(), T_audio, 48000)) { + fprintf(stderr, "[acestep-cpp] ERROR: failed to write %s\n", dst); + return 7; + } + + fprintf(stderr, "[acestep-cpp] Wrote %s: %d samples (%.2fs @ 48kHz stereo)\n", + dst, T_audio, (float)T_audio / 48000.0f); + return 0; +} diff --git a/backend/go/acestep-cpp/cpp/goacestepcpp.h b/backend/go/acestep-cpp/cpp/goacestepcpp.h new file mode 100644 index 000000000..474f68629 --- /dev/null +++ b/backend/go/acestep-cpp/cpp/goacestepcpp.h @@ -0,0 +1,11 @@ +#include +#include + +extern "C" { +int load_model(const char *lm_model_path, const char *text_encoder_path, + const char *dit_model_path, const char *vae_model_path); +int generate_music(const char *caption, const char *lyrics, int bpm, + const char *keyscale, const char *timesignature, + float duration, float temperature, bool instrumental, + int seed, const char *dst, int threads); +} diff --git a/backend/go/acestep-cpp/goacestepcpp.go b/backend/go/acestep-cpp/goacestepcpp.go new file mode 100644 index 000000000..e1243c3e9 --- /dev/null +++ b/backend/go/acestep-cpp/goacestepcpp.go @@ -0,0 +1,82 @@ +package main + +import ( + "fmt" + "os" + "strings" + + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" +) + +var ( + CppLoadModel func(lmModelPath, textEncoderPath, ditModelPath, vaeModelPath string) int + CppGenerateMusic func(caption, lyrics string, bpm int, keyscale, timesignature string, duration, temperature float32, instrumental bool, seed int, dst string, threads int) int +) + +type AceStepCpp struct { + base.SingleThread +} + +func (a *AceStepCpp) Load(opts *pb.ModelOptions) error { + // ModelFile is the LM model path + lmModel := opts.ModelFile + + var textEncoderModel, ditModel, vaeModel string + + for _, oo := range opts.Options { + parts := strings.SplitN(oo, ":", 2) + if len(parts) != 2 { + fmt.Fprintf(os.Stderr, "Unrecognized option: %v\n", oo) + continue + } + switch parts[0] { + case "text_encoder_model": + textEncoderModel = parts[1] + case "dit_model": + ditModel = parts[1] + case "vae_model": + vaeModel = parts[1] + default: + fmt.Fprintf(os.Stderr, "Unrecognized option: %v\n", oo) + } + } + + if textEncoderModel == "" { + return fmt.Errorf("text_encoder_model option is required") + } + if ditModel == "" { + return fmt.Errorf("dit_model option is required") + } + if vaeModel == "" { + return fmt.Errorf("vae_model option is required") + } + + if ret := CppLoadModel(lmModel, textEncoderModel, ditModel, vaeModel); ret != 0 { + return fmt.Errorf("failed to load acestep models (error code: %d)", ret) + } + + return nil +} + +func (a *AceStepCpp) SoundGeneration(req *pb.SoundGenerationRequest) error { + caption := req.GetCaption() + if caption == "" { + caption = req.GetText() + } + lyrics := req.GetLyrics() + bpm := int(req.GetBpm()) + keyscale := req.GetKeyscale() + timesignature := req.GetTimesignature() + duration := req.GetDuration() + temperature := req.GetTemperature() + instrumental := req.GetInstrumental() + seed := 42 + threads := 4 + + if ret := CppGenerateMusic(caption, lyrics, bpm, keyscale, timesignature, duration, temperature, instrumental, seed, req.GetDst(), threads); ret != 0 { + return fmt.Errorf("failed to generate music (error code: %d)", ret) + } + + return nil +} diff --git a/backend/go/acestep-cpp/main.go b/backend/go/acestep-cpp/main.go new file mode 100644 index 000000000..c65afb335 --- /dev/null +++ b/backend/go/acestep-cpp/main.go @@ -0,0 +1,47 @@ +package main + +// Note: this is started internally by LocalAI and a server is allocated for each model +import ( + "flag" + "os" + + "github.com/ebitengine/purego" + grpc "github.com/mudler/LocalAI/pkg/grpc" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +type LibFuncs struct { + FuncPtr any + Name string +} + +func main() { + // Get library name from environment variable, default to fallback + libName := os.Getenv("ACESTEP_LIBRARY") + if libName == "" { + libName = "./libgoacestepcpp-fallback.so" + } + + gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) + if err != nil { + panic(err) + } + + libFuncs := []LibFuncs{ + {&CppLoadModel, "load_model"}, + {&CppGenerateMusic, "generate_music"}, + } + + for _, lf := range libFuncs { + purego.RegisterLibFunc(lf.FuncPtr, gosd, lf.Name) + } + + flag.Parse() + + if err := grpc.StartServer(*addr, &AceStepCpp{}); err != nil { + panic(err) + } +} diff --git a/backend/go/acestep-cpp/package.sh b/backend/go/acestep-cpp/package.sh new file mode 100755 index 000000000..d922c5b86 --- /dev/null +++ b/backend/go/acestep-cpp/package.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# Script to copy the appropriate libraries based on architecture +# This script is used in the final stage of the Dockerfile + +set -e + +CURDIR=$(dirname "$(realpath $0)") +REPO_ROOT="${CURDIR}/../../.." + +# Create lib directory +mkdir -p $CURDIR/package/lib + +cp -avf $CURDIR/acestep-cpp $CURDIR/package/ +cp -fv $CURDIR/libgoacestepcpp-*.so $CURDIR/package/ +cp -fv $CURDIR/run.sh $CURDIR/package/ + +# Detect architecture and copy appropriate libraries +if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then + # x86_64 architecture + echo "Detected x86_64 architecture, copying x86_64 libraries..." + cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so + cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 + cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 +elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then + # ARM64 architecture + echo "Detected ARM64 architecture, copying ARM64 libraries..." + cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so + cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 + cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 +elif [ $(uname -s) = "Darwin" ]; then + echo "Detected Darwin" +else + echo "Error: Could not detect architecture" + exit 1 +fi + +# Package GPU libraries based on BUILD_TYPE +# The GPU library packaging script will detect BUILD_TYPE and copy appropriate GPU libraries +GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh" +if [ -f "$GPU_LIB_SCRIPT" ]; then + echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..." + source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib" + package_gpu_libs +fi + +echo "Packaging completed successfully" +ls -liah $CURDIR/package/ +ls -liah $CURDIR/package/lib/ diff --git a/backend/go/acestep-cpp/run.sh b/backend/go/acestep-cpp/run.sh new file mode 100755 index 000000000..d901e2c85 --- /dev/null +++ b/backend/go/acestep-cpp/run.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -ex + +# Get the absolute current dir where the script is located +CURDIR=$(dirname "$(realpath $0)") + +cd / + +echo "CPU info:" +if [ "$(uname)" != "Darwin" ]; then + grep -e "model\sname" /proc/cpuinfo | head -1 + grep -e "flags" /proc/cpuinfo | head -1 +fi + +LIBRARY="$CURDIR/libgoacestepcpp-fallback.so" + +if [ "$(uname)" != "Darwin" ]; then + if grep -q -e "\savx\s" /proc/cpuinfo ; then + echo "CPU: AVX found OK" + if [ -e $CURDIR/libgoacestepcpp-avx.so ]; then + LIBRARY="$CURDIR/libgoacestepcpp-avx.so" + fi + fi + + if grep -q -e "\savx2\s" /proc/cpuinfo ; then + echo "CPU: AVX2 found OK" + if [ -e $CURDIR/libgoacestepcpp-avx2.so ]; then + LIBRARY="$CURDIR/libgoacestepcpp-avx2.so" + fi + fi + + # Check avx 512 + if grep -q -e "\savx512f\s" /proc/cpuinfo ; then + echo "CPU: AVX512F found OK" + if [ -e $CURDIR/libgoacestepcpp-avx512.so ]; then + LIBRARY="$CURDIR/libgoacestepcpp-avx512.so" + fi + fi +fi + +export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH +export ACESTEP_LIBRARY=$LIBRARY + +# If there is a lib/ld.so, use it +if [ -f $CURDIR/lib/ld.so ]; then + echo "Using lib/ld.so" + echo "Using library: $LIBRARY" + exec $CURDIR/lib/ld.so $CURDIR/acestep-cpp "$@" +fi + +echo "Using library: $LIBRARY" +exec $CURDIR/acestep-cpp "$@" diff --git a/backend/go/acestep-cpp/test.sh b/backend/go/acestep-cpp/test.sh new file mode 100755 index 000000000..e9b103d29 --- /dev/null +++ b/backend/go/acestep-cpp/test.sh @@ -0,0 +1,54 @@ +#!/bin/bash +set -e + +CURDIR=$(dirname "$(realpath $0)") + +echo "Running acestep-cpp backend tests..." + +# The test requires: +# - ACESTEP_MODEL_DIR: path to directory containing GGUF model files +# - ACESTEP_BINARY: path to the acestep-cpp binary (defaults to ./acestep-cpp) +# +# Tests that require the model will be skipped if ACESTEP_MODEL_DIR is not set +# or the directory does not contain the required model files. + +cd "$CURDIR" + +# Only auto-download models when ACESTEP_MODEL_DIR is not explicitly set +if [ -z "$ACESTEP_MODEL_DIR" ]; then + export ACESTEP_MODEL_DIR="./acestep-models" + + if [ ! -d "$ACESTEP_MODEL_DIR" ]; then + echo "Creating acestep-models directory for tests..." + mkdir -p "$ACESTEP_MODEL_DIR" + REPO_ID="Serveurperso/ACE-Step-1.5-GGUF" + echo "Repository: ${REPO_ID}" + echo "" + + # Files to download (smallest quantizations for testing) + FILES=( + "acestep-5Hz-lm-0.6B-Q8_0.gguf" + "Qwen3-Embedding-0.6B-Q8_0.gguf" + "acestep-v15-turbo-Q8_0.gguf" + "vae-BF16.gguf" + ) + + BASE_URL="https://huggingface.co/${REPO_ID}/resolve/main" + + for file in "${FILES[@]}"; do + dest="${ACESTEP_MODEL_DIR}/${file}" + if [ -f "${dest}" ]; then + echo " [skip] ${file} (already exists)" + else + echo " [download] ${file}..." + curl -L -o "${dest}" "${BASE_URL}/${file}" --progress-bar + echo " [done] ${file}" + fi + done + fi +fi + +# Run Go tests +go test -v -timeout 600s . + +echo "All acestep-cpp tests passed." diff --git a/backend/index.yaml b/backend/index.yaml index 2271ad1b1..db095659c 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -364,6 +364,29 @@ default: "cpu-ace-step-development" nvidia-cuda-13: "cuda13-ace-step-development" nvidia-cuda-12: "cuda12-ace-step-development" +- &acestepcpp + name: "acestep-cpp" + description: | + ACE-Step 1.5 C++ backend using GGML. Native C++ implementation of ACE-Step music generation with GPU support through GGML backends. + Generates stereo 48kHz audio from text descriptions and optional lyrics via a two-stage pipeline: text-to-code (ace-qwen3 LLM) + code-to-audio (DiT-VAE). + urls: + - https://github.com/ace-step/acestep.cpp + tags: + - music-generation + - sound-generation + alias: "acestep-cpp" + capabilities: + default: "cpu-acestep-cpp" + nvidia: "cuda12-acestep-cpp" + nvidia-cuda-13: "cuda13-acestep-cpp" + nvidia-cuda-12: "cuda12-acestep-cpp" + intel: "intel-sycl-f16-acestep-cpp" + metal: "metal-acestep-cpp" + amd: "rocm-acestep-cpp" + vulkan: "vulkan-acestep-cpp" + nvidia-l4t: "nvidia-l4t-arm64-acestep-cpp" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-acestep-cpp" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-acestep-cpp" - &faster-whisper icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4 description: | @@ -1845,6 +1868,107 @@ uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-ace-step" mirrors: - localai/localai-backends:master-metal-darwin-arm64-ace-step +## acestep-cpp +- !!merge <<: *acestepcpp + name: "nvidia-l4t-arm64-acestep-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-acestep-cpp" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-arm64-acestep-cpp +- !!merge <<: *acestepcpp + name: "nvidia-l4t-arm64-acestep-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-acestep-cpp" + mirrors: + - localai/localai-backends:master-nvidia-l4t-arm64-acestep-cpp +- !!merge <<: *acestepcpp + name: "cuda13-nvidia-l4t-arm64-acestep-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-acestep-cpp" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-acestep-cpp +- !!merge <<: *acestepcpp + name: "cuda13-nvidia-l4t-arm64-acestep-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-acestep-cpp" + mirrors: + - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-acestep-cpp +- !!merge <<: *acestepcpp + name: "cpu-acestep-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-acestep-cpp" + mirrors: + - localai/localai-backends:latest-cpu-acestep-cpp +- !!merge <<: *acestepcpp + name: "metal-acestep-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-acestep-cpp" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-acestep-cpp +- !!merge <<: *acestepcpp + name: "metal-acestep-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-acestep-cpp" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-acestep-cpp +- !!merge <<: *acestepcpp + name: "cpu-acestep-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-cpu-acestep-cpp" + mirrors: + - localai/localai-backends:master-cpu-acestep-cpp +- !!merge <<: *acestepcpp + name: "cuda12-acestep-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-acestep-cpp" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-12-acestep-cpp +- !!merge <<: *acestepcpp + name: "rocm-acestep-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-acestep-cpp" + mirrors: + - localai/localai-backends:latest-gpu-rocm-hipblas-acestep-cpp +- !!merge <<: *acestepcpp + name: "intel-sycl-f32-acestep-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-acestep-cpp" + mirrors: + - localai/localai-backends:latest-gpu-intel-sycl-f32-acestep-cpp +- !!merge <<: *acestepcpp + name: "intel-sycl-f16-acestep-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-acestep-cpp" + mirrors: + - localai/localai-backends:latest-gpu-intel-sycl-f16-acestep-cpp +- !!merge <<: *acestepcpp + name: "vulkan-acestep-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-acestep-cpp" + mirrors: + - localai/localai-backends:latest-gpu-vulkan-acestep-cpp +- !!merge <<: *acestepcpp + name: "vulkan-acestep-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-acestep-cpp" + mirrors: + - localai/localai-backends:master-gpu-vulkan-acestep-cpp +- !!merge <<: *acestepcpp + name: "cuda12-acestep-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-acestep-cpp" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-12-acestep-cpp +- !!merge <<: *acestepcpp + name: "rocm-acestep-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-acestep-cpp" + mirrors: + - localai/localai-backends:master-gpu-rocm-hipblas-acestep-cpp +- !!merge <<: *acestepcpp + name: "intel-sycl-f32-acestep-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-acestep-cpp" + mirrors: + - localai/localai-backends:master-gpu-intel-sycl-f32-acestep-cpp +- !!merge <<: *acestepcpp + name: "intel-sycl-f16-acestep-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-acestep-cpp" + mirrors: + - localai/localai-backends:master-gpu-intel-sycl-f16-acestep-cpp +- !!merge <<: *acestepcpp + name: "cuda13-acestep-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-acestep-cpp" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-acestep-cpp +- !!merge <<: *acestepcpp + name: "cuda13-acestep-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-acestep-cpp" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-acestep-cpp ## kokoro - !!merge <<: *kokoro name: "kokoro-development" diff --git a/core/config/model_config.go b/core/config/model_config.go index bcb6105ac..ef7208bf9 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -675,7 +675,7 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecase) bool { } if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION { - soundGenBackends := []string{"transformers-musicgen", "ace-step", "mock-backend"} + soundGenBackends := []string{"transformers-musicgen", "ace-step", "acestep-cpp", "mock-backend"} if !slices.Contains(soundGenBackends, c.Backend) { return false } diff --git a/gallery/index.yaml b/gallery/index.yaml index 92f57c5d1..331846836 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -725,6 +725,83 @@ - "inference_steps:8" - "guidance_scale:7.0" - "batch_size:1" +- name: "acestep-cpp-turbo" + license: mit + tags: + - music + - audio + - music-generation + - sound-generation + - acestep-cpp + - ace-step-1.5 + - gguf + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + urls: + - https://huggingface.co/Serveurperso/ACE-Step-1.5-GGUF + - https://github.com/ace-step/acestep.cpp + description: | + ACE-Step 1.5 Turbo (C++ / GGML) — native C++ music generation from text descriptions and lyrics. + Two-stage pipeline: text-to-code (Qwen3 LM) + code-to-audio (DiT-VAE). Stereo 48kHz output. + Uses Q8_0 quantized models for a good balance of quality and speed. + overrides: + name: acestep-cpp-turbo + backend: acestep-cpp + parameters: + model: acestep-cpp/acestep-5Hz-lm-0.6B-Q8_0.gguf + known_usecases: + - sound_generation + - tts + options: + - "text_encoder_model:acestep-cpp/Qwen3-Embedding-0.6B-Q8_0.gguf" + - "dit_model:acestep-cpp/acestep-v15-turbo-Q8_0.gguf" + - "vae_model:acestep-cpp/vae-BF16.gguf" + files: + - filename: acestep-cpp/acestep-5Hz-lm-0.6B-Q8_0.gguf + uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/acestep-5Hz-lm-0.6B-Q8_0.gguf + - filename: acestep-cpp/Qwen3-Embedding-0.6B-Q8_0.gguf + uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf + - filename: acestep-cpp/acestep-v15-turbo-Q8_0.gguf + uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/acestep-v15-turbo-Q8_0.gguf + - filename: acestep-cpp/vae-BF16.gguf + uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/vae-BF16.gguf +- name: "acestep-cpp-turbo-4b" + license: mit + tags: + - music + - audio + - music-generation + - sound-generation + - acestep-cpp + - ace-step-1.5 + - gguf + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + urls: + - https://huggingface.co/Serveurperso/ACE-Step-1.5-GGUF + - https://github.com/ace-step/acestep.cpp + description: | + ACE-Step 1.5 Turbo (C++ / GGML) with 4B LM — higher quality music generation from text and lyrics. + Uses the larger 4B parameter LM for better metadata/code generation. Stereo 48kHz output. + overrides: + name: acestep-cpp-turbo-4b + backend: acestep-cpp + parameters: + model: acestep-cpp/acestep-5Hz-lm-4B-Q8_0.gguf + known_usecases: + - sound_generation + - tts + options: + - "text_encoder_model:acestep-cpp/Qwen3-Embedding-0.6B-Q8_0.gguf" + - "dit_model:acestep-cpp/acestep-v15-turbo-Q8_0.gguf" + - "vae_model:acestep-cpp/vae-BF16.gguf" + files: + - filename: acestep-cpp/acestep-5Hz-lm-4B-Q8_0.gguf + uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/acestep-5Hz-lm-4B-Q8_0.gguf + - filename: acestep-cpp/Qwen3-Embedding-0.6B-Q8_0.gguf + uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf + - filename: acestep-cpp/acestep-v15-turbo-Q8_0.gguf + uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/acestep-v15-turbo-Q8_0.gguf + - filename: acestep-cpp/vae-BF16.gguf + uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/vae-BF16.gguf - name: "qwen3-coder-next-mxfp4_moe" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" urls: