feat(backends): add ace-step.cpp (#8965)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-17 04:56:52 -04:00 · 2026-03-12 18:56:26 +01:00
parent 8f3efaed15
commit a738f8b0e4
17 changed files with 1355 additions and 3 deletions
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -535,6 +535,19 @@ jobs:
            dockerfile: "./backend/Dockerfile.golang"
            context: "./"
            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "8"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-acestep-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "acestep-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "8"
@@ -1069,6 +1082,32 @@ jobs:
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.golang"
            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-acestep-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "acestep-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'false'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-cuda-13-arm64-acestep-cpp'
+            base-image: "ubuntu:24.04"
+            ubuntu-version: '2404'
+            runs-on: 'ubuntu-24.04-arm'
+            backend: "acestep-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "13"
            cuda-minor-version: "0"
@@ -1882,6 +1921,85 @@ jobs:
            dockerfile: "./backend/Dockerfile.golang"
            context: "./"
            ubuntu-version: '2404'
+          # acestep-cpp
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-cpu-acestep-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "acestep-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-intel-sycl-f32-acestep-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
+            skip-drivers: 'false'
+            backend: "acestep-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'sycl_f16'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-intel-sycl-f16-acestep-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
+            skip-drivers: 'false'
+            backend: "acestep-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'vulkan'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-vulkan-acestep-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "acestep-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'false'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-arm64-acestep-cpp'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            backend: "acestep-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2204'
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-rocm-hipblas-acestep-cpp'
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
+            runs-on: 'ubuntu-latest'
+            skip-drivers: 'false'
+            backend: "acestep-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
          # voxtral
          - build-type: ''
            cuda-major-version: ""
@@ -2183,6 +2301,10 @@ jobs:
            tag-suffix: "-metal-darwin-arm64-whisper"
            build-type: "metal"
            lang: "go"
+          - backend: "acestep-cpp"
+            tag-suffix: "-metal-darwin-arm64-acestep-cpp"
+            build-type: "metal"
+            lang: "go"
          - backend: "voxtral"
            tag-suffix: "-metal-darwin-arm64-voxtral"
            build-type: "metal"
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -30,6 +30,10 @@ jobs:
            variable: "VOXTRAL_VERSION"
            branch: "main"
            file: "backend/go/voxtral/Makefile"
+          - repository: "ace-step/acestep.cpp"
+            variable: "ACESTEP_CPP_VERSION"
+            branch: "master"
+            file: "backend/go/acestep-cpp/Makefile"
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -383,6 +383,36 @@ jobs:
        run: |
          make --jobs=5 --output-sync=target -C backend/python/voxcpm
          make --jobs=5 --output-sync=target -C backend/python/voxcpm test
+  tests-acestep-cpp:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clone
+        uses: actions/checkout@v6
+        with:
+          submodules: true
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential cmake curl libopenblas-dev ffmpeg
+      - name: Setup Go
+        uses: actions/setup-go@v5
+      - name: Display Go version
+        run: go version
+      - name: Proto Dependencies
+        run: |
+          # Install protoc
+          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
+          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
+          rm protoc.zip
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+          PATH="$PATH:$HOME/go/bin" make protogen-go
+      - name: Build acestep-cpp
+        run: |
+          make --jobs=5 --output-sync=target -C backend/go/acestep-cpp
+      - name: Test acestep-cpp
+        run: |
+          make --jobs=5 --output-sync=target -C backend/go/acestep-cpp test
  tests-voxtral:
    runs-on: ubuntu-latest
    steps:
--- a/6
+++ b/6
@@ -1,5 +1,5 @@
 # Disable parallel execution for backend builds
-.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/fish-speech backends/voxtral
+.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral

 GOCMD=go
 GOTEST=$(GOCMD) test
@@ -476,6 +476,7 @@ BACKEND_SILERO_VAD = silero-vad|golang|.|false|true
 BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|true
 BACKEND_WHISPER = whisper|golang|.|false|true
 BACKEND_VOXTRAL = voxtral|golang|.|false|true
+BACKEND_ACESTEP_CPP = acestep-cpp|golang|.|false|true

 # Python backends with root context
 BACKEND_RERANKERS = rerankers|python|.|false|true
@@ -557,13 +558,14 @@ $(eval $(call generate-docker-build-target,$(BACKEND_NEMO)))
 $(eval $(call generate-docker-build-target,$(BACKEND_VOXCPM)))
 $(eval $(call generate-docker-build-target,$(BACKEND_WHISPERX)))
 $(eval $(call generate-docker-build-target,$(BACKEND_ACE_STEP)))
+$(eval $(call generate-docker-build-target,$(BACKEND_ACESTEP_CPP)))
 $(eval $(call generate-docker-build-target,$(BACKEND_MLX_DISTRIBUTED)))

 # Pattern rule for docker-save targets
 docker-save-%: backend-images
 	docker save local-ai-backend:$* -o backend-images/$*.tar

-docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-voxtral docker-build-mlx-distributed
+docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed

 ########################################################
 ### Mock Backend for E2E Tests
--- a/backend/go/acestep-cpp/CMakeLists.txt
+++ b/backend/go/acestep-cpp/CMakeLists.txt
@@ -0,0 +1,54 @@
+cmake_minimum_required(VERSION 3.14)
+project(goacestepcpp LANGUAGES C CXX)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+set(ACESTEP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sources/acestep.cpp)
+
+# Override upstream's CMAKE_CUDA_ARCHITECTURES before add_subdirectory.
+# Upstream sets 120a/121a for CUDA >= 12.8, but those archs require a newer
+# toolkit than 12.8.x ships. Pre-defining this variable makes the upstream
+# "if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)" guard skip its broken defaults.
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+    set(CMAKE_CUDA_ARCHITECTURES "75-virtual;80-virtual;86-real;89-real")
+endif()
+
+# EXCLUDE_FROM_ALL: only build targets we explicitly depend on (acestep-core, ggml),
+# skip upstream standalone executables (ace-understand, dit-vae, etc.)
+add_subdirectory(${ACESTEP_DIR} acestep EXCLUDE_FROM_ALL)
+
+add_library(goacestepcpp MODULE cpp/goacestepcpp.cpp)
+target_link_libraries(goacestepcpp PRIVATE acestep-core ggml ggml-base ggml-cpu)
+
+# Include dirs matching link_ggml_backends macro, but with absolute paths
+target_include_directories(goacestepcpp PRIVATE ${ACESTEP_DIR}/src ${ACESTEP_DIR})
+target_include_directories(goacestepcpp SYSTEM PRIVATE ${ACESTEP_DIR}/ggml/include)
+
+# Link GPU backends if available (mirrors link_ggml_backends macro)
+foreach(backend blas cuda metal vulkan)
+    if(TARGET ggml-${backend})
+        target_link_libraries(goacestepcpp PRIVATE ggml-${backend})
+        string(TOUPPER ${backend} BACKEND_UPPER)
+        target_compile_definitions(goacestepcpp PRIVATE ACESTEP_HAVE_${BACKEND_UPPER})
+        if(backend STREQUAL "cuda")
+            find_package(CUDAToolkit QUIET)
+            if(CUDAToolkit_FOUND)
+                target_link_libraries(goacestepcpp PRIVATE CUDA::cudart)
+            endif()
+        endif()
+    endif()
+endforeach()
+
+if(MSVC)
+    target_compile_options(goacestepcpp PRIVATE /W4 /wd4100 /wd4505)
+else()
+    target_compile_options(goacestepcpp PRIVATE -Wall -Wextra -Wshadow -Wconversion
+                          -Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion)
+endif()
+
+if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
+    target_link_libraries(goacestepcpp PRIVATE stdc++fs)
+endif()
+
+set_property(TARGET goacestepcpp PROPERTY CXX_STANDARD 17)
+set_target_properties(goacestepcpp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
--- a/backend/go/acestep-cpp/Makefile
+++ b/backend/go/acestep-cpp/Makefile
@@ -0,0 +1,127 @@
+CMAKE_ARGS?=
+BUILD_TYPE?=
+NATIVE?=false
+
+GOCMD?=go
+GO_TAGS?=
+JOBS?=$(shell nproc --ignore=1)
+
+# acestep.cpp version
+ACESTEP_REPO?=https://github.com/ace-step/acestep.cpp
+ACESTEP_CPP_VERSION?=master
+SO_TARGET?=libgoacestepcpp.so
+
+CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
+
+ifeq ($(NATIVE),false)
+	CMAKE_ARGS+=-DGGML_NATIVE=OFF
+endif
+
+ifeq ($(BUILD_TYPE),cublas)
+	CMAKE_ARGS+=-DGGML_CUDA=ON
+else ifeq ($(BUILD_TYPE),openblas)
+	CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+else ifeq ($(BUILD_TYPE),clblas)
+	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
+else ifeq ($(BUILD_TYPE),hipblas)
+	CMAKE_ARGS+=-DGGML_HIPBLAS=ON
+else ifeq ($(BUILD_TYPE),vulkan)
+	CMAKE_ARGS+=-DGGML_VULKAN=ON
+else ifeq ($(OS),Darwin)
+	ifneq ($(BUILD_TYPE),metal)
+		CMAKE_ARGS+=-DGGML_METAL=OFF
+	else
+		CMAKE_ARGS+=-DGGML_METAL=ON
+		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
+	endif
+endif
+
+ifeq ($(BUILD_TYPE),sycl_f16)
+	CMAKE_ARGS+=-DGGML_SYCL=ON \
+		-DCMAKE_C_COMPILER=icx \
+		-DCMAKE_CXX_COMPILER=icpx \
+		-DGGML_SYCL_F16=ON
+endif
+
+ifeq ($(BUILD_TYPE),sycl_f32)
+	CMAKE_ARGS+=-DGGML_SYCL=ON \
+		-DCMAKE_C_COMPILER=icx \
+		-DCMAKE_CXX_COMPILER=icpx
+endif
+
+sources/acestep.cpp:
+	mkdir -p sources/acestep.cpp
+	cd sources/acestep.cpp && \
+	git init && \
+	git remote add origin $(ACESTEP_REPO) && \
+	git fetch origin && \
+	git checkout $(ACESTEP_CPP_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+# Detect OS
+UNAME_S := $(shell uname -s)
+
+# Only build CPU variants on Linux
+ifeq ($(UNAME_S),Linux)
+	VARIANT_TARGETS = libgoacestepcpp-avx.so libgoacestepcpp-avx2.so libgoacestepcpp-avx512.so libgoacestepcpp-fallback.so
+else
+	# On non-Linux (e.g., Darwin), build only fallback variant
+	VARIANT_TARGETS = libgoacestepcpp-fallback.so
+endif
+
+acestep-cpp: main.go goacestepcpp.go $(VARIANT_TARGETS)
+	CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o acestep-cpp ./
+
+package: acestep-cpp
+	bash package.sh
+
+build: package
+
+clean: purge
+	rm -rf libgoacestepcpp*.so package sources/acestep.cpp acestep-cpp
+
+purge:
+	rm -rf build*
+
+# Variants must build sequentially: each uses its own build-<name> directory,
+# but parallel builds can still race on shared resources (jobserver, disk I/O).
+.NOTPARALLEL:
+
+# Build all variants (Linux only)
+ifeq ($(UNAME_S),Linux)
+libgoacestepcpp-avx.so: sources/acestep.cpp
+	$(info ${GREEN}I acestep-cpp build info:avx${RESET})
+	SO_TARGET=libgoacestepcpp-avx.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoacestepcpp-custom
+	rm -rf build-libgoacestepcpp-avx.so
+
+libgoacestepcpp-avx2.so: sources/acestep.cpp
+	$(info ${GREEN}I acestep-cpp build info:avx2${RESET})
+	SO_TARGET=libgoacestepcpp-avx2.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libgoacestepcpp-custom
+	rm -rf build-libgoacestepcpp-avx2.so
+
+libgoacestepcpp-avx512.so: sources/acestep.cpp
+	$(info ${GREEN}I acestep-cpp build info:avx512${RESET})
+	SO_TARGET=libgoacestepcpp-avx512.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libgoacestepcpp-custom
+	rm -rf build-libgoacestepcpp-avx512.so
+endif
+
+# Build fallback variant (all platforms)
+libgoacestepcpp-fallback.so: sources/acestep.cpp
+	$(info ${GREEN}I acestep-cpp build info:fallback${RESET})
+	SO_TARGET=libgoacestepcpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoacestepcpp-custom
+	rm -rf build-libgoacestepcpp-fallback.so
+
+libgoacestepcpp-custom: CMakeLists.txt cpp/goacestepcpp.cpp cpp/goacestepcpp.h
+	mkdir -p build-$(SO_TARGET) && \
+	cd build-$(SO_TARGET) && \
+	cmake .. $(CMAKE_ARGS) && \
+	cmake --build . --config Release -j$(JOBS) --target goacestepcpp && \
+	cd .. && \
+	mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET)
+
+test: acestep-cpp
+	@echo "Running acestep-cpp tests..."
+	bash test.sh
+	@echo "acestep-cpp tests completed."
+
+all: acestep-cpp package
--- a/backend/go/acestep-cpp/acestepcpp_test.go
+++ b/backend/go/acestep-cpp/acestepcpp_test.go
@@ -0,0 +1,195 @@
+package main
+
+import (
+	"context"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"testing"
+	"time"
+
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+)
+
+const (
+	testAddr    = "localhost:50051"
+	startupWait = 5 * time.Second
+)
+
+func skipIfNoModel(t *testing.T) string {
+	t.Helper()
+	modelDir := os.Getenv("ACESTEP_MODEL_DIR")
+	if modelDir == "" {
+		t.Skip("ACESTEP_MODEL_DIR not set, skipping test (set to directory with GGUF models)")
+	}
+	if _, err := os.Stat(filepath.Join(modelDir, "acestep-5Hz-lm-0.6B-Q8_0.gguf")); os.IsNotExist(err) {
+		t.Skipf("LM model file not found in %s, skipping", modelDir)
+	}
+	if _, err := os.Stat(filepath.Join(modelDir, "Qwen3-Embedding-0.6B-Q8_0.gguf")); os.IsNotExist(err) {
+		t.Skipf("Text encoder model file not found in %s, skipping", modelDir)
+	}
+	if _, err := os.Stat(filepath.Join(modelDir, "acestep-v15-turbo-Q8_0.gguf")); os.IsNotExist(err) {
+		t.Skipf("DiT model file not found in %s, skipping", modelDir)
+	}
+	if _, err := os.Stat(filepath.Join(modelDir, "vae-BF16.gguf")); os.IsNotExist(err) {
+		t.Skipf("VAE model file not found in %s, skipping", modelDir)
+	}
+	return modelDir
+}
+
+func startServer(t *testing.T) *exec.Cmd {
+	t.Helper()
+	binary := os.Getenv("ACESTEP_BINARY")
+	if binary == "" {
+		binary = "./acestep-cpp"
+	}
+	if _, err := os.Stat(binary); os.IsNotExist(err) {
+		t.Skipf("Backend binary not found at %s, skipping", binary)
+	}
+	cmd := exec.Command(binary, "--addr", testAddr)
+	cmd.Stdout = os.Stderr
+	cmd.Stderr = os.Stderr
+	if err := cmd.Start(); err != nil {
+		t.Fatalf("Failed to start server: %v", err)
+	}
+	time.Sleep(startupWait)
+	return cmd
+}
+
+func stopServer(cmd *exec.Cmd) {
+	if cmd != nil && cmd.Process != nil {
+		cmd.Process.Kill()
+		cmd.Wait()
+	}
+}
+
+func dialGRPC(t *testing.T) *grpc.ClientConn {
+	t.Helper()
+	conn, err := grpc.Dial(testAddr,
+		grpc.WithTransportCredentials(insecure.NewCredentials()),
+		grpc.WithDefaultCallOptions(
+			grpc.MaxCallRecvMsgSize(50*1024*1024),
+			grpc.MaxCallSendMsgSize(50*1024*1024),
+		),
+	)
+	if err != nil {
+		t.Fatalf("Failed to dial gRPC: %v", err)
+	}
+	return conn
+}
+
+func TestServerHealth(t *testing.T) {
+	cmd := startServer(t)
+	defer stopServer(cmd)
+
+	conn := dialGRPC(t)
+	defer conn.Close()
+
+	client := pb.NewBackendClient(conn)
+	resp, err := client.Health(context.Background(), &pb.HealthMessage{})
+	if err != nil {
+		t.Fatalf("Health check failed: %v", err)
+	}
+	if string(resp.Message) != "OK" {
+		t.Fatalf("Expected OK, got %s", string(resp.Message))
+	}
+}
+
+func TestLoadModel(t *testing.T) {
+	modelDir := skipIfNoModel(t)
+	cmd := startServer(t)
+	defer stopServer(cmd)
+
+	conn := dialGRPC(t)
+	defer conn.Close()
+
+	client := pb.NewBackendClient(conn)
+	resp, err := client.LoadModel(context.Background(), &pb.ModelOptions{
+		ModelFile: filepath.Join(modelDir, "acestep-5Hz-lm-0.6B-Q8_0.gguf"),
+		Options: []string{
+			"text_encoder_model:" + filepath.Join(modelDir, "Qwen3-Embedding-0.6B-Q8_0.gguf"),
+			"dit_model:" + filepath.Join(modelDir, "acestep-v15-turbo-Q8_0.gguf"),
+			"vae_model:" + filepath.Join(modelDir, "vae-BF16.gguf"),
+		},
+	})
+	if err != nil {
+		t.Fatalf("LoadModel failed: %v", err)
+	}
+	if !resp.Success {
+		t.Fatalf("LoadModel returned failure: %s", resp.Message)
+	}
+}
+
+func TestSoundGeneration(t *testing.T) {
+	modelDir := skipIfNoModel(t)
+
+	tmpDir, err := os.MkdirTemp("", "acestep-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(tmpDir)
+
+	outputFile := filepath.Join(tmpDir, "output.wav")
+
+	cmd := startServer(t)
+	defer stopServer(cmd)
+
+	conn := dialGRPC(t)
+	defer conn.Close()
+
+	client := pb.NewBackendClient(conn)
+
+	// Load models
+	loadResp, err := client.LoadModel(context.Background(), &pb.ModelOptions{
+		ModelFile: filepath.Join(modelDir, "acestep-5Hz-lm-0.6B-Q8_0.gguf"),
+		Options: []string{
+			"text_encoder_model:" + filepath.Join(modelDir, "Qwen3-Embedding-0.6B-Q8_0.gguf"),
+			"dit_model:" + filepath.Join(modelDir, "acestep-v15-turbo-Q8_0.gguf"),
+			"vae_model:" + filepath.Join(modelDir, "vae-BF16.gguf"),
+		},
+	})
+	if err != nil {
+		t.Fatalf("LoadModel failed: %v", err)
+	}
+	if !loadResp.Success {
+		t.Fatalf("LoadModel returned failure: %s", loadResp.Message)
+	}
+
+	// Generate music
+	duration := float32(10.0)
+	temperature := float32(0.85)
+	bpm := int32(120)
+	caption := "A cheerful electronic dance track"
+	timesig := "4/4"
+
+	_, err = client.SoundGeneration(context.Background(), &pb.SoundGenerationRequest{
+		Text:          caption,
+		Caption:       &caption,
+		Dst:           outputFile,
+		Duration:      &duration,
+		Temperature:   &temperature,
+		Bpm:           &bpm,
+		Timesignature: &timesig,
+	})
+	if err != nil {
+		t.Fatalf("SoundGeneration failed: %v", err)
+	}
+
+	// Verify output file exists and has content
+	info, err := os.Stat(outputFile)
+	if os.IsNotExist(err) {
+		t.Fatal("Output audio file was not created")
+	}
+	if err != nil {
+		t.Fatalf("Failed to stat output file: %v", err)
+	}
+
+	t.Logf("Output file size: %d bytes", info.Size())
+
+	// WAV header is 44 bytes minimum; any real audio should be much larger
+	if info.Size() < 1000 {
+		t.Errorf("Output file too small (%d bytes), expected real audio data", info.Size())
+	}
+}
--- a/backend/go/acestep-cpp/cpp/goacestepcpp.cpp
+++ b/backend/go/acestep-cpp/cpp/goacestepcpp.cpp
@@ -0,0 +1,306 @@
+#include "goacestepcpp.h"
+#include "ggml-backend.h"
+
+#include "audio-io.h"
+#include "bpe.h"
+#include "cond-enc.h"
+#include "dit-sampler.h"
+#include "dit.h"
+#include "gguf-weights.h"
+#include "philox.h"
+#include "qwen3-enc.h"
+#include "qwen3-lm.h"
+#include "request.h"
+#include "vae.h"
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <random>
+#include <string>
+#include <vector>
+
+// Global model contexts (loaded once, reused across requests)
+static DiTGGML       g_dit       = {};
+static DiTGGMLConfig g_dit_cfg;
+static VAEGGML       g_vae       = {};
+static bool          g_dit_loaded = false;
+static bool          g_vae_loaded = false;
+static bool          g_is_turbo   = false;
+
+// Silence latent [15000, 64] — read once from DiT GGUF
+static std::vector<float> g_silence_full;
+
+// Paths for per-request loading (text encoder, tokenizer)
+static std::string g_text_enc_path;
+static std::string g_dit_path;
+static std::string g_lm_path;
+
+static void ggml_log_cb(enum ggml_log_level level, const char * log, void * data) {
+    const char * level_str;
+    if (!log)
+        return;
+    switch (level) {
+    case GGML_LOG_LEVEL_DEBUG:
+        level_str = "DEBUG";
+        break;
+    case GGML_LOG_LEVEL_INFO:
+        level_str = "INFO";
+        break;
+    case GGML_LOG_LEVEL_WARN:
+        level_str = "WARN";
+        break;
+    case GGML_LOG_LEVEL_ERROR:
+        level_str = "ERROR";
+        break;
+    default:
+        level_str = "?????";
+        break;
+    }
+    fprintf(stderr, "[%-5s] ", level_str);
+    fputs(log, stderr);
+    fflush(stderr);
+}
+
+int load_model(const char * lm_model_path, const char * text_encoder_path,
+               const char * dit_model_path, const char * vae_model_path) {
+    ggml_log_set(ggml_log_cb, nullptr);
+    ggml_backend_load_all();
+
+    g_lm_path       = lm_model_path;
+    g_text_enc_path = text_encoder_path;
+    g_dit_path      = dit_model_path;
+
+    // Load DiT model
+    fprintf(stderr, "[acestep-cpp] Loading DiT from %s\n", dit_model_path);
+    dit_ggml_init_backend(&g_dit);
+    if (!dit_ggml_load(&g_dit, dit_model_path, g_dit_cfg, nullptr, 0.0f)) {
+        fprintf(stderr, "[acestep-cpp] FATAL: failed to load DiT from %s\n", dit_model_path);
+        return 1;
+    }
+    g_dit_loaded = true;
+
+    // Read DiT GGUF metadata + silence_latent
+    {
+        GGUFModel gf = {};
+        if (gf_load(&gf, dit_model_path)) {
+            g_is_turbo           = gf_get_bool(gf, "acestep.is_turbo");
+            const void * sl_data = gf_get_data(gf, "silence_latent");
+            if (sl_data) {
+                g_silence_full.resize(15000 * 64);
+                memcpy(g_silence_full.data(), sl_data, 15000 * 64 * sizeof(float));
+                fprintf(stderr, "[acestep-cpp] silence_latent: [15000, 64] loaded\n");
+            } else {
+                fprintf(stderr, "[acestep-cpp] FATAL: silence_latent not found in %s\n", dit_model_path);
+                gf_close(&gf);
+                return 2;
+            }
+            gf_close(&gf);
+        } else {
+            fprintf(stderr, "[acestep-cpp] FATAL: cannot read GGUF metadata from %s\n", dit_model_path);
+            return 2;
+        }
+    }
+
+    // Load VAE model
+    fprintf(stderr, "[acestep-cpp] Loading VAE from %s\n", vae_model_path);
+    vae_ggml_load(&g_vae, vae_model_path);
+    g_vae_loaded = true;
+
+    fprintf(stderr, "[acestep-cpp] All models loaded successfully (turbo=%d)\n", g_is_turbo);
+    return 0;
+}
+
+int generate_music(const char * caption, const char * lyrics, int bpm,
+                   const char * keyscale, const char * timesignature,
+                   float duration, float temperature, bool instrumental,
+                   int seed, const char * dst, int threads) {
+    if (!g_dit_loaded || !g_vae_loaded) {
+        fprintf(stderr, "[acestep-cpp] ERROR: models not loaded\n");
+        return 1;
+    }
+
+    const int FRAMES_PER_SECOND = 25;
+
+    // Defaults
+    if (duration <= 0)
+        duration = 30.0f;
+    std::string cap_str    = caption ? caption : "";
+    std::string lyrics_str = (instrumental || !lyrics) ? "" : lyrics;
+    std::string ks_str     = keyscale ? keyscale : "N/A";
+    std::string ts_str     = timesignature ? timesignature : "4/4";
+    std::string lang_str   = "unknown";
+    char        bpm_str[16];
+    if (bpm > 0) {
+        snprintf(bpm_str, sizeof(bpm_str), "%d", bpm);
+    } else {
+        snprintf(bpm_str, sizeof(bpm_str), "N/A");
+    }
+
+    int   num_steps      = 8;
+    float guidance_scale = g_is_turbo ? 1.0f : 7.0f;
+    float shift          = 1.0f;
+
+    if (seed < 0) {
+        std::random_device rd;
+        seed = (int)(rd() & 0x7FFFFFFF);
+    }
+
+    // Compute T (latent frames at 25Hz)
+    int T = (int)(duration * FRAMES_PER_SECOND);
+    T     = ((T + g_dit_cfg.patch_size - 1) / g_dit_cfg.patch_size) * g_dit_cfg.patch_size;
+    int S = T / g_dit_cfg.patch_size;
+
+    if (T > 15000) {
+        fprintf(stderr, "[acestep-cpp] ERROR: T=%d exceeds max 15000\n", T);
+        return 2;
+    }
+
+    int Oc     = g_dit_cfg.out_channels;      // 64
+    int ctx_ch = g_dit_cfg.in_channels - Oc;  // 128
+
+    fprintf(stderr, "[acestep-cpp] T=%d, S=%d, duration=%.1fs, seed=%d\n", T, S, duration, seed);
+
+    // 1. Load BPE tokenizer from text encoder GGUF
+    BPETokenizer tok;
+    if (!load_bpe_from_gguf(&tok, g_text_enc_path.c_str())) {
+        fprintf(stderr, "[acestep-cpp] FATAL: failed to load BPE tokenizer\n");
+        return 3;
+    }
+
+    // 2. Build formatted prompts (matches dit-vae.cpp text2music template)
+    std::string instruction = "Fill the audio semantic mask based on the given conditions:";
+
+    char metas[512];
+    snprintf(metas, sizeof(metas),
+             "- bpm: %s\n- timesignature: %s\n- keyscale: %s\n- duration: %d seconds\n",
+             bpm_str, ts_str.c_str(), ks_str.c_str(), (int)duration);
+
+    std::string text_str  = std::string("# Instruction\n") + instruction + "\n\n" +
+                            "# Caption\n" + cap_str + "\n\n" +
+                            "# Metas\n" + metas + "<|endoftext|>\n";
+    std::string lyric_str = std::string("# Languages\n") + lang_str + "\n\n# Lyric\n" +
+                            lyrics_str + "<|endoftext|>";
+
+    // 3. Tokenize
+    auto text_ids  = bpe_encode(&tok, text_str.c_str(), true);
+    auto lyric_ids = bpe_encode(&tok, lyric_str.c_str(), true);
+    int  S_text    = (int)text_ids.size();
+    int  S_lyric   = (int)lyric_ids.size();
+
+    fprintf(stderr, "[acestep-cpp] caption: %d tokens, lyrics: %d tokens\n", S_text, S_lyric);
+
+    // 4. Text encoder forward
+    Qwen3GGML text_enc = {};
+    qwen3_init_backend(&text_enc);
+    if (!qwen3_load_text_encoder(&text_enc, g_text_enc_path.c_str())) {
+        fprintf(stderr, "[acestep-cpp] FATAL: failed to load text encoder\n");
+        return 4;
+    }
+
+    int                H_text = text_enc.cfg.hidden_size;  // 1024
+    std::vector<float> text_hidden(H_text * S_text);
+
+    qwen3_forward(&text_enc, text_ids.data(), S_text, text_hidden.data());
+    fprintf(stderr, "[acestep-cpp] TextEncoder forward done\n");
+
+    // 5. Lyric embedding
+    std::vector<float> lyric_embed(H_text * S_lyric);
+    qwen3_embed_lookup(&text_enc, lyric_ids.data(), S_lyric, lyric_embed.data());
+
+    // 6. Condition encoder
+    CondGGML cond = {};
+    cond_ggml_init_backend(&cond);
+    if (!cond_ggml_load(&cond, g_dit_path.c_str())) {
+        fprintf(stderr, "[acestep-cpp] FATAL: failed to load condition encoder\n");
+        qwen3_free(&text_enc);
+        return 5;
+    }
+
+    const int          S_ref = 750;
+    std::vector<float> silence_feats(S_ref * 64);
+    memcpy(silence_feats.data(), g_silence_full.data(), S_ref * 64 * sizeof(float));
+
+    int                enc_S = 0;
+    std::vector<float> enc_hidden;
+    cond_ggml_forward(&cond, text_hidden.data(), S_text, lyric_embed.data(), S_lyric,
+                      silence_feats.data(), S_ref, enc_hidden, &enc_S);
+    fprintf(stderr, "[acestep-cpp] ConditionEncoder done, enc_S=%d\n", enc_S);
+
+    qwen3_free(&text_enc);
+    cond_ggml_free(&cond);
+
+    // 7. Build context [T, ctx_ch] = silence[64] + mask[64]
+    std::vector<float> context(T * ctx_ch);
+    for (int t = 0; t < T; t++) {
+        const float * src = g_silence_full.data() + t * Oc;
+        for (int c = 0; c < Oc; c++) {
+            context[t * ctx_ch + c] = src[c];
+        }
+        for (int c = 0; c < Oc; c++) {
+            context[t * ctx_ch + Oc + c] = 1.0f;
+        }
+    }
+
+    // 8. Build schedule
+    std::vector<float> schedule(num_steps);
+    for (int i = 0; i < num_steps; i++) {
+        float t     = 1.0f - (float)i / (float)num_steps;
+        schedule[i] = shift * t / (1.0f + (shift - 1.0f) * t);
+    }
+
+    // 9. Generate noise (Philox)
+    std::vector<float> noise(Oc * T);
+    philox_randn((long long)seed, noise.data(), Oc * T, true);
+
+    // 10. DiT generate
+    std::vector<float> output(Oc * T);
+    fprintf(stderr, "[acestep-cpp] DiT generate: T=%d, steps=%d, guidance=%.1f\n", T, num_steps, guidance_scale);
+
+    dit_ggml_generate(&g_dit, noise.data(), context.data(), enc_hidden.data(), enc_S,
+                      T, 1, num_steps, schedule.data(), output.data(), guidance_scale,
+                      nullptr, nullptr, -1);
+    fprintf(stderr, "[acestep-cpp] DiT generation done\n");
+
+    // 11. VAE decode
+    int                T_audio_max = T * 1920;
+    std::vector<float> audio(2 * T_audio_max);
+
+    int T_audio = vae_ggml_decode_tiled(&g_vae, output.data(), T, audio.data(), T_audio_max, 256, 64);
+    if (T_audio < 0) {
+        fprintf(stderr, "[acestep-cpp] ERROR: VAE decode failed\n");
+        return 6;
+    }
+    fprintf(stderr, "[acestep-cpp] VAE decode done: %d samples (%.2fs @ 48kHz)\n", T_audio,
+            (float)T_audio / 48000.0f);
+
+    // 12. Peak normalization to -1.0 dB
+    {
+        float peak      = 0.0f;
+        int   n_samples = 2 * T_audio;
+        for (int i = 0; i < n_samples; i++) {
+            float a = audio[i] < 0 ? -audio[i] : audio[i];
+            if (a > peak) {
+                peak = a;
+            }
+        }
+        if (peak > 1e-6f) {
+            const float target_amp = powf(10.0f, -1.0f / 20.0f);
+            float       gain       = target_amp / peak;
+            for (int i = 0; i < n_samples; i++) {
+                audio[i] *= gain;
+            }
+        }
+    }
+
+    // 13. Write WAV output
+    if (!audio_write_wav(dst, audio.data(), T_audio, 48000)) {
+        fprintf(stderr, "[acestep-cpp] ERROR: failed to write %s\n", dst);
+        return 7;
+    }
+
+    fprintf(stderr, "[acestep-cpp] Wrote %s: %d samples (%.2fs @ 48kHz stereo)\n",
+            dst, T_audio, (float)T_audio / 48000.0f);
+    return 0;
+}
--- a/backend/go/acestep-cpp/cpp/goacestepcpp.h
+++ b/backend/go/acestep-cpp/cpp/goacestepcpp.h
@@ -0,0 +1,11 @@
+#include <cstddef>
+#include <cstdint>
+
+extern "C" {
+int load_model(const char *lm_model_path, const char *text_encoder_path,
+               const char *dit_model_path, const char *vae_model_path);
+int generate_music(const char *caption, const char *lyrics, int bpm,
+                   const char *keyscale, const char *timesignature,
+                   float duration, float temperature, bool instrumental,
+                   int seed, const char *dst, int threads);
+}
--- a/backend/go/acestep-cpp/goacestepcpp.go
+++ b/backend/go/acestep-cpp/goacestepcpp.go
@@ -0,0 +1,82 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+)
+
+var (
+	CppLoadModel    func(lmModelPath, textEncoderPath, ditModelPath, vaeModelPath string) int
+	CppGenerateMusic func(caption, lyrics string, bpm int, keyscale, timesignature string, duration, temperature float32, instrumental bool, seed int, dst string, threads int) int
+)
+
+type AceStepCpp struct {
+	base.SingleThread
+}
+
+func (a *AceStepCpp) Load(opts *pb.ModelOptions) error {
+	// ModelFile is the LM model path
+	lmModel := opts.ModelFile
+
+	var textEncoderModel, ditModel, vaeModel string
+
+	for _, oo := range opts.Options {
+		parts := strings.SplitN(oo, ":", 2)
+		if len(parts) != 2 {
+			fmt.Fprintf(os.Stderr, "Unrecognized option: %v\n", oo)
+			continue
+		}
+		switch parts[0] {
+		case "text_encoder_model":
+			textEncoderModel = parts[1]
+		case "dit_model":
+			ditModel = parts[1]
+		case "vae_model":
+			vaeModel = parts[1]
+		default:
+			fmt.Fprintf(os.Stderr, "Unrecognized option: %v\n", oo)
+		}
+	}
+
+	if textEncoderModel == "" {
+		return fmt.Errorf("text_encoder_model option is required")
+	}
+	if ditModel == "" {
+		return fmt.Errorf("dit_model option is required")
+	}
+	if vaeModel == "" {
+		return fmt.Errorf("vae_model option is required")
+	}
+
+	if ret := CppLoadModel(lmModel, textEncoderModel, ditModel, vaeModel); ret != 0 {
+		return fmt.Errorf("failed to load acestep models (error code: %d)", ret)
+	}
+
+	return nil
+}
+
+func (a *AceStepCpp) SoundGeneration(req *pb.SoundGenerationRequest) error {
+	caption := req.GetCaption()
+	if caption == "" {
+		caption = req.GetText()
+	}
+	lyrics := req.GetLyrics()
+	bpm := int(req.GetBpm())
+	keyscale := req.GetKeyscale()
+	timesignature := req.GetTimesignature()
+	duration := req.GetDuration()
+	temperature := req.GetTemperature()
+	instrumental := req.GetInstrumental()
+	seed := 42
+	threads := 4
+
+	if ret := CppGenerateMusic(caption, lyrics, bpm, keyscale, timesignature, duration, temperature, instrumental, seed, req.GetDst(), threads); ret != 0 {
+		return fmt.Errorf("failed to generate music (error code: %d)", ret)
+	}
+
+	return nil
+}
--- a/backend/go/acestep-cpp/main.go
+++ b/backend/go/acestep-cpp/main.go
@@ -0,0 +1,47 @@
+package main
+
+// Note: this is started internally by LocalAI and a server is allocated for each model
+import (
+	"flag"
+	"os"
+
+	"github.com/ebitengine/purego"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
+)
+
+var (
+	addr = flag.String("addr", "localhost:50051", "the address to connect to")
+)
+
+type LibFuncs struct {
+	FuncPtr any
+	Name    string
+}
+
+func main() {
+	// Get library name from environment variable, default to fallback
+	libName := os.Getenv("ACESTEP_LIBRARY")
+	if libName == "" {
+		libName = "./libgoacestepcpp-fallback.so"
+	}
+
+	gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
+	if err != nil {
+		panic(err)
+	}
+
+	libFuncs := []LibFuncs{
+		{&CppLoadModel, "load_model"},
+		{&CppGenerateMusic, "generate_music"},
+	}
+
+	for _, lf := range libFuncs {
+		purego.RegisterLibFunc(lf.FuncPtr, gosd, lf.Name)
+	}
+
+	flag.Parse()
+
+	if err := grpc.StartServer(*addr, &AceStepCpp{}); err != nil {
+		panic(err)
+	}
+}
--- a/backend/go/acestep-cpp/package.sh
+++ b/backend/go/acestep-cpp/package.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+# This script is used in the final stage of the Dockerfile
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+REPO_ROOT="${CURDIR}/../../.."
+
+# Create lib directory
+mkdir -p $CURDIR/package/lib
+
+cp -avf $CURDIR/acestep-cpp $CURDIR/package/
+cp -fv $CURDIR/libgoacestepcpp-*.so $CURDIR/package/
+cp -fv $CURDIR/run.sh $CURDIR/package/
+
+# Detect architecture and copy appropriate libraries
+if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
+    # x86_64 architecture
+    echo "Detected x86_64 architecture, copying x86_64 libraries..."
+    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
+    cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+    cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
+    # ARM64 architecture
+    echo "Detected ARM64 architecture, copying ARM64 libraries..."
+    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
+    cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+    cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+elif [ $(uname -s) = "Darwin" ]; then
+    echo "Detected Darwin"
+else
+    echo "Error: Could not detect architecture"
+    exit 1
+fi
+
+# Package GPU libraries based on BUILD_TYPE
+# The GPU library packaging script will detect BUILD_TYPE and copy appropriate GPU libraries
+GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
+if [ -f "$GPU_LIB_SCRIPT" ]; then
+    echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
+    source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
+    package_gpu_libs
+fi
+
+echo "Packaging completed successfully"
+ls -liah $CURDIR/package/
+ls -liah $CURDIR/package/lib/
--- a/backend/go/acestep-cpp/run.sh
+++ b/backend/go/acestep-cpp/run.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+set -ex
+
+# Get the absolute current dir where the script is located
+CURDIR=$(dirname "$(realpath $0)")
+
+cd /
+
+echo "CPU info:"
+if [ "$(uname)" != "Darwin" ]; then
+	grep -e "model\sname" /proc/cpuinfo | head -1
+	grep -e "flags" /proc/cpuinfo | head -1
+fi
+
+LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
+
+if [ "$(uname)" != "Darwin" ]; then
+	if grep -q -e "\savx\s" /proc/cpuinfo ; then
+		echo "CPU:    AVX    found OK"
+		if [ -e $CURDIR/libgoacestepcpp-avx.so ]; then
+			LIBRARY="$CURDIR/libgoacestepcpp-avx.so"
+		fi
+	fi
+
+	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
+		echo "CPU:    AVX2   found OK"
+		if [ -e $CURDIR/libgoacestepcpp-avx2.so ]; then
+			LIBRARY="$CURDIR/libgoacestepcpp-avx2.so"
+		fi
+	fi
+
+	# Check avx 512
+	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
+		echo "CPU:    AVX512F found OK"
+		if [ -e $CURDIR/libgoacestepcpp-avx512.so ]; then
+			LIBRARY="$CURDIR/libgoacestepcpp-avx512.so"
+		fi
+	fi
+fi
+
+export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+export ACESTEP_LIBRARY=$LIBRARY
+
+# If there is a lib/ld.so, use it
+if [ -f $CURDIR/lib/ld.so ]; then
+	echo "Using lib/ld.so"
+	echo "Using library: $LIBRARY"
+	exec $CURDIR/lib/ld.so $CURDIR/acestep-cpp "$@"
+fi
+
+echo "Using library: $LIBRARY"
+exec $CURDIR/acestep-cpp "$@"
--- a/backend/go/acestep-cpp/test.sh
+++ b/backend/go/acestep-cpp/test.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+echo "Running acestep-cpp backend tests..."
+
+# The test requires:
+#   - ACESTEP_MODEL_DIR: path to directory containing GGUF model files
+#   - ACESTEP_BINARY: path to the acestep-cpp binary (defaults to ./acestep-cpp)
+#
+# Tests that require the model will be skipped if ACESTEP_MODEL_DIR is not set
+# or the directory does not contain the required model files.
+
+cd "$CURDIR"
+
+# Only auto-download models when ACESTEP_MODEL_DIR is not explicitly set
+if [ -z "$ACESTEP_MODEL_DIR" ]; then
+    export ACESTEP_MODEL_DIR="./acestep-models"
+
+    if [ ! -d "$ACESTEP_MODEL_DIR" ]; then
+        echo "Creating acestep-models directory for tests..."
+        mkdir -p "$ACESTEP_MODEL_DIR"
+        REPO_ID="Serveurperso/ACE-Step-1.5-GGUF"
+        echo "Repository: ${REPO_ID}"
+        echo ""
+
+        # Files to download (smallest quantizations for testing)
+        FILES=(
+            "acestep-5Hz-lm-0.6B-Q8_0.gguf"
+            "Qwen3-Embedding-0.6B-Q8_0.gguf"
+            "acestep-v15-turbo-Q8_0.gguf"
+            "vae-BF16.gguf"
+        )
+
+        BASE_URL="https://huggingface.co/${REPO_ID}/resolve/main"
+
+        for file in "${FILES[@]}"; do
+            dest="${ACESTEP_MODEL_DIR}/${file}"
+            if [ -f "${dest}" ]; then
+                echo "  [skip] ${file} (already exists)"
+            else
+                echo "  [download] ${file}..."
+                curl -L -o "${dest}" "${BASE_URL}/${file}" --progress-bar
+                echo "  [done] ${file}"
+            fi
+        done
+    fi
+fi
+
+# Run Go tests
+go test -v -timeout 600s .
+
+echo "All acestep-cpp tests passed."
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -364,6 +364,29 @@
    default: "cpu-ace-step-development"
    nvidia-cuda-13: "cuda13-ace-step-development"
    nvidia-cuda-12: "cuda12-ace-step-development"
+- &acestepcpp
+  name: "acestep-cpp"
+  description: |
+    ACE-Step 1.5 C++ backend using GGML. Native C++ implementation of ACE-Step music generation with GPU support through GGML backends.
+    Generates stereo 48kHz audio from text descriptions and optional lyrics via a two-stage pipeline: text-to-code (ace-qwen3 LLM) + code-to-audio (DiT-VAE).
+  urls:
+    - https://github.com/ace-step/acestep.cpp
+  tags:
+    - music-generation
+    - sound-generation
+  alias: "acestep-cpp"
+  capabilities:
+    default: "cpu-acestep-cpp"
+    nvidia: "cuda12-acestep-cpp"
+    nvidia-cuda-13: "cuda13-acestep-cpp"
+    nvidia-cuda-12: "cuda12-acestep-cpp"
+    intel: "intel-sycl-f16-acestep-cpp"
+    metal: "metal-acestep-cpp"
+    amd: "rocm-acestep-cpp"
+    vulkan: "vulkan-acestep-cpp"
+    nvidia-l4t: "nvidia-l4t-arm64-acestep-cpp"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-acestep-cpp"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-acestep-cpp"
 - &faster-whisper
  icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
  description: |
@@ -1845,6 +1868,107 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-ace-step"
  mirrors:
    - localai/localai-backends:master-metal-darwin-arm64-ace-step
+## acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "nvidia-l4t-arm64-acestep-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-arm64-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "nvidia-l4t-arm64-acestep-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-arm64-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "cuda13-nvidia-l4t-arm64-acestep-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "cuda13-nvidia-l4t-arm64-acestep-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "cpu-acestep-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:latest-cpu-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "metal-acestep-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:latest-metal-darwin-arm64-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "metal-acestep-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:master-metal-darwin-arm64-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "cpu-acestep-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:master-cpu-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "cuda12-acestep-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-12-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "rocm-acestep-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:latest-gpu-rocm-hipblas-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "intel-sycl-f32-acestep-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:latest-gpu-intel-sycl-f32-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "intel-sycl-f16-acestep-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:latest-gpu-intel-sycl-f16-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "vulkan-acestep-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:latest-gpu-vulkan-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "vulkan-acestep-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:master-gpu-vulkan-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "cuda12-acestep-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-12-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "rocm-acestep-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:master-gpu-rocm-hipblas-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "intel-sycl-f32-acestep-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:master-gpu-intel-sycl-f32-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "intel-sycl-f16-acestep-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:master-gpu-intel-sycl-f16-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "cuda13-acestep-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-acestep-cpp
+- !!merge <<: *acestepcpp
+  name: "cuda13-acestep-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-acestep-cpp"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-acestep-cpp
 ## kokoro
 - !!merge <<: *kokoro
  name: "kokoro-development"
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -675,7 +675,7 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecase) bool {
 	}

 	if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION {
-		soundGenBackends := []string{"transformers-musicgen", "ace-step", "mock-backend"}
+		soundGenBackends := []string{"transformers-musicgen", "ace-step", "acestep-cpp", "mock-backend"}
 		if !slices.Contains(soundGenBackends, c.Backend) {
 			return false
 		}
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -725,6 +725,83 @@
      - "inference_steps:8"
      - "guidance_scale:7.0"
      - "batch_size:1"
+- name: "acestep-cpp-turbo"
+  license: mit
+  tags:
+    - music
+    - audio
+    - music-generation
+    - sound-generation
+    - acestep-cpp
+    - ace-step-1.5
+    - gguf
+  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
+  urls:
+    - https://huggingface.co/Serveurperso/ACE-Step-1.5-GGUF
+    - https://github.com/ace-step/acestep.cpp
+  description: |
+    ACE-Step 1.5 Turbo (C++ / GGML) — native C++ music generation from text descriptions and lyrics.
+    Two-stage pipeline: text-to-code (Qwen3 LM) + code-to-audio (DiT-VAE). Stereo 48kHz output.
+    Uses Q8_0 quantized models for a good balance of quality and speed.
+  overrides:
+    name: acestep-cpp-turbo
+    backend: acestep-cpp
+    parameters:
+      model: acestep-cpp/acestep-5Hz-lm-0.6B-Q8_0.gguf
+    known_usecases:
+      - sound_generation
+      - tts
+    options:
+      - "text_encoder_model:acestep-cpp/Qwen3-Embedding-0.6B-Q8_0.gguf"
+      - "dit_model:acestep-cpp/acestep-v15-turbo-Q8_0.gguf"
+      - "vae_model:acestep-cpp/vae-BF16.gguf"
+  files:
+    - filename: acestep-cpp/acestep-5Hz-lm-0.6B-Q8_0.gguf
+      uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/acestep-5Hz-lm-0.6B-Q8_0.gguf
+    - filename: acestep-cpp/Qwen3-Embedding-0.6B-Q8_0.gguf
+      uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf
+    - filename: acestep-cpp/acestep-v15-turbo-Q8_0.gguf
+      uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/acestep-v15-turbo-Q8_0.gguf
+    - filename: acestep-cpp/vae-BF16.gguf
+      uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/vae-BF16.gguf
+- name: "acestep-cpp-turbo-4b"
+  license: mit
+  tags:
+    - music
+    - audio
+    - music-generation
+    - sound-generation
+    - acestep-cpp
+    - ace-step-1.5
+    - gguf
+  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
+  urls:
+    - https://huggingface.co/Serveurperso/ACE-Step-1.5-GGUF
+    - https://github.com/ace-step/acestep.cpp
+  description: |
+    ACE-Step 1.5 Turbo (C++ / GGML) with 4B LM — higher quality music generation from text and lyrics.
+    Uses the larger 4B parameter LM for better metadata/code generation. Stereo 48kHz output.
+  overrides:
+    name: acestep-cpp-turbo-4b
+    backend: acestep-cpp
+    parameters:
+      model: acestep-cpp/acestep-5Hz-lm-4B-Q8_0.gguf
+    known_usecases:
+      - sound_generation
+      - tts
+    options:
+      - "text_encoder_model:acestep-cpp/Qwen3-Embedding-0.6B-Q8_0.gguf"
+      - "dit_model:acestep-cpp/acestep-v15-turbo-Q8_0.gguf"
+      - "vae_model:acestep-cpp/vae-BF16.gguf"
+  files:
+    - filename: acestep-cpp/acestep-5Hz-lm-4B-Q8_0.gguf
+      uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/acestep-5Hz-lm-4B-Q8_0.gguf
+    - filename: acestep-cpp/Qwen3-Embedding-0.6B-Q8_0.gguf
+      uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf
+    - filename: acestep-cpp/acestep-v15-turbo-Q8_0.gguf
+      uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/acestep-v15-turbo-Q8_0.gguf
+    - filename: acestep-cpp/vae-BF16.gguf
+      uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/vae-BF16.gguf
 - name: "qwen3-coder-next-mxfp4_moe"
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls: