mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-03 21:07:33 -04:00
fix(backends): enable ROCm/HIP GPU offload for ggml audio backends (#10666)
qwen3-tts-cpp, omnivoice-cpp, acestep-cpp and vibevoice-cpp shipped rocm-* variants that silently ran on CPU ([Load] backend: CPU). Two coupled defects: - The Makefiles passed -DGGML_HIPBLAS=ON, but the vendored ggml only understands -DGGML_HIP=ON (GGML_HIPBLAS was removed upstream), so the ggml-hip backend target was never created and no GPU code was built. - The CMake foreach that links the ggml GPU backends into the module listed blas/cuda/metal/vulkan but not hip, so even a built ggml-hip would not have been linked and its static backend registration would never run. CUDA users were unaffected because cublas passes the correct GGML_CUDA=ON and the foreach already links cuda. Mirror the proven llama-cpp hipblas block (ROCm clang CC/CXX + AMDGPU_TARGETS) and add hip to each foreach. Upstream picks the best device via ggml_backend_init_best(), so no runtime flag is needed once HIP is compiled and linked. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8[1m] [Claude Code]
This commit is contained in:
@@ -25,7 +25,7 @@ target_include_directories(goacestepcpp PRIVATE ${ACESTEP_DIR}/src ${ACESTEP_DIR
|
||||
target_include_directories(goacestepcpp SYSTEM PRIVATE ${ACESTEP_DIR}/ggml/include)
|
||||
|
||||
# Link GPU backends if available (mirrors link_ggml_backends macro)
|
||||
foreach(backend blas cuda metal vulkan)
|
||||
foreach(backend blas cuda hip metal vulkan)
|
||||
if(TARGET ggml-${backend})
|
||||
target_link_libraries(goacestepcpp PRIVATE ggml-${backend})
|
||||
string(TOUPPER ${backend} BACKEND_UPPER)
|
||||
|
||||
@@ -24,7 +24,14 @@ else ifeq ($(BUILD_TYPE),openblas)
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
||||
# This ggml only understands GGML_HIP (GGML_HIPBLAS was removed upstream),
|
||||
# so passing GGML_HIPBLAS silently produced a CPU-only build (see #10666).
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
AMDGPU_TARGETS ?= gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
|
||||
@@ -30,7 +30,7 @@ target_include_directories(gomnivoicecpp PRIVATE ${OMNIVOICE_DIR}/src)
|
||||
target_include_directories(gomnivoicecpp SYSTEM PRIVATE ${OMNIVOICE_DIR}/ggml/include)
|
||||
|
||||
# Link GPU backends if the upstream ggml created them.
|
||||
foreach(backend blas cuda metal vulkan sycl)
|
||||
foreach(backend blas cuda hip metal vulkan sycl)
|
||||
if(TARGET ggml-${backend})
|
||||
target_link_libraries(gomnivoicecpp PRIVATE ggml-${backend})
|
||||
if(backend STREQUAL "cuda")
|
||||
|
||||
@@ -24,7 +24,14 @@ else ifeq ($(BUILD_TYPE),openblas)
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
||||
# This ggml only understands GGML_HIP (GGML_HIPBLAS was removed upstream),
|
||||
# so passing GGML_HIPBLAS silently produced a CPU-only build (see #10666).
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
AMDGPU_TARGETS ?= gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
|
||||
@@ -30,7 +30,7 @@ target_include_directories(goqwen3ttscpp PRIVATE ${QWENTTS_DIR}/src)
|
||||
target_include_directories(goqwen3ttscpp SYSTEM PRIVATE ${QWENTTS_DIR}/ggml/include)
|
||||
|
||||
# Link GPU backends if the upstream ggml created them.
|
||||
foreach(backend blas cuda metal vulkan sycl)
|
||||
foreach(backend blas cuda hip metal vulkan sycl)
|
||||
if(TARGET ggml-${backend})
|
||||
target_link_libraries(goqwen3ttscpp PRIVATE ggml-${backend})
|
||||
if(backend STREQUAL "cuda")
|
||||
|
||||
@@ -24,7 +24,14 @@ else ifeq ($(BUILD_TYPE),openblas)
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
||||
# This ggml only understands GGML_HIP (GGML_HIPBLAS was removed upstream),
|
||||
# so passing GGML_HIPBLAS silently produced a CPU-only build (see #10666).
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
AMDGPU_TARGETS ?= gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
|
||||
@@ -50,7 +50,7 @@ target_include_directories(govibevoicecpp SYSTEM PRIVATE ${VIBEVOICE_DIR}/third_
|
||||
# Link GPU backends if available — vibevoice's own CMake already links
|
||||
# these to the libvibevoice STATIC library, but we re-link them on the
|
||||
# MODULE so resolved symbols include all backend kernels.
|
||||
foreach(backend blas cuda metal vulkan)
|
||||
foreach(backend blas cuda hip metal vulkan)
|
||||
if(TARGET ggml-${backend})
|
||||
target_link_libraries(govibevoicecpp PRIVATE ggml-${backend})
|
||||
string(TOUPPER ${backend} BACKEND_UPPER)
|
||||
|
||||
@@ -29,7 +29,14 @@ else ifeq ($(BUILD_TYPE),openblas)
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DVIBEVOICE_GGML_HIPBLAS=ON
|
||||
# This ggml only understands GGML_HIP (GGML_HIPBLAS was removed upstream),
|
||||
# so passing GGML_HIPBLAS silently produced a CPU-only build (see #10666).
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
AMDGPU_TARGETS ?= gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON -DVIBEVOICE_GGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
|
||||
Reference in New Issue
Block a user