mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-04 05:16:42 -04:00
Compare commits
4 Commits
dependabot
...
v4.6.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
38350d363e | ||
|
|
817136c20e | ||
|
|
8396ce1388 | ||
|
|
348f3c87c0 |
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=fdb1db877c526ec90f668eca1b858da5dba85560
|
||||
LLAMA_VERSION?=d4cff114c0084f1fbc9b4c62717eca8fb2ae494a
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -36,6 +36,12 @@ else
|
||||
if [ -d "$CURDIR/lib/rocblas/library" ]; then
|
||||
export ROCBLAS_TENSILE_LIBPATH="$CURDIR"/lib/rocblas/library
|
||||
fi
|
||||
# Same for hipBLASLt (rocblaslt): the bundled libhipblaslt.so resolves its
|
||||
# TensileLibrary_lazy_gfx*.dat kernel data relative to itself, so point it at
|
||||
# the bundled data or it falls back to slow generic kernels (issue #10660).
|
||||
if [ -d "$CURDIR/lib/hipblaslt/library" ]; then
|
||||
export HIPBLASLT_TENSILE_LIBPATH="$CURDIR"/lib/hipblaslt/library
|
||||
fi
|
||||
fi
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
|
||||
@@ -34,6 +34,12 @@ else
|
||||
if [ -d "$CURDIR/lib/rocblas/library" ]; then
|
||||
export ROCBLAS_TENSILE_LIBPATH="$CURDIR"/lib/rocblas/library
|
||||
fi
|
||||
# Same for hipBLASLt (rocblaslt): the bundled libhipblaslt.so resolves its
|
||||
# TensileLibrary_lazy_gfx*.dat kernel data relative to itself, so point it at
|
||||
# the bundled data or it falls back to slow generic kernels (issue #10660).
|
||||
if [ -d "$CURDIR/lib/hipblaslt/library" ]; then
|
||||
export HIPBLASLT_TENSILE_LIBPATH="$CURDIR"/lib/hipblaslt/library
|
||||
fi
|
||||
fi
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
|
||||
@@ -25,7 +25,7 @@ target_include_directories(goacestepcpp PRIVATE ${ACESTEP_DIR}/src ${ACESTEP_DIR
|
||||
target_include_directories(goacestepcpp SYSTEM PRIVATE ${ACESTEP_DIR}/ggml/include)
|
||||
|
||||
# Link GPU backends if available (mirrors link_ggml_backends macro)
|
||||
foreach(backend blas cuda metal vulkan)
|
||||
foreach(backend blas cuda hip metal vulkan)
|
||||
if(TARGET ggml-${backend})
|
||||
target_link_libraries(goacestepcpp PRIVATE ggml-${backend})
|
||||
string(TOUPPER ${backend} BACKEND_UPPER)
|
||||
|
||||
@@ -24,7 +24,14 @@ else ifeq ($(BUILD_TYPE),openblas)
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
||||
# This ggml only understands GGML_HIP (GGML_HIPBLAS was removed upstream),
|
||||
# so passing GGML_HIPBLAS silently produced a CPU-only build (see #10666).
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
AMDGPU_TARGETS ?= gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# CrispASR version (release tag)
|
||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||
CRISPASR_VERSION?=9a26976a8c8cf5af0afcdd04463cf8ba91e96a54
|
||||
CRISPASR_VERSION?=f35185b876fc482fcb2053a81a2697936ed5fcc0
|
||||
SO_TARGET?=libgocrispasr.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -30,7 +30,7 @@ target_include_directories(gomnivoicecpp PRIVATE ${OMNIVOICE_DIR}/src)
|
||||
target_include_directories(gomnivoicecpp SYSTEM PRIVATE ${OMNIVOICE_DIR}/ggml/include)
|
||||
|
||||
# Link GPU backends if the upstream ggml created them.
|
||||
foreach(backend blas cuda metal vulkan sycl)
|
||||
foreach(backend blas cuda hip metal vulkan sycl)
|
||||
if(TARGET ggml-${backend})
|
||||
target_link_libraries(gomnivoicecpp PRIVATE ggml-${backend})
|
||||
if(backend STREQUAL "cuda")
|
||||
|
||||
@@ -24,7 +24,14 @@ else ifeq ($(BUILD_TYPE),openblas)
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
||||
# This ggml only understands GGML_HIP (GGML_HIPBLAS was removed upstream),
|
||||
# so passing GGML_HIPBLAS silently produced a CPU-only build (see #10666).
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
AMDGPU_TARGETS ?= gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
|
||||
@@ -30,7 +30,7 @@ target_include_directories(goqwen3ttscpp PRIVATE ${QWENTTS_DIR}/src)
|
||||
target_include_directories(goqwen3ttscpp SYSTEM PRIVATE ${QWENTTS_DIR}/ggml/include)
|
||||
|
||||
# Link GPU backends if the upstream ggml created them.
|
||||
foreach(backend blas cuda metal vulkan sycl)
|
||||
foreach(backend blas cuda hip metal vulkan sycl)
|
||||
if(TARGET ggml-${backend})
|
||||
target_link_libraries(goqwen3ttscpp PRIVATE ggml-${backend})
|
||||
if(backend STREQUAL "cuda")
|
||||
|
||||
@@ -24,7 +24,14 @@ else ifeq ($(BUILD_TYPE),openblas)
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
||||
# This ggml only understands GGML_HIP (GGML_HIPBLAS was removed upstream),
|
||||
# so passing GGML_HIPBLAS silently produced a CPU-only build (see #10666).
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
AMDGPU_TARGETS ?= gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
|
||||
@@ -50,7 +50,7 @@ target_include_directories(govibevoicecpp SYSTEM PRIVATE ${VIBEVOICE_DIR}/third_
|
||||
# Link GPU backends if available — vibevoice's own CMake already links
|
||||
# these to the libvibevoice STATIC library, but we re-link them on the
|
||||
# MODULE so resolved symbols include all backend kernels.
|
||||
foreach(backend blas cuda metal vulkan)
|
||||
foreach(backend blas cuda hip metal vulkan)
|
||||
if(TARGET ggml-${backend})
|
||||
target_link_libraries(govibevoicecpp PRIVATE ggml-${backend})
|
||||
string(TOUPPER ${backend} BACKEND_UPPER)
|
||||
|
||||
@@ -29,7 +29,14 @@ else ifeq ($(BUILD_TYPE),openblas)
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DVIBEVOICE_GGML_HIPBLAS=ON
|
||||
# This ggml only understands GGML_HIP (GGML_HIPBLAS was removed upstream),
|
||||
# so passing GGML_HIPBLAS silently produced a CPU-only build (see #10666).
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
AMDGPU_TARGETS ?= gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON -DVIBEVOICE_GGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
|
||||
57
scripts/build/package-gpu-libs-rocm-data_test.sh
Executable file
57
scripts/build/package-gpu-libs-rocm-data_test.sh
Executable file
@@ -0,0 +1,57 @@
|
||||
#!/bin/bash
|
||||
# Regression test for scripts/build/package-gpu-libs.sh ROCm data bundling.
|
||||
#
|
||||
# Guards issue #10660: hipBLASLt (rocblaslt) resolves its TensileLibrary_lazy_gfx*.dat
|
||||
# kernel data relative to the bundled libhipblaslt.so. The packager copied the
|
||||
# rocblas/ data dir but not the hipblaslt/ data dir, so the bundled backend
|
||||
# fell back to slow generic kernels and logged
|
||||
# rocblaslt error: Cannot read "TensileLibrary_lazy_gfx1201.dat": No such file or directory
|
||||
#
|
||||
# This test fabricates a fake ROCm tree containing both rocblas/ and hipblaslt/
|
||||
# tensile data, points the packager at it via ROCM_BASE_DIRS, and asserts BOTH
|
||||
# data directories are bundled into the target lib dir.
|
||||
set -euo pipefail
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
SCRIPT="$CURDIR/package-gpu-libs.sh"
|
||||
|
||||
WORK=$(mktemp -d)
|
||||
trap 'rm -rf "$WORK"' EXIT
|
||||
|
||||
# Fabricate a fake ROCm install with both rocblas and hipblaslt tensile data.
|
||||
FAKE_ROCM="$WORK/opt/rocm"
|
||||
mkdir -p "$FAKE_ROCM/lib/rocblas/library"
|
||||
mkdir -p "$FAKE_ROCM/lib/hipblaslt/library"
|
||||
echo "fake rocblas tensile" > "$FAKE_ROCM/lib/rocblas/library/TensileLibrary_lazy_gfx1201.dat"
|
||||
echo "fake hipblaslt tensile" > "$FAKE_ROCM/lib/hipblaslt/library/TensileLibrary_lazy_gfx1201.dat"
|
||||
|
||||
TARGET="$WORK/target"
|
||||
mkdir -p "$TARGET"
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "$SCRIPT" "$TARGET"
|
||||
|
||||
# Point the data-dir copy at the fabricated tree instead of the real /opt/rocm,
|
||||
# then run the actual ROCm packager. This asserts package_rocm_libs itself
|
||||
# bundles BOTH data dirs, not just that the helper works in isolation.
|
||||
export BUILD_TYPE=hipblas
|
||||
export ROCM_BASE_DIRS="$FAKE_ROCM"
|
||||
package_rocm_libs
|
||||
|
||||
fail=false
|
||||
if [ ! -e "$TARGET/rocblas/library/TensileLibrary_lazy_gfx1201.dat" ]; then
|
||||
echo "FAIL: rocblas tensile data was NOT bundled"
|
||||
fail=true
|
||||
fi
|
||||
if [ ! -e "$TARGET/hipblaslt/library/TensileLibrary_lazy_gfx1201.dat" ]; then
|
||||
echo "FAIL: hipblaslt tensile data was NOT bundled (regression of #10660)"
|
||||
fail=true
|
||||
fi
|
||||
|
||||
if [ "$fail" = true ]; then
|
||||
ls -R "$TARGET" || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "PASS: rocblas and hipblaslt tensile data were both bundled"
|
||||
exit 0
|
||||
@@ -224,6 +224,50 @@ package_cuda_libs() {
|
||||
echo "CUDA libraries packaged successfully"
|
||||
}
|
||||
|
||||
# Copy a ROCm library data subdirectory (e.g. rocblas, hipblaslt) into the
|
||||
# bundled lib/ dir. These directories hold the TensileLibrary_*.dat GPU kernel
|
||||
# tuning files, which rocBLAS/hipBLASLt load at runtime *relative to their own
|
||||
# .so*. Since backends ship their own copies of libhipblaslt.so/librocblas.so
|
||||
# under lib/, the matching data dir must travel with them or the libs fall back
|
||||
# to slow generic kernels (rocblaslt error: Cannot read TensileLibrary_lazy_gfx*.dat;
|
||||
# see issue #10660).
|
||||
#
|
||||
# The ROCm search roots default to /opt/rocm{,-*} but can be overridden via the
|
||||
# ROCM_BASE_DIRS env var (space-separated), which keeps the copy unit-testable
|
||||
# without a real ROCm install.
|
||||
# Args: $1 = data subdir name found under <rocm-root>/lib{,64}/
|
||||
copy_rocm_data_dir() {
|
||||
local data_name="$1"
|
||||
# Single-line `local x=$(...)` on purpose: `local` masks the command
|
||||
# substitution's exit status, which is 1 when nullglob is unset and would
|
||||
# otherwise trip the script's `set -e`.
|
||||
local old_nullglob=$(shopt -p nullglob)
|
||||
shopt -s nullglob
|
||||
local rocm_dirs
|
||||
if [ -n "${ROCM_BASE_DIRS:-}" ]; then
|
||||
# shellcheck disable=SC2206 # intentional word-split of the override
|
||||
rocm_dirs=(${ROCM_BASE_DIRS})
|
||||
else
|
||||
rocm_dirs=(/opt/rocm /opt/rocm-*)
|
||||
fi
|
||||
eval "$old_nullglob"
|
||||
local found=false
|
||||
local rocm_base lib_subdir
|
||||
for rocm_base in "${rocm_dirs[@]}"; do
|
||||
for lib_subdir in lib lib64; do
|
||||
if [ -d "$rocm_base/$lib_subdir/$data_name" ]; then
|
||||
echo "Found $data_name data at $rocm_base/$lib_subdir/$data_name"
|
||||
mkdir -p "$TARGET_LIB_DIR/$data_name"
|
||||
cp -arfL "$rocm_base/$lib_subdir/$data_name/"* "$TARGET_LIB_DIR/$data_name/" || echo "WARNING: Failed to copy $data_name data from $rocm_base/$lib_subdir/$data_name"
|
||||
found=true
|
||||
fi
|
||||
done
|
||||
done
|
||||
if [ "$found" = false ]; then
|
||||
echo "WARNING: No $data_name library data found in ${ROCM_BASE_DIRS:-/opt/rocm*}/lib{,64}/$data_name"
|
||||
fi
|
||||
}
|
||||
|
||||
# Package AMD ROCm/HIPBlas libraries
|
||||
package_rocm_libs() {
|
||||
echo "Packaging ROCm/HIPBlas libraries for BUILD_TYPE=${BUILD_TYPE}..."
|
||||
@@ -267,27 +311,16 @@ package_rocm_libs() {
|
||||
fi
|
||||
done
|
||||
|
||||
# Copy rocblas library data (tuning files, TensileLibrary, etc.)
|
||||
local old_nullglob=$(shopt -p nullglob)
|
||||
shopt -s nullglob
|
||||
local rocm_dirs=(/opt/rocm /opt/rocm-*)
|
||||
eval "$old_nullglob"
|
||||
local rocblas_found=false
|
||||
for rocm_base in "${rocm_dirs[@]}"; do
|
||||
for lib_subdir in lib lib64; do
|
||||
if [ -d "$rocm_base/$lib_subdir/rocblas" ]; then
|
||||
echo "Found rocblas data at $rocm_base/$lib_subdir/rocblas"
|
||||
mkdir -p "$TARGET_LIB_DIR/rocblas"
|
||||
cp -arfL "$rocm_base/$lib_subdir/rocblas/"* "$TARGET_LIB_DIR/rocblas/" || echo "WARNING: Failed to copy rocblas data from $rocm_base/$lib_subdir/rocblas"
|
||||
rocblas_found=true
|
||||
fi
|
||||
done
|
||||
done
|
||||
if [ "$rocblas_found" = false ]; then
|
||||
echo "WARNING: No rocblas library data found in /opt/rocm*/lib{,64}/rocblas"
|
||||
fi
|
||||
# Copy rocBLAS and hipBLASLt kernel data (TensileLibrary_*.dat tuning files)
|
||||
# so the bundled libs find their per-arch kernels at runtime instead of
|
||||
# falling back to slow generic code (see copy_rocm_data_dir / issue #10660).
|
||||
copy_rocm_data_dir rocblas
|
||||
copy_rocm_data_dir hipblaslt
|
||||
|
||||
# Copy libomp from LLVM (required for ROCm)
|
||||
# Single-line `local x=$(...)` on purpose: masks shopt -p's nonzero exit
|
||||
# (nullglob unset) so it doesn't trip `set -e`.
|
||||
local old_nullglob=$(shopt -p nullglob)
|
||||
shopt -s nullglob
|
||||
local omp_libs=(/opt/rocm*/lib/llvm/lib/libomp.so*)
|
||||
eval "$old_nullglob"
|
||||
@@ -477,6 +510,7 @@ export -f copy_libs_glob
|
||||
export -f is_core_lib
|
||||
export -f copy_elf_deps
|
||||
export -f sweep_transitive_deps
|
||||
export -f copy_rocm_data_dir
|
||||
export -f package_cuda_libs
|
||||
export -f package_rocm_libs
|
||||
export -f package_intel_libs
|
||||
|
||||
Reference in New Issue
Block a user