mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-04 05:16:42 -04:00
The ROCm packager copied rocBLAS kernel data (rocblas/library/*.dat) into the
bundled lib/ dir and run.sh pointed ROCBLAS_TENSILE_LIBPATH at it, but the
parallel hipBLASLt data dir (hipblaslt/library/TensileLibrary_lazy_gfx*.dat)
was never packaged and no HIPBLASLT_TENSILE_LIBPATH was set. The bundled
libhipblaslt.so therefore resolved its per-arch kernel data relative to itself,
found nothing, and silently fell back to slow generic kernels, logging:
rocblaslt error: Cannot read "TensileLibrary_lazy_gfx1201.dat": No such file or directory
rocblaslt error: Could not load "TensileLibrary_lazy_gfx1201.dat"
Fix, mirroring the existing rocBLAS handling:
- package-gpu-libs.sh: extract the rocblas data-dir copy into a reusable
copy_rocm_data_dir helper and call it for both rocblas and hipblaslt.
- llama-cpp/turboquant run.sh: export HIPBLASLT_TENSILE_LIBPATH when the
bundled hipblaslt/library dir exists.
The helper takes an optional ROCM_BASE_DIRS override so the copy is unit
testable without a real ROCm install; add a regression test that runs
package_rocm_libs against a fabricated ROCm tree and asserts both data dirs
are bundled.
Note: this bundles whatever gfx*.dat the build image's ROCm provides. If a
given arch's tensile data is absent from the shipped ROCm, that arch still
needs a ROCm bump; the packaging gap itself is fixed for every supported arch.
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]
58 lines
1.9 KiB
Bash
Executable File
58 lines
1.9 KiB
Bash
Executable File
#!/bin/bash
|
|
set -ex
|
|
|
|
# Get the absolute current dir where the script is located
|
|
CURDIR=$(dirname "$(realpath "$0")")
|
|
|
|
cd /
|
|
|
|
echo "CPU info:"
|
|
grep -e "model\sname" /proc/cpuinfo | head -1
|
|
grep -e "flags" /proc/cpuinfo | head -1
|
|
|
|
BINARY=llama-cpp-fallback
|
|
|
|
# CPU images (x86, arm64, darwin) ship a single llama-cpp-cpu-all built with ggml
|
|
# CPU_ALL_VARIANTS: ggml's backend registry dlopens the best libggml-cpu-*.so for this
|
|
# host, so no shell-side AVX probing. GPU images (cublas/sycl/vulkan/hipblas) ship only
|
|
# llama-cpp-fallback (the accelerator does the compute), so fall back to it when absent.
|
|
if [ -e "$CURDIR"/llama-cpp-cpu-all ]; then
|
|
BINARY=llama-cpp-cpu-all
|
|
fi
|
|
|
|
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
|
if [ -e "$CURDIR"/llama-cpp-grpc ]; then
|
|
BINARY=llama-cpp-grpc
|
|
fi
|
|
fi
|
|
|
|
# Extend ld library path with the dir where this script is located/lib
|
|
if [ "$(uname)" == "Darwin" ]; then
|
|
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
|
#export DYLD_FALLBACK_LIBRARY_PATH="$CURDIR"/lib:$DYLD_FALLBACK_LIBRARY_PATH
|
|
else
|
|
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
|
# Tell rocBLAS where to find TensileLibrary data (GPU kernel tuning files)
|
|
if [ -d "$CURDIR/lib/rocblas/library" ]; then
|
|
export ROCBLAS_TENSILE_LIBPATH="$CURDIR"/lib/rocblas/library
|
|
fi
|
|
# Same for hipBLASLt (rocblaslt): the bundled libhipblaslt.so resolves its
|
|
# TensileLibrary_lazy_gfx*.dat kernel data relative to itself, so point it at
|
|
# the bundled data or it falls back to slow generic kernels (issue #10660).
|
|
if [ -d "$CURDIR/lib/hipblaslt/library" ]; then
|
|
export HIPBLASLT_TENSILE_LIBPATH="$CURDIR"/lib/hipblaslt/library
|
|
fi
|
|
fi
|
|
|
|
# If there is a lib/ld.so, use it
|
|
if [ -f "$CURDIR"/lib/ld.so ]; then
|
|
echo "Using lib/ld.so"
|
|
echo "Using binary: $BINARY"
|
|
exec "$CURDIR"/lib/ld.so "$CURDIR"/$BINARY "$@"
|
|
fi
|
|
|
|
echo "Using binary: $BINARY"
|
|
exec "$CURDIR"/$BINARY "$@"
|
|
|
|
# We should never reach this point, however just in case we do, run fallback
|
|
exec "$CURDIR"/llama-cpp-fallback "$@" |