mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-03 21:07:33 -04:00
The ROCm packager copied rocBLAS kernel data (rocblas/library/*.dat) into the
bundled lib/ dir and run.sh pointed ROCBLAS_TENSILE_LIBPATH at it, but the
parallel hipBLASLt data dir (hipblaslt/library/TensileLibrary_lazy_gfx*.dat)
was never packaged and no HIPBLASLT_TENSILE_LIBPATH was set. The bundled
libhipblaslt.so therefore resolved its per-arch kernel data relative to itself,
found nothing, and silently fell back to slow generic kernels, logging:
rocblaslt error: Cannot read "TensileLibrary_lazy_gfx1201.dat": No such file or directory
rocblaslt error: Could not load "TensileLibrary_lazy_gfx1201.dat"
Fix, mirroring the existing rocBLAS handling:
- package-gpu-libs.sh: extract the rocblas data-dir copy into a reusable
copy_rocm_data_dir helper and call it for both rocblas and hipblaslt.
- llama-cpp/turboquant run.sh: export HIPBLASLT_TENSILE_LIBPATH when the
bundled hipblaslt/library dir exists.
The helper takes an optional ROCM_BASE_DIRS override so the copy is unit
testable without a real ROCm install; add a regression test that runs
package_rocm_libs against a fabricated ROCm tree and asserts both data dirs
are bundled.
Note: this bundles whatever gfx*.dat the build image's ROCm provides. If a
given arch's tensile data is absent from the shipped ROCm, that arch still
needs a ROCm bump; the packaging gap itself is fixed for every supported arch.
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]
523 lines
18 KiB
Bash
Executable File
523 lines
18 KiB
Bash
Executable File
#!/bin/bash
|
|
# Script to package GPU libraries based on BUILD_TYPE
|
|
# This script copies GPU-specific runtime libraries to a target lib directory
|
|
# so backends can run in isolation with their own GPU libraries.
|
|
#
|
|
# Usage: source package-gpu-libs.sh TARGET_LIB_DIR
|
|
# package_gpu_libs
|
|
#
|
|
# Environment variables:
|
|
# BUILD_TYPE - The GPU build type (cublas, l4t, hipblas, sycl_f16, sycl_f32, intel, vulkan)
|
|
# CUDA_MAJOR_VERSION - CUDA major version (for cublas/l4t builds)
|
|
#
|
|
# This enables backends to be fully self-contained and run on a unified base image
|
|
# without requiring GPU drivers to be pre-installed in the host image.
|
|
|
|
set -e
|
|
|
|
TARGET_LIB_DIR="${1:-./lib}"
|
|
|
|
# Create target directory if it doesn't exist
|
|
mkdir -p "$TARGET_LIB_DIR"
|
|
|
|
# Associative array to track copied files by basename
|
|
# Note: We use basename for deduplication because the target is a flat directory.
|
|
# If the same library exists in multiple source paths, we only copy it once.
|
|
declare -A COPIED_FILES
|
|
|
|
# Helper function to copy library preserving symlinks structure
|
|
# Instead of following symlinks and duplicating files, this function:
|
|
# 1. Resolves symlinks to their real target
|
|
# 2. Copies the real file only once
|
|
# 3. Recreates symlinks pointing to the real file
|
|
copy_lib() {
|
|
local src="$1"
|
|
|
|
# Check if source exists (follows symlinks)
|
|
if [ ! -e "$src" ]; then
|
|
return
|
|
fi
|
|
|
|
local src_basename
|
|
src_basename=$(basename "$src")
|
|
|
|
# Skip if we've already processed this filename
|
|
if [[ -n "${COPIED_FILES[$src_basename]:-}" ]]; then
|
|
return
|
|
fi
|
|
|
|
if [ -L "$src" ]; then
|
|
# Source is a symbolic link
|
|
# Resolve the real file (following all symlinks)
|
|
local real_file
|
|
real_file=$(readlink -f "$src")
|
|
|
|
if [ ! -e "$real_file" ]; then
|
|
echo "Warning: symlink target does not exist: $src -> $real_file" >&2
|
|
return
|
|
fi
|
|
|
|
local real_basename
|
|
real_basename=$(basename "$real_file")
|
|
|
|
# Copy the real file if we haven't already
|
|
if [[ -z "${COPIED_FILES[$real_basename]:-}" ]]; then
|
|
cp -v "$real_file" "$TARGET_LIB_DIR/$real_basename" 2>/dev/null || true
|
|
COPIED_FILES[$real_basename]=1
|
|
fi
|
|
|
|
# Create the symlink if the source name differs from the real file name
|
|
if [ "$src_basename" != "$real_basename" ]; then
|
|
# Point directly to the real file for simplicity and reliability
|
|
ln -sfv "$real_basename" "$TARGET_LIB_DIR/$src_basename" 2>/dev/null || true
|
|
fi
|
|
COPIED_FILES[$src_basename]=1
|
|
else
|
|
# Source is a regular file - copy if not already copied
|
|
if [[ -z "${COPIED_FILES[$src_basename]:-}" ]]; then
|
|
cp -v "$src" "$TARGET_LIB_DIR/$src_basename" 2>/dev/null || true
|
|
fi
|
|
COPIED_FILES[$src_basename]=1
|
|
fi
|
|
}
|
|
|
|
# Helper function to copy all matching libraries from a glob pattern
|
|
# Files are sorted so that regular files are processed before symlinks
|
|
copy_libs_glob() {
|
|
local pattern="$1"
|
|
# Use nullglob option to handle non-matching patterns gracefully
|
|
local old_nullglob=$(shopt -p nullglob)
|
|
shopt -s nullglob
|
|
local matched=($pattern)
|
|
eval "$old_nullglob"
|
|
|
|
# Sort files: regular files first, then symlinks
|
|
# This ensures real files are copied before we try to create symlinks pointing to them
|
|
local regular_files=()
|
|
local symlinks=()
|
|
for file in "${matched[@]}"; do
|
|
if [ -L "$file" ]; then
|
|
symlinks+=("$file")
|
|
elif [ -e "$file" ]; then
|
|
regular_files+=("$file")
|
|
fi
|
|
done
|
|
|
|
# Process regular files first, then symlinks
|
|
for lib in "${regular_files[@]}" "${symlinks[@]}"; do
|
|
copy_lib "$lib"
|
|
done
|
|
}
|
|
|
|
# Returns success for the core runtime libs the base image and package.sh
|
|
# already provide. We must NOT bundle our own copies of these — a second libc
|
|
# or libstdc++ on LD_LIBRARY_PATH clashes with the loader and the rest of the
|
|
# process — so they're skipped when pulling in a driver's transitive deps.
|
|
is_core_lib() {
|
|
case "$1" in
|
|
ld-linux*|ld.so|libc.so.*|libm.so.*|libdl.so.*|libpthread.so.*|librt.so.*|\
|
|
libgcc_s.so.*|libstdc++.so.*|libresolv.so.*|libutil.so.*|linux-vdso.so.*)
|
|
return 0 ;;
|
|
esac
|
|
return 1
|
|
}
|
|
|
|
# Copy the shared-library dependencies of an ELF file into TARGET_LIB_DIR.
|
|
# Used to make a bundled GPU driver self-contained: e.g. the Mesa Vulkan ICDs
|
|
# pull in libdrm, libexpat and (for RADV/lavapipe) libLLVM, none of which the
|
|
# runtime base image is guaranteed to have. Core libc-family deps are skipped.
|
|
copy_elf_deps() {
|
|
local elf="$1"
|
|
[ -e "$elf" ] || return 0
|
|
command -v ldd >/dev/null 2>&1 || return 0
|
|
|
|
# ldd lines look like: "<TAB>libfoo.so.1 => /path/to/libfoo.so.1 (0x..)".
|
|
# Take the resolved absolute path (field 3) and skip vdso/static entries.
|
|
while read -r dep; do
|
|
if is_core_lib "$(basename "$dep")"; then
|
|
continue
|
|
fi
|
|
copy_lib "$dep"
|
|
done < <(ldd "$elf" 2>/dev/null | awk '/=>/ && $3 ~ /^\// {print $3}')
|
|
}
|
|
|
|
# Sweep the transitive shared-library dependencies of everything already
|
|
# bundled in a lib dir. The per-vendor packagers below copy an explicit
|
|
# allowlist of top-level runtime libs, but those libs pull in transitive deps
|
|
# that aren't in the list (e.g. ROCm's librocprofiler-register.so.0, libnuma,
|
|
# libdrm_amdgpu). Because backends run through the bundled lib/ld.so with
|
|
# LD_LIBRARY_PATH=lib (see run.sh), an unbundled transitive dep is a hard load
|
|
# failure (issue #10537: "librocprofiler-register.so.0: cannot open shared
|
|
# object file"). ldd resolves the full recursive closure, so a single pass over
|
|
# the already-bundled libs is enough; core libc-family deps are skipped via
|
|
# copy_elf_deps/is_core_lib so we never shadow the loader's own libc/libstdc++.
|
|
sweep_transitive_deps() {
|
|
local dir="${1:-$TARGET_LIB_DIR}"
|
|
command -v ldd >/dev/null 2>&1 || return 0
|
|
|
|
# Snapshot the current set first: copy_elf_deps adds files as it runs, and
|
|
# ldd already returns the full recursive closure, so we only need to sweep
|
|
# the libs that were present before the sweep started.
|
|
# `local x=$(...)` keeps set -e from tripping on shopt -p's nonzero exit.
|
|
local old_nullglob=$(shopt -p nullglob)
|
|
shopt -s nullglob
|
|
local libs=("$dir"/*.so*)
|
|
eval "$old_nullglob"
|
|
|
|
local lib
|
|
for lib in "${libs[@]}"; do
|
|
[ -e "$lib" ] || continue
|
|
# Skip symlinks: their real target is in the snapshot and gets swept.
|
|
[ -L "$lib" ] && continue
|
|
copy_elf_deps "$lib"
|
|
done
|
|
}
|
|
|
|
# Package NVIDIA CUDA libraries
|
|
package_cuda_libs() {
|
|
echo "Packaging CUDA libraries for BUILD_TYPE=${BUILD_TYPE}..."
|
|
|
|
local cuda_lib_paths=(
|
|
"/usr/local/cuda/lib64"
|
|
"/usr/local/cuda-${CUDA_MAJOR_VERSION:-}/lib64"
|
|
"/usr/lib/x86_64-linux-gnu"
|
|
"/usr/lib/aarch64-linux-gnu"
|
|
)
|
|
|
|
# Core CUDA runtime libraries
|
|
local cuda_libs=(
|
|
"libcudart.so*"
|
|
"libcublas.so*"
|
|
"libcublasLt.so*"
|
|
"libcufft.so*"
|
|
"libcurand.so*"
|
|
"libcusparse.so*"
|
|
"libcusolver.so*"
|
|
"libnvrtc.so*"
|
|
"libnvrtc-builtins.so*"
|
|
"libcudnn.so*"
|
|
"libcudnn_ops.so*"
|
|
"libcudnn_cnn.so*"
|
|
"libnvJitLink.so*"
|
|
"libnvinfer.so*"
|
|
"libnvonnxparser.so*"
|
|
)
|
|
|
|
for lib_path in "${cuda_lib_paths[@]}"; do
|
|
if [ -d "$lib_path" ]; then
|
|
for lib_pattern in "${cuda_libs[@]}"; do
|
|
copy_libs_glob "${lib_path}/${lib_pattern}"
|
|
done
|
|
fi
|
|
done
|
|
|
|
# Copy CUDA target directory for runtime compilation support
|
|
# if [ -d "/usr/local/cuda/targets" ]; then
|
|
# mkdir -p "$TARGET_LIB_DIR/../cuda"
|
|
# cp -arfL /usr/local/cuda/targets "$TARGET_LIB_DIR/../cuda/" 2>/dev/null || true
|
|
# fi
|
|
|
|
# Pull in transitive deps the allowlist misses so the backend is
|
|
# self-contained (same class of failure as #10537).
|
|
sweep_transitive_deps "$TARGET_LIB_DIR"
|
|
|
|
echo "CUDA libraries packaged successfully"
|
|
}
|
|
|
|
# Copy a ROCm library data subdirectory (e.g. rocblas, hipblaslt) into the
|
|
# bundled lib/ dir. These directories hold the TensileLibrary_*.dat GPU kernel
|
|
# tuning files, which rocBLAS/hipBLASLt load at runtime *relative to their own
|
|
# .so*. Since backends ship their own copies of libhipblaslt.so/librocblas.so
|
|
# under lib/, the matching data dir must travel with them or the libs fall back
|
|
# to slow generic kernels (rocblaslt error: Cannot read TensileLibrary_lazy_gfx*.dat;
|
|
# see issue #10660).
|
|
#
|
|
# The ROCm search roots default to /opt/rocm{,-*} but can be overridden via the
|
|
# ROCM_BASE_DIRS env var (space-separated), which keeps the copy unit-testable
|
|
# without a real ROCm install.
|
|
# Args: $1 = data subdir name found under <rocm-root>/lib{,64}/
|
|
copy_rocm_data_dir() {
|
|
local data_name="$1"
|
|
# Single-line `local x=$(...)` on purpose: `local` masks the command
|
|
# substitution's exit status, which is 1 when nullglob is unset and would
|
|
# otherwise trip the script's `set -e`.
|
|
local old_nullglob=$(shopt -p nullglob)
|
|
shopt -s nullglob
|
|
local rocm_dirs
|
|
if [ -n "${ROCM_BASE_DIRS:-}" ]; then
|
|
# shellcheck disable=SC2206 # intentional word-split of the override
|
|
rocm_dirs=(${ROCM_BASE_DIRS})
|
|
else
|
|
rocm_dirs=(/opt/rocm /opt/rocm-*)
|
|
fi
|
|
eval "$old_nullglob"
|
|
local found=false
|
|
local rocm_base lib_subdir
|
|
for rocm_base in "${rocm_dirs[@]}"; do
|
|
for lib_subdir in lib lib64; do
|
|
if [ -d "$rocm_base/$lib_subdir/$data_name" ]; then
|
|
echo "Found $data_name data at $rocm_base/$lib_subdir/$data_name"
|
|
mkdir -p "$TARGET_LIB_DIR/$data_name"
|
|
cp -arfL "$rocm_base/$lib_subdir/$data_name/"* "$TARGET_LIB_DIR/$data_name/" || echo "WARNING: Failed to copy $data_name data from $rocm_base/$lib_subdir/$data_name"
|
|
found=true
|
|
fi
|
|
done
|
|
done
|
|
if [ "$found" = false ]; then
|
|
echo "WARNING: No $data_name library data found in ${ROCM_BASE_DIRS:-/opt/rocm*}/lib{,64}/$data_name"
|
|
fi
|
|
}
|
|
|
|
# Package AMD ROCm/HIPBlas libraries
|
|
package_rocm_libs() {
|
|
echo "Packaging ROCm/HIPBlas libraries for BUILD_TYPE=${BUILD_TYPE}..."
|
|
|
|
local rocm_lib_paths=(
|
|
"/opt/rocm/lib"
|
|
"/opt/rocm/lib64"
|
|
"/opt/rocm/hip/lib"
|
|
)
|
|
|
|
# Find the actual ROCm versioned directory
|
|
for rocm_dir in /opt/rocm-*; do
|
|
if [ -d "$rocm_dir/lib" ]; then
|
|
rocm_lib_paths+=("$rocm_dir/lib")
|
|
fi
|
|
done
|
|
|
|
# Core ROCm/HIP runtime libraries
|
|
local rocm_libs=(
|
|
"libamdhip64.so*"
|
|
"libhipblas.so*"
|
|
"libhipblaslt.so*"
|
|
"librocblas.so*"
|
|
"librocrand.so*"
|
|
"librocsparse.so*"
|
|
"librocsolver.so*"
|
|
"librocfft.so*"
|
|
"libMIOpen.so*"
|
|
"libroctx64.so*"
|
|
"libhsa-runtime64.so*"
|
|
"libamd_comgr.so*"
|
|
"libhip_hcc.so*"
|
|
"libhiprtc.so*"
|
|
)
|
|
|
|
for lib_path in "${rocm_lib_paths[@]}"; do
|
|
if [ -d "$lib_path" ]; then
|
|
for lib_pattern in "${rocm_libs[@]}"; do
|
|
copy_libs_glob "${lib_path}/${lib_pattern}"
|
|
done
|
|
fi
|
|
done
|
|
|
|
# Copy rocBLAS and hipBLASLt kernel data (TensileLibrary_*.dat tuning files)
|
|
# so the bundled libs find their per-arch kernels at runtime instead of
|
|
# falling back to slow generic code (see copy_rocm_data_dir / issue #10660).
|
|
copy_rocm_data_dir rocblas
|
|
copy_rocm_data_dir hipblaslt
|
|
|
|
# Copy libomp from LLVM (required for ROCm)
|
|
# Single-line `local x=$(...)` on purpose: masks shopt -p's nonzero exit
|
|
# (nullglob unset) so it doesn't trip `set -e`.
|
|
local old_nullglob=$(shopt -p nullglob)
|
|
shopt -s nullglob
|
|
local omp_libs=(/opt/rocm*/lib/llvm/lib/libomp.so*)
|
|
eval "$old_nullglob"
|
|
for omp_path in "${omp_libs[@]}"; do
|
|
if [ -e "$omp_path" ]; then
|
|
copy_lib "$omp_path"
|
|
fi
|
|
done
|
|
|
|
# Pull in transitive deps the allowlist misses (librocprofiler-register.so.0,
|
|
# libnuma, libdrm_amdgpu, ...) so the backend is self-contained. See #10537.
|
|
sweep_transitive_deps "$TARGET_LIB_DIR"
|
|
|
|
echo "ROCm libraries packaged successfully"
|
|
}
|
|
|
|
# Package Intel oneAPI/SYCL libraries
|
|
package_intel_libs() {
|
|
echo "Packaging Intel oneAPI/SYCL libraries for BUILD_TYPE=${BUILD_TYPE}..."
|
|
|
|
local intel_lib_paths=(
|
|
"/opt/intel/oneapi/compiler/latest/lib"
|
|
"/opt/intel/oneapi/mkl/latest/lib/intel64"
|
|
"/opt/intel/oneapi/tbb/latest/lib/intel64/gcc4.8"
|
|
)
|
|
|
|
# Core Intel oneAPI runtime libraries
|
|
local intel_libs=(
|
|
"libsycl.so*"
|
|
"libOpenCL.so*"
|
|
"libmkl_core.so*"
|
|
"libmkl_intel_lp64.so*"
|
|
"libmkl_intel_thread.so*"
|
|
"libmkl_sequential.so*"
|
|
"libmkl_sycl.so*"
|
|
"libiomp5.so*"
|
|
"libsvml.so*"
|
|
"libirng.so*"
|
|
"libimf.so*"
|
|
"libintlc.so*"
|
|
"libtbb.so*"
|
|
"libtbbmalloc.so*"
|
|
"libpi_level_zero.so*"
|
|
"libpi_opencl.so*"
|
|
"libze_loader.so*"
|
|
)
|
|
|
|
for lib_path in "${intel_lib_paths[@]}"; do
|
|
if [ -d "$lib_path" ]; then
|
|
for lib_pattern in "${intel_libs[@]}"; do
|
|
copy_libs_glob "${lib_path}/${lib_pattern}"
|
|
done
|
|
fi
|
|
done
|
|
|
|
# Pull in transitive deps the allowlist misses so the backend is
|
|
# self-contained (same class of failure as #10537).
|
|
sweep_transitive_deps "$TARGET_LIB_DIR"
|
|
|
|
echo "Intel oneAPI libraries packaged successfully"
|
|
}
|
|
|
|
# Package Vulkan libraries
|
|
package_vulkan_libs() {
|
|
echo "Packaging Vulkan libraries for BUILD_TYPE=${BUILD_TYPE}..."
|
|
|
|
local vulkan_lib_paths=(
|
|
"/usr/lib/x86_64-linux-gnu"
|
|
"/usr/lib/aarch64-linux-gnu"
|
|
"/usr/local/lib"
|
|
)
|
|
|
|
# Core Vulkan runtime: the loader plus the shader tooling shipped by the SDK.
|
|
local vulkan_libs=(
|
|
"libvulkan.so*"
|
|
"libshaderc_shared.so*"
|
|
"libSPIRV.so*"
|
|
"libSPIRV-Tools.so*"
|
|
"libglslang.so*"
|
|
)
|
|
|
|
for lib_path in "${vulkan_lib_paths[@]}"; do
|
|
if [ -d "$lib_path" ]; then
|
|
for lib_pattern in "${vulkan_libs[@]}"; do
|
|
copy_libs_glob "${lib_path}/${lib_pattern}"
|
|
done
|
|
fi
|
|
done
|
|
|
|
# Bundle the ICD drivers. Rather than hard-code Mesa's (platform- and
|
|
# version-dependent) driver sonames, treat each installed ICD manifest as
|
|
# the source of truth: every /usr/share/vulkan/icd.d/*.json names the exact
|
|
# driver .so it needs in its "library_path". So we copy whatever drivers
|
|
# the manifests reference (libvulkan_intel/radeon/lvp/... on amd64, the SoC
|
|
# drivers on arm64, ...) plus each driver's transitive deps, and skip any
|
|
# manifest whose driver isn't actually installed. The loader picks the
|
|
# right driver for the GPU at runtime.
|
|
if [ -d "/usr/share/vulkan/icd.d" ]; then
|
|
local icd_dest="$TARGET_LIB_DIR/../vulkan/icd.d"
|
|
mkdir -p "$icd_dest"
|
|
|
|
local manifest driver driver_base resolved lib_path
|
|
for manifest in /usr/share/vulkan/icd.d/*.json; do
|
|
[ -e "$manifest" ] || continue
|
|
|
|
# Pull the driver path out of "library_path": "<path-or-soname>".
|
|
driver=$(sed -nE 's/.*"library_path"[[:space:]]*:[[:space:]]*"([^"]+)".*/\1/p' "$manifest" | head -n1)
|
|
[ -n "$driver" ] || continue
|
|
driver_base=$(basename "$driver")
|
|
|
|
# Resolve to an absolute path: honour an absolute library_path,
|
|
# else look in the standard lib dirs, else fall back to ldconfig.
|
|
resolved=""
|
|
case "$driver" in
|
|
/*) [ -e "$driver" ] && resolved="$driver" ;;
|
|
esac
|
|
if [ -z "$resolved" ]; then
|
|
for lib_path in "${vulkan_lib_paths[@]}"; do
|
|
if [ -e "${lib_path}/${driver_base}" ]; then
|
|
resolved="${lib_path}/${driver_base}"
|
|
break
|
|
fi
|
|
done
|
|
fi
|
|
if [ -z "$resolved" ] && command -v ldconfig >/dev/null 2>&1; then
|
|
resolved=$(ldconfig -p | awk -v n="$driver_base" '$1 == n { print $NF; exit }')
|
|
fi
|
|
|
|
if [ -z "$resolved" ] || [ ! -e "$resolved" ]; then
|
|
echo "Vulkan ICD: driver '$driver_base' for $(basename "$manifest") not installed; skipping its manifest" >&2
|
|
continue
|
|
fi
|
|
|
|
# Bundle the driver + its transitive deps (libdrm, libexpat, and
|
|
# libLLVM for RADV/lavapipe, ...) so the backend is self-contained
|
|
# on a runtime base image without Mesa.
|
|
copy_lib "$resolved"
|
|
copy_elf_deps "$resolved"
|
|
|
|
# Copy the manifest and rewrite its library_path to a bare soname
|
|
# so the loader resolves our bundled driver via LD_LIBRARY_PATH
|
|
# (run.sh adds lib/ to it) instead of a host path that won't exist
|
|
# on the runtime image.
|
|
cp -arfL "$manifest" "$icd_dest/" 2>/dev/null || true
|
|
sed -i -E 's#("library_path"[[:space:]]*:[[:space:]]*")[^"]*/#\1#' "$icd_dest/$(basename "$manifest")"
|
|
done
|
|
fi
|
|
|
|
echo "Vulkan libraries packaged successfully"
|
|
}
|
|
|
|
# Main function to package GPU libraries based on BUILD_TYPE
|
|
package_gpu_libs() {
|
|
local build_type="${BUILD_TYPE:-}"
|
|
|
|
echo "Packaging GPU libraries for BUILD_TYPE=${build_type}..."
|
|
|
|
case "$build_type" in
|
|
cublas|l4t)
|
|
package_cuda_libs
|
|
;;
|
|
hipblas)
|
|
package_rocm_libs
|
|
;;
|
|
sycl_f16|sycl_f32|intel)
|
|
package_intel_libs
|
|
;;
|
|
vulkan)
|
|
package_vulkan_libs
|
|
;;
|
|
""|cpu)
|
|
echo "No GPU libraries to package for BUILD_TYPE=${build_type}"
|
|
;;
|
|
*)
|
|
echo "Unknown BUILD_TYPE: ${build_type}, skipping GPU library packaging"
|
|
;;
|
|
esac
|
|
|
|
echo "GPU library packaging complete. Contents of ${TARGET_LIB_DIR}:"
|
|
ls -la "$TARGET_LIB_DIR/" 2>/dev/null || echo " (empty or not created)"
|
|
}
|
|
|
|
# Export the function so it can be sourced and called
|
|
export -f package_gpu_libs
|
|
export -f copy_lib
|
|
export -f copy_libs_glob
|
|
export -f is_core_lib
|
|
export -f copy_elf_deps
|
|
export -f sweep_transitive_deps
|
|
export -f copy_rocm_data_dir
|
|
export -f package_cuda_libs
|
|
export -f package_rocm_libs
|
|
export -f package_intel_libs
|
|
export -f package_vulkan_libs
|
|
|
|
# If script is run directly (not sourced), execute the packaging
|
|
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|
package_gpu_libs
|
|
fi
|