mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-19 14:17:21 -04:00
feat: package GPU libraries inside backend containers for unified base image (#7891)
* Initial plan * Add GPU library packaging for isolated backend environments - Create scripts/build/package-gpu-libs.sh for packaging CUDA, ROCm, SYCL, and Vulkan libraries - Update llama-cpp, whisper, stablediffusion-ggml package.sh to include GPU libraries - Update Dockerfile.python to package GPU libraries into Python backends - Update libbackend.sh to set LD_LIBRARY_PATH for GPU library loading Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Address code review feedback: fix variable consistency and quoting Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Fix code review issues: improve glob handling and remove redundant variable Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Simplify main Dockerfile and workflow to use unified base image - Remove GPU-specific driver installation from Dockerfile (CUDA, ROCm, Vulkan, Intel) - Simplify image.yml workflow to build single unified base image for linux/amd64 and linux/arm64 - GPU libraries are now packaged in individual backend containers Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
138
.github/workflows/image.yml
vendored
138
.github/workflows/image.yml
vendored
@@ -13,42 +13,7 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
hipblas-jobs:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
aio: ${{ matrix.aio }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
ubuntu-version: ${{ matrix.ubuntu-version }}
|
||||
ubuntu-codename: ${{ matrix.ubuntu-codename }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-hipblas'
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
|
||||
grpc-base-image: "ubuntu:24.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
aio: "-aio-gpu-hipblas"
|
||||
ubuntu-version: '2404'
|
||||
ubuntu-codename: 'noble'
|
||||
|
||||
# Unified base image build - GPU drivers are now packaged in individual backends
|
||||
core-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
@@ -72,9 +37,10 @@ jobs:
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
#max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||
matrix:
|
||||
include:
|
||||
# Unified base image for all platforms
|
||||
# GPU-specific backends will be pulled at runtime and contain their own GPU libraries
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
@@ -86,101 +52,3 @@ jobs:
|
||||
skip-drivers: 'false'
|
||||
ubuntu-version: '2404'
|
||||
ubuntu-codename: 'noble'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "9"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
aio: "-aio-gpu-nvidia-cuda-12"
|
||||
ubuntu-version: '2404'
|
||||
ubuntu-codename: 'noble'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
aio: "-aio-gpu-nvidia-cuda-13"
|
||||
ubuntu-version: '2404'
|
||||
ubuntu-codename: 'noble'
|
||||
- build-type: 'vulkan'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-vulkan'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
aio: "-aio-gpu-vulkan"
|
||||
ubuntu-version: '2404'
|
||||
ubuntu-codename: 'noble'
|
||||
- build-type: 'intel'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||
grpc-base-image: "ubuntu:24.04"
|
||||
tag-suffix: '-gpu-intel'
|
||||
runs-on: 'ubuntu-latest'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
aio: "-aio-gpu-intel"
|
||||
ubuntu-version: '2404'
|
||||
ubuntu-codename: 'noble'
|
||||
|
||||
gh-runner:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
aio: ${{ matrix.aio }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
skip-drivers: ${{ matrix.skip-drivers }}
|
||||
ubuntu-version: ${{ matrix.ubuntu-version }}
|
||||
ubuntu-codename: ${{ matrix.ubuntu-codename }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "9"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64'
|
||||
base-image: "ubuntu:24.04"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'true'
|
||||
ubuntu-version: "2404"
|
||||
ubuntu-codename: 'noble'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-cuda-13'
|
||||
base-image: "ubuntu:24.04"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
ubuntu-version: '2404'
|
||||
ubuntu-codename: 'noble'
|
||||
|
||||
Reference in New Issue
Block a user