diff --git a/.agents/adding-backends.md b/.agents/adding-backends.md index 46233469b..e775a4492 100644 --- a/.agents/adding-backends.md +++ b/.agents/adding-backends.md @@ -28,7 +28,7 @@ Add build matrix entries for each platform/GPU type you want to support. Look at - CUDA 13 builds: Add after other CUDA 13 builds (e.g., after `gpu-nvidia-cuda-13-chatterbox`) **Additional build types you may need:** -- ROCm/HIP: Use `build-type: 'hipblas'` with `base-image: "rocm/dev-ubuntu-24.04:6.4.4"` +- ROCm/HIP: Use `build-type: 'hipblas'` with `base-image: "rocm/dev-ubuntu-24.04:7.2.1"` - Intel/SYCL: Use `build-type: 'intel'` or `build-type: 'sycl_f16'`/`sycl_f32` with `base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"` - L4T (ARM): Use `build-type: 'l4t'` with `platforms: 'linux/arm64'` and `runs-on: 'ubuntu-24.04-arm'` diff --git a/.agents/building-and-testing.md b/.agents/building-and-testing.md index 2f5488fa4..9c6b59f1a 100644 --- a/.agents/building-and-testing.md +++ b/.agents/building-and-testing.md @@ -10,7 +10,7 @@ Let's say the user wants to build a particular backend for a given platform. For - At a minimum we need to set the BUILD_TYPE, BASE_IMAGE build-args - Use .github/workflows/backend.yml as a reference it lists the needed args in the `include` job strategy matrix - l4t and cublas also requires the CUDA major and minor version -- You can pretty print a command like `DOCKER_MAKEFLAGS=-j$(nproc --ignore=1) BUILD_TYPE=hipblas BASE_IMAGE=rocm/dev-ubuntu-24.04:6.4.4 make docker-build-coqui` +- You can pretty print a command like `DOCKER_MAKEFLAGS=-j$(nproc --ignore=1) BUILD_TYPE=hipblas BASE_IMAGE=rocm/dev-ubuntu-24.04:7.2.1 make docker-build-coqui` - Unless the user specifies that they want you to run the command, then just print it because not all agent frontends handle long running jobs well and the output may overflow your context - The user may say they want to build AMD or ROCM instead of hipblas, or Intel instead of SYCL or NVIDIA insted of l4t or cublas. Ask for confirmation if there is ambiguity. - Sometimes the user may need extra parameters to be added to `docker build` (e.g. `--platform` for cross-platform builds or `--progress` to view the full logs), in which case you can generate the `docker build` command directly. diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 978b49121..e88495d0b 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -1298,7 +1298,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-rerankers' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" @@ -1311,7 +1311,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" @@ -1324,7 +1324,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-vllm' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" @@ -1337,7 +1337,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-vllm-omni' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "vllm-omni" dockerfile: "./backend/Dockerfile.python" @@ -1350,7 +1350,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-transformers' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" @@ -1363,7 +1363,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-diffusers' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" @@ -1376,7 +1376,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-ace-step' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "ace-step" dockerfile: "./backend/Dockerfile.python" @@ -1390,7 +1390,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-kokoro' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" @@ -1403,7 +1403,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-vibevoice' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" @@ -1416,7 +1416,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-qwen-asr' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "qwen-asr" dockerfile: "./backend/Dockerfile.python" @@ -1429,7 +1429,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-nemo' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "nemo" dockerfile: "./backend/Dockerfile.python" @@ -1442,7 +1442,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-qwen-tts' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "qwen-tts" dockerfile: "./backend/Dockerfile.python" @@ -1455,7 +1455,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-fish-speech' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "fish-speech" dockerfile: "./backend/Dockerfile.python" @@ -1468,7 +1468,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-voxcpm' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "voxcpm" dockerfile: "./backend/Dockerfile.python" @@ -1481,7 +1481,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-pocket-tts' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "pocket-tts" dockerfile: "./backend/Dockerfile.python" @@ -1494,7 +1494,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-faster-whisper' runs-on: 'bigger-runner' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" @@ -1507,7 +1507,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-whisperx' runs-on: 'bigger-runner' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "whisperx" dockerfile: "./backend/Dockerfile.python" @@ -1520,7 +1520,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-coqui' runs-on: 'bigger-runner' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" @@ -2175,7 +2175,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-whisper' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" runs-on: 'ubuntu-latest' skip-drivers: 'false' backend: "whisper" @@ -2254,7 +2254,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-acestep-cpp' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" runs-on: 'ubuntu-latest' skip-drivers: 'false' backend: "acestep-cpp" @@ -2456,7 +2456,7 @@ jobs: # platforms: 'linux/amd64' # tag-latest: 'auto' # tag-suffix: '-gpu-hipblas-rfdetr' - # base-image: "rocm/dev-ubuntu-24.04:6.4.4" + # base-image: "rocm/dev-ubuntu-24.04:7.2.1" # runs-on: 'ubuntu-latest' # skip-drivers: 'false' # backend: "rfdetr" @@ -2497,7 +2497,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-neutts' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index f4ec247ae..028bc9453 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -59,7 +59,7 @@ platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-hipblas' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" grpc-base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 8b672e897..55edb94c3 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -41,7 +41,7 @@ platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-hipblas' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" + base-image: "rocm/dev-ubuntu-24.04:7.2.1" grpc-base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp index 3930d04d4..d7be7941c 100644 --- a/backend/Dockerfile.llama-cpp +++ b/backend/Dockerfile.llama-cpp @@ -209,7 +209,11 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then rm -rf /var/lib/apt/lists/* && \ # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency - ldconfig \ + ldconfig && \ + # Log which GPU architectures have rocBLAS kernel support + echo "rocBLAS library data architectures:" && \ + (ls /opt/rocm*/lib/rocblas/library/Kernels* 2>/dev/null || ls /opt/rocm*/lib64/rocblas/library/Kernels* 2>/dev/null) | grep -oP 'gfx[0-9a-z+-]+' | sort -u || \ + echo "WARNING: No rocBLAS kernel data found" \ ; fi RUN echo "TARGETARCH: $TARGETARCH" diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 3c64918ab..afe429eae 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -33,7 +33,7 @@ else ifeq ($(BUILD_TYPE),hipblas) ROCM_PATH ?= /opt/rocm export CXX=$(ROCM_HOME)/llvm/bin/clang++ export CC=$(ROCM_HOME)/llvm/bin/clang - AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 + AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) else ifeq ($(BUILD_TYPE),vulkan) CMAKE_ARGS+=-DGGML_VULKAN=1 diff --git a/backend/cpp/llama-cpp/run.sh b/backend/cpp/llama-cpp/run.sh index 2f1ff13cf..553faeb27 100755 --- a/backend/cpp/llama-cpp/run.sh +++ b/backend/cpp/llama-cpp/run.sh @@ -46,6 +46,10 @@ if [ "$(uname)" == "Darwin" ]; then #export DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH else export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH + # Tell rocBLAS where to find TensileLibrary data (GPU kernel tuning files) + if [ -d "$CURDIR/lib/rocblas/library" ]; then + export ROCBLAS_TENSILE_LIBPATH=$CURDIR/lib/rocblas/library + fi fi # If there is a lib/ld.so, use it diff --git a/backend/go/sam3-cpp/Makefile b/backend/go/sam3-cpp/Makefile index 3983b596e..ed0aa3c6e 100644 --- a/backend/go/sam3-cpp/Makefile +++ b/backend/go/sam3-cpp/Makefile @@ -26,7 +26,7 @@ else ifeq ($(BUILD_TYPE),hipblas) ROCM_PATH ?= /opt/rocm export CXX=$(ROCM_HOME)/llvm/bin/clang++ export CC=$(ROCM_HOME)/llvm/bin/clang - AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 + AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) else ifeq ($(BUILD_TYPE),vulkan) CMAKE_ARGS+=-DGGML_VULKAN=ON diff --git a/backend/go/stablediffusion-ggml/Makefile b/backend/go/stablediffusion-ggml/Makefile index 11c2737d4..780b7b856 100644 --- a/backend/go/stablediffusion-ggml/Makefile +++ b/backend/go/stablediffusion-ggml/Makefile @@ -32,7 +32,7 @@ else ifeq ($(BUILD_TYPE),hipblas) ROCM_PATH ?= /opt/rocm export CXX=$(ROCM_HOME)/llvm/bin/clang++ export CC=$(ROCM_HOME)/llvm/bin/clang - AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 + AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) else ifeq ($(BUILD_TYPE),vulkan) CMAKE_ARGS+=-DSD_VULKAN=ON -DGGML_VULKAN=ON diff --git a/backend/python/ace-step/requirements-hipblas.txt b/backend/python/ace-step/requirements-hipblas.txt index e1c3c0cc2..459792279 100644 --- a/backend/python/ace-step/requirements-hipblas.txt +++ b/backend/python/ace-step/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 torchaudio torchvision diff --git a/backend/python/chatterbox/requirements-hipblas.txt b/backend/python/chatterbox/requirements-hipblas.txt index ed30fb824..21705a460 100644 --- a/backend/python/chatterbox/requirements-hipblas.txt +++ b/backend/python/chatterbox/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.9.1+rocm6.4 -torchaudio==2.9.1+rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 +torchaudio==2.10.0+rocm7.0 transformers numpy>=1.24.0,<1.26.0 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289 diff --git a/backend/python/common/template/requirements-hipblas.txt b/backend/python/common/template/requirements-hipblas.txt index b733ec7b1..ee867992d 100644 --- a/backend/python/common/template/requirements-hipblas.txt +++ b/backend/python/common/template/requirements-hipblas.txt @@ -1,2 +1,2 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 torch \ No newline at end of file diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt index 8e7d03459..5a3967d34 100644 --- a/backend/python/coqui/requirements-hipblas.txt +++ b/backend/python/coqui/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 -torchaudio==2.8.0+rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 +torchaudio==2.10.0+rocm7.0 transformers==4.48.3 accelerate coqui-tts \ No newline at end of file diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt index b1f8b3e04..712510827 100644 --- a/backend/python/diffusers/requirements-hipblas.txt +++ b/backend/python/diffusers/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 -torchvision==0.23.0+rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 +torchvision==0.25.0+rocm7.0 git+https://github.com/huggingface/diffusers opencv-python transformers diff --git a/backend/python/faster-whisper/requirements-hipblas.txt b/backend/python/faster-whisper/requirements-hipblas.txt index da9c9123c..286df8ff8 100644 --- a/backend/python/faster-whisper/requirements-hipblas.txt +++ b/backend/python/faster-whisper/requirements-hipblas.txt @@ -1,3 +1,3 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 torch faster-whisper \ No newline at end of file diff --git a/backend/python/fish-speech/requirements-hipblas.txt b/backend/python/fish-speech/requirements-hipblas.txt index 81a30d412..ead3ec07a 100644 --- a/backend/python/fish-speech/requirements-hipblas.txt +++ b/backend/python/fish-speech/requirements-hipblas.txt @@ -1,3 +1,3 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 -torchaudio==2.7.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 +torchaudio==2.10.0+rocm7.0 diff --git a/backend/python/kokoro/requirements-hipblas.txt b/backend/python/kokoro/requirements-hipblas.txt index 74262df5c..0ca135228 100644 --- a/backend/python/kokoro/requirements-hipblas.txt +++ b/backend/python/kokoro/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 -torchaudio==2.8.0+rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 +torchaudio==2.10.0+rocm7.0 transformers accelerate kokoro diff --git a/backend/python/nemo/requirements-hipblas.txt b/backend/python/nemo/requirements-hipblas.txt index 21a5d9635..355bed861 100644 --- a/backend/python/nemo/requirements-hipblas.txt +++ b/backend/python/nemo/requirements-hipblas.txt @@ -1,3 +1,3 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 torch nemo_toolkit[asr] diff --git a/backend/python/neutts/requirements-hipblas.txt b/backend/python/neutts/requirements-hipblas.txt index 72d11e059..47999c40d 100644 --- a/backend/python/neutts/requirements-hipblas.txt +++ b/backend/python/neutts/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 transformers==4.56.1 accelerate librosa==0.11.0 diff --git a/backend/python/outetts/requirements-hipblas.txt b/backend/python/outetts/requirements-hipblas.txt index 1cd505db9..086931625 100644 --- a/backend/python/outetts/requirements-hipblas.txt +++ b/backend/python/outetts/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 accelerate llvmlite==0.43.0 numba==0.60.0 diff --git a/backend/python/pocket-tts/requirements-hipblas.txt b/backend/python/pocket-tts/requirements-hipblas.txt index b6f9d2fb6..d5c0b1f0e 100644 --- a/backend/python/pocket-tts/requirements-hipblas.txt +++ b/backend/python/pocket-tts/requirements-hipblas.txt @@ -1,4 +1,4 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 pocket-tts scipy -torch==2.7.1+rocm6.3 +torch==2.10.0+rocm7.0 diff --git a/backend/python/qwen-asr/requirements-hipblas.txt b/backend/python/qwen-asr/requirements-hipblas.txt index 6871f93f5..83fd16be5 100644 --- a/backend/python/qwen-asr/requirements-hipblas.txt +++ b/backend/python/qwen-asr/requirements-hipblas.txt @@ -1,3 +1,3 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 qwen-asr diff --git a/backend/python/qwen-tts/requirements-hipblas.txt b/backend/python/qwen-tts/requirements-hipblas.txt index 7c8b8273a..63f47eb7e 100644 --- a/backend/python/qwen-tts/requirements-hipblas.txt +++ b/backend/python/qwen-tts/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 -torchaudio==2.7.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 +torchaudio==2.10.0+rocm7.0 qwen-tts sox diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt index 7a72b3d06..3f4a21d9a 100644 --- a/backend/python/rerankers/requirements-hipblas.txt +++ b/backend/python/rerankers/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 transformers accelerate -torch==2.8.0+rocm6.4 +torch==2.10.0+rocm7.0 rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rfdetr/requirements-hipblas.txt b/backend/python/rfdetr/requirements-hipblas.txt index 884cfba7b..b8978d1bf 100644 --- a/backend/python/rfdetr/requirements-hipblas.txt +++ b/backend/python/rfdetr/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 -torchvision==0.23.0+rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 +torchvision==0.25.0+rocm7.0 rfdetr opencv-python accelerate diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index 5460b10b7..a5a6ffa30 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 accelerate transformers>=5.0.0 llvmlite==0.43.0 diff --git a/backend/python/vibevoice/requirements-hipblas.txt b/backend/python/vibevoice/requirements-hipblas.txt index 931dd1e0a..ed54961c1 100644 --- a/backend/python/vibevoice/requirements-hipblas.txt +++ b/backend/python/vibevoice/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 -torchvision==0.22.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 +torchvision==0.25.0+rocm7.0 git+https://github.com/huggingface/diffusers opencv-python transformers>=4.51.3,<5.0.0 diff --git a/backend/python/vllm-omni/requirements-hipblas.txt b/backend/python/vllm-omni/requirements-hipblas.txt index 426494ec0..7233fb441 100644 --- a/backend/python/vllm-omni/requirements-hipblas.txt +++ b/backend/python/vllm-omni/requirements-hipblas.txt @@ -1,4 +1,4 @@ ---extra-index-url https://download.pytorch.org/whl/nightly/rocm6.4 +--extra-index-url https://download.pytorch.org/whl/nightly/rocm7.0 accelerate torch transformers diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt index db732bc86..1163f1b64 100644 --- a/backend/python/vllm/requirements-hipblas.txt +++ b/backend/python/vllm/requirements-hipblas.txt @@ -1,4 +1,4 @@ ---extra-index-url https://download.pytorch.org/whl/nightly/rocm6.4 +--extra-index-url https://download.pytorch.org/whl/nightly/rocm7.0 accelerate torch transformers diff --git a/backend/python/voxcpm/requirements-hipblas.txt b/backend/python/voxcpm/requirements-hipblas.txt index 7541c8149..2067df0af 100644 --- a/backend/python/voxcpm/requirements-hipblas.txt +++ b/backend/python/voxcpm/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 soundfile numpy voxcpm diff --git a/backend/python/whisperx/requirements-hipblas.txt b/backend/python/whisperx/requirements-hipblas.txt index 160040026..1931a382e 100644 --- a/backend/python/whisperx/requirements-hipblas.txt +++ b/backend/python/whisperx/requirements-hipblas.txt @@ -1,3 +1,3 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0 +--extra-index-url https://download.pytorch.org/whl/rocm7.0 +torch==2.10.0+rocm7.0 whisperx @ git+https://github.com/m-bain/whisperX.git diff --git a/docs/content/features/GPU-acceleration.md b/docs/content/features/GPU-acceleration.md index aedc47516..fc2ecd545 100644 --- a/docs/content/features/GPU-acceleration.md +++ b/docs/content/features/GPU-acceleration.md @@ -151,15 +151,15 @@ llama_init_from_file: kv self size = 512.00 MB ## ROCM(AMD) acceleration -There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation. +There are a limited number of tested configurations for ROCm systems however most newer dedicated GPU consumer grade devices seem to be supported under the current ROCm 7 implementation. Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatibility and package versions for dependencies across all variations of OS must be tested independently if desired, please refer to the [build]({{%relref "installation/build#Acceleration" %}}) documentation. ### Requirements -- `ROCm 6.x.x` compatible GPU/accelerator -- OS: `Ubuntu` (22.04, 20.04), `RHEL` (9.3, 9.2, 8.9, 8.8), `SLES` (15.5, 15.4) -- Installed to host: `amdgpu-dkms` and `rocm` >=6.0.0 as per ROCm documentation. +- `ROCm 7.x.x` compatible GPU/accelerator +- OS: `Ubuntu` (24.04, 22.04), `RHEL` (9.x), `SLES` (15.x) +- Installed to host: `amdgpu-dkms` and `rocm` >=7.0.0 as per ROCm documentation. ### Recommendations @@ -171,24 +171,23 @@ Due to the nature of ROCm it is best to run all implementations in containers as Ongoing verification testing of ROCm compatibility with integrated backends. Please note the following list of verified backends and devices. -LocalAI hipblas images are built against the following targets: gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101 +LocalAI hipblas images are built against the following targets: gfx908, gfx90a, gfx942, gfx950, gfx1030, gfx1100, gfx1101, gfx1102, gfx1200, gfx1201 -If your device is not one of these you must specify the corresponding `GPU_TARGETS` and specify `REBUILD=true`. Otherwise you don't need to specify these in the commands below. +**Note:** Starting with ROCm 6.4, AMD removed rocBLAS kernel support for older architectures (gfx803, gfx900, gfx906). Since llama.cpp and other backends depend on rocBLAS for matrix operations, these GPUs (e.g. Radeon VII) are no longer supported in pre-built images. + +If your device is not one of the above targets, you must specify the corresponding `GPU_TARGETS` and specify `REBUILD=true`. However, rebuilding will not help for architectures that lack rocBLAS kernel support in your ROCm version. ### Verified -The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0` +The devices in the following list have been tested with `hipblas` images. | Backend | Verified | Devices | | ---- | ---- | ---- | -| llama.cpp | yes | Radeon VII (gfx906) | -| diffusers | yes | Radeon VII (gfx906) | -| piper | yes | Radeon VII (gfx906) | +| llama.cpp | yes | MI100 (gfx908), MI210/250 (gfx90a) | +| diffusers | yes | MI100 (gfx908), MI210/250 (gfx90a) | | whisper | no | none | | coqui | no | none | | transformers | no | none | -| sentencetransformers | no | none | -| transformers-musicgen | no | none | | vllm | no | none | **You can help by expanding this list.** @@ -196,8 +195,8 @@ The devices in the following list have been tested with `hipblas` images running ### System Prep 1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html). -2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatibility for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)) -3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html)) +2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the [ROCm compatibility matrix](https://rocm.docs.amd.com/en/latest/compatibility/compatibility-matrix.html). +3. Install your chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the [ROCm installation documentation](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html). 4. Deploy. Yes it's that easy. #### Setup Example (Docker/containerd) @@ -212,7 +211,7 @@ The following are examples of the ROCm specific configuration elements required. # If your gpu is not already included in the current list of default targets the following build details are required. - REBUILD=true - BUILD_TYPE=hipblas - - GPU_TARGETS=gfx906 # Example for Radeon VII + - GPU_TARGETS=gfx1100 # Example for RX 7900 XTX devices: # AMD GPU only require the following devices be passed through to the container for offloading to occur. - /dev/dri @@ -226,7 +225,7 @@ docker run \ -e DEBUG=true \ -e REBUILD=true \ -e BUILD_TYPE=hipblas \ - -e GPU_TARGETS=gfx906 \ + -e GPU_TARGETS=gfx1100 \ --device /dev/dri \ --device /dev/kfd \ quay.io/go-skynet/local-ai:master-gpu-hipblas diff --git a/docs/content/getting-started/troubleshooting.md b/docs/content/getting-started/troubleshooting.md index dc6ae6668..16dde84da 100644 --- a/docs/content/getting-started/troubleshooting.md +++ b/docs/content/getting-started/troubleshooting.md @@ -163,7 +163,7 @@ rocminfo docker run --device=/dev/kfd --device=/dev/dri --group-add=video ... ``` -If your GPU is not in the default target list, open up an Issue. Supported targets include: gfx900, gfx906, gfx908, gfx90a, gfx940, gfx941, gfx942, gfx1030, gfx1031, gfx1100, gfx1101. +If your GPU is not in the default target list, open up an Issue. Supported targets include: gfx908, gfx90a, gfx942, gfx950, gfx1030, gfx1100, gfx1101, gfx1102, gfx1200, gfx1201. **Intel (SYCL):** diff --git a/scripts/build/package-gpu-libs.sh b/scripts/build/package-gpu-libs.sh index 8fc2a59c8..f1c248b82 100755 --- a/scripts/build/package-gpu-libs.sh +++ b/scripts/build/package-gpu-libs.sh @@ -198,17 +198,25 @@ package_rocm_libs() { fi done - # Copy rocblas library data (tuning files, etc.) + # Copy rocblas library data (tuning files, TensileLibrary, etc.) local old_nullglob=$(shopt -p nullglob) shopt -s nullglob local rocm_dirs=(/opt/rocm /opt/rocm-*) eval "$old_nullglob" + local rocblas_found=false for rocm_base in "${rocm_dirs[@]}"; do - if [ -d "$rocm_base/lib/rocblas" ]; then - mkdir -p "$TARGET_LIB_DIR/rocblas" - cp -arfL "$rocm_base/lib/rocblas/"* "$TARGET_LIB_DIR/rocblas/" 2>/dev/null || true - fi + for lib_subdir in lib lib64; do + if [ -d "$rocm_base/$lib_subdir/rocblas" ]; then + echo "Found rocblas data at $rocm_base/$lib_subdir/rocblas" + mkdir -p "$TARGET_LIB_DIR/rocblas" + cp -arfL "$rocm_base/$lib_subdir/rocblas/"* "$TARGET_LIB_DIR/rocblas/" || echo "WARNING: Failed to copy rocblas data from $rocm_base/$lib_subdir/rocblas" + rocblas_found=true + fi + done done + if [ "$rocblas_found" = false ]; then + echo "WARNING: No rocblas library data found in /opt/rocm*/lib{,64}/rocblas" + fi # Copy libomp from LLVM (required for ROCm) shopt -s nullglob