test CI (remove me)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
chore(deps): switch to ubuntu 24.04
2026-02-03 03:02:38 -05:00 · 2025-07-04 18:49:15 +02:00 · 2025-07-04 18:48:53 +02:00 · 2025-07-04 18:42:01 +02:00 · 2025-07-04 18:39:36 +02:00 · 2025-07-04 18:38:18 +02:00
75 changed files with 2530 additions and 681 deletions
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -6,7 +6,6 @@ services:
      target: devcontainer
      args:
      - FFMPEG=true
-      - IMAGE_TYPE=extras
      - GO_TAGS=p2p tts
    env_file:
      - ../.env
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -7,7 +7,7 @@ on:
      - master
    tags:
      - '*'
-  #pull_request:
+  pull_request:

 concurrency:
  group: ci-backends-${{ github.head_ref || github.ref }}-${{ github.repository }}
@@ -26,7 +26,6 @@ jobs:
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
      backend: ${{ matrix.backend }}
-      latest-image: ${{ matrix.latest-image }}
      dockerfile: $${ matrix.dockerfile }}
      context: $${ matrix.context }}
    secrets:
@@ -47,9 +46,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "rerankers"
-            latest-image: 'latest-gpu-nvidia-cuda-11-rerankers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -59,9 +57,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-11-vllm'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "vllm"
-            latest-image: 'latest-gpu-nvidia-cuda-11-vllm'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -71,9 +68,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-11-transformers'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "transformers"
-            latest-image: 'latest-gpu-nvidia-cuda-11-transformers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -83,9 +79,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "diffusers"
-            latest-image: 'latest-gpu-nvidia-cuda-11-diffusers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # CUDA 11 additional backends
@@ -96,9 +91,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "kokoro"
-            latest-image: 'latest-gpu-nvidia-cuda-11-kokoro'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -108,9 +102,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "faster-whisper"
-            latest-image: 'latest-gpu-nvidia-cuda-11-faster-whisper'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -120,9 +113,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-11-coqui'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "coqui"
-            latest-image: 'latest-gpu-nvidia-cuda-11-coqui'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -132,9 +124,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-11-bark'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "bark"
-            latest-image: 'latest-gpu-nvidia-cuda-11-bark'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -144,9 +135,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "chatterbox"
-            latest-image: 'latest-gpu-nvidia-cuda-11-chatterbox'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # CUDA 12 builds
@@ -157,9 +147,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "rerankers"
-            latest-image: 'latest-gpu-nvidia-cuda-12-rerankers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -169,9 +158,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-12-vllm'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "vllm"
-            latest-image: 'latest-gpu-nvidia-cuda-12-vllm'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -181,9 +169,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-12-transformers'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "transformers"
-            latest-image: 'latest-gpu-nvidia-cuda-12-transformers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -193,9 +180,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            backend: "diffusers"
-            latest-image: 'latest-gpu-nvidia-cuda-12-diffusers'
+            base-image: "ubuntu:24.04"
+            backend: "diffusers"            
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # CUDA 12 additional backends
@@ -206,9 +192,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "kokoro"
-            latest-image: 'latest-gpu-nvidia-cuda-12-kokoro'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -218,9 +203,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "faster-whisper"
-            latest-image: 'latest-gpu-nvidia-cuda-12-faster-whisper'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -230,9 +214,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-12-coqui'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "coqui"
-            latest-image: 'latest-gpu-nvidia-cuda-12-coqui'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -242,9 +225,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-12-bark'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "bark"
-            latest-image: 'latest-gpu-nvidia-cuda-12-bark'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@@ -254,9 +236,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "chatterbox"
-            latest-image: 'latest-gpu-nvidia-cuda-12-chatterbox'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # hipblas builds
@@ -267,9 +248,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-rocm-hipblas-rerankers'
            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
            backend: "rerankers"
-            latest-image: 'latest-gpu-rocm-hipblas-rerankers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@@ -279,9 +259,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-rocm-hipblas-vllm'
            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
            backend: "vllm"
-            latest-image: 'latest-gpu-rocm-hipblas-vllm'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@@ -291,9 +270,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-rocm-hipblas-transformers'
            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
            backend: "transformers"
-            latest-image: 'latest-gpu-rocm-hipblas-transformers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@@ -303,9 +281,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-rocm-hipblas-diffusers'
            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
            backend: "diffusers"
-            latest-image: 'latest-gpu-rocm-hipblas-diffusers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # ROCm additional backends
@@ -316,9 +293,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-rocm-hipblas-kokoro'
            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
            backend: "kokoro"
-            latest-image: 'latest-gpu-rocm-hipblas-kokoro'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@@ -328,9 +304,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
            backend: "faster-whisper"
-            latest-image: 'latest-gpu-rocm-hipblas-faster-whisper'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@@ -340,9 +315,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-rocm-hipblas-coqui'
            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
            backend: "coqui"
-            latest-image: 'latest-gpu-rocm-hipblas-coqui'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@@ -352,9 +326,8 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-gpu-rocm-hipblas-bark'
            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
            backend: "bark"
-            latest-image: 'latest-gpu-rocm-hipblas-bark'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # sycl builds
@@ -367,7 +340,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "rerankers"
-            latest-image: 'latest-gpu-intel-sycl-f32-rerankers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@@ -379,7 +351,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "rerankers"
-            latest-image: 'latest-gpu-intel-sycl-f16-rerankers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@@ -391,7 +362,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "vllm"
-            latest-image: 'latest-gpu-intel-sycl-f32-vllm'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@@ -403,7 +373,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "vllm"
-            latest-image: 'latest-gpu-intel-sycl-f16-vllm'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@@ -415,7 +384,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "transformers"
-            latest-image: 'latest-gpu-intel-sycl-f32-transformers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@@ -427,7 +395,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "transformers"
-            latest-image: 'latest-gpu-intel-sycl-f16-transformers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@@ -439,7 +406,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "diffusers"
-            latest-image: 'latest-gpu-intel-sycl-f32-diffusers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # SYCL additional backends
@@ -452,7 +418,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "kokoro"
-            latest-image: 'latest-gpu-intel-sycl-f32-kokoro'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@@ -464,7 +429,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "kokoro"
-            latest-image: 'latest-gpu-intel-sycl-f16-kokoro'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@@ -476,7 +440,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "faster-whisper"
-            latest-image: 'latest-gpu-intel-sycl-f32-faster-whisper'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@@ -488,7 +451,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "faster-whisper"
-            latest-image: 'latest-gpu-intel-sycl-f16-faster-whisper'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@@ -500,7 +462,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "coqui"
-            latest-image: 'latest-gpu-intel-sycl-f32-coqui'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@@ -512,7 +473,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "coqui"
-            latest-image: 'latest-gpu-intel-sycl-f16-coqui'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@@ -524,7 +484,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "bark"
-            latest-image: 'latest-gpu-intel-sycl-f32-bark'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@@ -536,7 +495,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "bark"
-            latest-image: 'latest-gpu-intel-sycl-f16-bark'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # bark-cpp
@@ -547,8 +505,7 @@ jobs:
            tag-latest: 'true'
            tag-suffix: '-bark-cpp'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            backend: "bark"
-            latest-image: 'latest-bark-cpp'
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
--- a/.github/workflows/backend_build.yml
+++ b/.github/workflows/backend_build.yml
@@ -28,10 +28,6 @@ on:
        description: 'Tag latest'
        default: ''
        type: string
-      latest-image:
-        description: 'Tag latest'
-        default: ''
-        type: string
      tag-suffix:
        description: 'Tag suffix'
        default: ''
@@ -153,7 +149,7 @@ jobs:
            type=sha
          flavor: |
            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }}
+            suffix=${{ inputs.tag-suffix }},onlatest=true

      - name: Docker meta for PR
        id: meta_pull_request
@@ -168,7 +164,7 @@ jobs:
            type=sha,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
          flavor: |
            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }}
+            suffix=${{ inputs.tag-suffix }},onlatest=true
 ## End testing image
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
@@ -232,20 +228,7 @@ jobs:
          tags: ${{ steps.meta_pull_request.outputs.tags }}
          labels: ${{ steps.meta_pull_request.outputs.labels }}

-      - name: Cleanup
-        run: |
-          docker builder prune -f
-          docker system prune --force --volumes --all

-      - name: Latest tag
-        if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
-        run: |
-          docker pull localai/localai-backends:${{ steps.meta.outputs.version }}
-          docker tag localai/localai-backends:${{ steps.meta.outputs.version }} localai/localai-backends:${{ inputs.latest-image }}
-          docker push localai/localai-backends:${{ inputs.latest-image }}
-          docker pull quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }}
-          docker tag quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}
-          docker push quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}

      - name: job summary
        run: |
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -16,7 +16,7 @@ jobs:
    strategy:
      matrix:
        include:
-          - grpc-base-image: ubuntu:22.04
+          - grpc-base-image: ubuntu:24.04
            runs-on: 'ubuntu-latest'
            platforms: 'linux/amd64,linux/arm64'
    runs-on: ${{matrix.runs-on}}
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -15,7 +15,7 @@ jobs:
    strategy:
      matrix:
        include:
-          - base-image: intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04
+          - base-image: intel/oneapi-basekit:2025.1.3-0-devel-ubuntu24.04
            runs-on: 'ubuntu-latest'
            platforms: 'linux/amd64'
    runs-on: ${{matrix.runs-on}}
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -40,25 +40,25 @@ jobs:
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12-ffmpeg'
+            tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'hipblas'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-hipblas'
            ffmpeg: 'false'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
+            grpc-base-image: "ubuntu:24.04"
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
+            grpc-base-image: "ubuntu:24.04"
            tag-suffix: 'sycl-f16-ffmpeg'
            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
@@ -69,5 +69,5 @@ jobs:
            tag-suffix: '-vulkan-ffmpeg-core'
            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -28,8 +28,6 @@ jobs:
      grpc-base-image: ${{ matrix.grpc-base-image }}
      aio: ${{ matrix.aio }}
      makeflags: ${{ matrix.makeflags }}
-      latest-image: ${{ matrix.latest-image }}
-      latest-image-aio: ${{ matrix.latest-image-aio }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -40,16 +38,14 @@ jobs:
        include:
          - build-type: 'hipblas'
            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-hipblas'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-hipblas'
            ffmpeg: 'true'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
+            grpc-base-image: "ubuntu:24.04"
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
-            latest-image: 'latest-gpu-hipblas'
            aio: "-aio-gpu-hipblas"
-            latest-image-aio: 'latest-aio-gpu-hipblas'

  core-image-build:
    uses: ./.github/workflows/image_build.yml
@@ -66,8 +62,6 @@ jobs:
      base-image: ${{ matrix.base-image }}
      grpc-base-image: ${{ matrix.grpc-base-image }}
      makeflags: ${{ matrix.makeflags }}
-      latest-image: ${{ matrix.latest-image }}
-      latest-image-aio: ${{ matrix.latest-image-aio }}
      skip-drivers: ${{ matrix.skip-drivers }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
@@ -83,77 +77,65 @@ jobs:
            tag-latest: 'auto'
            tag-suffix: ''
            ffmpeg: 'true'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            runs-on: 'ubuntu-latest'
            aio: "-aio-cpu"
-            latest-image: 'latest-cpu'
-            latest-image-aio: 'latest-aio-cpu'
            makeflags: "--jobs=4 --output-sync=target"
            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda11'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda11'
            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            makeflags: "--jobs=4 --output-sync=target"
            skip-drivers: 'false'
-            latest-image: 'latest-gpu-nvidia-cuda-11'
            aio: "-aio-gpu-nvidia-cuda-11"
-            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda12'
            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
-            latest-image: 'latest-gpu-nvidia-cuda-12'
            aio: "-aio-gpu-nvidia-cuda-12"
-            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
-            tag-latest: 'false'
+            tag-latest: 'auto'
            tag-suffix: '-vulkan'
            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
-            latest-image: 'latest-gpu-vulkan'
            aio: "-aio-gpu-vulkan"
-            latest-image-aio: 'latest-aio-gpu-vulkan'
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
-            tag-latest: 'false'
+            tag-latest: 'auto'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f16'
+            grpc-base-image: "ubuntu:24.04"
+            tag-suffix: '-gpu-intel-f16'
            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
-            latest-image: 'latest-gpu-intel-f16'
            aio: "-aio-gpu-intel-f16"
-            latest-image-aio: 'latest-aio-gpu-intel-f16'
          - build-type: 'sycl_f32'
            platforms: 'linux/amd64'
-            tag-latest: 'false'
+            tag-latest: 'auto'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f32'
+            grpc-base-image: "ubuntu:24.04"
+            tag-suffix: '-gpu-intel-f32'
            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
-            latest-image: 'latest-gpu-intel-f32'
            aio: "-aio-gpu-intel-f32"
-            latest-image-aio: 'latest-aio-gpu-intel-f32'

  gh-runner:
    uses: ./.github/workflows/image_build.yml
@@ -170,8 +152,6 @@ jobs:
      base-image: ${{ matrix.base-image }}
      grpc-base-image: ${{ matrix.grpc-base-image }}
      makeflags: ${{ matrix.makeflags }}
-      latest-image: ${{ matrix.latest-image }}
-      latest-image-aio: ${{ matrix.latest-image-aio }}
      skip-drivers: ${{ matrix.skip-drivers }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
@@ -185,9 +165,8 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/arm64'
-            tag-latest: 'false'
+            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-arm64'
-            latest-image: 'latest-nvidia-l4t-arm64'
            ffmpeg: 'true'
            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
            runs-on: 'ubuntu-24.04-arm'
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -33,14 +33,6 @@ on:
        description: 'Tag latest'
        default: ''
        type: string
-      latest-image:
-          description: 'Tag latest'
-          default: ''
-          type: string
-      latest-image-aio:
-          description: 'Tag latest'
-          default: ''
-          type: string
      tag-suffix:
        description: 'Tag suffix'
        default: ''
@@ -164,7 +156,7 @@ jobs:
            type=sha
          flavor: |
            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }}
+            suffix=${{ inputs.tag-suffix }},onlatest=true
      - name: Docker meta for PR
        id: meta_pull_request
        if: github.event_name == 'pull_request'
@@ -191,7 +183,7 @@ jobs:
            type=semver,pattern={{raw}}
          flavor: |
            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.aio }}
+            suffix=${{ inputs.aio }},onlatest=true

      - name: Docker meta AIO (dockerhub)
        if: inputs.aio != ''
@@ -204,7 +196,8 @@ jobs:
            type=ref,event=branch
            type=semver,pattern={{raw}}
          flavor: |
-            suffix=${{ inputs.aio }}
+            latest=${{ inputs.tag-latest }}
+            suffix=${{ inputs.aio }},onlatest=true

      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
@@ -316,32 +309,6 @@ jobs:
          tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
          labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}

-      - name: Cleanup
-        run: |
-          docker builder prune -f
-          docker system prune --force --volumes --all
-
-      - name: Latest tag
-        # run this on branches, when it is a tag and there is a latest-image defined
-        if: github.event_name != 'pull_request' && inputs.latest-image != ''  && github.ref_type == 'tag'
-        run: |
-          docker pull localai/localai:${{ steps.meta.outputs.version }}
-          docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
-          docker push localai/localai:${{ inputs.latest-image }}
-          docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
-          docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
-          docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
-      - name: Latest AIO tag
-        # run this on branches, when it is a tag and there is a latest-image defined
-        if: github.event_name != 'pull_request' && inputs.latest-image-aio != ''  && github.ref_type == 'tag'
-        run: |
-          docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
-          docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
-          docker push localai/localai:${{ inputs.latest-image-aio }}
-          docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
-          docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
-          docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
-
      - name: job summary
        run: |
          echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/stalebot.yml
+++ b/.github/workflows/stalebot.yml
@@ -0,0 +1,24 @@
+name: 'Close stale issues and PRs'
+permissions:
+  issues: write
+  pull-requests: write
+on:
+  schedule:
+    - cron: '30 1 * * *'
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9
+        with:
+          stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
+          stale-pr-message: 'This PR is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 10 days.'
+          close-issue-message: 'This issue was closed because it has been stalled for 5 days with no activity.'
+          close-pr-message: 'This PR was closed because it has been stalled for 10 days with no activity.'
+          days-before-issue-stale: 90
+          days-before-pr-stale: 90
+          days-before-issue-close: 5
+          days-before-pr-close: 10
+          exempt-issue-labels: 'roadmap'
+          exempt-pr-labels: 'roadmap'
--- a/258
+++ b/258
@@ -1,11 +1,109 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
 ARG INTEL_BASE_IMAGE=${BASE_IMAGE}

-# The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
 FROM ${BASE_IMAGE} AS requirements

-USER root
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        ca-certificates curl wget espeak-ng libgomp1 \
+        python3 python-is-python3 ffmpeg && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# The requirements-drivers target is for BUILD_TYPE specific items.  If you need to install something specific to CUDA, or specific to ROCM, it goes here.
+FROM requirements AS requirements-drivers
+
+ARG BUILD_TYPE
+ARG CUDA_MAJOR_VERSION=12
+ARG CUDA_MINOR_VERSION=0
+ARG SKIP_DRIVERS=false
+ARG TARGETARCH
+ARG TARGETVARIANT
+ENV BUILD_TYPE=${BUILD_TYPE}
+
+RUN mkdir -p /run/localai
+
+# Vulkan requirements
+RUN <<EOT bash
+    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
+        apt-get update && \
+        apt-get install -y  --no-install-recommends \
+            software-properties-common pciutils wget gpg-agent && \
+        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+        apt-get update && \
+        apt-get install -y \
+            vulkan-sdk && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* && \
+        echo "vulkan" > /run/localai/capability
+    fi
+EOT
+
+# CuBLAS requirements
+RUN <<EOT bash
+    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
+        apt-get update && \
+        apt-get install -y  --no-install-recommends \
+            software-properties-common pciutils
+        if [ "amd64" = "$TARGETARCH" ]; then
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
+        fi
+        dpkg -i cuda-keyring_1.1-1_all.deb && \
+        rm -f cuda-keyring_1.1-1_all.deb && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* && \
+        echo "nvidia" > /run/localai/capability
+    fi
+EOT
+
+# If we are building with clblas support, we need the libraries for the builds
+RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            libclblast-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* \
+    ; fi
+
+RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            hipblas-dev \
+            rocblas-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* && \
+        echo "amd" > /run/localai/capability && \
+        # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
+        # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
+        ldconfig \
+    ; fi
+
+# Cuda
+ENV PATH=/usr/local/cuda/bin:${PATH}
+
+# HipBLAS requirements
+ENV PATH=/opt/rocm/bin:${PATH}
+
+###################################
+###################################
+
+# The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
+FROM requirements-drivers AS build-requirements

 ARG GO_VERSION=1.22.6
 ARG CMAKE_VERSION=3.26.4
@@ -13,7 +111,6 @@ ARG CMAKE_FROM_SOURCE=false
 ARG TARGETARCH
 ARG TARGETVARIANT

-ENV DEBIAN_FRONTEND=noninteractive

 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
@@ -52,18 +149,6 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
 RUN update-ca-certificates

-RUN test -n "$TARGETARCH" \
-    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
-
-# Use the variables in subsequent instructions
-RUN echo "Target Architecture: $TARGETARCH"
-RUN echo "Target Variant: $TARGETVARIANT"
-
-# Cuda
-ENV PATH=/usr/local/cuda/bin:${PATH}
-
-# HipBLAS requirements
-ENV PATH=/opt/rocm/bin:${PATH}

 # OpenBLAS requirements and stable diffusion
 RUN apt-get update && \
@@ -72,86 +157,19 @@ RUN apt-get update && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

+RUN test -n "$TARGETARCH" \
+    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
+
+# Use the variables in subsequent instructions
+RUN echo "Target Architecture: $TARGETARCH"
+RUN echo "Target Variant: $TARGETVARIANT"
+
+
+
+
 WORKDIR /build


-###################################
-###################################
-
-# The requirements-drivers target is for BUILD_TYPE specific items.  If you need to install something specific to CUDA, or specific to ROCM, it goes here.
-FROM requirements AS requirements-drivers
-
-ARG BUILD_TYPE
-ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=0
-ARG SKIP_DRIVERS=false
-
-ENV BUILD_TYPE=${BUILD_TYPE}
-
-# Vulkan requirements
-RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
-        apt-get update && \
-        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# CuBLAS requirements
-RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
-        apt-get update && \
-        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils
-        if [ "amd64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-        fi
-        if [ "arm64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
-        fi
-        dpkg -i cuda-keyring_1.1-1_all.deb && \
-        rm -f cuda-keyring_1.1-1_all.deb && \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# If we are building with clblas support, we need the libraries for the builds
-RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            libclblast-dev && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* \
-    ; fi
-
-RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            hipblas-dev \
-            rocblas-dev && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* && \
-        # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
-        # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
-        ldconfig \
-    ; fi
-
 ###################################
 ###################################

@@ -218,13 +236,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall

 # The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer

-FROM requirements-drivers AS builder-base
+FROM build-requirements AS builder-base

 ARG GO_TAGS="tts p2p"
 ARG GRPC_BACKENDS
 ARG MAKEFLAGS
 ARG LD_FLAGS="-s -w"
-
+ARG TARGETARCH
+ARG TARGETVARIANT
 ENV GRPC_BACKENDS=${GRPC_BACKENDS}
 ENV GO_TAGS=${GO_TAGS}
 ENV MAKEFLAGS=${MAKEFLAGS}
@@ -259,6 +278,8 @@ EOT

 # Compile backends first in a separate stage
 FROM builder-base AS builder-backends
+ARG TARGETARCH
+ARG TARGETVARIANT

 COPY --from=grpc /opt/grpc /usr/local

@@ -314,24 +335,13 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \

 FROM builder-base AS devcontainer

-ARG FFMPEG
-
 COPY --from=grpc /opt/grpc /usr/local

 COPY .devcontainer-scripts /.devcontainer-scripts

-# Add FFmpeg
-RUN if [ "${FFMPEG}" = "true" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            ffmpeg && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* \
-    ; fi
-
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        ssh less wget
+        ssh less
 # For the devcontainer, leave apt functional in case additional devtools are needed at runtime.

 RUN go install github.com/go-delve/delve/cmd/dlv@latest
@@ -345,40 +355,16 @@ RUN go install github.com/mikefarah/yq/v4@latest
 # If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
 FROM requirements-drivers

-ARG FFMPEG
-ARG BUILD_TYPE
-ARG TARGETARCH
-ARG MAKEFLAGS
-
-ENV BUILD_TYPE=${BUILD_TYPE}
-ENV REBUILD=false
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
-ENV MAKEFLAGS=${MAKEFLAGS}

 ARG CUDA_MAJOR_VERSION=12
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all

-# Add FFmpeg
-RUN if [ "${FFMPEG}" = "true" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            ffmpeg && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* \
-    ; fi
+WORKDIR /

-WORKDIR /build
-
-# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
-# so when `entrypoint.sh` runs `make build` again (which it does by default), the build would fail
-# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
-# https://github.com/go-skynet/LocalAI/pull/434
-COPY . .
-
-COPY --from=builder /build/sources ./sources/
-COPY --from=grpc /opt/grpc /usr/local
+COPY ./entrypoint.sh .

 # Copy the binary
 COPY --from=builder /build/local-ai ./
@@ -387,12 +373,12 @@ COPY --from=builder /build/local-ai ./
 COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/

 # Make sure the models directory exists
-RUN mkdir -p /build/models /build/backends
+RUN mkdir -p /models /backends

 # Define the health check command
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
  CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1

-VOLUME /build/models /build/backends
+VOLUME /models /backends
 EXPOSE 8080
-ENTRYPOINT [ "/build/entrypoint.sh" ]
+ENTRYPOINT [ "/entrypoint.sh" ]
--- a/Dockerfile.aio
+++ b/Dockerfile.aio
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04

 FROM ${BASE_IMAGE} 

--- a/24
+++ b/24
@@ -6,11 +6,11 @@ BINARY_NAME=local-ai
 DETECT_LIBS?=true

 # llama.cpp versions
-CPPLLAMA_VERSION?=8d947136546773f6410756f37fcc5d3e65b8135d
+CPPLLAMA_VERSION?=bee28421be25fd447f61cb6db64d556cbfce32ec

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
-WHISPER_CPP_VERSION?=ecb8f3c2b4e282d5ef416516bcbfb92821f06bf6
+WHISPER_CPP_VERSION?=d9999d54c868b8bfcd376aa26067e787d53e679e

 # go-piper version
 PIPER_REPO?=https://github.com/mudler/go-piper
@@ -18,7 +18,7 @@ PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0

 # bark.cpp
 BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
-BARKCPP_VERSION?=v1.0.0
+BARKCPP_VERSION?=5d5be84f089ab9ea53b7a793f088d3fbf7247495

 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
@@ -265,8 +265,8 @@ sources/bark.cpp/build/libbark.a: sources/bark.cpp
 	cmake $(CMAKE_ARGS) .. && \
 	cmake --build . --config Release

-backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
-	$(MAKE) -C backend/go/bark libbark.a
+backend/go/bark-cpp/libbark.a: sources/bark.cpp/build/libbark.a
+	$(MAKE) -C backend/go/bark-cpp libbark.a

 ## go-piper
 sources/go-piper:
@@ -355,7 +355,7 @@ clean: ## Remove build related file
 	rm -rf release/
 	rm -rf backend-assets/*
 	$(MAKE) -C backend/cpp/grpc clean
-	$(MAKE) -C backend/go/bark clean
+	$(MAKE) -C backend/go/bark-cpp clean
 	$(MAKE) -C backend/cpp/llama clean
 	$(MAKE) -C backend/go/image/stablediffusion-ggml clean
 	rm -rf backend/cpp/llama-* || true
@@ -778,9 +778,9 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 	mkdir -p backend-assets/util/
 	cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server

-backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
+backend-assets/grpc/bark-cpp: backend/go/bark-cpp/libbark.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark-cpp/ LIBRARY_PATH=$(CURDIR)/backend/go/bark-cpp/ \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark-cpp/
 ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/bark-cpp
 endif
@@ -817,7 +817,7 @@ grpcs: prepare $(GRPC_BACKENDS)
 DOCKER_IMAGE?=local-ai
 DOCKER_AIO_IMAGE?=local-ai-aio
 IMAGE_TYPE?=core
-BASE_IMAGE?=ubuntu:22.04
+BASE_IMAGE?=ubuntu:24.04

 docker:
 	docker build \
@@ -852,7 +852,7 @@ docker-aio-all:

 docker-image-intel:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu24.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.3-0-devel-ubuntu24.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -860,7 +860,7 @@ docker-image-intel:

 docker-image-intel-xpu:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.3-0-devel-ubuntu24.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--- a/README.md
+++ b/README.md
@@ -121,18 +121,12 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
 ### NVIDIA GPU Images:

 ```bash
-# CUDA 12.0 with core features
+# CUDA 12.0
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12

-# CUDA 12.0 with extra Python dependencies
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12-extras
-
-# CUDA 11.7 with core features
+# CUDA 11.7
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11

-# CUDA 11.7 with extra Python dependencies
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11-extras
-
 # NVIDIA Jetson (L4T) ARM64
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64
 ```
@@ -140,33 +134,22 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nv
 ### AMD GPU Images (ROCm):

 ```bash
-# ROCm with core features
 docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
-
-# ROCm with extra Python dependencies
-docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas-extras
 ```

 ### Intel GPU Images (oneAPI):

 ```bash
 # Intel GPU with FP16 support
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
-
-# Intel GPU with FP16 support and extra dependencies
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16-extras
+docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f16

 # Intel GPU with FP32 support
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
-
-# Intel GPU with FP32 support and extra dependencies
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32-extras
+docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f32
 ```

 ### Vulkan GPU Images:

 ```bash
-# Vulkan with core features
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
 ```

@@ -232,6 +215,7 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A

 ## 🚀 [Features](https://localai.io/features/)

+- 🧩 [Backend Gallery](https://localai.io/backends/): Install/remove backends on the fly, powered by OCI images — fully customizable and API-driven.
 - 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
 - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
 - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -135,4 +135,4 @@ check_vars

 echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"

-exec /build/entrypoint.sh "$@"
+exec /entrypoint.sh "$@"
--- a/backend/Dockerfile.go
+++ b/backend/Dockerfile.go
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04

 FROM ${BASE_IMAGE} AS builder
 ARG BACKEND=rerankers
@@ -123,9 +123,9 @@ EOT

 COPY . /LocalAI

-RUN cd /LocalAI && make backend-assets/grpc/bark-cpp
+RUN cd /LocalAI && make backend-assets/grpc/${BACKEND}

 FROM scratch

-COPY --from=builder /LocalAI/backend-assets/grpc/bark-cpp ./
-COPY --from=builder /LocalAI/backend/go/bark/run.sh ./
+COPY --from=builder /LocalAI/backend-assets/grpc/${BACKEND} ./
+COPY --from=builder /LocalAI/backend/go/${BACKEND}/run.sh ./
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04

 FROM ${BASE_IMAGE} AS builder
 ARG BACKEND=rerankers
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -258,6 +258,8 @@ message ModelOptions {
  repeated GrammarTrigger GrammarTriggers = 65;

  bool Reranking = 71;
+
+  repeated string Overrides = 72;
 }

 message Result {
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -12,6 +12,7 @@

 #include "backend.pb.h"
 #include "backend.grpc.pb.h"
+#include "common.h"
 #include <getopt.h>
 #include <grpcpp/ext/proto_server_reflection_plugin.h>
 #include <grpcpp/grpcpp.h>
@@ -260,6 +261,13 @@ static void params_parse(const backend::ModelOptions* request,
        }
    }

+    // Add kv_overrides
+    if (request->overrides_size() > 0) {
+        for (int i = 0; i < request->overrides_size(); i++) {
+            string_parse_kv_override(request->overrides(i).c_str(), params.kv_overrides);
+        }
+    }
+
    // TODO: Add yarn

    if (!request->tensorsplit().empty()) {
--- a/backend/go/bark-cpp/Makefile
+++ b/backend/go/bark-cpp/Makefile
--- a/backend/go/bark-cpp/gobark.cpp
+++ b/backend/go/bark-cpp/gobark.cpp
--- a/backend/go/bark-cpp/gobark.go
+++ b/backend/go/bark-cpp/gobark.go
--- a/backend/go/bark-cpp/gobark.h
+++ b/backend/go/bark-cpp/gobark.h
--- a/backend/go/bark-cpp/main.go
+++ b/backend/go/bark-cpp/main.go
--- a/backend/go/bark-cpp/run.sh
+++ b/backend/go/bark-cpp/run.sh
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -1,8 +1,7 @@
 ---
 ## vLLM
 - &vllm
-  name: "cuda11-vllm"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-vllm"
+  name: "vllm"
  license: apache-2.0
  urls:
    - https://github.com/vllm-project/vllm
@@ -29,6 +28,19 @@
        Speculative decoding
        Chunked prefill
  alias: "vllm"
+  capabilities:
+    nvidia: "cuda12-vllm"
+    amd: "rocm-vllm"
+    intel: "intel-sycl-f16-vllm"
+- !!merge <<: *vllm
+  name: "vllm-development"
+  capabilities:
+    nvidia: "cuda12-vllm-development"
+    amd: "rocm-vllm-development"
+    intel: "intel-sycl-f16-vllm-development"
+- !!merge <<: *vllm
+  name: "cuda11-vllm"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-vllm"
 - !!merge <<: *vllm
  name: "cuda12-vllm"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm"
@@ -57,43 +69,52 @@
  name: "intel-sycl-f16-vllm-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
 ## Rerankers
- name: "cuda11-rerankers"
+- &rerankers
+  name: "rerankers"
+  alias: "rerankers"
+  capabilities:
+    nvidia: "cuda12-rerankers"
+    intel: "intel-sycl-f16-rerankers"
+    amd: "rocm-rerankers"
+- !!merge <<: *rerankers
+  name: "rerankers-development"
+  capabilities:
+    nvidia: "cuda12-rerankers-development"
+    intel: "intel-sycl-f16-rerankers-development"
+    amd: "rocm-rerankers-development"
+- !!merge <<: *rerankers
+  name: "cuda11-rerankers"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers"
-  alias: "cuda11-rerankers"
- name: "cuda12-rerankers"
+- !!merge <<: *rerankers
+  name: "cuda12-rerankers"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rerankers"
-  alias: "cuda12-rerankers"
- name: "intel-sycl-f32-rerankers"
+- !!merge <<: *rerankers
+  name: "intel-sycl-f32-rerankers"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-rerankers"
-  alias: "intel-sycl-f32-rerankers"
- name: "intel-sycl-f16-rerankers"
+- !!merge <<: *rerankers
+  name: "intel-sycl-f16-rerankers"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-rerankers"
-  alias: "intel-sycl-f16-rerankers"
- name: "rocm-rerankers"
+- !!merge <<: *rerankers
+  name: "rocm-rerankers"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers"
-  alias: "rocm-rerankers"
- name: "cuda11-rerankers-development"
+- !!merge <<: *rerankers
+  name: "cuda11-rerankers-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-rerankers"
-  alias: "rerankers"
-
- name: "cuda12-rerankers-development"
+- !!merge <<: *rerankers
+  name: "cuda12-rerankers-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rerankers"
-  alias: "rerankers"
- name: "rocm-rerankers-development"
+- !!merge <<: *rerankers
+  name: "rocm-rerankers-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-rerankers"
-  alias: "rerankers"
-
- name: "intel-sycl-f32-rerankers-development"
+- !!merge <<: *rerankers
+  name: "intel-sycl-f32-rerankers-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-rerankers"
-  alias: "rerankers"
-
- name: "intel-sycl-f16-rerankers-development"
+- !!merge <<: *rerankers
+  name: "intel-sycl-f16-rerankers-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-rerankers"
-  alias: "rerankers"
 ## Transformers
 - &transformers
-  name: "cuda12-transformers"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers"
+  name: "transformers"
  icon: https://camo.githubusercontent.com/26569a27b8a30a488dd345024b71dbc05da7ff1b2ba97bb6080c9f1ee0f26cc7/68747470733a2f2f68756767696e67666163652e636f2f64617461736574732f68756767696e67666163652f646f63756d656e746174696f6e2d696d616765732f7265736f6c76652f6d61696e2f7472616e73666f726d6572732f7472616e73666f726d6572735f61735f615f6d6f64656c5f646566696e6974696f6e2e706e67
  alias: "transformers"
  license: apache-2.0
@@ -105,6 +126,19 @@
  tags:
    - text-to-text
    - multimodal
+  capabilities:
+    nvidia: "cuda12-transformers"
+    intel: "intel-sycl-f16-transformers"
+    amd: "rocm-transformers"
+- !!merge <<: *transformers
+  name: "transformers-development"
+  capabilities:
+    nvidia: "cuda12-transformers-development"
+    intel: "intel-sycl-f16-transformers-development"
+    amd: "rocm-transformers-development"
+- !!merge <<: *transformers
+  name: "cuda12-transformers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers"
 - !!merge <<: *transformers
  name: "rocm-transformers"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-transformers"
@@ -143,10 +177,21 @@
    - image-generation
    - video-generation
    - diffusion-models
-  name: "cuda12-diffusers"
  license: apache-2.0
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers"
  alias: "diffusers"
+  capabilities:
+    nvidia: "cuda12-diffusers"
+    intel: "intel-sycl-f32-diffusers"
+    amd: "rocm-diffusers"
+- !!merge <<: *diffusers
+  name: "diffusers-development"
+  capabilities:
+    nvidia: "cuda12-diffusers-development"
+    intel: "intel-sycl-f32-diffusers-development"
+    amd: "rocm-diffusers-development"
+- !!merge <<: *diffusers
+  name: "cuda12-diffusers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers"
 - !!merge <<: *diffusers
  name: "rocm-diffusers"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers"
@@ -170,6 +215,7 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-diffusers"
  ## exllama2
 - &exllama2
+  name: "exllama2"
  urls:
    - https://github.com/turboderp-org/exllamav2
  tags:
@@ -179,9 +225,20 @@
  license: MIT
  description: |
    ExLlamaV2 is an inference library for running local LLMs on modern consumer GPUs.
+  alias: "exllama2"
+  capabilities:
+    nvidia: "cuda12-exllama2"
+    intel: "intel-sycl-f32-exllama2"
+    amd: "rocm-exllama2"
+- !!merge <<: *exllama2
+  name: "exllama2-development"
+  capabilities:
+    nvidia: "cuda12-exllama2-development"
+    intel: "intel-sycl-f32-exllama2-development"
+    amd: "rocm-exllama2-development"
+- !!merge <<: *exllama2
  name: "cuda11-exllama2"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2"
-  alias: "exllama2"
 - !!merge <<: *exllama2
  name: "cuda12-exllama2"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2"
@@ -204,9 +261,21 @@
    - TTS
    - LLM
  license: apache-2.0
+  alias: "kokoro"
+  name: "kokoro"
+  capabilities:
+    nvidia: "cuda12-kokoro"
+    intel: "intel-sycl-f32-kokoro"
+    amd: "rocm-kokoro"
+- !!merge <<: *kokoro
+  name: "kokoro-development"
+  capabilities:
+    nvidia: "cuda12-kokoro-development"
+    intel: "intel-sycl-f32-kokoro-development"
+    amd: "rocm-kokoro-development"
+- !!merge <<: *kokoro
  name: "cuda11-kokoro-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro"
-  alias: "kokoro"
 - !!merge <<: *kokoro
  name: "cuda12-kokoro-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoro"
@@ -225,6 +294,15 @@
 - !!merge <<: *kokoro
  name: "sycl-f32-kokoro-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-kokoro"
+- !!merge <<: *kokoro
+  name: "cuda11-kokoro"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-kokoro"
+- !!merge <<: *kokoro
+  name: "cuda12-kokoro"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-kokoro"
+- !!merge <<: *kokoro
+  name: "rocm-kokoro"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-kokoro"
 ## faster-whisper
 - &faster-whisper
  icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
@@ -237,9 +315,20 @@
    - speech-to-text
    - Whisper
  license: MIT
-  name: "cuda11-faster-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-faster-whisper"
-  alias: "faster-whisper"
+  name: "faster-whisper"
+  capabilities:
+    nvidia: "cuda12-faster-whisper"
+    intel: "intel-sycl-f32-faster-whisper"
+    amd: "rocm-faster-whisper"
+- !!merge <<: *faster-whisper
+  name: "faster-whisper-development"
+  capabilities:
+    nvidia: "cuda12-faster-whisper-development"
+    intel: "intel-sycl-f32-faster-whisper-development"
+    amd: "rocm-faster-whisper-development"
+- !!merge <<: *faster-whisper
+  name: "cuda11-faster-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-faster-whisper"
 - !!merge <<: *faster-whisper
  name: "cuda12-faster-whisper-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper"
@@ -274,10 +363,28 @@
    - text-to-speech
    - TTS
  license: mpl-2.0
+  name: "coqui"
+  alias: "coqui"
+  capabilities:
+    nvidia: "cuda12-coqui"
+    intel: "intel-sycl-f32-coqui"
+    amd: "rocm-coqui"
+  icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
+- !!merge <<: *coqui
+  name: "coqui-development"
+  capabilities:
+    nvidia: "cuda12-coqui-development"
+    intel: "intel-sycl-f32-coqui-development"
+    amd: "rocm-coqui-development"
+- !!merge <<: *coqui
+  name: "cuda11-coqui"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-coqui"
+- !!merge <<: *coqui
+  name: "cuda12-coqui"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-coqui"
+- !!merge <<: *coqui
  name: "cuda11-coqui-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-coqui"
-  alias: "coqui"
-  icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
 - !!merge <<: *coqui
  name: "cuda12-coqui-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-coqui"
@@ -296,6 +403,9 @@
 - !!merge <<: *coqui
  name: "sycl-f16-coqui-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-coqui"
+- !!merge <<: *coqui
+  name: "rocm-coqui"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-coqui"
 ## bark
 - &bark
  urls:
@@ -306,13 +416,25 @@
    - text-to-speech
    - TTS
  license: MIT
-  name: "cuda11-bark-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark"
+  name: "bark"
  alias: "bark"
+  capabilities:
+    cuda: "cuda12-bark"
+    intel: "intel-sycl-f32-bark"
+    rocm: "rocm-bark"
  icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4
 - !!merge <<: *bark
-  name: "cuda12-bark-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-bark"
+  name: "bark-development"
+  capabilities:
+    nvidia: "cuda12-bark-development"
+    intel: "intel-sycl-f32-bark-development"
+    amd: "rocm-bark-development"
+- !!merge <<: *bark
+  name: "cuda11-bark-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark"
+- !!merge <<: *bark
+  name: "cuda11-bark"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-bark"
 - !!merge <<: *bark
  name: "rocm-bark-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-bark"
@@ -328,6 +450,15 @@
 - !!merge <<: *bark
  name: "sycl-f16-bark-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-bark"
+- !!merge <<: *bark
+  name: "cuda12-bark"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-bark"
+- !!merge <<: *bark
+  name: "rocm-bark"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-bark"
+- !!merge <<: *bark
+  name: "cuda12-bark-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-bark"
 - &barkcpp
  urls:
    - https://github.com/PABannier/bark.cpp
@@ -369,15 +500,22 @@
    - TTS
  license: MIT
  icon: https://private-user-images.githubusercontent.com/660224/448166653-bd8c5f03-e91d-4ee5-b680-57355da204d1.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NTAxOTE0MDAsIm5iZiI6MTc1MDE5MTEwMCwicGF0aCI6Ii82NjAyMjQvNDQ4MTY2NjUzLWJkOGM1ZjAzLWU5MWQtNGVlNS1iNjgwLTU3MzU1ZGEyMDRkMS5wbmc_WC1BbXotQWxnb3JpdGhtPUFXUzQtSE1BQy1TSEEyNTYmWC1BbXotQ3JlZGVudGlhbD1BS0lBVkNPRFlMU0E1M1BRSzRaQSUyRjIwMjUwNjE3JTJGdXMtZWFzdC0xJTJGczMlMkZhd3M0X3JlcXVlc3QmWC1BbXotRGF0ZT0yMDI1MDYxN1QyMDExNDBaJlgtQW16LUV4cGlyZXM9MzAwJlgtQW16LVNpZ25hdHVyZT1hMmI1NGY3OGFiZTlhNGFkNTVlYTY4NTIwMWEzODRiZGE4YzdhNGQ5MGNhNzE3MDYyYTA2NDIxYTkyYzhiODkwJlgtQW16LVNpZ25lZEhlYWRlcnM9aG9zdCJ9.mR9kM9xX0TdzPuSpuspCllHYQiq79dFQ2rtuNvjrl6w
-  name: "cuda11-chatterbox-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox"
-  alias: "chatterbox"
+  name: "chatterbox"
+  capabilities:
+    nvidia: "cuda12-chatterbox"
+- !!merge <<: *chatterbox
+  name: "chatterbox-development"
+  capabilities:
+    nvidia: "cuda12-chatterbox-development"
 - !!merge <<: *chatterbox
  name: "cuda12-chatterbox-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox"
 - !!merge <<: *chatterbox
  name: "cuda11-chatterbox"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-chatterbox"
+- !!merge <<: *chatterbox
+  name: "cuda11-chatterbox-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox"
 - !!merge <<: *chatterbox
  name: "cuda12-chatterbox"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-chatterbox"
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -38,9 +38,7 @@ DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
 FRAMES = os.environ.get("FRAMES", "64")

 if XPU:
-    import intel_extension_for_pytorch as ipex
-
-    print(ipex.xpu.get_device_name(0))
+    print(torch.xpu.get_device_name(0))

 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
@@ -336,6 +334,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter)

            device = "cpu" if not request.CUDA else "cuda"
+            if XPU:
+                device = "xpu"
            self.device = device
            if request.LoraAdapter:
                # Check if its a local file and not a directory ( we load lora differently for a safetensor file )
@@ -359,12 +359,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):

                self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)

-            if request.CUDA:
-                self.pipe.to('cuda')
+            if device != "cpu":
+                self.pipe.to(device)
                if self.controlnet:
-                    self.controlnet.to('cuda')
-            if XPU:
-                self.pipe = self.pipe.to("xpu")
+                    self.controlnet.to(device)
+
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
        # Implement your logic here for the LoadModel service
--- a/backend/python/diffusers/run.sh
+++ b/backend/python/diffusers/run.sh
@@ -6,4 +6,10 @@ else
    source $backend_dir/../common/libbackend.sh
 fi

-startBackend $@
+if [ -d "/opt/intel" ]; then
+    # Assumes we are using the Intel oneAPI container image
+    # https://github.com/intel/intel-extension-for-pytorch/issues/538
+    export XPU=1
+fi
+
+startBackend $@
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -57,7 +57,7 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}

-	if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
+	if err := pkgStartup.InstallModels(options.Galleries, options.BackendGalleries, options.ModelPath, options.BackendsPath, options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
 		log.Error().Err(err).Msg("error installing models")
 	}

@@ -86,13 +86,13 @@ func New(opts ...config.AppOption) (*Application, error) {
 	}

 	if options.PreloadJSONModels != "" {
-		if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil {
+		if err := services.ApplyGalleryFromString(options.ModelPath, options.BackendsPath, options.EnforcePredownloadScans, options.AutoloadBackendGalleries, options.Galleries, options.BackendGalleries, options.PreloadJSONModels); err != nil {
 			return nil, err
 		}
 	}

 	if options.PreloadModelsFromPath != "" {
-		if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil {
+		if err := services.ApplyGalleryFromFile(options.ModelPath, options.BackendsPath, options.EnforcePredownloadScans, options.AutoloadBackendGalleries, options.Galleries, options.BackendGalleries, options.PreloadModelsFromPath); err != nil {
 			return nil, err
 		}
 	}
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -4,8 +4,8 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
-	"os"
 	"regexp"
+	"slices"
 	"strings"
 	"sync"
 	"unicode/utf8"
@@ -14,6 +14,7 @@ import (

 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/services"

 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/pkg/grpc/proto"
@@ -34,17 +35,22 @@ type TokenUsage struct {
 	TimingTokenGeneration  float64
 }

-func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
+func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, cl *config.BackendConfigLoader, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
 	modelFile := c.Model

 	// Check if the modelFile exists, if it doesn't try to load it from the gallery
 	if o.AutoloadGalleries { // experimental
-		if _, err := os.Stat(modelFile); os.IsNotExist(err) {
+		modelNames, err := services.ListModels(cl, loader, nil, services.SKIP_ALWAYS)
+		if err != nil {
+			return nil, err
+		}
+		if !slices.Contains(modelNames, c.Name) {
 			utils.ResetDownloadTimers()
 			// if we failed to load the model, we try to download it
-			err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans)
+			err := gallery.InstallModelFromGallery(o.Galleries, o.BackendGalleries, c.Name, loader.ModelPath, o.BackendsPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans, o.AutoloadBackendGalleries)
 			if err != nil {
-				return nil, err
+				log.Error().Err(err).Msgf("failed to install model %q from gallery", modelFile)
+				//return nil, err
 			}
 		}
 	}
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -143,6 +143,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		CLIPModel:            c.Diffusers.ClipModel,
 		CLIPSubfolder:        c.Diffusers.ClipSubFolder,
 		Options:              c.Options,
+		Overrides:            c.Overrides,
 		CLIPSkip:             int32(c.Diffusers.ClipSkip),
 		ControlNet:           c.Diffusers.ControlNet,
 		ContextSize:          int32(ctxSize),
--- a/core/cli/backends.go
+++ b/core/cli/backends.go
@@ -0,0 +1,156 @@
+package cli
+
+import (
+	"encoding/json"
+	"fmt"
+
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/core/config"
+
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/pkg/downloader"
+	"github.com/mudler/LocalAI/pkg/startup"
+	"github.com/rs/zerolog/log"
+	"github.com/schollz/progressbar/v3"
+)
+
+type BackendsCMDFlags struct {
+	BackendGalleries string `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
+	BackendsPath     string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"storage"`
+}
+
+type BackendsList struct {
+	BackendsCMDFlags `embed:""`
+}
+
+type BackendsInstallSingle struct {
+	InstallArgs []string `arg:"" optional:"" name:"backend" help:"Backend images to install"`
+
+	BackendsCMDFlags `embed:""`
+}
+
+type BackendsInstall struct {
+	BackendArgs []string `arg:"" optional:"" name:"backends" help:"Backend configuration URLs to load"`
+
+	BackendsCMDFlags `embed:""`
+}
+
+type BackendsUninstall struct {
+	BackendArgs []string `arg:"" name:"backends" help:"Backend names to uninstall"`
+
+	BackendsCMDFlags `embed:""`
+}
+
+type BackendsCMD struct {
+	List          BackendsList          `cmd:"" help:"List the backends available in your galleries" default:"withargs"`
+	Install       BackendsInstall       `cmd:"" help:"Install a backend from the gallery"`
+	InstallSingle BackendsInstallSingle `cmd:"" help:"Install a single backend from the gallery"`
+	Uninstall     BackendsUninstall     `cmd:"" help:"Uninstall a backend"`
+}
+
+func (bi *BackendsInstallSingle) Run(ctx *cliContext.Context) error {
+	for _, backend := range bi.InstallArgs {
+		progressBar := progressbar.NewOptions(
+			1000,
+			progressbar.OptionSetDescription(fmt.Sprintf("downloading backend %s", backend)),
+			progressbar.OptionShowBytes(false),
+			progressbar.OptionClearOnFinish(),
+		)
+		progressCallback := func(fileName string, current string, total string, percentage float64) {
+			v := int(percentage * 10)
+			err := progressBar.Set(v)
+			if err != nil {
+				log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
+			}
+		}
+
+		if err := gallery.InstallBackend(bi.BackendsPath, &gallery.GalleryBackend{
+			URI: backend,
+		}, progressCallback); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (bl *BackendsList) Run(ctx *cliContext.Context) error {
+	var galleries []config.Gallery
+	if err := json.Unmarshal([]byte(bl.BackendGalleries), &galleries); err != nil {
+		log.Error().Err(err).Msg("unable to load galleries")
+	}
+
+	backends, err := gallery.AvailableBackends(galleries, bl.BackendsPath)
+	if err != nil {
+		return err
+	}
+	for _, backend := range backends {
+		if backend.Installed {
+			fmt.Printf(" * %s@%s (installed)\n", backend.Gallery.Name, backend.Name)
+		} else {
+			fmt.Printf(" - %s@%s\n", backend.Gallery.Name, backend.Name)
+		}
+	}
+	return nil
+}
+
+func (bi *BackendsInstall) Run(ctx *cliContext.Context) error {
+	var galleries []config.Gallery
+	if err := json.Unmarshal([]byte(bi.BackendGalleries), &galleries); err != nil {
+		log.Error().Err(err).Msg("unable to load galleries")
+	}
+
+	for _, backendName := range bi.BackendArgs {
+
+		progressBar := progressbar.NewOptions(
+			1000,
+			progressbar.OptionSetDescription(fmt.Sprintf("downloading backend %s", backendName)),
+			progressbar.OptionShowBytes(false),
+			progressbar.OptionClearOnFinish(),
+		)
+		progressCallback := func(fileName string, current string, total string, percentage float64) {
+			v := int(percentage * 10)
+			err := progressBar.Set(v)
+			if err != nil {
+				log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
+			}
+		}
+
+		backendURI := downloader.URI(backendName)
+
+		if !backendURI.LooksLikeOCI() {
+			backends, err := gallery.AvailableBackends(galleries, bi.BackendsPath)
+			if err != nil {
+				return err
+			}
+
+			backend := gallery.FindGalleryElement(backends, backendName, bi.BackendsPath)
+			if backend == nil {
+				log.Error().Str("backend", backendName).Msg("backend not found")
+				return fmt.Errorf("backend not found: %s", backendName)
+			}
+
+			log.Info().Str("backend", backendName).Str("license", backend.License).Msg("installing backend")
+		}
+
+		err := startup.InstallExternalBackends(galleries, bi.BackendsPath, progressCallback, backendName)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (bu *BackendsUninstall) Run(ctx *cliContext.Context) error {
+	for _, backendName := range bu.BackendArgs {
+		log.Info().Str("backend", backendName).Msg("uninstalling backend")
+
+		err := gallery.DeleteBackendFromSystem(bu.BackendsPath, backendName)
+		if err != nil {
+			return err
+		}
+
+		fmt.Printf("Backend %s uninstalled successfully\n", backendName)
+	}
+	return nil
+}
--- a/core/cli/cli.go
+++ b/core/cli/cli.go
@@ -11,6 +11,7 @@ var CLI struct {
 	Run             RunCMD             `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
 	Federated       FederatedCLI       `cmd:"" help:"Run LocalAI in federated mode"`
 	Models          ModelsCMD          `cmd:"" help:"Manage LocalAI models and definitions"`
+	Backends        BackendsCMD        `cmd:"" help:"Manage LocalAI backends and definitions"`
 	TTS             TTSCMD             `cmd:"" help:"Convert text to speech"`
 	SoundGeneration SoundGenerationCMD `cmd:"" help:"Generates audio files from text or audio"`
 	Transcript      TranscriptCMD      `cmd:"" help:"Convert audio to text"`
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -16,8 +16,10 @@ import (
 )

 type ModelsCMDFlags struct {
-	Galleries  string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
-	ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+	Galleries        string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
+	BackendGalleries string `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
+	ModelsPath       string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+	BackendsPath     string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"storage"`
 }

 type ModelsList struct {
@@ -25,8 +27,9 @@ type ModelsList struct {
 }

 type ModelsInstall struct {
-	DisablePredownloadScan bool     `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
-	ModelArgs              []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
+	DisablePredownloadScan   bool     `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
+	AutoloadBackendGalleries bool     `env:"LOCALAI_AUTOLOAD_BACKEND_GALLERIES" help:"If true, automatically loads backend galleries" group:"backends" default:"true"`
+	ModelArgs                []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`

 	ModelsCMDFlags `embed:""`
 }
@@ -62,6 +65,11 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 		log.Error().Err(err).Msg("unable to load galleries")
 	}

+	var backendGalleries []config.Gallery
+	if err := json.Unmarshal([]byte(mi.BackendGalleries), &backendGalleries); err != nil {
+		log.Error().Err(err).Msg("unable to load backend galleries")
+	}
+
 	for _, modelName := range mi.ModelArgs {

 		progressBar := progressbar.NewOptions(
@@ -100,7 +108,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 			log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
 		}

-		err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
+		err = startup.InstallModels(galleries, backendGalleries, mi.ModelsPath, mi.BackendsPath, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
 		if err != nil {
 			return err
 		}
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -30,13 +30,14 @@ type RunCMD struct {
 	LocalaiConfigDir             string        `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
 	LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"`
 	// The alias on this option is there to preserve functionality with the old `--config-file` parameter
-	ModelsConfigFile    string   `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"`
-	BackendGalleries    string   `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
-	Galleries           string   `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
-	AutoloadGalleries   bool     `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
-	PreloadModels       string   `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
-	Models              []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
-	PreloadModelsConfig string   `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
+	ModelsConfigFile         string   `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"`
+	BackendGalleries         string   `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
+	Galleries                string   `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
+	AutoloadGalleries        bool     `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models" default:"true"`
+	AutoloadBackendGalleries bool     `env:"LOCALAI_AUTOLOAD_BACKEND_GALLERIES,AUTOLOAD_BACKEND_GALLERIES" group:"backends" default:"true"`
+	PreloadModels            string   `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
+	Models                   []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
+	PreloadModelsConfig      string   `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`

 	F16         bool `name:"f16" env:"LOCALAI_F16,F16" help:"Enable GPU acceleration" group:"performance"`
 	Threads     int  `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
@@ -192,6 +193,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		opts = append(opts, config.EnableGalleriesAutoload)
 	}

+	if r.AutoloadBackendGalleries {
+		opts = append(opts, config.EnableBackendGalleriesAutoload)
+	}
+
 	if r.PreloadBackendOnly {
 		_, err := application.New(opts...)
 		return err
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -55,7 +55,7 @@ type ApplicationConfig struct {

 	ExternalGRPCBackends map[string]string

-	AutoloadGalleries bool
+	AutoloadGalleries, AutoloadBackendGalleries bool

 	SingleBackend           bool
 	ParallelBackendRequests bool
@@ -192,6 +192,10 @@ var EnableGalleriesAutoload = func(o *ApplicationConfig) {
 	o.AutoloadGalleries = true
 }

+var EnableBackendGalleriesAutoload = func(o *ApplicationConfig) {
+	o.AutoloadBackendGalleries = true
+}
+
 func WithExternalBackend(name string, uri string) AppOption {
 	return func(o *ApplicationConfig) {
 		if o.ExternalGRPCBackends == nil {
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -70,7 +70,8 @@ type BackendConfig struct {
 	Description string `yaml:"description"`
 	Usage       string `yaml:"usage"`

-	Options []string `yaml:"options"`
+	Options   []string `yaml:"options"`
+	Overrides []string `yaml:"overrides"`
 }

 // Pipeline defines other models to use for audio-to-audio
--- a/core/gallery/backend_types.go
+++ b/core/gallery/backend_types.go
@@ -2,10 +2,25 @@ package gallery

 import "github.com/mudler/LocalAI/core/config"

+// BackendMetadata represents the metadata stored in a JSON file for each installed backend
+type BackendMetadata struct {
+	// Alias is an optional alternative name for the backend
+	Alias string `json:"alias,omitempty"`
+	// MetaBackendFor points to the concrete backend if this is a meta backend
+	MetaBackendFor string `json:"meta_backend_for,omitempty"`
+	// Name is the original name from the gallery
+	Name string `json:"name,omitempty"`
+	// GalleryURL is the URL of the gallery this backend came from
+	GalleryURL string `json:"gallery_url,omitempty"`
+	// InstalledAt is the timestamp when the backend was installed
+	InstalledAt string `json:"installed_at,omitempty"`
+}
+
 type GalleryBackend struct {
-	Metadata `json:",inline" yaml:",inline"`
-	Alias    string `json:"alias,omitempty" yaml:"alias,omitempty"`
-	URI      string `json:"uri,omitempty" yaml:"uri,omitempty"`
+	Metadata        `json:",inline" yaml:",inline"`
+	Alias           string            `json:"alias,omitempty" yaml:"alias,omitempty"`
+	URI             string            `json:"uri,omitempty" yaml:"uri,omitempty"`
+	CapabilitiesMap map[string]string `json:"capabilities,omitempty" yaml:"capabilities,omitempty"`
 }

 type GalleryBackends []*GalleryBackend
@@ -14,6 +29,10 @@ func (m *GalleryBackend) SetGallery(gallery config.Gallery) {
 	m.Gallery = gallery
 }

+func (m *GalleryBackend) IsMeta() bool {
+	return len(m.CapabilitiesMap) > 0 && m.URI == ""
+}
+
 func (m *GalleryBackend) SetInstalled(installed bool) {
 	m.Installed = installed
 }
--- a/core/gallery/backends.go
+++ b/core/gallery/backends.go
@@ -1,17 +1,94 @@
 package gallery

 import (
+	"encoding/json"
 	"fmt"
 	"os"
 	"path/filepath"
+	"time"

 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/system"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/oci"
+	"github.com/rs/zerolog/log"
 )

+const (
+	metadataFile = "metadata.json"
+	runFile      = "run.sh"
+)
+
+// readBackendMetadata reads the metadata JSON file for a backend
+func readBackendMetadata(backendPath string) (*BackendMetadata, error) {
+	metadataPath := filepath.Join(backendPath, metadataFile)
+
+	// If metadata file doesn't exist, return nil (for backward compatibility)
+	if _, err := os.Stat(metadataPath); os.IsNotExist(err) {
+		return nil, nil
+	}
+
+	data, err := os.ReadFile(metadataPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read metadata file %q: %v", metadataPath, err)
+	}
+
+	var metadata BackendMetadata
+	if err := json.Unmarshal(data, &metadata); err != nil {
+		return nil, fmt.Errorf("failed to unmarshal metadata file %q: %v", metadataPath, err)
+	}
+
+	return &metadata, nil
+}
+
+// writeBackendMetadata writes the metadata JSON file for a backend
+func writeBackendMetadata(backendPath string, metadata *BackendMetadata) error {
+	metadataPath := filepath.Join(backendPath, metadataFile)
+
+	data, err := json.MarshalIndent(metadata, "", "  ")
+	if err != nil {
+		return fmt.Errorf("failed to marshal metadata: %v", err)
+	}
+
+	if err := os.WriteFile(metadataPath, data, 0644); err != nil {
+		return fmt.Errorf("failed to write metadata file %q: %v", metadataPath, err)
+	}
+
+	return nil
+}
+
+func findBestBackendFromMeta(backend *GalleryBackend, systemState *system.SystemState, backends GalleryElements[*GalleryBackend]) *GalleryBackend {
+	if systemState == nil {
+		return nil
+	}
+
+	realBackend := backend.CapabilitiesMap[systemState.Capability()]
+	if realBackend == "" {
+		return nil
+	}
+
+	return backends.FindByName(realBackend)
+}
+
 // Installs a model from the gallery
-func InstallBackendFromGallery(galleries []config.Gallery, name string, basePath string, downloadStatus func(string, string, string, float64)) error {
+func InstallBackendFromGallery(galleries []config.Gallery, systemState *system.SystemState, name string, basePath string, downloadStatus func(string, string, string, float64), force bool) error {
+	if !force {
+		// check if we already have the backend installed
+		backends, err := ListSystemBackends(basePath)
+		if err != nil {
+			return err
+		}
+		if _, ok := backends[name]; ok {
+			return nil
+		}
+	}
+
+	if name == "" {
+		return fmt.Errorf("backend name is empty")
+	}
+
+	log.Debug().Interface("galleries", galleries).Str("name", name).Msg("Installing backend from gallery")
+
 	backends, err := AvailableBackends(galleries, basePath)
 	if err != nil {
 		return err
@@ -19,7 +96,44 @@ func InstallBackendFromGallery(galleries []config.Gallery, name string, basePath

 	backend := FindGalleryElement(backends, name, basePath)
 	if backend == nil {
-		return fmt.Errorf("no model found with name %q", name)
+		return fmt.Errorf("no backend found with name %q", name)
+	}
+
+	if backend.IsMeta() {
+		log.Debug().Interface("systemState", systemState).Str("name", name).Msg("Backend is a meta backend")
+
+		// Then, let's try to find the best backend based on the capabilities map
+		bestBackend := findBestBackendFromMeta(backend, systemState, backends)
+		if bestBackend == nil {
+			return fmt.Errorf("no backend found with capabilities %q", backend.CapabilitiesMap)
+		}
+
+		log.Debug().Str("name", name).Str("bestBackend", bestBackend.Name).Msg("Installing backend from meta backend")
+
+		// Then, let's install the best backend
+		if err := InstallBackend(basePath, bestBackend, downloadStatus); err != nil {
+			return err
+		}
+
+		// we need now to create a path for the meta backend, with the alias to the installed ones so it can be used to remove it
+		metaBackendPath := filepath.Join(basePath, name)
+		if err := os.MkdirAll(metaBackendPath, 0750); err != nil {
+			return fmt.Errorf("failed to create meta backend path %q: %v", metaBackendPath, err)
+		}
+
+		// Create metadata for the meta backend
+		metaMetadata := &BackendMetadata{
+			MetaBackendFor: bestBackend.Name,
+			Name:           name,
+			GalleryURL:     backend.Gallery.URL,
+			InstalledAt:    time.Now().Format(time.RFC3339),
+		}
+
+		if err := writeBackendMetadata(metaBackendPath, metaMetadata); err != nil {
+			return fmt.Errorf("failed to write metadata for meta backend %q: %v", name, err)
+		}
+
+		return nil
 	}

 	return InstallBackend(basePath, backend, downloadStatus)
@@ -32,6 +146,10 @@ func InstallBackend(basePath string, config *GalleryBackend, downloadStatus func
 		return fmt.Errorf("failed to create base path: %v", err)
 	}

+	if config.IsMeta() {
+		return fmt.Errorf("meta backends cannot be installed directly")
+	}
+
 	name := config.Name

 	img, err := oci.GetImage(config.URI, "", nil, nil)
@@ -44,25 +162,77 @@ func InstallBackend(basePath string, config *GalleryBackend, downloadStatus func
 		return fmt.Errorf("failed to create backend path %q: %v", backendPath, err)
 	}

-	if err := oci.ExtractOCIImage(img, backendPath, downloadStatus); err != nil {
+	if err := oci.ExtractOCIImage(img, config.URI, backendPath, downloadStatus); err != nil {
 		return fmt.Errorf("failed to extract image %q: %v", config.URI, err)
 	}

+	// Create metadata for the backend
+	metadata := &BackendMetadata{
+		Name:        name,
+		GalleryURL:  config.Gallery.URL,
+		InstalledAt: time.Now().Format(time.RFC3339),
+	}
+
 	if config.Alias != "" {
-		// Write an alias file inside
-		aliasFile := filepath.Join(backendPath, "alias")
-		if err := os.WriteFile(aliasFile, []byte(config.Alias), 0644); err != nil {
-			return fmt.Errorf("failed to write alias file %q: %v", aliasFile, err)
-		}
+		metadata.Alias = config.Alias
+	}
+
+	if err := writeBackendMetadata(backendPath, metadata); err != nil {
+		return fmt.Errorf("failed to write metadata for backend %q: %v", name, err)
 	}

 	return nil
 }

 func DeleteBackendFromSystem(basePath string, name string) error {
-	backendFile := filepath.Join(basePath, name)
+	backendDirectory := filepath.Join(basePath, name)

-	return os.RemoveAll(backendFile)
+	// check if the backend dir exists
+	if _, err := os.Stat(backendDirectory); os.IsNotExist(err) {
+		// if doesn't exist, it might be an alias, so we need to check if we have a matching alias in
+		// all the backends in the basePath
+		backends, err := os.ReadDir(basePath)
+		if err != nil {
+			return err
+		}
+		foundBackend := false
+
+		for _, backend := range backends {
+			if backend.IsDir() {
+				metadata, err := readBackendMetadata(filepath.Join(basePath, backend.Name()))
+				if err != nil {
+					return err
+				}
+				if metadata != nil && metadata.Alias == name {
+					backendDirectory = filepath.Join(basePath, backend.Name())
+					foundBackend = true
+					break
+				}
+			}
+		}
+
+		// If no backend found, return successfully (idempotent behavior)
+		if !foundBackend {
+			return fmt.Errorf("no backend found with name %q", name)
+		}
+	}
+
+	// If it's a meta backend, delete also associated backend
+	metadata, err := readBackendMetadata(backendDirectory)
+	if err != nil {
+		return err
+	}
+
+	if metadata != nil && metadata.MetaBackendFor != "" {
+		metaBackendDirectory := filepath.Join(basePath, metadata.MetaBackendFor)
+		log.Debug().Str("backendDirectory", metaBackendDirectory).Msg("Deleting meta backend")
+		if _, err := os.Stat(metaBackendDirectory); os.IsNotExist(err) {
+			return fmt.Errorf("meta backend %q not found", metadata.MetaBackendFor)
+		}
+		os.RemoveAll(metaBackendDirectory)
+	}
+
+	return os.RemoveAll(backendDirectory)
 }

 func ListSystemBackends(basePath string) (map[string]string, error) {
@@ -75,17 +245,34 @@ func ListSystemBackends(basePath string) (map[string]string, error) {

 	for _, backend := range backends {
 		if backend.IsDir() {
-			runFile := filepath.Join(basePath, backend.Name(), "run.sh")
-			backendsNames[backend.Name()] = runFile
+			runFile := filepath.Join(basePath, backend.Name(), runFile)
+			// Skip if metadata file don't exist
+			metadataFilePath := filepath.Join(basePath, backend.Name(), metadataFile)
+			if _, err := os.Stat(metadataFilePath); os.IsNotExist(err) {
+				continue
+			}

-			aliasFile := filepath.Join(basePath, backend.Name(), "alias")
-			if _, err := os.Stat(aliasFile); err == nil {
-				// read the alias file, and use it as key
-				alias, err := os.ReadFile(aliasFile)
-				if err != nil {
-					return nil, err
+			// Check for alias in metadata
+			metadata, err := readBackendMetadata(filepath.Join(basePath, backend.Name()))
+			if err != nil {
+				return nil, err
+			}
+
+			if metadata == nil {
+				continue
+			}
+
+			if _, exists := backendsNames[backend.Name()]; !exists {
+				// We don't want to override aliases if already set, and if we are meta backend
+				if _, err := os.Stat(runFile); err == nil {
+					backendsNames[backend.Name()] = runFile
+				} else {
+					backendsNames[backend.Name()] = ""
 				}
-				backendsNames[string(alias)] = runFile
+			}
+
+			if metadata.Alias != "" {
+				backendsNames[metadata.Alias] = runFile
 			}
 		}
 	}
--- a/core/gallery/backends_test.go
+++ b/core/gallery/backends_test.go
@@ -1,12 +1,19 @@
 package gallery

 import (
+	"encoding/json"
 	"os"
 	"path/filepath"

 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/system"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	"gopkg.in/yaml.v2"
+)
+
+const (
+	testImage = "quay.io/mudler/tests:localai-backend-test"
 )

 var _ = Describe("Gallery Backends", func() {
@@ -35,18 +42,349 @@ var _ = Describe("Gallery Backends", func() {

 	Describe("InstallBackendFromGallery", func() {
 		It("should return error when backend is not found", func() {
-			err := InstallBackendFromGallery(galleries, "non-existent", tempDir, nil)
+			err := InstallBackendFromGallery(galleries, nil, "non-existent", tempDir, nil, true)
 			Expect(err).To(HaveOccurred())
-			Expect(err.Error()).To(ContainSubstring("no model found with name"))
+			Expect(err.Error()).To(ContainSubstring("no backend found with name \"non-existent\""))
 		})

 		It("should install backend from gallery", func() {
-			err := InstallBackendFromGallery(galleries, "test-backend", tempDir, nil)
+			err := InstallBackendFromGallery(galleries, nil, "test-backend", tempDir, nil, true)
 			Expect(err).ToNot(HaveOccurred())
 			Expect(filepath.Join(tempDir, "test-backend", "run.sh")).To(BeARegularFile())
 		})
 	})

+	Describe("Meta Backends", func() {
+		It("should identify meta backends correctly", func() {
+			metaBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "meta-backend",
+				},
+				CapabilitiesMap: map[string]string{
+					"nvidia": "nvidia-backend",
+					"amd":    "amd-backend",
+					"intel":  "intel-backend",
+				},
+			}
+
+			Expect(metaBackend.IsMeta()).To(BeTrue())
+
+			regularBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "regular-backend",
+				},
+				URI: testImage,
+			}
+
+			Expect(regularBackend.IsMeta()).To(BeFalse())
+
+			emptyMetaBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "empty-meta-backend",
+				},
+				CapabilitiesMap: map[string]string{},
+			}
+
+			Expect(emptyMetaBackend.IsMeta()).To(BeFalse())
+
+			nilMetaBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "nil-meta-backend",
+				},
+				CapabilitiesMap: nil,
+			}
+
+			Expect(nilMetaBackend.IsMeta()).To(BeFalse())
+		})
+
+		It("should find best backend from meta based on system capabilities", func() {
+			metaBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "meta-backend",
+				},
+				CapabilitiesMap: map[string]string{
+					"nvidia": "nvidia-backend",
+					"amd":    "amd-backend",
+					"intel":  "intel-backend",
+				},
+			}
+
+			nvidiaBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "nvidia-backend",
+				},
+				URI: testImage,
+			}
+
+			amdBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "amd-backend",
+				},
+				URI: testImage,
+			}
+
+			backends := GalleryElements[*GalleryBackend]{nvidiaBackend, amdBackend}
+
+			// Test with NVIDIA system state
+			nvidiaSystemState := &system.SystemState{GPUVendor: "nvidia"}
+			bestBackend := findBestBackendFromMeta(metaBackend, nvidiaSystemState, backends)
+			Expect(bestBackend).To(Equal(nvidiaBackend))
+
+			// Test with AMD system state
+			amdSystemState := &system.SystemState{GPUVendor: "amd"}
+			bestBackend = findBestBackendFromMeta(metaBackend, amdSystemState, backends)
+			Expect(bestBackend).To(Equal(amdBackend))
+
+			// Test with unsupported GPU vendor
+			unsupportedSystemState := &system.SystemState{GPUVendor: "unsupported"}
+			bestBackend = findBestBackendFromMeta(metaBackend, unsupportedSystemState, backends)
+			Expect(bestBackend).To(BeNil())
+		})
+
+		It("should handle meta backend deletion correctly", func() {
+			metaBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "meta-backend",
+				},
+				CapabilitiesMap: map[string]string{
+					"nvidia": "nvidia-backend",
+					"amd":    "amd-backend",
+					"intel":  "intel-backend",
+				},
+			}
+
+			nvidiaBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "nvidia-backend",
+				},
+				URI: testImage,
+			}
+
+			amdBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "amd-backend",
+				},
+				URI: testImage,
+			}
+
+			gallery := config.Gallery{
+				Name: "test-gallery",
+				URL:  "file://" + filepath.Join(tempDir, "backend-gallery.yaml"),
+			}
+
+			galleryBackend := GalleryBackends{amdBackend, nvidiaBackend, metaBackend}
+
+			dat, err := yaml.Marshal(galleryBackend)
+			Expect(err).NotTo(HaveOccurred())
+			err = os.WriteFile(filepath.Join(tempDir, "backend-gallery.yaml"), dat, 0644)
+			Expect(err).NotTo(HaveOccurred())
+
+			// Test with NVIDIA system state
+			nvidiaSystemState := &system.SystemState{GPUVendor: "nvidia"}
+			err = InstallBackendFromGallery([]config.Gallery{gallery}, nvidiaSystemState, "meta-backend", tempDir, nil, true)
+			Expect(err).NotTo(HaveOccurred())
+
+			metaBackendPath := filepath.Join(tempDir, "meta-backend")
+			Expect(metaBackendPath).To(BeADirectory())
+
+			concreteBackendPath := filepath.Join(tempDir, "nvidia-backend")
+			Expect(concreteBackendPath).To(BeADirectory())
+
+			allBackends, err := ListSystemBackends(tempDir)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(allBackends).To(HaveKey("meta-backend"))
+			Expect(allBackends).To(HaveKey("nvidia-backend"))
+
+			// Delete meta backend by name
+			err = DeleteBackendFromSystem(tempDir, "meta-backend")
+			Expect(err).NotTo(HaveOccurred())
+
+			// Verify meta backend directory is deleted
+			Expect(metaBackendPath).NotTo(BeADirectory())
+
+			// Verify concrete backend directory is deleted
+			Expect(concreteBackendPath).NotTo(BeADirectory())
+		})
+
+		It("should handle meta backend deletion correctly with aliases", func() {
+			metaBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "meta-backend",
+				},
+				Alias: "backend-alias",
+				CapabilitiesMap: map[string]string{
+					"nvidia": "nvidia-backend",
+					"amd":    "amd-backend",
+					"intel":  "intel-backend",
+				},
+			}
+
+			nvidiaBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "nvidia-backend",
+				},
+				Alias: "backend-alias",
+				URI:   testImage,
+			}
+
+			amdBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "amd-backend",
+				},
+				Alias: "backend-alias",
+				URI:   testImage,
+			}
+
+			gallery := config.Gallery{
+				Name: "test-gallery",
+				URL:  "file://" + filepath.Join(tempDir, "backend-gallery.yaml"),
+			}
+
+			galleryBackend := GalleryBackends{amdBackend, nvidiaBackend, metaBackend}
+
+			dat, err := yaml.Marshal(galleryBackend)
+			Expect(err).NotTo(HaveOccurred())
+			err = os.WriteFile(filepath.Join(tempDir, "backend-gallery.yaml"), dat, 0644)
+			Expect(err).NotTo(HaveOccurred())
+
+			// Test with NVIDIA system state
+			nvidiaSystemState := &system.SystemState{GPUVendor: "nvidia"}
+			err = InstallBackendFromGallery([]config.Gallery{gallery}, nvidiaSystemState, "meta-backend", tempDir, nil, true)
+			Expect(err).NotTo(HaveOccurred())
+
+			metaBackendPath := filepath.Join(tempDir, "meta-backend")
+			Expect(metaBackendPath).To(BeADirectory())
+
+			concreteBackendPath := filepath.Join(tempDir, "nvidia-backend")
+			Expect(concreteBackendPath).To(BeADirectory())
+
+			allBackends, err := ListSystemBackends(tempDir)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(allBackends).To(HaveKey("meta-backend"))
+			Expect(allBackends).To(HaveKey("nvidia-backend"))
+			Expect(allBackends["meta-backend"]).To(BeEmpty())
+
+			// Delete meta backend by name
+			err = DeleteBackendFromSystem(tempDir, "meta-backend")
+			Expect(err).NotTo(HaveOccurred())
+
+			// Verify meta backend directory is deleted
+			Expect(metaBackendPath).NotTo(BeADirectory())
+
+			// Verify concrete backend directory is deleted
+			Expect(concreteBackendPath).NotTo(BeADirectory())
+		})
+
+		It("should handle meta backend deletion correctly with aliases pointing to the same backend", func() {
+			metaBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "meta-backend",
+				},
+				Alias: "meta-backend",
+				CapabilitiesMap: map[string]string{
+					"nvidia": "nvidia-backend",
+					"amd":    "amd-backend",
+					"intel":  "intel-backend",
+				},
+			}
+
+			nvidiaBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "nvidia-backend",
+				},
+				Alias: "meta-backend",
+				URI:   testImage,
+			}
+
+			amdBackend := &GalleryBackend{
+				Metadata: Metadata{
+					Name: "amd-backend",
+				},
+				Alias: "meta-backend",
+				URI:   testImage,
+			}
+
+			gallery := config.Gallery{
+				Name: "test-gallery",
+				URL:  "file://" + filepath.Join(tempDir, "backend-gallery.yaml"),
+			}
+
+			galleryBackend := GalleryBackends{amdBackend, nvidiaBackend, metaBackend}
+
+			dat, err := yaml.Marshal(galleryBackend)
+			Expect(err).NotTo(HaveOccurred())
+			err = os.WriteFile(filepath.Join(tempDir, "backend-gallery.yaml"), dat, 0644)
+			Expect(err).NotTo(HaveOccurred())
+
+			// Test with NVIDIA system state
+			nvidiaSystemState := &system.SystemState{GPUVendor: "nvidia"}
+			err = InstallBackendFromGallery([]config.Gallery{gallery}, nvidiaSystemState, "meta-backend", tempDir, nil, true)
+			Expect(err).NotTo(HaveOccurred())
+
+			metaBackendPath := filepath.Join(tempDir, "meta-backend")
+			Expect(metaBackendPath).To(BeADirectory())
+
+			concreteBackendPath := filepath.Join(tempDir, "nvidia-backend")
+			Expect(concreteBackendPath).To(BeADirectory())
+
+			allBackends, err := ListSystemBackends(tempDir)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(allBackends).To(HaveKey("meta-backend"))
+			Expect(allBackends).To(HaveKey("nvidia-backend"))
+			Expect(allBackends["meta-backend"]).To(Equal(filepath.Join(tempDir, "nvidia-backend", "run.sh")))
+
+			// Delete meta backend by name
+			err = DeleteBackendFromSystem(tempDir, "meta-backend")
+			Expect(err).NotTo(HaveOccurred())
+
+			// Verify meta backend directory is deleted
+			Expect(metaBackendPath).NotTo(BeADirectory())
+
+			// Verify concrete backend directory is deleted
+			Expect(concreteBackendPath).NotTo(BeADirectory())
+		})
+
+		It("should list meta backends correctly in system backends", func() {
+			// Create a meta backend directory with metadata
+			metaBackendPath := filepath.Join(tempDir, "meta-backend")
+			err := os.MkdirAll(metaBackendPath, 0750)
+			Expect(err).NotTo(HaveOccurred())
+
+			// Create metadata file pointing to concrete backend
+			metadata := &BackendMetadata{
+				MetaBackendFor: "concrete-backend",
+				Name:           "meta-backend",
+				InstalledAt:    "2023-01-01T00:00:00Z",
+			}
+			metadataData, err := json.Marshal(metadata)
+			Expect(err).NotTo(HaveOccurred())
+			err = os.WriteFile(filepath.Join(metaBackendPath, "metadata.json"), metadataData, 0644)
+			Expect(err).NotTo(HaveOccurred())
+
+			// Create the concrete backend directory with run.sh
+			concreteBackendPath := filepath.Join(tempDir, "concrete-backend")
+			err = os.MkdirAll(concreteBackendPath, 0750)
+			Expect(err).NotTo(HaveOccurred())
+			err = os.WriteFile(filepath.Join(concreteBackendPath, "metadata.json"), []byte("{}"), 0755)
+			Expect(err).NotTo(HaveOccurred())
+			err = os.WriteFile(filepath.Join(concreteBackendPath, "run.sh"), []byte(""), 0755)
+			Expect(err).NotTo(HaveOccurred())
+
+			// List system backends
+			backends, err := ListSystemBackends(tempDir)
+			Expect(err).NotTo(HaveOccurred())
+
+			// Should include both the meta backend name and concrete backend name
+			Expect(backends).To(HaveKey("meta-backend"))
+			Expect(backends).To(HaveKey("concrete-backend"))
+
+			// meta-backend should be empty
+			Expect(backends["meta-backend"]).To(BeEmpty())
+			// concrete-backend should point to its own run.sh
+			Expect(backends["concrete-backend"]).To(Equal(filepath.Join(tempDir, "concrete-backend", "run.sh")))
+		})
+	})
+
 	Describe("InstallBackend", func() {
 		It("should create base path if it doesn't exist", func() {
 			newPath := filepath.Join(tempDir, "new-path")
@@ -73,10 +411,17 @@ var _ = Describe("Gallery Backends", func() {

 			err := InstallBackend(tempDir, &backend, nil)
 			Expect(err).ToNot(HaveOccurred())
-			Expect(filepath.Join(tempDir, "test-backend", "alias")).To(BeARegularFile())
-			content, err := os.ReadFile(filepath.Join(tempDir, "test-backend", "alias"))
+			Expect(filepath.Join(tempDir, "test-backend", "metadata.json")).To(BeARegularFile())
+
+			// Read and verify metadata
+			metadataData, err := os.ReadFile(filepath.Join(tempDir, "test-backend", "metadata.json"))
 			Expect(err).ToNot(HaveOccurred())
-			Expect(string(content)).To(ContainSubstring("test-alias"))
+			var metadata BackendMetadata
+			err = json.Unmarshal(metadataData, &metadata)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(metadata.Alias).To(Equal("test-alias"))
+			Expect(metadata.Name).To(Equal("test-backend"))
+
 			Expect(filepath.Join(tempDir, "test-backend", "run.sh")).To(BeARegularFile())

 			// Check that the alias was recognized
@@ -103,7 +448,7 @@ var _ = Describe("Gallery Backends", func() {

 		It("should not error when backend doesn't exist", func() {
 			err := DeleteBackendFromSystem(tempDir, "non-existent")
-			Expect(err).NotTo(HaveOccurred())
+			Expect(err).To(HaveOccurred())
 		})
 	})

@@ -114,6 +459,10 @@ var _ = Describe("Gallery Backends", func() {
 			for _, name := range backendNames {
 				err := os.MkdirAll(filepath.Join(tempDir, name), 0750)
 				Expect(err).NotTo(HaveOccurred())
+				err = os.WriteFile(filepath.Join(tempDir, name, "metadata.json"), []byte("{}"), 0755)
+				Expect(err).NotTo(HaveOccurred())
+				err = os.WriteFile(filepath.Join(tempDir, name, "run.sh"), []byte(""), 0755)
+				Expect(err).NotTo(HaveOccurred())
 			}

 			backends, err := ListSystemBackends(tempDir)
@@ -134,8 +483,17 @@ var _ = Describe("Gallery Backends", func() {
 			err := os.MkdirAll(backendPath, 0750)
 			Expect(err).NotTo(HaveOccurred())

-			// Create alias file
-			err = os.WriteFile(filepath.Join(backendPath, "alias"), []byte(alias), 0644)
+			// Create metadata file with alias
+			metadata := &BackendMetadata{
+				Alias:       alias,
+				Name:        backendName,
+				InstalledAt: "2023-01-01T00:00:00Z",
+			}
+			metadataData, err := json.Marshal(metadata)
+			Expect(err).NotTo(HaveOccurred())
+			err = os.WriteFile(filepath.Join(backendPath, "metadata.json"), metadataData, 0644)
+			Expect(err).NotTo(HaveOccurred())
+			err = os.WriteFile(filepath.Join(backendPath, "run.sh"), []byte(""), 0755)
 			Expect(err).NotTo(HaveOccurred())

 			backends, err := ListSystemBackends(tempDir)
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -121,7 +121,12 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) (Galler

 	// Get models from galleries
 	for _, gallery := range galleries {
-		galleryModels, err := getGalleryElements[*GalleryModel](gallery, basePath)
+		galleryModels, err := getGalleryElements[*GalleryModel](gallery, basePath, func(model *GalleryModel) bool {
+			if _, err := os.Stat(filepath.Join(basePath, fmt.Sprintf("%s.yaml", model.GetName()))); err == nil {
+				return true
+			}
+			return false
+		})
 		if err != nil {
 			return nil, err
 		}
@@ -137,7 +142,14 @@ func AvailableBackends(galleries []config.Gallery, basePath string) (GalleryElem

 	// Get models from galleries
 	for _, gallery := range galleries {
-		galleryModels, err := getGalleryElements[*GalleryBackend](gallery, basePath)
+		galleryModels, err := getGalleryElements[*GalleryBackend](gallery, basePath, func(backend *GalleryBackend) bool {
+			backends, err := ListSystemBackends(basePath)
+			if err != nil {
+				return false
+			}
+			_, exists := backends[backend.GetName()]
+			return exists
+		})
 		if err != nil {
 			return nil, err
 		}
@@ -162,7 +174,7 @@ func findGalleryURLFromReferenceURL(url string, basePath string) (string, error)
 	return refFile, err
 }

-func getGalleryElements[T GalleryElement](gallery config.Gallery, basePath string) ([]T, error) {
+func getGalleryElements[T GalleryElement](gallery config.Gallery, basePath string, isInstalledCallback func(T) bool) ([]T, error) {
 	var models []T = []T{}

 	if strings.HasSuffix(gallery.URL, ".ref") {
@@ -187,15 +199,7 @@ func getGalleryElements[T GalleryElement](gallery config.Gallery, basePath strin
 	// Add gallery to models
 	for _, model := range models {
 		model.SetGallery(gallery)
-		// we check if the model was already installed by checking if the config file exists
-		// TODO: (what to do if the model doesn't install a config file?)
-		// TODO: This is sub-optimal now that the gallery handles both backends and models - we need to abstract this away
-		if _, err := os.Stat(filepath.Join(basePath, fmt.Sprintf("%s.yaml", model.GetName()))); err == nil {
-			model.SetInstalled(true)
-		}
-		if _, err := os.Stat(filepath.Join(basePath, model.GetName())); err == nil {
-			model.SetInstalled(true)
-		}
+		model.SetInstalled(isInstalledCallback(model))
 	}
 	return models, nil
 }
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -10,6 +10,7 @@ import (
 	"dario.cat/mergo"
 	"github.com/mudler/LocalAI/core/config"
 	lconfig "github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/system"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/utils"

@@ -69,7 +70,9 @@ type PromptTemplate struct {
 }

 // Installs a model from the gallery
-func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error {
+func InstallModelFromGallery(
+	modelGalleries, backendGalleries []config.Gallery,
+	name string, basePath, backendBasePath string, req GalleryModel, downloadStatus func(string, string, string, float64), enforceScan, automaticallyInstallBackend bool) error {

 	applyModel := func(model *GalleryModel) error {
 		name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
@@ -119,14 +122,26 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s
 			return err
 		}

-		if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus, enforceScan); err != nil {
+		installedModel, err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus, enforceScan)
+		if err != nil {
 			return err
 		}

+		if automaticallyInstallBackend && installedModel.Backend != "" {
+			systemState, err := system.GetSystemState()
+			if err != nil {
+				return err
+			}
+
+			if err := InstallBackendFromGallery(backendGalleries, systemState, installedModel.Backend, backendBasePath, downloadStatus, false); err != nil {
+				return err
+			}
+		}
+
 		return nil
 	}

-	models, err := AvailableGalleryModels(galleries, basePath)
+	models, err := AvailableGalleryModels(modelGalleries, basePath)
 	if err != nil {
 		return err
 	}
@@ -139,11 +154,11 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s
 	return applyModel(model)
 }

-func InstallModel(basePath, nameOverride string, config *ModelConfig, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64), enforceScan bool) error {
+func InstallModel(basePath, nameOverride string, config *ModelConfig, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64), enforceScan bool) (*lconfig.BackendConfig, error) {
 	// Create base path if it doesn't exist
 	err := os.MkdirAll(basePath, 0750)
 	if err != nil {
-		return fmt.Errorf("failed to create base path: %v", err)
+		return nil, fmt.Errorf("failed to create base path: %v", err)
 	}

 	if len(configOverrides) > 0 {
@@ -155,7 +170,7 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
 		log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)

 		if err := utils.VerifyPath(file.Filename, basePath); err != nil {
-			return err
+			return nil, err
 		}

 		// Create file path
@@ -165,19 +180,19 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
 			scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI))
 			if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
 				log.Error().Str("model", config.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
-				return err
+				return nil, err
 			}
 		}
 		uri := downloader.URI(file.URI)
 		if err := uri.DownloadFile(filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
-			return err
+			return nil, err
 		}
 	}

 	// Write prompt template contents to separate files
 	for _, template := range config.PromptTemplates {
 		if err := utils.VerifyPath(template.Name+".tmpl", basePath); err != nil {
-			return err
+			return nil, err
 		}
 		// Create file path
 		filePath := filepath.Join(basePath, template.Name+".tmpl")
@@ -185,12 +200,12 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
 		// Create parent directory
 		err := os.MkdirAll(filepath.Dir(filePath), 0750)
 		if err != nil {
-			return fmt.Errorf("failed to create parent directory for prompt template %q: %v", template.Name, err)
+			return nil, fmt.Errorf("failed to create parent directory for prompt template %q: %v", template.Name, err)
 		}
 		// Create and write file content
 		err = os.WriteFile(filePath, []byte(template.Content), 0600)
 		if err != nil {
-			return fmt.Errorf("failed to write prompt template %q: %v", template.Name, err)
+			return nil, fmt.Errorf("failed to write prompt template %q: %v", template.Name, err)
 		}

 		log.Debug().Msgf("Prompt template %q written", template.Name)
@@ -202,9 +217,11 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
 	}

 	if err := utils.VerifyPath(name+".yaml", basePath); err != nil {
-		return err
+		return nil, err
 	}

+	backendConfig := lconfig.BackendConfig{}
+
 	// write config file
 	if len(configOverrides) != 0 || len(config.ConfigFile) != 0 {
 		configFilePath := filepath.Join(basePath, name+".yaml")
@@ -213,33 +230,33 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
 		configMap := make(map[string]interface{})
 		err = yaml.Unmarshal([]byte(config.ConfigFile), &configMap)
 		if err != nil {
-			return fmt.Errorf("failed to unmarshal config YAML: %v", err)
+			return nil, fmt.Errorf("failed to unmarshal config YAML: %v", err)
 		}

 		configMap["name"] = name

 		if err := mergo.Merge(&configMap, configOverrides, mergo.WithOverride); err != nil {
-			return err
+			return nil, err
 		}

 		// Write updated config file
 		updatedConfigYAML, err := yaml.Marshal(configMap)
 		if err != nil {
-			return fmt.Errorf("failed to marshal updated config YAML: %v", err)
+			return nil, fmt.Errorf("failed to marshal updated config YAML: %v", err)
 		}

-		backendConfig := lconfig.BackendConfig{}
 		err = yaml.Unmarshal(updatedConfigYAML, &backendConfig)
 		if err != nil {
-			return fmt.Errorf("failed to unmarshal updated config YAML: %v", err)
+			return nil, fmt.Errorf("failed to unmarshal updated config YAML: %v", err)
 		}
+
 		if !backendConfig.Validate() {
-			return fmt.Errorf("failed to validate updated config YAML")
+			return nil, fmt.Errorf("failed to validate updated config YAML")
 		}

 		err = os.WriteFile(configFilePath, updatedConfigYAML, 0600)
 		if err != nil {
-			return fmt.Errorf("failed to write updated config file: %v", err)
+			return nil, fmt.Errorf("failed to write updated config file: %v", err)
 		}

 		log.Debug().Msgf("Written config file %s", configFilePath)
@@ -249,14 +266,12 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
 	modelFile := filepath.Join(basePath, galleryFileName(name))
 	data, err := yaml.Marshal(config)
 	if err != nil {
-		return err
+		return nil, err
 	}

 	log.Debug().Msgf("Written gallery file %s", modelFile)

-	return os.WriteFile(modelFile, data, 0600)
-
-	//return nil
+	return &backendConfig, os.WriteFile(modelFile, data, 0600)
 }

 func galleryFileName(name string) string {
--- a/core/gallery/models_test.go
+++ b/core/gallery/models_test.go
@@ -29,7 +29,7 @@ var _ = Describe("Model test", func() {
 			defer os.RemoveAll(tempdir)
 			c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
 			Expect(err).ToNot(HaveOccurred())
-			err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
+			_, err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
 			Expect(err).ToNot(HaveOccurred())

 			for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "cerebras.yaml"} {
@@ -79,7 +79,7 @@ var _ = Describe("Model test", func() {
 			Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
 			Expect(models[0].Installed).To(BeFalse())

-			err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
+			err = InstallModelFromGallery(galleries, []config.Gallery{}, "test@bert", tempdir, "", GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true, true)
 			Expect(err).ToNot(HaveOccurred())

 			dat, err := os.ReadFile(filepath.Join(tempdir, "bert.yaml"))
@@ -116,7 +116,7 @@ var _ = Describe("Model test", func() {
 			c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
 			Expect(err).ToNot(HaveOccurred())

-			err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
+			_, err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
 			Expect(err).ToNot(HaveOccurred())

 			for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
@@ -132,7 +132,7 @@ var _ = Describe("Model test", func() {
 			c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
 			Expect(err).ToNot(HaveOccurred())

-			err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {}, true)
+			_, err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {}, true)
 			Expect(err).ToNot(HaveOccurred())

 			for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
@@ -158,7 +158,7 @@ var _ = Describe("Model test", func() {
 			c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
 			Expect(err).ToNot(HaveOccurred())

-			err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
+			_, err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
 			Expect(err).To(HaveOccurred())
 		})
 	})
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -205,7 +205,10 @@ func API(application *application.Application) (*fiber.App, error) {
 	utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)

 	galleryService := services.NewGalleryService(application.ApplicationConfig(), application.ModelLoader())
-	galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
+	err = galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
+	if err != nil {
+		return nil, err
+	}

 	requestExtractor := middleware.NewRequestExtractor(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())

--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -41,7 +41,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 		}
 		responses <- initialMessage

-		ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
+		ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
 			usage := schema.OpenAIUsage{
 				PromptTokens:     tokenUsage.Prompt,
 				CompletionTokens: tokenUsage.Completion,
@@ -68,7 +68,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 	}
 	processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
 		result := ""
-		_, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
+		_, tokenUsage, _ := ComputeChoices(req, prompt, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
 			result += s
 			// TODO: Change generated BNF grammar to be compliant with the schema so we can
 			// stream the result token by token here.
@@ -92,7 +92,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 			}
 			responses <- initialMessage

-			result, err := handleQuestion(config, req, ml, startupOptions, functionResults, result, prompt)
+			result, err := handleQuestion(config, cl, req, ml, startupOptions, functionResults, result, prompt)
 			if err != nil {
 				log.Error().Err(err).Msg("error handling question")
 				return
@@ -383,7 +383,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat

 		// no streaming mode
 		default:
-			result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
+
+			tokenCallback := func(s string, c *[]schema.Choice) {
 				if !shouldUseFn {
 					// no function is called, just reply and use stop as finish reason
 					*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
@@ -403,7 +404,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat

 				switch {
 				case noActionsToRun:
-					result, err := handleQuestion(config, input, ml, startupOptions, results, s, predInput)
+					result, err := handleQuestion(config, cl, input, ml, startupOptions, results, s, predInput)
 					if err != nil {
 						log.Error().Err(err).Msg("error handling question")
 						return
@@ -458,7 +459,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 					}
 				}

-			}, nil)
+			}
+
+			result, tokenUsage, err := ComputeChoices(
+				input,
+				predInput,
+				config,
+				cl,
+				startupOptions,
+				ml,
+				tokenCallback,
+				nil,
+			)
 			if err != nil {
 				return err
 			}
@@ -489,7 +501,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 	}
 }

-func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, funcResults []functions.FuncCallResults, result, prompt string) (string, error) {
+func handleQuestion(config *config.BackendConfig, cl *config.BackendConfigLoader, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, funcResults []functions.FuncCallResults, result, prompt string) (string, error) {

 	if len(funcResults) == 0 && result != "" {
 		log.Debug().Msgf("nothing function results but we had a message from the LLM")
@@ -538,7 +550,7 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
 		audios = append(audios, m.StringAudios...)
 	}

-	predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, o, nil)
+	predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, cl, o, nil)
 	if err != nil {
 		log.Error().Err(err).Msg("model inference failed")
 		return "", err
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -31,7 +31,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
 	created := int(time.Now().Unix())

 	process := func(id string, s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
-		ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
+		tokenCallback := func(s string, tokenUsage backend.TokenUsage) bool {
 			usage := schema.OpenAIUsage{
 				PromptTokens:     tokenUsage.Prompt,
 				CompletionTokens: tokenUsage.Completion,
@@ -58,7 +58,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e

 			responses <- resp
 			return true
-		})
+		}
+		ComputeChoices(req, s, config, cl, appConfig, loader, func(s string, c *[]schema.Choice) {}, tokenCallback)
 		close(responses)
 	}

@@ -168,7 +169,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
 			}

 			r, tokenUsage, err := ComputeChoices(
-				input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
+				input, i, config, cl, appConfig, ml, func(s string, c *[]schema.Choice) {
 					*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
 				}, nil)
 			if err != nil {
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -56,7 +56,7 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}

-			r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
+			r, tokenUsage, err := ComputeChoices(input, i, config, cl, appConfig, ml, func(s string, c *[]schema.Choice) {
 				*c = append(*c, schema.Choice{Text: s})
 			}, nil)
 			if err != nil {
--- a/core/http/endpoints/openai/inference.go
+++ b/core/http/endpoints/openai/inference.go
@@ -12,6 +12,7 @@ func ComputeChoices(
 	req *schema.OpenAIRequest,
 	predInput string,
 	config *config.BackendConfig,
+	bcl *config.BackendConfigLoader,
 	o *config.ApplicationConfig,
 	loader *model.ModelLoader,
 	cb func(string, *[]schema.Choice),
@@ -37,7 +38,7 @@ func ComputeChoices(
 	}

 	// get the model function to call for the result
-	predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, config, o, tokenCallback)
+	predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, config, bcl, o, tokenCallback)
 	if err != nil {
 		return result, backend.TokenUsage{}, err
 	}
--- a/core/http/routes/ui_backend_gallery.go
+++ b/core/http/routes/ui_backend_gallery.go
@@ -223,7 +223,7 @@ func registerBackendGalleryRoutes(app *fiber.App, appConfig *config.ApplicationC
 			return c.SendString(elements.ProgressBar("0"))
 		}

-		if status.Progress == 100 {
+		if status.Progress == 100 && status.Processed && status.Message == "completed" {
 			c.Set("HX-Trigger", "done") // this triggers /browse/backend/job/:uid
 			return c.SendString(elements.ProgressBar("100"))
 		}
--- a/core/http/routes/ui_gallery.go
+++ b/core/http/routes/ui_gallery.go
@@ -243,7 +243,7 @@ func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appCo
 			return c.SendString(elements.ProgressBar("0"))
 		}

-		if status.Progress == 100 {
+		if status.Progress == 100 && status.Processed && status.Message == "completed" {
 			c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done)
 			return c.SendString(elements.ProgressBar("100"))
 		}
--- a/core/services/backends.go
+++ b/core/services/backends.go
@@ -2,12 +2,13 @@ package services

 import (
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/system"

 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )

-func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend]) error {
+func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend], systemState *system.SystemState) error {
 	utils.ResetDownloadTimers()
 	g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})

@@ -23,13 +24,17 @@ func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend]) e
 		g.modelLoader.DeleteExternalBackend(op.GalleryElementName)
 	} else {
 		log.Warn().Msgf("installing backend %s", op.GalleryElementName)
-		err = gallery.InstallBackendFromGallery(g.appConfig.BackendGalleries, op.GalleryElementName, g.appConfig.BackendsPath, progressCallback)
+		err = gallery.InstallBackendFromGallery(g.appConfig.BackendGalleries, systemState, op.GalleryElementName, g.appConfig.BackendsPath, progressCallback, true)
 		if err == nil {
 			err = gallery.RegisterBackends(g.appConfig.BackendsPath, g.modelLoader)
 		}
 	}
 	if err != nil {
 		log.Error().Err(err).Msgf("error installing backend %s", op.GalleryElementName)
+		if !op.Delete {
+			// If we didn't install the backend, we need to make sure we don't have a leftover directory
+			gallery.DeleteBackendFromSystem(g.appConfig.BackendsPath, op.GalleryElementName)
+		}
 		return err
 	}

--- a/core/services/gallery.go
+++ b/core/services/gallery.go
@@ -7,7 +7,9 @@ import (

 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/system"
 	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/rs/zerolog/log"
 )

 type GalleryService struct {
@@ -50,7 +52,7 @@ func (g *GalleryService) GetAllStatus() map[string]*GalleryOpStatus {
 	return g.statuses
 }

-func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader) {
+func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader) error {
 	// updates the status with an error
 	var updateError func(id string, e error)
 	if !g.appConfig.OpaqueErrors {
@@ -63,13 +65,18 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
 		}
 	}

+	systemState, err := system.GetSystemState()
+	if err != nil {
+		log.Error().Err(err).Msg("failed to get system state")
+	}
+
 	go func() {
 		for {
 			select {
 			case <-c.Done():
 				return
 			case op := <-g.BackendGalleryChannel:
-				err := g.backendHandler(&op)
+				err := g.backendHandler(&op, systemState)
 				if err != nil {
 					updateError(op.ID, err)
 				}
@@ -82,4 +89,6 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
 			}
 		}
 	}()
+
+	return nil
 }
--- a/core/services/models.go
+++ b/core/services/models.go
@@ -7,6 +7,7 @@ import (

 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/system"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"gopkg.in/yaml.v2"
 )
@@ -22,7 +23,7 @@ func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel], cl *c
 		utils.DisplayDownloadFunction(fileName, current, total, percentage)
 	}

-	err := processModelOperation(op, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback)
+	err := processModelOperation(op, g.appConfig.ModelPath, g.appConfig.BackendsPath, g.appConfig.EnforcePredownloadScans, g.appConfig.AutoloadBackendGalleries, progressCallback)
 	if err != nil {
 		return err
 	}
@@ -49,7 +50,7 @@ func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel], cl *c
 	return nil
 }

-func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error {
+func installModelFromRemoteConfig(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64), enforceScan, automaticallyInstallBackend bool, backendGalleries []config.Gallery, backendBasePath string) error {
 	config, err := gallery.GetGalleryConfigFromURL[gallery.ModelConfig](req.URL, modelPath)
 	if err != nil {
 		return err
@@ -57,7 +58,23 @@ func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus fun

 	config.Files = append(config.Files, req.AdditionalFiles...)

-	return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus, enforceScan)
+	installedModel, err := gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus, enforceScan)
+	if err != nil {
+		return err
+	}
+
+	if automaticallyInstallBackend && installedModel.Backend != "" {
+		systemState, err := system.GetSystemState()
+		if err != nil {
+			return err
+		}
+
+		if err := gallery.InstallBackendFromGallery(backendGalleries, systemState, installedModel.Backend, backendBasePath, downloadStatus, false); err != nil {
+			return err
+		}
+	}
+
+	return nil
 }

 type galleryModel struct {
@@ -65,22 +82,22 @@ type galleryModel struct {
 	ID                   string           `json:"id"`
 }

-func processRequests(modelPath string, enforceScan bool, galleries []config.Gallery, requests []galleryModel) error {
+func processRequests(modelPath, backendBasePath string, enforceScan, automaticallyInstallBackend bool, galleries []config.Gallery, backendGalleries []config.Gallery, requests []galleryModel) error {
 	var err error
 	for _, r := range requests {
 		utils.ResetDownloadTimers()
 		if r.ID == "" {
-			err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan)
+			err = installModelFromRemoteConfig(modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan, automaticallyInstallBackend, backendGalleries, backendBasePath)

 		} else {
 			err = gallery.InstallModelFromGallery(
-				galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan)
+				galleries, backendGalleries, r.ID, modelPath, backendBasePath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan, automaticallyInstallBackend)
 		}
 	}
 	return err
 }

-func ApplyGalleryFromFile(modelPath, s string, enforceScan bool, galleries []config.Gallery) error {
+func ApplyGalleryFromFile(modelPath, backendBasePath string, enforceScan, automaticallyInstallBackend bool, galleries []config.Gallery, backendGalleries []config.Gallery, s string) error {
 	dat, err := os.ReadFile(s)
 	if err != nil {
 		return err
@@ -91,24 +108,26 @@ func ApplyGalleryFromFile(modelPath, s string, enforceScan bool, galleries []con
 		return err
 	}

-	return processRequests(modelPath, enforceScan, galleries, requests)
+	return processRequests(modelPath, backendBasePath, enforceScan, automaticallyInstallBackend, galleries, backendGalleries, requests)
 }

-func ApplyGalleryFromString(modelPath, s string, enforceScan bool, galleries []config.Gallery) error {
+func ApplyGalleryFromString(modelPath, backendBasePath string, enforceScan, automaticallyInstallBackend bool, galleries []config.Gallery, backendGalleries []config.Gallery, s string) error {
 	var requests []galleryModel
 	err := json.Unmarshal([]byte(s), &requests)
 	if err != nil {
 		return err
 	}

-	return processRequests(modelPath, enforceScan, galleries, requests)
+	return processRequests(modelPath, backendBasePath, enforceScan, automaticallyInstallBackend, galleries, backendGalleries, requests)
 }

 // processModelOperation handles the installation or deletion of a model
 func processModelOperation(
 	op *GalleryOp[gallery.GalleryModel],
 	modelPath string,
+	backendBasePath string,
 	enforcePredownloadScans bool,
+	automaticallyInstallBackend bool,
 	progressCallback func(string, string, string, float64),
 ) error {
 	// delete a model
@@ -140,7 +159,7 @@ func processModelOperation(

 	// if the request contains a gallery name, we apply the gallery from the gallery list
 	if op.GalleryElementName != "" {
-		return gallery.InstallModelFromGallery(op.Galleries, op.GalleryElementName, modelPath, op.Req, progressCallback, enforcePredownloadScans)
+		return gallery.InstallModelFromGallery(op.Galleries, op.BackendGalleries, op.GalleryElementName, modelPath, backendBasePath, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend)
 		// } else if op.ConfigURL != "" {
 		// 	err := startup.InstallModels(op.Galleries, modelPath, enforcePredownloadScans, progressCallback, op.ConfigURL)
 		// 	if err != nil {
@@ -148,6 +167,6 @@ func processModelOperation(
 		// 	}
 		// 	return cl.Preload(modelPath)
 	} else {
-		return prepareModel(modelPath, op.Req, progressCallback, enforcePredownloadScans)
+		return installModelFromRemoteConfig(modelPath, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend, op.BackendGalleries, backendBasePath)
 	}
 }
--- a/core/services/operation.go
+++ b/core/services/operation.go
@@ -10,8 +10,9 @@ type GalleryOp[T any] struct {
 	GalleryElementName string
 	Delete             bool

-	Req       T
-	Galleries []config.Gallery
+	Req              T
+	Galleries        []config.Gallery
+	BackendGalleries []config.Gallery
 }

 type GalleryOpStatus struct {
--- a/core/system/capabilities.go
+++ b/core/system/capabilities.go
@@ -0,0 +1,73 @@
+package system
+
+import (
+	"os"
+	"strings"
+
+	"github.com/mudler/LocalAI/pkg/xsysinfo"
+	"github.com/rs/zerolog/log"
+)
+
+type SystemState struct {
+	GPUVendor string
+}
+
+func (s *SystemState) Capability() string {
+	if os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY") != "" {
+		return os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY")
+	}
+
+	capabilityRunFile := "/run/localai/capability"
+	if os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE") != "" {
+		capabilityRunFile = os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE")
+	}
+
+	// Check if /run/localai/capability exists and use it
+	// This might be used by e.g. container images to specify which
+	// backends to pull in automatically when installing meta backends.
+	if _, err := os.Stat(capabilityRunFile); err == nil {
+		capability, err := os.ReadFile(capabilityRunFile)
+		if err == nil {
+			return string(capability)
+		}
+	}
+
+	return s.GPUVendor
+}
+
+func GetSystemState() (*SystemState, error) {
+	gpuVendor, _ := detectGPUVendor()
+	log.Debug().Str("gpuVendor", gpuVendor).Msg("GPU vendor")
+
+	return &SystemState{
+		GPUVendor: gpuVendor,
+	}, nil
+}
+
+func detectGPUVendor() (string, error) {
+	gpus, err := xsysinfo.GPUs()
+	if err != nil {
+		return "", err
+	}
+
+	for _, gpu := range gpus {
+		if gpu.DeviceInfo != nil {
+			if gpu.DeviceInfo.Vendor != nil {
+				gpuVendorName := strings.ToUpper(gpu.DeviceInfo.Vendor.Name)
+				if strings.Contains(gpuVendorName, "NVIDIA") {
+					return "nvidia", nil
+				}
+				if strings.Contains(gpuVendorName, "AMD") {
+					return "amd", nil
+				}
+				if strings.Contains(gpuVendorName, "INTEL") {
+					return "intel", nil
+				}
+				return "nvidia", nil
+			}
+		}
+
+	}
+
+	return "", nil
+}
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -11,7 +11,7 @@ services:
      dockerfile: Dockerfile
      args:
      - IMAGE_TYPE=core
-      - BASE_IMAGE=ubuntu:22.04
+      - BASE_IMAGE=ubuntu:24.04
    ports:
      - 8080:8080
    env_file:
--- a/docs/content/docs/advanced/installer.md
+++ b/docs/content/docs/advanced/installer.md
@@ -23,7 +23,6 @@ List of the Environment Variables:
 |----------------------|--------------------------------------------------------------|
 | **DOCKER_INSTALL**       | Set to "true" to enable the installation of Docker images.    |
 | **USE_AIO**              | Set to "true" to use the all-in-one LocalAI Docker image.    |
-| **USE_EXTRAS**           | Set to "true" to use images with extra Python dependencies.   |
 | **USE_VULKAN**           | Set to "true" to use Vulkan GPU support.                     |
 | **API_KEY**              | Specify an API key for accessing LocalAI, if required.       |
 | **PORT**                 | Specifies the port on which LocalAI will run (default is 8080). |
@@ -39,7 +38,6 @@ List of the Environment Variables:

 The installer will automatically detect your GPU and select the appropriate image. By default, it uses the standard images without extra Python dependencies. You can customize the image selection using the following environment variables:

- `USE_EXTRAS=true`: Use images with extra Python dependencies (larger images, ~17GB)
 - `USE_AIO=true`: Use all-in-one images that include all dependencies
 - `USE_VULKAN=true`: Use Vulkan GPU support instead of vendor-specific GPU support

--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@@ -71,15 +71,15 @@ To use CUDA, use the images with the `cublas` tag, for example.

 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags):

- CUDA `11` tags: `master-cublas-cuda11`, `v1.40.0-cublas-cuda11`, ...
- CUDA `12` tags: `master-cublas-cuda12`, `v1.40.0-cublas-cuda12`, ...
- CUDA `11` + FFmpeg tags: `master-cublas-cuda11-ffmpeg`, `v1.40.0-cublas-cuda11-ffmpeg`, ...
- CUDA `12` + FFmpeg tags: `master-cublas-cuda12-ffmpeg`, `v1.40.0-cublas-cuda12-ffmpeg`, ...
+- CUDA `11` tags: `master-gpu-nvidia-cuda11`, `v1.40.0-gpu-nvidia-cuda11`, ...
+- CUDA `12` tags: `master-gpu-nvidia-cuda12`, `v1.40.0-gpu-nvidia-cuda12`, ...
+- CUDA `11` + FFmpeg tags: `master-gpu-nvidia-cuda11-ffmpeg`, `v1.40.0-gpu-nvidia-cuda11-ffmpeg`, ...
+- CUDA `12` + FFmpeg tags: `master-gpu-nvidia-cuda12-ffmpeg`, `v1.40.0-gpu-nvidia-cuda12-ffmpeg`, ...

 In addition to the commands to run LocalAI normally, you need to specify `--gpus all` to docker, for example:

 ```bash
-docker run --rm -ti --gpus all -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:v1.40.0-cublas-cuda12
+docker run --rm -ti --gpus all -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:v1.40.0-gpu-nvidia-cuda12
 ```

 If the GPU inferencing is working, you should be able to see something like:
@@ -232,8 +232,8 @@ spec:
        - env:
            - name: HIP_VISIBLE_DEVICES
              value: '0'
-              # This variable indicates the devices availible to container (0:device1 1:device2 2:device3) etc.
-              # For multiple devices (say device 1 and 3) the value would be equivelant to HIP_VISIBLE_DEVICES="0,2"
+              # This variable indicates the devices available to container (0:device1 1:device2 2:device3) etc.
+              # For multiple devices (say device 1 and 3) the value would be equivalent to HIP_VISIBLE_DEVICES="0,2"
              # Please take note of this when an iGPU is present in host system as compatability is not assured.
          ...
          resources:
@@ -259,7 +259,7 @@ If building from source, you need to install [Intel oneAPI Base Toolkit](https:/

 ### Container images

-To use SYCL, use the images with the `sycl-f16` or `sycl-f32` tag, for example `{{< version >}}-sycl-f32-core`, `{{< version >}}-sycl-f16-ffmpeg-core`, ...
+To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16-ffmpeg-core`, ...

 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).

@@ -268,7 +268,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
 To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:

 ```bash
-docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core phi-2
+docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2
 ```

 ### Notes
@@ -276,7 +276,7 @@ docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8
 In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:

 ```bash
-docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core
+docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16-ffmpeg-core
 ```

 Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
@@ -296,7 +296,7 @@ To use Vulkan, use the images with the `vulkan` tag, for example `{{< version >}
 To run LocalAI with Docker and Vulkan, you can use the following command as an example:

 ```bash
-docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models localai/localai:latest-vulkan-ffmpeg-core
+docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/models localai/localai:latest-vulkan-ffmpeg-core
 ```

 ### Notes
@@ -308,7 +308,7 @@ These flags are the same as the sections above, depending on the hardware, for [
 If you have mixed hardware, you can pass flags for multiple GPUs, for example:

 ```bash
-docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models \
+docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/models \
 --gpus=all \ # nvidia passthrough
 --device /dev/dri --device /dev/kfd \ # AMD/Intel passthrough
 localai/localai:latest-vulkan-ffmpeg-core
--- a/docs/content/docs/features/backends.md
+++ b/docs/content/docs/features/backends.md
@@ -91,6 +91,13 @@ Your backend container should:
 5. Have a top level `run.sh` file that will be used to run the backend
 6. Pushed to a registry so can be used in a gallery

+### Getting started
+
+For getting started, see the available backends in LocalAI here: https://github.com/mudler/LocalAI/tree/master/backend . 
+
+- For Python based backends there is a template that can be used as starting point: https://github.com/mudler/LocalAI/tree/master/backend/python/common/template . 
+- For Golang based backends, you can see the `bark-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/go/bark
+- For C++ based backends, you can see the `llama-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/cpp/llama

 ### Publishing Your Backend

@@ -116,4 +123,4 @@ LocalAI supports various types of backends:
 - **LLM Backends**: For running language models
 - **Diffusion Backends**: For image generation
 - **TTS Backends**: For text-to-speech conversion
- **Whisper Backends**: For speech-to-text conversion
+- **Whisper Backends**: For speech-to-text conversion
--- a/docs/content/docs/features/image-generation.md
+++ b/docs/content/docs/features/image-generation.md
@@ -40,7 +40,7 @@ curl http://localhost:8080/v1/images/generations -H "Content-Type: application/j

 ### stablediffusion-ggml

-This backend is based on [stable-diffusion.cpp](https://github.com/leejet/stable-diffusion.cpp). Every model supported by that backend is suppoerted indeed with LocalAI.
+This backend is based on [stable-diffusion.cpp](https://github.com/leejet/stable-diffusion.cpp). Every model supported by that backend is supported indeed with LocalAI.


 #### Setup
@@ -327,4 +327,4 @@ diffusers:
 ```bash
 (echo -n '{"prompt": "spiderman surfing","size": "512x512","model":"txt2vid"}') |
 curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/images/generations
-```
+```
--- a/docs/content/docs/getting-started/build.md
+++ b/docs/content/docs/getting-started/build.md
@@ -127,13 +127,6 @@ docker build -t localai .
 docker run localai
 ```

-There are some build arguments that can be used to customize the build:
-
-| Variable | Default | Description |
-| ---------------------| ------- | ----------- |
-| `IMAGE_TYPE`         |   `extras`      | Build type. Available: `core`, `extras` |
-
-
 ### Example: Build on mac

 Building on Mac (M1, M2 or M3) works, but you may need to install some prerequisites using `brew`. 
--- a/docs/content/docs/getting-started/container-images.md
+++ b/docs/content/docs/getting-started/container-images.md
@@ -92,7 +92,7 @@ services:
      - DEBUG=true
      # ...
    volumes:
-      - ./models:/build/models:cached
+      - ./models:/models:cached
    # decomment the following piece if running with Nvidia GPUs
    # deploy:
    #   resources:
@@ -105,21 +105,21 @@ services:

 {{% alert icon="💡" %}}

-**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/build/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
+**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.

 You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`). 

-If you want to use a named model or a local directory, you can mount it as a volume to `/build/models`:
+If you want to use a named model or a local directory, you can mount it as a volume to `/models`:

 ```bash
-docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/build/models localai/localai:latest-aio-cpu
+docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/models localai/localai:latest-aio-cpu
 ```

 or associate a volume:

 ```bash
 docker volume create localai-models
-docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models localai/localai:latest-aio-cpu
+docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/localai:latest-aio-cpu
 ```

 {{% /alert %}}
@@ -150,10 +150,6 @@ The AIO Images are inheriting the same environment variables as the base images

 Standard container images do not have pre-installed models. 

-Images are available with and without python dependencies (images with the `extras` suffix). Note that images with python dependencies are bigger (in order of 17GB). 
-
-Images with `core` in the tag are smaller and do not contain any python dependencies. 
-
 {{< tabs tabTotal="8" >}}
 {{% tab tabName="Vanilla / CPU Images" %}}

@@ -169,10 +165,9 @@ Images with `core` in the tag are smaller and do not contain any python dependen

 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda11` | `localai/localai:master-cublas-cuda11`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda11` | `localai/localai:master-gpu-nvidia-cuda11`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11`                      |
-| Latest tag with extras | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11-extras` | `localai/localai:latest-gpu-nvidia-cuda-11-extras`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11` | `localai/localai:{{< version >}}-cublas-cuda11`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda11`             |

 {{% /tab %}}

@@ -180,10 +175,9 @@ Images with `core` in the tag are smaller and do not contain any python dependen

 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda12` | `localai/localai:master-cublas-cuda12`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda12` | `localai/localai:master-gpu-nvidia-cuda12`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-12` | `localai/localai:latest-gpu-nvidia-cuda-12`                 |
-| Latest tag with extras | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-12-extras` | `localai/localai:latest-gpu-nvidia-cuda-12-extras`                 |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12` | `localai/localai:{{< version >}}-cublas-cuda12`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda12` | `localai/localai:{{< version >}}-gpu-nvidia-cuda12`             |

 {{% /tab %}}

@@ -191,10 +185,9 @@ Images with `core` in the tag are smaller and do not contain any python dependen

 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f16` | `localai/localai:master-gpu-intel-f16`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16`                      |
-| Latest tag with extras | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16-extras` | `localai/localai:latest-gpu-intel-f16-extras`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16` | `localai/localai:{{< version >}}-gpu-intel-f16`             |

 {{% /tab %}}

@@ -202,10 +195,9 @@ Images with `core` in the tag are smaller and do not contain any python dependen

 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f32` | `localai/localai:master-gpu-intel-f32`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32`                      |
-| Latest tag with extras | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32-extras` | `localai/localai:latest-gpu-intel-f32-extras`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f32` | `localai/localai:{{< version >}}-gpu-intel-f32`             |

 {{% /tab %}}

@@ -213,10 +205,9 @@ Images with `core` in the tag are smaller and do not contain any python dependen

 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-hipblas` | `localai/localai:master-hipblas`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-hipblas` | `localai/localai:master-gpu-hipblas`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-hipblas` | `localai/localai:latest-gpu-hipblas`                      |
-| Latest tag with extras | `quay.io/go-skynet/local-ai:latest-gpu-hipblas-extras` | `localai/localai:latest-gpu-hipblas-extras`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-hipblas` | `localai/localai:{{< version >}}-hipblas`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-hipblas` | `localai/localai:{{< version >}}-gpu-hipblas`             |

 {{% /tab %}}

--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -27,19 +27,68 @@ curl https://localai.io/install.sh | sh

 See [Installer]({{% relref "docs/advanced/installer" %}}) for all the supported options

-### Run with docker:
-```bash
-# CPU only image:
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
+### Run with docker

-# Nvidia GPU:
+
+#### CPU only image:
+
+```bash
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
+```
+
+#### NVIDIA GPU Images:
+
+```bash
+# CUDA 12.0
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12

-# CPU and GPU image (bigger size):
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
+# CUDA 11.7
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11

-# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
+# NVIDIA Jetson (L4T) ARM64
+# First, you need to have installed the nvidia container toolkit: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-ap
+docker run -ti --name local-ai -p 8080:8080 --runtime nvidia --gpus all localai/localai:latest-nvidia-l4t-arm64
+```
+
+#### AMD GPU Images (ROCm):
+
+```bash
+docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
+```
+
+#### Intel GPU Images (oneAPI):
+
+```bash
+# Intel GPU with FP16 support
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
+
+# Intel GPU with FP32 support
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
+```
+
+#### Vulkan GPU Images:
+
+```bash
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
+```
+
+#### AIO Images (pre-downloaded models):
+
+```bash
+# CPU version
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
+
+# NVIDIA CUDA 12 version
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
+
+# NVIDIA CUDA 11 version
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
+
+# Intel GPU version
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16
+
+# AMD GPU version
+docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
 ```

 ### Load models:
--- a/docs/content/docs/reference/nvidia-l4t.md
+++ b/docs/content/docs/reference/nvidia-l4t.md
@@ -35,7 +35,7 @@ docker pull quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
 Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models:

 ```bash
-docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models  -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
+docker run -e DEBUG=true -p 8080:8080 -v /data/models:/models  -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
 ```

 Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models.
--- a/docs/content/docs/whats-new.md
+++ b/docs/content/docs/whats-new.md
@@ -288,8 +288,8 @@ From this release the default behavior of images has changed. Compilation is not
 ### Container images
 - Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:v1.20.0`
 - FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-ffmpeg`
- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-cublas-cuda11-ffmpeg`
- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-cublas-cuda12-ffmpeg`
+- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-gpu-nvidia-cuda11-ffmpeg`
+- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-gpu-nvidia-cuda12-ffmpeg`

 ### Updates

@@ -339,8 +339,8 @@ You can check the full changelog in [Github](https://github.com/go-skynet/LocalA
 Container images:
 - Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:v1.19.2`
 - FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-ffmpeg`
- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-cublas-cuda11-ffmpeg`
- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-cublas-cuda12-ffmpeg`
+- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-gpu-nvidia-cuda11-ffmpeg`
+- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-gpu-nvidia-cuda12-ffmpeg`

 --- 

--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.29.0"
+  "version": "v3.1.1"
 }
--- a/docs/layouts/partials/docs/top-header.html
+++ b/docs/layouts/partials/docs/top-header.html
@@ -44,7 +44,7 @@
                {{ end }}
            {{ end -}}
        </div>
-        <div class="d-flex align-items-center m-1">
+        <div class="d-none d-md-flex d-flex align-items-center m-1">
            <h5>Star us on GitHub !&nbsp;</h5>
            <script async defer src="https://buttons.github.io/buttons.js"></script>
            <a class="github-button" href="https://github.com/mudler/LocalAI" data-color-scheme="no-preference: light; light: light; dark: dark;" data-icon="octicon-star" data-size="large" data-show-count="true" aria-label="Star mudler/LocalAI on GitHub">Star</a> 
--- a/docs/static/install.sh
+++ b/docs/static/install.sh
@@ -16,7 +16,6 @@
 # Environment Variables:
 #   DOCKER_INSTALL - Set to "true" to install Docker images (default: auto-detected)
 #   USE_AIO       - Set to "true" to use the all-in-one LocalAI image (default: false)
-#   USE_EXTRAS    - Set to "true" to use images with extra Python dependencies (default: false)
 #   USE_VULKAN    - Set to "true" to use Vulkan GPU support (default: false)
 #   API_KEY       - API key for securing LocalAI access (default: none)
 #   PORT          - Port to run LocalAI on (default: 8080)
@@ -160,7 +159,6 @@ uninstall_localai() {

 # DOCKER_INSTALL - set to "true" to install Docker images
 # USE_AIO - set to "true" to install the all-in-one LocalAI image
-# USE_EXTRAS - set to "true" to use images with extra Python dependencies
 # USE_VULKAN - set to "true" to use Vulkan GPU support
 PORT=${PORT:-8080}

@@ -175,7 +173,6 @@ fi

 DOCKER_INSTALL=${DOCKER_INSTALL:-$docker_found}
 USE_AIO=${USE_AIO:-false}
-USE_EXTRAS=${USE_EXTRAS:-false}
 USE_VULKAN=${USE_VULKAN:-false}
 API_KEY=${API_KEY:-}
 CORE_IMAGES=${CORE_IMAGES:-false}
@@ -666,7 +663,7 @@ install_docker() {
        IMAGE_TAG=${LOCALAI_VERSION}-vulkan

        info "Starting LocalAI Docker container..."
-        $SUDO docker run -v local-ai-data:/build/models \
+        $SUDO docker run -v local-ai-data:/models \
            --device /dev/dri \
            --restart=always \
            -e API_KEY=$API_KEY \
@@ -675,11 +672,7 @@ install_docker() {
            -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
    elif [ "$HAS_CUDA" ]; then
        # Default to CUDA 12
-        IMAGE_TAG=${LOCALAI_VERSION}-cublas-cuda12
-        # EXTRAS
-        if [ "$USE_EXTRAS" = true ]; then
-            IMAGE_TAG=${LOCALAI_VERSION}-cublas-cuda12-extras
-        fi
+        IMAGE_TAG=${LOCALAI_VERSION}-gpu-nvidia-cuda12
        # AIO
        if [ "$USE_AIO" = true ]; then
            IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-nvidia-cuda-12
@@ -697,7 +690,7 @@ install_docker() {
        fi

        info "Starting LocalAI Docker container..."
-        $SUDO docker run -v local-ai-data:/build/models \
+        $SUDO docker run -v local-ai-data:/models \
            --gpus all \
            --restart=always \
            -e API_KEY=$API_KEY \
@@ -705,18 +698,14 @@ install_docker() {
            $envs \
            -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
    elif [ "$HAS_AMD" ]; then
-        IMAGE_TAG=${LOCALAI_VERSION}-hipblas
-        # EXTRAS
-        if [ "$USE_EXTRAS" = true ]; then
-            IMAGE_TAG=${LOCALAI_VERSION}-hipblas-extras
-        fi
+        IMAGE_TAG=${LOCALAI_VERSION}-gpu-hipblas
        # AIO
        if [ "$USE_AIO" = true ]; then
            IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-hipblas
        fi

        info "Starting LocalAI Docker container..."
-        $SUDO docker run -v local-ai-data:/build/models \
+        $SUDO docker run -v local-ai-data:/models \
            --device /dev/dri \
            --device /dev/kfd \
            --group-add=video \
@@ -727,18 +716,14 @@ install_docker() {
            -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
    elif [ "$HAS_INTEL" ]; then
        # Default to FP32 for better compatibility
-        IMAGE_TAG=${LOCALAI_VERSION}-sycl-f32
-        # EXTRAS
-        if [ "$USE_EXTRAS" = true ]; then
-            IMAGE_TAG=${LOCALAI_VERSION}-sycl-f32-extras
-        fi
+        IMAGE_TAG=${LOCALAI_VERSION}-gpu-intel-f32
        # AIO
        if [ "$USE_AIO" = true ]; then
            IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-intel-f32
        fi

        info "Starting LocalAI Docker container..."
-        $SUDO docker run -v local-ai-data:/build/models \
+        $SUDO docker run -v local-ai-data:/models \
            --device /dev/dri \
            --restart=always \
            -e API_KEY=$API_KEY \
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -e

-cd /build
+cd /

 # If we have set EXTRA_BACKENDS, then we need to prepare the backends
 if [ -n "$EXTRA_BACKENDS" ]; then
@@ -13,38 +13,23 @@ if [ -n "$EXTRA_BACKENDS" ]; then
 	done
 fi

-if [ "$REBUILD" != "false" ]; then
-	rm -rf ./local-ai
-	make build -j${BUILD_PARALLELISM:-1}
+echo "CPU info:"
+grep -e "model\sname" /proc/cpuinfo | head -1
+grep -e "flags" /proc/cpuinfo | head -1
+if grep -q -e "\savx\s" /proc/cpuinfo ; then
+	echo "CPU:    AVX    found OK"
 else
-	echo "@@@@@"
-	echo "Skipping rebuild"
-	echo "@@@@@"
-	echo "If you are experiencing issues with the pre-compiled builds, try setting REBUILD=true"
-	echo "If you are still experiencing issues with the build, try setting CMAKE_ARGS and disable the instructions set as needed:"
-	echo 'CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF"'
-	echo "see the documentation at: https://localai.io/basics/build/index.html"
-	echo "Note: See also https://github.com/go-skynet/LocalAI/issues/288"
-	echo "@@@@@"
-	echo "CPU info:"
-	grep -e "model\sname" /proc/cpuinfo | head -1
-	grep -e "flags" /proc/cpuinfo | head -1
-	if grep -q -e "\savx\s" /proc/cpuinfo ; then
-		echo "CPU:    AVX    found OK"
-	else
-		echo "CPU: no AVX    found"
-	fi
-	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
-		echo "CPU:    AVX2   found OK"
-	else
-		echo "CPU: no AVX2   found"
-	fi
-	if grep -q -e "\savx512" /proc/cpuinfo ; then
-		echo "CPU:    AVX512 found OK"
-	else
-		echo "CPU: no AVX512 found"
-	fi
-	echo "@@@@@"
+	echo "CPU: no AVX    found"
+fi
+if grep -q -e "\savx2\s" /proc/cpuinfo ; then
+	echo "CPU:    AVX2   found OK"
+else
+	echo "CPU: no AVX2   found"
+fi
+if grep -q -e "\savx512" /proc/cpuinfo ; then
+	echo "CPU:    AVX512 found OK"
+else
+	echo "CPU: no AVX512 found"
 fi

 exec ./local-ai "$@"
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1170,6 +1170,446 @@
    - filename: Yanfei-v2-Qwen3-32B.Q4_K_M.gguf
      sha256: b9c87f5816a66e9036b4af013e3d658f8a11f5e987c44e6d4cb6c4f91e82d3df
      uri: huggingface://mradermacher/Yanfei-v2-Qwen3-32B-GGUF/Yanfei-v2-Qwen3-32B.Q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "qwen3-the-josiefied-omega-directive-22b-uncensored-abliterated-i1"
+  icon: https://huggingface.co/DavidAU/Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated/resolve/main/omega.jpg
+  urls:
+    - https://huggingface.co/DavidAU/Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated
+    - https://huggingface.co/mradermacher/Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated-i1-GGUF
+  description: |
+    WARNING: NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
+    A massive 22B, 62 layer merge of the fantastic "The-Omega-Directive-Qwen3-14B-v1.1" and off the scale "Goekdeniz-Guelmez/Josiefied-Qwen3-14B-abliterated-v3" in Qwen3, with full reasoning (can be turned on or off) and the model is completely uncensored/abliterated too.
+  overrides:
+    parameters:
+      model: Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
+      sha256: 3d43e00b685004688b05f75d77f756a84eaa24e042d536e12e3ce1faa71f8c64
+      uri: huggingface://mradermacher/Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated-i1-GGUF/Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "menlo_jan-nano"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65713d70f56f9538679e5a56/wC7Xtolp7HOFIdKTOJhVt.png
+  urls:
+    - https://huggingface.co/Menlo/Jan-nano
+    - https://huggingface.co/bartowski/Menlo_Jan-nano-GGUF
+  description: |
+    Jan-Nano is a compact 4-billion parameter language model specifically designed and trained for deep research tasks. This model has been optimized to work seamlessly with Model Context Protocol (MCP) servers, enabling efficient integration with various research tools and data sources.
+  overrides:
+    parameters:
+      model: Menlo_Jan-nano-Q4_K_M.gguf
+  files:
+    - filename: Menlo_Jan-nano-Q4_K_M.gguf
+      sha256: b90a30f226e6bce26ef9e0db444cb12530edf90b0eea0defc15b0e361fc698eb
+      uri: huggingface://bartowski/Menlo_Jan-nano-GGUF/Menlo_Jan-nano-Q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "qwen3-the-xiaolong-omega-directive-22b-uncensored-abliterated-i1"
+  icon: https://huggingface.co/DavidAU/Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated/resolve/main/little-dragon-moon.jpg
+  urls:
+    - https://huggingface.co/DavidAU/Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated
+    - https://huggingface.co/mradermacher/Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated-i1-GGUF
+  description: |
+    WARNING: NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
+    A massive 22B, 62 layer merge of the fantastic "The-Omega-Directive-Qwen3-14B-v1.1" (by ReadyArt) and off the scale "Xiaolong-Qwen3-14B" (by nbeerbower) in Qwen3, with full reasoning (can be turned on or off) and the model is completely uncensored/abliterated too.
+  overrides:
+    parameters:
+      model: Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
+      sha256: ecee2813ab0b9cc6f555aff81dfbfe380f7bdaf15cef475c8ff402462f4ddd41
+      uri: huggingface://mradermacher/Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated-i1-GGUF/Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "allura-org_q3-8b-kintsugi"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/o_fhP0riFrKh-5XyPxQyk.png
+  urls:
+    - https://huggingface.co/allura-org/Q3-8B-Kintsugi
+    - https://huggingface.co/allura-quants/allura-org_Q3-8B-Kintsugi-GGUF
+  description: |
+    Q3-8B-Kintsugi is a roleplaying model finetuned from Qwen3-8B-Base.
+    During testing, Kintsugi punched well above its weight class in terms of parameters, especially for 1-on-1 roleplaying and general storywriting.
+  overrides:
+    parameters:
+      model: Q3-8B-Kintsugi-Q4_K_M.GGUF
+  files:
+    - filename: Q3-8B-Kintsugi-Q4_K_M.GGUF
+      sha256: 2eecf44c709ef02794346d84f7d69ee30059c2a71186e4d18a0861958a4a52db
+      uri: huggingface://allura-quants/allura-org_Q3-8B-Kintsugi-GGUF/Q3-8B-Kintsugi-Q4_K_M.GGUF
+- !!merge <<: *qwen3
+  name: "ds-r1-qwen3-8b-arliai-rpr-v4-small-iq-imatrix"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6625f4a8a8d1362ebcc3851a/hIZ2ZcaDyfYLT9Yd4pfOs.jpeg
+  urls:
+    - https://huggingface.co/ArliAI/DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small
+    - https://huggingface.co/Lewdiculous/DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small-GGUF-IQ-Imatrix
+  description: |
+    The best RP/creative model series from ArliAI yet again. This time made based on DS-R1-0528-Qwen3-8B-Fast for a smaller memory footprint.
+
+        Reduced repetitions and impersonation
+
+        To add to the creativity and out of the box thinking of RpR v3, a more advanced filtering method was used in order to remove examples where the LLM repeated similar phrases or talked for the user. Any repetition or impersonation cases that happens will be due to how the base QwQ model was trained, and not because of the RpR dataset.
+
+        Increased training sequence length
+
+        The training sequence length was increased to 16K in order to help awareness and memory even on longer chats.
+  overrides:
+    parameters:
+      model: DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small-Q4_K_M-imat.gguf
+  files:
+    - filename: DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small-Q4_K_M-imat.gguf
+      sha256: b40be91d3d2f2497efa849e69f0bb303956b54e658f57bc39c41dba424018d71
+      uri: huggingface://Lewdiculous/DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small-GGUF-IQ-Imatrix/DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small-Q4_K_M-imat.gguf
+- !!merge <<: *qwen3
+  name: "menlo_jan-nano-128k"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65713d70f56f9538679e5a56/NP7CvcjOtLX8mST0t7eAM.png
+  urls:
+    - https://huggingface.co/Menlo/Jan-nano-128k
+    - https://huggingface.co/bartowski/Menlo_Jan-nano-128k-GGUF
+  description: |
+    Jan-Nano-128k represents a significant advancement in compact language models for research applications. Building upon the success of Jan-Nano, this enhanced version features a native 128k context window that enables deeper, more comprehensive research capabilities without the performance degradation typically associated with context extension methods.
+
+    Key Improvements:
+
+        🔍 Research Deeper: Extended context allows for processing entire research papers, lengthy documents, and complex multi-turn conversations
+        ⚡ Native 128k Window: Built from the ground up to handle long contexts efficiently, maintaining performance across the full context range
+        📈 Enhanced Performance: Unlike traditional context extension methods, Jan-Nano-128k shows improved performance with longer contexts
+
+    This model maintains full compatibility with Model Context Protocol (MCP) servers while dramatically expanding the scope of research tasks it can handle in a single session.
+  overrides:
+    parameters:
+      model: Menlo_Jan-nano-128k-Q4_K_M.gguf
+  files:
+    - filename: Menlo_Jan-nano-128k-Q4_K_M.gguf
+      sha256: a864031a138288da427ca176afd61d7fe2b03fd19a84a656b2691aa1f7a12921
+      uri: huggingface://bartowski/Menlo_Jan-nano-128k-GGUF/Menlo_Jan-nano-128k-Q4_K_M.gguf
+- !!merge <<: *qwen3
+  icon: https://huggingface.co/DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-V1.3/resolve/main/qwen3-total-recall.gif
+  name: "qwen3-55b-a3b-total-recall-v1.3-i1"
+  urls:
+    - https://huggingface.co/DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-V1.3
+    - https://huggingface.co/mradermacher/Qwen3-55B-A3B-TOTAL-RECALL-V1.3-i1-GGUF
+  description: |
+    WARNING: MADNESS - UN HINGED and... NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
+    This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
+
+    This model is for all use cases, but excels in creative use cases specifically.
+
+    This model is based on Qwen3-30B-A3B (MOE, 128 experts, 8 activated), with Brainstorm 40X (by DavidAU - details at bottom of this page.
+
+    This is the refined version -V1.3- from this project (see this repo for all settings, details, system prompts, example generations etc etc):
+
+    https://huggingface.co/DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-Deep-40X-GGUF/
+
+    This version -1.3- is slightly smaller, with further refinements to the Brainstorm adapter.
+
+    This will change generation and reasoning performance within the model.
+  overrides:
+    parameters:
+      model: Qwen3-55B-A3B-TOTAL-RECALL-V1.3.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-55B-A3B-TOTAL-RECALL-V1.3.i1-Q4_K_M.gguf
+      sha256: bcf5a1f8a40e9438a19b23dfb40e872561c310296c5ac804f937a0e3c1376def
+      uri: huggingface://mradermacher/Qwen3-55B-A3B-TOTAL-RECALL-V1.3-i1-GGUF/Qwen3-55B-A3B-TOTAL-RECALL-V1.3.i1-Q4_K_M.gguf
+
+- !!merge <<: *qwen3
+  name: "qwen3-55b-a3b-total-recall-deep-40x"
+  icon: https://huggingface.co/DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-V1.3/resolve/main/qwen3-total-recall.gif
+  urls:
+    - https://huggingface.co/DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-Deep-40X-GGUF
+  description: |
+    WARNING: MADNESS - UN HINGED and... NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
+    Qwen3-55B-A3B-TOTAL-RECALL-Deep-40X-GGUF
+
+    A highly experimental model ("tamer" versions below) based on Qwen3-30B-A3B (MOE, 128 experts, 8 activated), with Brainstorm 40X (by DavidAU - details at bottom of this page).
+
+    These modifications blow the model (V1) out to 87 layers, 1046 tensors and 55B parameters.
+
+    Note that some versions are smaller than this, with fewer layers/tensors and smaller parameter counts.
+
+    The adapter extensively alters performance, reasoning and output generation.
+
+    Exceptional changes in creative, prose and general performance.
+
+    Regens of the same prompt - even with the same settings - will be very different.
+
+    THREE example generations below - creative (generated with Q3_K_M, V1 model).
+
+    ONE example generation (#4) - non creative (generated with Q3_K_M, V1 model).
+
+    You can run this model on CPU and/or GPU due to unique model construction, size of experts and total activated experts at 3B parameters (8 experts), which translates into roughly almost 6B parameters in this version.
+
+    Two quants uploaded for testing: Q3_K_M, Q4_K_M
+
+    V3, V4 and V5 are also available in these two quants.
+
+    V2 and V6 in Q3_k_m only; as are: V 1.3, 1.4, 1.5, 1.7 and V7 (newest)
+
+    NOTE: V2 and up are from source model 2, V1 and 1.3,1.4,1.5,1.7 are from source model 1.
+  overrides:
+    parameters:
+      model: Qwen3-55B-A3B-TOTAL-RECALL-V5-Deep-40X-q4_K_M.gguf
+  files:
+    - filename: Qwen3-55B-A3B-TOTAL-RECALL-V5-Deep-40X-q4_K_M.gguf
+      sha256: 20ef786a8c8e74eb257aa3069e237cbd40f42d25f5502fed6fa016bb8afbdae4
+      uri: huggingface://DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-Deep-40X-GGUF/Qwen3-55B-A3B-TOTAL-RECALL-V5-Deep-40X-q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "qwen3-42b-a3b-stranger-thoughts-deep20x-abliterated-uncensored-i1"
+  icon: https://huggingface.co/DavidAU/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored/resolve/main/qwen-42b-ablit.jpg
+  urls:
+    - https://huggingface.co/DavidAU/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored
+    - https://huggingface.co/mradermacher/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored-i1-GGUF
+  description: |
+    WARNING: NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
+    Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored
+
+    This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
+
+    ABOUT:
+
+    Qwen's excellent "Qwen3-30B-A3B", abliterated by "huihui-ai" then combined Brainstorm 20x (tech notes at bottom of the page) in a MOE (128 experts) at 42B parameters (up from 30B).
+
+    This pushes Qwen's abliterated/uncensored model to the absolute limit for creative use cases.
+
+    Prose (all), reasoning, thinking ... all will be very different from reg "Qwen 3s".
+
+    This model will generate horror, fiction, erotica, - you name it - in vivid, stark detail.
+
+    It will NOT hold back.
+
+    Likewise, regen(s) of the same prompt - even at the same settings - will create very different version(s) too.
+
+    See FOUR examples below.
+
+    Model retains full reasoning, and output generation of a Qwen3 MOE ; but has not been tested for "non-creative" use cases.
+
+    Model is set with Qwen's default config:
+
+        40 k context
+        8 of 128 experts activated.
+        Chatml OR Jinja Template (embedded)
+
+    IMPORTANT:
+
+    See usage guide / repo below to get the most out of this model, as settings are very specific.
+
+    USAGE GUIDE:
+
+    Please refer to this model card for
+
+        Specific usage, suggested settings, changing ACTIVE EXPERTS, templates, settings and the like:
+        How to maximize this model in "uncensored" form, with specific notes on "abliterated" models.
+        Rep pen / temp settings specific to getting the model to perform strongly.
+
+    https://huggingface.co/DavidAU/Qwen3-18B-A3B-Stranger-Thoughts-Abliterated-Uncensored-GGUF
+
+    GGUF / QUANTS / SPECIAL SHOUTOUT:
+
+    Special thanks to team Mradermacher for making the quants!
+
+    https://huggingface.co/mradermacher/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored-GGUF
+
+    KNOWN ISSUES:
+
+        Model may "mis-capitalize" word(s) - lowercase, where uppercase should be - from time to time.
+        Model may add extra space from time to time before a word.
+        Incorrect template and/or settings will result in a drop in performance / poor performance.
+  overrides:
+    parameters:
+      model: Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored.i1-Q4_K_M.gguf
+      sha256: ef4a601adfc2897b214cda2d16f76dcb8215a1b994bc76c696158d68ec535dd8
+      uri: huggingface://mradermacher/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored-i1-GGUF/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored.i1-Q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "qwen3-22b-a3b-the-harley-quinn"
+  icon: https://huggingface.co/DavidAU/Qwen3-22B-A3B-The-Harley-Quinn/resolve/main/qwen3-harley-quinn-23b.webp
+  urls:
+    - https://huggingface.co/DavidAU/Qwen3-22B-A3B-The-Harley-Quinn
+    - https://huggingface.co/mradermacher/Qwen3-22B-A3B-The-Harley-Quinn-GGUF
+  description: |
+    WARNING: MADNESS - UN HINGED and... NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
+    Qwen3-22B-A3B-The-Harley-Quinn
+
+    This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
+
+    ABOUT:
+
+    A stranger, yet radically different version of Kalmaze's "Qwen/Qwen3-16B-A3B" with the experts pruned to 64 (from 128, the Qwen 3 30B-A3B version) and then I added 19 layers expanding (Brainstorm 20x by DavidAU info at bottom of this page) the model to 22B total parameters.
+
+    The goal: slightly alter the model, to address some odd creative thinking and output choices.
+
+    Then... Harley Quinn showed up, and then it was a party!
+
+    A wild, out of control (sometimes) but never boring party.
+
+    Please note that the modifications affect the entire model operation; roughly I adjusted the model to think a little "deeper" and "ponder" a bit - but this is a very rough description.
+
+    That being said, reasoning and output generation will be altered regardless of your use case(s).
+
+    These modifications pushes Qwen's model to the absolute limit for creative use cases.
+
+    Detail, vividiness, and creativity all get a boost.
+
+    Prose (all) will also be very different from "default" Qwen3.
+
+    Likewise, regen(s) of the same prompt - even at the same settings - will create very different version(s) too.
+
+    The Brainstrom 20x has also lightly de-censored the model under some conditions.
+
+    However, this model can be prone to bouts of madness.
+
+    It will not always behave, and it will sometimes go -wildly- off script.
+
+    See 4 examples below.
+
+    Model retains full reasoning, and output generation of a Qwen3 MOE ; but has not been tested for "non-creative" use cases.
+
+    Model is set with Qwen's default config:
+
+        40 k context
+        8 of 64 experts activated.
+        Chatml OR Jinja Template (embedded)
+
+    Four example generations below.
+
+    IMPORTANT:
+
+    See usage guide / repo below to get the most out of this model, as settings are very specific.
+
+    If not set correctly, this model will not work the way it should.
+
+    Critical settings:
+
+        Chatml or Jinja Template (embedded, but updated version at repo below)
+        Rep pen of 1.01 or 1.02 ; higher (1.04, 1.05) will result in "Harley Mode".
+        Temp range of .6 to 1.2. ; higher you may need to prompt the model to "output" after thinking.
+        Experts set at 8-10 ; higher will result in "odder" output BUT it might be better.
+
+    That being said, "Harley Quinn" may make her presence known at any moment.
+
+    USAGE GUIDE:
+
+    Please refer to this model card for
+
+        Specific usage, suggested settings, changing ACTIVE EXPERTS, templates, settings and the like:
+        How to maximize this model in "uncensored" form, with specific notes on "abliterated" models.
+        Rep pen / temp settings specific to getting the model to perform strongly.
+
+    https://huggingface.co/DavidAU/Qwen3-18B-A3B-Stranger-Thoughts-Abliterated-Uncensored-GGUF
+
+    GGUF / QUANTS / SPECIAL SHOUTOUT:
+
+    Special thanks to team Mradermacher for making the quants!
+
+    https://huggingface.co/mradermacher/Qwen3-22B-A3B-The-Harley-Quinn-GGUF
+
+    KNOWN ISSUES:
+
+        Model may "mis-capitalize" word(s) - lowercase, where uppercase should be - from time to time.
+        Model may add extra space from time to time before a word.
+        Incorrect template and/or settings will result in a drop in performance / poor performance.
+        Can rant at the end / repeat. Most of the time it will stop on its own.
+
+    Looking for the Abliterated / Uncensored version?
+
+    https://huggingface.co/DavidAU/Qwen3-23B-A3B-The-Harley-Quinn-PUDDIN-Abliterated-Uncensored
+
+    In some cases this "abliterated/uncensored" version may work better than this version.
+    EXAMPLES
+
+    Standard system prompt, rep pen 1.01-1.02, topk 100, topp .95, minp .05, rep pen range 64.
+
+    Tested in LMStudio, quant Q4KS, GPU (CPU output will differ slightly).
+
+    As this is the mid range quant, expected better results from higher quants and/or with more experts activated to be better.
+
+    NOTE: Some formatting lost on copy/paste.
+
+    WARNING: NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
+  overrides:
+    parameters:
+      model: Qwen3-22B-A3B-The-Harley-Quinn.Q4_K_M.gguf
+  files:
+    - filename: Qwen3-22B-A3B-The-Harley-Quinn.Q4_K_M.gguf
+      sha256: a3666754efde5d6c054de53cff0f38f1bb4a20117e2502eed7018ae57017b0a2
+      uri: huggingface://mradermacher/Qwen3-22B-A3B-The-Harley-Quinn-GGUF/Qwen3-22B-A3B-The-Harley-Quinn.Q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "qwen3-33b-a3b-stranger-thoughts-abliterated-uncensored"
+  icon: https://huggingface.co/DavidAU/Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored/resolve/main/qwen3-33b-ablit.jpg
+  urls:
+    - https://huggingface.co/DavidAU/Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored
+    - https://huggingface.co/mradermacher/Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored-GGUF
+  description: |
+    WARNING: NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
+    Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored
+
+    This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
+
+    ABOUT:
+
+    A stranger, yet radically different version of "Qwen/Qwen3-30B-A3B", abliterated by "huihui-ai" , with 4 added layers expanding the model to 33B total parameters.
+
+    The goal: slightly alter the model, to address some odd creative thinking and output choices AND de-censor it.
+
+    Please note that the modifications affect the entire model operation; roughly I adjusted the model to think a little "deeper" and "ponder" a bit - but this is a very rough description.
+
+    I also ran reasoning tests (non-creative) to ensure model was not damaged and roughly matched original model performance.
+
+    That being said, reasoning and output generation will be altered regardless of your use case(s)
+  overrides:
+    parameters:
+      model: Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored.Q4_K_M.gguf
+  files:
+    - filename: Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored.Q4_K_M.gguf
+      sha256: fc0f028ab04d4643032e5bf65c3b51ba947e97b4f562c4fc25c06b6a20b14616
+      uri: huggingface://mradermacher/Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored-GGUF/Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored.Q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "pinkpixel_crystal-think-v2"
+  icon: https://huggingface.co/PinkPixel/Crystal-Think-V2/resolve/main/crystal-think-v2-logo.png
+  urls:
+    - https://huggingface.co/PinkPixel/Crystal-Think-V2
+    - https://huggingface.co/bartowski/PinkPixel_Crystal-Think-V2-GGUF
+  description: |
+    Crystal-Think is a specialized mathematical reasoning model based on Qwen3-4B, fine-tuned using Group Relative Policy Optimization (GRPO) on NVIDIA's OpenMathReasoning dataset. Version 2 introduces the new <think></think> reasoning format for enhanced step-by-step mathematical problem solving, algebraic reasoning, and mathematical code generation.
+  overrides:
+    parameters:
+      model: PinkPixel_Crystal-Think-V2-Q4_K_M.gguf
+  files:
+    - filename: PinkPixel_Crystal-Think-V2-Q4_K_M.gguf
+      sha256: 10f2558089c90bc9ef8036ac0b1142ad8991902ec83840a00710fd654df19aaa
+      uri: huggingface://bartowski/PinkPixel_Crystal-Think-V2-GGUF/PinkPixel_Crystal-Think-V2-Q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "helpingai_dhanishtha-2.0-preview"
+  urls:
+    - https://huggingface.co/HelpingAI/Dhanishtha-2.0-preview
+    - https://huggingface.co/bartowski/HelpingAI_Dhanishtha-2.0-preview-GGUF
+  description: |
+    What makes Dhanishtha-2.0 special? Imagine an AI that doesn't just answer your questions instantly, but actually thinks through problems step-by-step, shows its work, and can even change its mind when it realizes a better approach. That's Dhanishtha-2.0.
+    Quick Summary:
+        🚀 For Everyone: An AI that shows its thinking process and can reconsider its reasoning
+        👩‍💻 For Developers: First model with intermediate thinking capabilities, 39+ language support
+    Dhanishtha-2.0 is a state-of-the-art (SOTA) model developed by HelpingAI, representing the world's first model to feature Intermediate Thinking capabilities. Unlike traditional models that provide single-pass responses, Dhanishtha-2.0 employs a revolutionary multi-phase thinking process that allows the model to think, reconsider, and refine its reasoning multiple times throughout a single response.
+  overrides:
+    parameters:
+      model: HelpingAI_Dhanishtha-2.0-preview-Q4_K_M.gguf
+  files:
+    - filename: HelpingAI_Dhanishtha-2.0-preview-Q4_K_M.gguf
+      sha256: 026a1f80187c9ecdd0227816a35661f3b6b7abe85971121b4c1c25b6cdd7ab86
+      uri: huggingface://bartowski/HelpingAI_Dhanishtha-2.0-preview-GGUF/HelpingAI_Dhanishtha-2.0-preview-Q4_K_M.gguf
+- !!merge <<: *qwen3
+  name: "agentica-org_deepswe-preview"
+  icon: https://hebbkx1anhila5yf.public.blob.vercel-storage.com/IMG_3783-N75vmFhDaJtJkLR4d8pdBymos68DPo.png
+  urls:
+    - https://huggingface.co/agentica-org/DeepSWE-Preview
+    - https://huggingface.co/bartowski/agentica-org_DeepSWE-Preview-GGUF
+  description: |
+    DeepSWE-Preview is a fully open-sourced, state-of-the-art coding agent trained with only reinforcement learning (RL) to excel at software engineering (SWE) tasks. DeepSWE-Preview demonstrates strong reasoning capabilities in navigating complex codebases and viewing/editing multiple files, and it serves as a foundational model for future coding agents. The model achieves an impressive 59.0% on SWE-Bench-Verified, which is currently #1 in the open-weights category.
+
+    DeepSWE-Preview is trained on top of Qwen3-32B with thinking mode enabled. With just 200 steps of RL training, SWE-Bench-Verified score increases by ~20%.
+  overrides:
+    parameters:
+      model: agentica-org_DeepSWE-Preview-Q4_K_M.gguf
+  files:
+    - filename: agentica-org_DeepSWE-Preview-Q4_K_M.gguf
+      sha256: 196a7128d3b7a59f1647792bb72c17db306f773e78d5a47feeeea92e672d761b
+      uri: huggingface://bartowski/agentica-org_DeepSWE-Preview-GGUF/agentica-org_DeepSWE-Preview-Q4_K_M.gguf
 - &gemma3
  url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
  name: "gemma-3-27b-it"
@@ -1867,6 +2307,65 @@
    - filename: medgemma-27b-text-it-Q4_K_M.gguf
      sha256: 383b1c414d3f2f1a9c577a61e623d29a4ed4f7834f60b9e5412f5ff4e8aaf080
      uri: huggingface://unsloth/medgemma-27b-text-it-GGUF/medgemma-27b-text-it-Q4_K_M.gguf
+- !!merge <<: *gemma3
+  name: "gemma-3n-e2b-it"
+  urls:
+    - https://huggingface.co/google/gemma-3n-E4B-it
+    - https://huggingface.co/ggml-org/gemma-3n-E2B-it-GGUF
+  description: |
+    Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. Gemma 3n models are designed for efficient execution on low-resource devices. They are capable of multimodal input, handling text, image, video, and audio input, and generating text outputs, with open weights for pre-trained and instruction-tuned variants. These models were trained with data in over 140 spoken languages.
+    Gemma 3n models use selective parameter activation technology to reduce resource requirements. This technique allows the models to operate at an effective size of 2B and 4B parameters, which is lower than the total number of parameters they contain. For more information on Gemma 3n's efficient parameter management technology, see the Gemma 3n page.
+  overrides:
+    parameters:
+      model: gemma-3n-E2B-it-Q8_0.gguf
+  files:
+    - filename: gemma-3n-E2B-it-Q8_0.gguf
+      sha256: 038a47c482e7af3009c462b56a7592e1ade3c7862540717aa1d9dee1760c337b
+      uri: huggingface://ggml-org/gemma-3n-E2B-it-GGUF/gemma-3n-E2B-it-Q8_0.gguf
+- !!merge <<: *gemma3
+  name: "gemma-3n-e4b-it"
+  urls:
+    - https://huggingface.co/google/gemma-3n-E4B-it
+    - https://huggingface.co/ggml-org/gemma-3n-E4B-it-GGUF
+  description: |
+    Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. Gemma 3n models are designed for efficient execution on low-resource devices. They are capable of multimodal input, handling text, image, video, and audio input, and generating text outputs, with open weights for pre-trained and instruction-tuned variants. These models were trained with data in over 140 spoken languages.
+    Gemma 3n models use selective parameter activation technology to reduce resource requirements. This technique allows the models to operate at an effective size of 2B and 4B parameters, which is lower than the total number of parameters they contain. For more information on Gemma 3n's efficient parameter management technology, see the Gemma 3n page.
+  overrides:
+    parameters:
+      model: gemma-3n-E4B-it-Q8_0.gguf
+  files:
+    - filename: gemma-3n-E4B-it-Q8_0.gguf
+      sha256: 9f74079242c765116bd1f33123aa07160b5e93578c2d0032594b7ed97576f9c3
+      uri: huggingface://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf
+- !!merge <<: *gemma3
+  name: "gemma-3-4b-it-max-horror-uncensored-dbl-x-imatrix"
+  icon: https://huggingface.co/DavidAU/Gemma-3-4b-it-MAX-HORROR-Uncensored-DBL-X-Imatrix-GGUF/resolve/main/gemma4-horror-max2.jpg
+  urls:
+    - https://huggingface.co/DavidAU/Gemma-3-4b-it-MAX-HORROR-Uncensored-DBL-X-Imatrix-GGUF
+  description: |
+    Google's newest Gemma-3 model that has been uncensored by David_AU (maintains instruction following / model performance and adds 4 layers to the model) and re-enforced with a system prompt (optional) - see below.
+    The "Horror Imatrix" was built using Grand Horror 16B (at my repo). This adds a "tint" of horror to the model.
+
+    5 examples provided (NSFW / F-Bombs galore) below with prompts at IQ4XS (56 t/s on mid level card).
+
+    Context: 128k.
+
+    "MAXED"
+
+    This means the embed and output tensor are set at "BF16" (full precision) for all quants. This enhances quality, depth and general performance at the cost of a slightly larger quant.
+
+    "HORROR IMATRIX"
+
+    A strong, in house built, imatrix dataset built by David_AU which results in better overall function, instruction following, output quality and stronger connections to ideas, concepts and the world in general.
+
+    This combines with "MAXing" the quant to improve preformance.
+  overrides:
+    parameters:
+      model: Gemma-3-4b-it-MAX-HORROR-Uncensored-D_AU-Q4_K_M-imat.gguf
+  files:
+    - filename: Gemma-3-4b-it-MAX-HORROR-Uncensored-D_AU-Q4_K_M-imat.gguf
+      sha256: 1c577e4c84311c39b3d54b0cef12857ad46e88755f858143accbfcca7cc9fc6b
+      uri: huggingface://DavidAU/Gemma-3-4b-it-MAX-HORROR-Uncensored-DBL-X-Imatrix-GGUF/Gemma-3-4b-it-MAX-HORROR-Uncensored-D_AU-Q4_K_M-imat.gguf
 - &llama4
  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
  icon: https://avatars.githubusercontent.com/u/153379578
@@ -3145,6 +3644,38 @@
    - filename: sophosympatheia_StrawberryLemonade-L3-70B-v1.0-Q4_K_M.gguf
      sha256: 354472a2946598e0df376f9ecb91f83d7bc9c1b32db46bf48d3ea76f892f2a97
      uri: huggingface://bartowski/sophosympatheia_StrawberryLemonade-L3-70B-v1.0-GGUF/sophosympatheia_StrawberryLemonade-L3-70B-v1.0-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "steelskull_l3.3-shakudo-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/Y3_fED_Re3U1rd0jOPnAR.jpeg
+  urls:
+    - https://huggingface.co/Steelskull/L3.3-Shakudo-70b
+    - https://huggingface.co/bartowski/Steelskull_L3.3-Shakudo-70b-GGUF
+  description: |
+    L3.3-Shakudo-70b is the result of a multi-stage merging process by Steelskull, designed to create a powerful and creative roleplaying model with a unique flavor. The creation process involved several advanced merging techniques, including weight twisting, to achieve its distinct characteristics.
+    Stage 1: The Cognitive Foundation & Weight Twisting
+
+    The process began by creating a cognitive and tool-use focused base model, L3.3-Cogmoblated-70B. This was achieved through a `model_stock` merge of several models known for their reasoning and instruction-following capabilities. This base was built upon `nbeerbower/Llama-3.1-Nemotron-lorablated-70B`, a model intentionally "ablated" to skew refusal behaviors. This technique, known as weight twisting, helps the final model adopt more desirable response patterns by building upon a foundation that is already aligned against common refusal patterns.
+    Stage 2: The Twin Hydrargyrum - Flavor and Depth
+
+    Two distinct models were then created from the Cogmoblated base:
+
+        L3.3-M1-Hydrargyrum-70B: This model was merged using `SCE`, a technique that enhances creative writing and prose style, giving the model its unique "flavor." The Top_K for this merge were set at 0.22 .
+        L3.3-M2-Hydrargyrum-70B: This model was created using a `Della_Linear` merge, which focuses on integrating the "depth" of various roleplaying and narrative models. The settings for this merge were set at: (lambda: 1.1) (weight: 0.2) (density: 0.7) (epsilon: 0.2)
+
+    Final Stage: Shakudo
+
+    The final model, L3.3-Shakudo-70b, was created by merging the two Hydrargyrum variants using a 50/50 `nuslerp`. This final step combines the rich, creative prose (flavor) from the SCE merge with the strong roleplaying capabilities (depth) from the Della_Linear merge, resulting in a model with a distinct and refined narrative voice.
+
+    A special thank you to Nectar.ai for their generous support of the open-source community and my projects.
+
+    Additionally, a heartfelt thanks to all the Ko-fi supporters who have contributed—your generosity is deeply appreciated and helps keep this work going and the Pods spinning.
+  overrides:
+    parameters:
+      model: Steelskull_L3.3-Shakudo-70b-Q4_K_M.gguf
+  files:
+    - filename: Steelskull_L3.3-Shakudo-70b-Q4_K_M.gguf
+      sha256: 54590c02226f12c6f48a4af6bfed0e3c90130addd1fb8a2b4fcc1f0ab1674ef7
+      uri: huggingface://bartowski/Steelskull_L3.3-Shakudo-70b-GGUF/Steelskull_L3.3-Shakudo-70b-Q4_K_M.gguf
 - &rwkv
  url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
  name: "rwkv-6-world-7b"
@@ -3355,6 +3886,27 @@
    - filename: Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
      sha256: 86ac8efb86daf241792ac3d5d35b7da92c54901b4208a6f2829bd03d8f273c9c
      uri: huggingface://mraWdermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF/Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "skywork_skywork-swe-32b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6665dd2b3a64c70529f7542c/8o-IE7N3GwSFCIH3ntc8E.png
+  urls:
+    - https://huggingface.co/Skywork/Skywork-SWE-32B
+    - https://huggingface.co/bartowski/Skywork_Skywork-SWE-32B-GGUF
+  description: |
+    Skywork-SWE-32B is a code agent model developed by Skywork AI, specifically designed for software engineering (SWE) tasks. It demonstrates strong performance across several key metrics:
+
+    Skywork-SWE-32B attains 38.0% pass@1 accuracy on the SWE-bench Verified benchmark, outperforming previous open-source SoTA Qwen2.5-Coder-32B-based LLMs built on the OpenHands agent framework.
+    When incorporated with test-time scaling techniques, the performance further improves to 47.0% accuracy, surpassing the previous SoTA results for sub-32B parameter models.
+    We clearly demonstrate the data scaling law phenomenon for software engineering capabilities in LLMs, with no signs of saturation at 8209 collected training trajectories.
+
+    We also introduce an efficient and automated pipeline for SWE data collection, culminating in the creation of the Skywork-SWE dataset---a large-scale, high-quality dataset featuring comprehensive executable runtime environments. Detailed descriptions are available on our technical report.
+  overrides:
+    parameters:
+      model: Skywork_Skywork-SWE-32B-Q4_K_M.gguf
+  files:
+    - filename: Skywork_Skywork-SWE-32B-Q4_K_M.gguf
+      sha256: b5a451fa677159d7ab0aee64eeec5933aa4e5bd598e400501ecec3af0a767fa8
+      uri: huggingface://bartowski/Skywork_Skywork-SWE-32B-GGUF/Skywork_Skywork-SWE-32B-Q4_K_M.gguf
 - &opencoder
  name: "opencoder-8b-base"
  icon: https://avatars.githubusercontent.com/u/186387526
@@ -10390,6 +10942,80 @@
    - filename: mmproj-ultravox-v0_5-llama-3_1-8b-f16.gguf
      sha256: e6395ed42124303eaa9fca934452aabce14c59d2a56fab2dda65b798442289ff
      uri: https://huggingface.co/ggml-org/ultravox-v0_5-llama-3_1-8b-GGUF/resolve/main/mmproj-ultravox-v0_5-llama-3_1-8b-f16.gguf
+- !!merge <<: *llama31
+  name: "astrosage-70b"
+  urls:
+    - https://huggingface.co/AstroMLab/AstroSage-70B
+    - https://huggingface.co/mradermacher/AstroSage-70B-GGUF
+  description: |
+    Developed by: AstroMLab (Tijmen de Haan, Yuan-Sen Ting, Tirthankar Ghosal, Tuan Dung Nguyen, Alberto Accomazzi, Emily Herron, Vanessa Lama, Azton Wells, Nesar Ramachandra, Rui Pan)
+    Funded by:
+        Oak Ridge Leadership Computing Facility (OLCF), a DOE Office of Science User Facility at Oak Ridge National Laboratory (U.S. Department of Energy).
+        Microsoft’s Accelerating Foundation Models Research (AFMR) program.
+        World Premier International Research Center Initiative (WPI), MEXT, Japan.
+        National Science Foundation (NSF).
+        UChicago Argonne LLC, Operator of Argonne National Laboratory (U.S. Department of Energy).
+    Reference Paper: Tijmen de Haan et al. (2025). "AstroMLab 4: Benchmark-Topping Performance in Astronomy Q&A with a 70B-Parameter Domain-Specialized Reasoning Model" https://arxiv.org/abs/2505.17592
+    Model Type: Autoregressive transformer-based LLM, specialized in astronomy, astrophysics, space science, astroparticle physics, cosmology, and astronomical instrumentation.
+    Model Architecture: AstroSage-70B is a fine-tuned derivative of the Meta-Llama-3.1-70B architecture, making no architectural changes. The Llama-3.1-70B-Instruct tokenizer is also used without modification.
+    Context Length: Fine-tuned on 8192-token sequences. Base model was trained to 128k context length.
+    AstroSage-70B is a large-scale, domain-specialized language model tailored for research and education in astronomy, astrophysics, space science, cosmology, and astronomical instrumentation. It builds on the Llama-3.1-70B foundation model, enhanced through extensive continued pre-training (CPT) on a vast corpus of astronomical literature, further refined with supervised fine-tuning (SFT) on instruction-following datasets, and finally enhanced via parameter averaging (model merging) with other popular fine tunes. AstroSage-70B aims to achieve state-of-the-art performance on astronomy-specific tasks, providing researchers, students, and enthusiasts with an advanced AI assistant. This 70B parameter model represents a significant scaling up from the AstroSage-8B model. The primary enhancements from the AstroSage-8B model are:
+
+        Stronger base model, higher parameter count for increased capacity
+        Improved datasets
+        Improved learning hyperparameters
+        Reasoning capability (can be enabled or disabled at inference time)
+    Training Lineage
+        Base Model: Meta-Llama-3.1-70B.
+        Continued Pre-Training (CPT): The base model underwent 2.5 epochs of CPT (168k GPU-hours) on a specialized astronomy corpus (details below, largely inherited from AstroSage-8B) to produce AstroSage-70B-CPT. This stage imbues domain-specific knowledge and language nuances.
+        Supervised Fine-Tuning (SFT): AstroSage-70B-CPT was then fine-tuned for 0.6 epochs (13k GPU-hours) using astronomy-relevant and general-purpose instruction-following datasets, resulting in AstroSage-70B-SFT.
+        Final Mixture: The released AstroSage-70B model is created via parameter averaging / model merging:
+            DARE-TIES with rescale: true and lambda: 1.2
+            AstroSage-70B-CPT designated as the "base model"
+            70% AstroSage-70B-SFT (density 0.7)
+            15% Llama-3.1-Nemotron-70B-Instruct (density 0.5)
+            7.5% Llama-3.3-70B-Instruct (density 0.5)
+            7.5% Llama-3.1-70B-Instruct (density 0.5)
+    Intended Use: Like AstroSage-8B, this model can be used for a variety of LLM application, including
+        Providing factual information and explanations in astronomy, astrophysics, cosmology, and instrumentation.
+        Assisting with literature reviews and summarizing scientific papers.
+        Answering domain-specific questions with high accuracy.
+        Brainstorming research ideas and formulating hypotheses.
+        Assisting with programming tasks related to astronomical data analysis.
+        Serving as an educational tool for learning astronomical concepts.
+        Potentially forming the core of future agentic research assistants capable of more autonomous scientific tasks.
+  overrides:
+    parameters:
+      model: AstroSage-70B.Q4_K_M.gguf
+  files:
+    - filename: AstroSage-70B.Q4_K_M.gguf
+      sha256: 1d98dabfa001d358d9f95d2deba93a94ad8baa8839c75a0129cdb6bcf1507f38
+      uri: huggingface://mradermacher/AstroSage-70B-GGUF/AstroSage-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "thedrummer_anubis-70b-v1.1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/G-NwpVtnbdfdnPusYDzx3.png
+  urls:
+    - https://huggingface.co/TheDrummer/Anubis-70B-v1.1
+    - https://huggingface.co/bartowski/TheDrummer_Anubis-70B-v1.1-GGUF
+  description: |
+    A follow up to Anubis 70B v1.0 but with two main strengths: character adherence and unalignment.
+
+    This is not a minor update to Anubis. It is a totally different beast.
+
+        The model does a fantastic job portraying my various characters without fail, adhering to them in such a refreshing and pleasing degree with their dialogue and mannerisms, while also being able to impart a very nice and fresh style that doesn't feel like any other L3.3 models.
+
+        I do think it's a solid improvement though, like it nails characters.
+
+        It feels fresh. I am quite impressed on how it picked up on and empasized subtle details I have not seen other models do in one of my historically accurate character cards.
+
+        Anubis v1.1 is in my main model rotation now, I really like it! -Tarek
+  overrides:
+    parameters:
+      model: TheDrummer_Anubis-70B-v1.1-Q4_K_M.gguf
+  files:
+    - filename: TheDrummer_Anubis-70B-v1.1-Q4_K_M.gguf
+      sha256: a73bed551c64703737f598f1120aac28d1a62c08b5dbe2208da810936bb2522d
+      uri: huggingface://bartowski/TheDrummer_Anubis-70B-v1.1-GGUF/TheDrummer_Anubis-70B-v1.1-Q4_K_M.gguf
 - &deepseek
  url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" ## Deepseek
  name: "deepseek-coder-v2-lite-instruct"
@@ -12655,6 +13281,135 @@
    - filename: mistralai_Magistral-Small-2506-Q4_K_M.gguf
      sha256: b681b81ba30238b7654db77b4b3afa7b0f6226c84d8bbd5a5dfb1a5a3cb95816
      uri: huggingface://bartowski/mistralai_Magistral-Small-2506-GGUF/mistralai_Magistral-Small-2506-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  icon: https://cdn-avatars.huggingface.co/v1/production/uploads/634c17653d11eaedd88b314d/9OgyfKstSZtbmsmuG8MbU.png
+  name: "mistralai_mistral-small-3.2-24b-instruct-2506"
+  urls:
+    - https://huggingface.co/mistralai/Mistral-Small-3.2-24B-Instruct-2506
+    - https://huggingface.co/bartowski/mistralai_Mistral-Small-3.2-24B-Instruct-2506-GGUF
+  description: |
+    Mistral-Small-3.2-24B-Instruct-2506 is a minor update of Mistral-Small-3.1-24B-Instruct-2503.
+
+    Small-3.2 improves in the following categories:
+
+        Instruction following: Small-3.2 is better at following precise instructions
+        Repetition errors: Small-3.2 produces less infinite generations or repetitive answers
+        Function calling: Small-3.2's function calling template is more robust (see here and examples)
+
+    In all other categories Small-3.2 should match or slightly improve compared to Mistral-Small-3.1-24B-Instruct-2503.
+  overrides:
+    parameters:
+      model: mistralai_Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf
+  files:
+    - filename: mistralai_Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf
+      sha256: 2ad86e0934a4d6f021c1dbcf12d81aac75a84edd3a929294c09cb1cb6117627c
+      uri: huggingface://bartowski/mistralai_Mistral-Small-3.2-24B-Instruct-2506-GGUF/mistralai_Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  icon: https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/jxUvuFK1bdOdAPiYIcBW5.jpeg
+  name: "delta-vector_austral-24b-winton"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/Delta-Vector/Austral-24B-Winton
+    - https://huggingface.co/bartowski/Delta-Vector_Austral-24B-Winton-GGUF
+  description: |
+    More than 1.5-metres tall, about six-metres long and up to 1000-kilograms heavy, Australovenator Wintonensis was a fast and agile hunter. The largest known Australian theropod.
+
+    This is a finetune of Harbinger 24B to be a generalist Roleplay/Adventure model. I've removed some of the "slops" that i noticed in an otherwise great model aswell as improving the general writing of the model, This was a multi-stage finetune, all previous checkpoints are released aswell.
+  overrides:
+    parameters:
+      model: Delta-Vector_Austral-24B-Winton-Q4_K_M.gguf
+  files:
+    - filename: Delta-Vector_Austral-24B-Winton-Q4_K_M.gguf
+      sha256: feb76e0158d1ebba1809de89d01671b86037f768ebd5f6fb165885ae6338b1b7
+      uri: huggingface://bartowski/Delta-Vector_Austral-24B-Winton-GGUF/Delta-Vector_Austral-24B-Winton-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "mistral-small-3.2-46b-the-brilliant-raconteur-ii-instruct-2506"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/DavidAU/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506/resolve/main/mistral-2506.jpg
+  urls:
+    - https://huggingface.co/DavidAU/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506
+    - https://huggingface.co/mradermacher/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506-GGUF
+  description: |
+    WARNING: MADNESS - UN HINGED and... NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
+    Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506
+
+    This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
+
+    ABOUT:
+
+    A stronger, more creative Mistral (Mistral-Small-3.2-24B-Instruct-2506) extended to 79 layers, 46B parameters with Brainstorm 40x by DavidAU (details at very bottom of the page). This is version II, which has a jump in detail, and raw emotion relative to version 1.
+
+    This model pushes Mistral's Instruct 2506 to the limit:
+
+        Regens will be very different, even with same prompt / settings.
+        Output generation will vary vastly on each generation.
+        Reasoning will be changed, and often shorter.
+        Prose, creativity, word choice, and general "flow" are improved.
+        Several system prompts below help push this model even further.
+        Model is partly de-censored / abliterated. Most Mistrals are more uncensored that most other models too.
+        This model can also be used for coding too; even at low quants.
+        Model can be used for all use cases too.
+
+    As this is an instruct model, this model thrives on instructions - both in the system prompt and/or the prompt itself.
+
+    One example below with 3 generations using Q4_K_S.
+
+    Second example below with 2 generations using Q4_K_S.
+
+    Quick Details:
+
+        Model is 128k context, Jinja template (embedded) OR Chatml Template.
+        Reasoning can be turned on/off (see system prompts below) and is OFF by default.
+        Temp range .1 to 1 suggested, with 1-2 for enhanced creative. Above temp 2, is strong but can be very different.
+        Rep pen range: 1 (off) or very light 1.01, 1.02 to 1.05. (model is sensitive to rep pen - this affects reasoning / generation length.)
+        For creative/brainstorming use: suggest 2-5 generations due to variations caused by Brainstorm.
+
+    Observations:
+
+        Sometimes using Chatml (or Alpaca / others ) template (VS Jinja) will result in stronger creative generation.
+        Model can be operated with NO system prompt; however a system prompt will enhance generation.
+        Longer prompts, that more detailed, with more instructions will result in much stronger generations.
+        For prose directives: You may need to add directions, because the model may follow your instructions too closely. IE: "use short sentences" vs "use short sentences sparsely".
+        Reasoning (on) can lead to better creative generation, however sometimes generation with reasoning off is better.
+        Rep pen of up to 1.05 may be needed on quants Q2k/q3ks for some prompts to address "low bit" issues.
+
+    Detailed settings, system prompts, how to and examples below.
+
+    NOTES:
+
+    Image generation should also be possible with this model, just like the base model. Brainstorm was not applied to the image generation systems of the model... yet.
+
+    This is Version II and subject to change / revision.
+
+    This model is a slightly different version of:
+
+    https://huggingface.co/DavidAU/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-Instruct-2506
+  overrides:
+    parameters:
+      model: Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506.Q4_K_M.gguf
+  files:
+    - filename: Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506.Q4_K_M.gguf
+      sha256: 5c8b6f21ae4f671880fafe60001f30f4c639a680e257701e474777cfcf00f8f6
+      uri: huggingface://mradermacher/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506-GGUF/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "zerofata_ms3.2-paintedfantasy-visage-33b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/CQeog2SHdGUdmx8vHqL71.png
+  urls:
+    - https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-33B
+    - https://huggingface.co/bartowski/zerofata_MS3.2-PaintedFantasy-Visage-33B-GGUF
+  description: |
+    Another experimental release. Mistral Small 3.2 24B upscaled by 18 layers to create a 33.6B model. This model then went through pretraining, SFT & DPO.
+
+    Can't guarantee the Mistral 3.2 repetition issues are fixed, but this model seems to be less repetitive than my previous attempt.
+
+    This is an uncensored creative model intended to excel at character driven RP / ERP where characters are portrayed creatively and proactively.
+  overrides:
+    parameters:
+      model: zerofata_MS3.2-PaintedFantasy-Visage-33B-Q4_K_M.gguf
+  files:
+    - filename: zerofata_MS3.2-PaintedFantasy-Visage-33B-Q4_K_M.gguf
+      sha256: bd315ad9a4cf0f47ed24f8d387b0cad1dd127e10f2bbe1c6820ae91f700ada56
+      uri: huggingface://bartowski/zerofata_MS3.2-PaintedFantasy-Visage-33B-GGUF/zerofata_MS3.2-PaintedFantasy-Visage-33B-Q4_K_M.gguf
 - &mudler
  url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
  name: "LocalAI-llama3-8b-function-call-v0.2"
--- a/pkg/downloader/uri.go
+++ b/pkg/downloader/uri.go
@@ -256,7 +256,7 @@ func (uri URI) DownloadFile(filePath, sha string, fileN, total int, downloadStat
 			return fmt.Errorf("failed to get image %q: %v", url, err)
 		}

-		return oci.ExtractOCIImage(img, filepath.Dir(filePath), downloadStatus)
+		return oci.ExtractOCIImage(img, url, filepath.Dir(filePath), downloadStatus)
 	}

 	// Check if the file already exists
--- a/pkg/oci/image.go
+++ b/pkg/oci/image.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"os"
 	"runtime"
 	"strconv"
 	"strings"
@@ -21,6 +22,7 @@ import (
 	"github.com/google/go-containerregistry/pkg/v1/mutate"
 	"github.com/google/go-containerregistry/pkg/v1/remote"
 	"github.com/google/go-containerregistry/pkg/v1/remote/transport"
+	"github.com/google/go-containerregistry/pkg/v1/tarball"
 )

 // ref: https://github.com/mudler/luet/blob/master/pkg/helpers/docker/docker.go#L117
@@ -95,31 +97,28 @@ func (pw *progressWriter) Write(p []byte) (int, error) {
 }

 // ExtractOCIImage will extract a given targetImage into a given targetDestination
-func ExtractOCIImage(img v1.Image, targetDestination string, downloadStatus func(string, string, string, float64)) error {
-	var reader io.Reader
-	reader = mutate.Extract(img)
+func ExtractOCIImage(img v1.Image, imageRef string, targetDestination string, downloadStatus func(string, string, string, float64)) error {
+	// Create a temporary tar file
+	tmpTarFile, err := os.CreateTemp("", "localai-oci-*.tar")
+	if err != nil {
+		return fmt.Errorf("failed to create temporary tar file: %v", err)
+	}
+	defer os.Remove(tmpTarFile.Name())
+	defer tmpTarFile.Close()

-	if downloadStatus != nil {
-		var totalSize int64
-		layers, err := img.Layers()
-		if err != nil {
-			return err
-		}
-		for _, layer := range layers {
-			size, err := layer.Size()
-			if err != nil {
-				return err
-			}
-			totalSize += size
-		}
-		reader = io.TeeReader(reader, &progressWriter{total: totalSize, downloadStatus: downloadStatus})
+	// Download the image as tar with progress tracking
+	err = DownloadOCIImageTar(img, imageRef, tmpTarFile.Name(), downloadStatus)
+	if err != nil {
+		return fmt.Errorf("failed to download image tar: %v", err)
 	}

-	_, err := archive.Apply(context.Background(),
-		targetDestination, reader,
-		archive.WithNoSameOwner())
+	// Extract the tar file to the target destination
+	err = ExtractOCIImageFromTar(tmpTarFile.Name(), imageRef, targetDestination, downloadStatus)
+	if err != nil {
+		return fmt.Errorf("failed to extract image tar: %v", err)
+	}

-	return err
+	return nil
 }

 func ParseImageParts(image string) (tag, repository, dstimage string) {
@@ -205,3 +204,164 @@ func GetOCIImageSize(targetImage, targetPlatform string, auth *registrytypes.Aut

 	return size, nil
 }
+
+// DownloadOCIImageTar downloads the compressed layers of an image and then creates an uncompressed tar
+// This provides accurate size estimation and allows for later extraction
+func DownloadOCIImageTar(img v1.Image, imageRef string, tarFilePath string, downloadStatus func(string, string, string, float64)) error {
+	// Get layers to calculate total compressed size for estimation
+	layers, err := img.Layers()
+	if err != nil {
+		return fmt.Errorf("failed to get layers: %v", err)
+	}
+
+	// Calculate total compressed size for progress tracking
+	var totalCompressedSize int64
+	for _, layer := range layers {
+		size, err := layer.Size()
+		if err != nil {
+			return fmt.Errorf("failed to get layer size: %v", err)
+		}
+		totalCompressedSize += size
+	}
+
+	// Create a temporary directory to store the compressed layers
+	tmpDir, err := os.MkdirTemp("", "localai-oci-layers-*")
+	if err != nil {
+		return fmt.Errorf("failed to create temporary directory: %v", err)
+	}
+	defer os.RemoveAll(tmpDir)
+
+	// Download all compressed layers with progress tracking
+	var downloadedLayers []v1.Layer
+	var downloadedSize int64
+
+	// Extract image name from the reference for display
+	imageName := imageRef
+	for i, layer := range layers {
+		layerSize, err := layer.Size()
+		if err != nil {
+			return fmt.Errorf("failed to get layer size: %v", err)
+		}
+
+		// Create a temporary file for this layer
+		layerFile := fmt.Sprintf("%s/layer-%d.tar.gz", tmpDir, i)
+		file, err := os.Create(layerFile)
+		if err != nil {
+			return fmt.Errorf("failed to create layer file: %v", err)
+		}
+
+		// Create progress writer for this layer
+		var writer io.Writer = file
+		if downloadStatus != nil {
+			writer = io.MultiWriter(file, &progressWriter{
+				total:          totalCompressedSize,
+				fileName:       fmt.Sprintf("Downloading %d/%d %s", i+1, len(layers), imageName),
+				downloadStatus: downloadStatus,
+			})
+		}
+
+		// Download the compressed layer
+		layerReader, err := layer.Compressed()
+		if err != nil {
+			file.Close()
+			return fmt.Errorf("failed to get compressed layer: %v", err)
+		}
+
+		_, err = io.Copy(writer, layerReader)
+		file.Close()
+		if err != nil {
+			return fmt.Errorf("failed to download layer %d: %v", i, err)
+		}
+
+		// Load the downloaded layer
+		downloadedLayer, err := tarball.LayerFromFile(layerFile)
+		if err != nil {
+			return fmt.Errorf("failed to load downloaded layer: %v", err)
+		}
+
+		downloadedLayers = append(downloadedLayers, downloadedLayer)
+		downloadedSize += layerSize
+	}
+
+	// Create a local image from the downloaded layers
+	localImg, err := mutate.AppendLayers(img, downloadedLayers...)
+	if err != nil {
+		return fmt.Errorf("failed to create local image: %v", err)
+	}
+
+	// Now extract the uncompressed tar from the local image
+	tarFile, err := os.Create(tarFilePath)
+	if err != nil {
+		return fmt.Errorf("failed to create tar file: %v", err)
+	}
+	defer tarFile.Close()
+
+	// Extract uncompressed tar from local image
+	extractReader := mutate.Extract(localImg)
+	_, err = io.Copy(tarFile, extractReader)
+	if err != nil {
+		return fmt.Errorf("failed to extract uncompressed tar: %v", err)
+	}
+
+	return nil
+}
+
+// ExtractOCIImageFromTar extracts an image from a previously downloaded tar file
+func ExtractOCIImageFromTar(tarFilePath, imageRef, targetDestination string, downloadStatus func(string, string, string, float64)) error {
+	// Open the tar file
+	tarFile, err := os.Open(tarFilePath)
+	if err != nil {
+		return fmt.Errorf("failed to open tar file: %v", err)
+	}
+	defer tarFile.Close()
+
+	// Get file size for progress tracking
+	fileInfo, err := tarFile.Stat()
+	if err != nil {
+		return fmt.Errorf("failed to get file info: %v", err)
+	}
+
+	var reader io.Reader = tarFile
+	if downloadStatus != nil {
+		reader = io.TeeReader(tarFile, &progressWriter{
+			total:          fileInfo.Size(),
+			fileName:       fmt.Sprintf("Extracting %s", imageRef),
+			downloadStatus: downloadStatus,
+		})
+	}
+
+	// Extract the tar file
+	_, err = archive.Apply(context.Background(),
+		targetDestination, reader,
+		archive.WithNoSameOwner())
+
+	return err
+}
+
+// GetOCIImageUncompressedSize returns the total uncompressed size of an image
+func GetOCIImageUncompressedSize(targetImage, targetPlatform string, auth *registrytypes.AuthConfig, t http.RoundTripper) (int64, error) {
+	var totalSize int64
+	var img v1.Image
+	var err error
+
+	img, err = GetImage(targetImage, targetPlatform, auth, t)
+	if err != nil {
+		return totalSize, err
+	}
+
+	layers, err := img.Layers()
+	if err != nil {
+		return totalSize, err
+	}
+
+	for _, layer := range layers {
+		// Use compressed size as an approximation since uncompressed size is not directly available
+		size, err := layer.Size()
+		if err != nil {
+			return totalSize, err
+		}
+		totalSize += size
+	}
+
+	return totalSize, nil
+}
--- a/pkg/oci/image_test.go
+++ b/pkg/oci/image_test.go
@@ -30,7 +30,7 @@ var _ = Describe("OCI", func() {
 			Expect(err).NotTo(HaveOccurred())
 			defer os.RemoveAll(dir)

-			err = ExtractOCIImage(img, dir, nil)
+			err = ExtractOCIImage(img, imageName, dir, nil)
 			Expect(err).NotTo(HaveOccurred())
 		})
 	})
--- a/pkg/startup/backend_preload.go
+++ b/pkg/startup/backend_preload.go
@@ -7,10 +7,15 @@ import (

 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/system"
 )

 func InstallExternalBackends(galleries []config.Gallery, backendPath string, downloadStatus func(string, string, string, float64), backends ...string) error {
 	var errs error
+	systemState, err := system.GetSystemState()
+	if err != nil {
+		return fmt.Errorf("failed to get system state: %w", err)
+	}
 	for _, backend := range backends {
 		switch {
 		case strings.HasPrefix(backend, "oci://"):
@@ -22,7 +27,7 @@ func InstallExternalBackends(galleries []config.Gallery, backendPath string, dow
 				errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
 			}
 		default:
-			err := gallery.InstallBackendFromGallery(galleries, backend, backendPath, downloadStatus)
+			err := gallery.InstallBackendFromGallery(galleries, systemState, backend, backendPath, downloadStatus, true)
 			if err != nil {
 				errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
 			}
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@@ -17,7 +17,7 @@ import (
 // InstallModels will preload models from the given list of URLs and galleries
 // It will download the model if it is not already present in the model path
 // It will also try to resolve if the model is an embedded model YAML configuration
-func InstallModels(galleries []config.Gallery, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
+func InstallModels(galleries, backendGalleries []config.Gallery, modelPath, backendBasePath string, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
 	// create an error that groups all errors
 	var err error

@@ -99,7 +99,7 @@ func InstallModels(galleries []config.Gallery, modelPath string, enforceScan boo
 				}
 			} else {
 				// Check if it's a model gallery, or print a warning
-				e, found := installModel(galleries, url, modelPath, downloadStatus, enforceScan)
+				e, found := installModel(galleries, backendGalleries, url, modelPath, backendBasePath, downloadStatus, enforceScan, autoloadBackendGalleries)
 				if e != nil && found {
 					log.Error().Err(err).Msgf("[startup] failed installing model '%s'", url)
 					err = errors.Join(err, e)
@@ -113,7 +113,7 @@ func InstallModels(galleries []config.Gallery, modelPath string, enforceScan boo
 	return err
 }

-func installModel(galleries []config.Gallery, modelName, modelPath string, downloadStatus func(string, string, string, float64), enforceScan bool) (error, bool) {
+func installModel(galleries, backendGalleries []config.Gallery, modelName, modelPath, backendBasePath string, downloadStatus func(string, string, string, float64), enforceScan, autoloadBackendGalleries bool) (error, bool) {
 	models, err := gallery.AvailableGalleryModels(galleries, modelPath)
 	if err != nil {
 		return err, false
@@ -129,7 +129,7 @@ func installModel(galleries []config.Gallery, modelName, modelPath string, downl
 	}

 	log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
-	err = gallery.InstallModelFromGallery(galleries, modelName, modelPath, gallery.GalleryModel{}, downloadStatus, enforceScan)
+	err = gallery.InstallModelFromGallery(galleries, backendGalleries, modelName, modelPath, backendBasePath, gallery.GalleryModel{}, downloadStatus, enforceScan, autoloadBackendGalleries)
 	if err != nil {
 		return err, true
 	}
--- a/pkg/startup/model_preload_test.go
+++ b/pkg/startup/model_preload_test.go
@@ -21,7 +21,7 @@ var _ = Describe("Preload test", func() {
 			url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
 			fileName := fmt.Sprintf("%s.yaml", "phi-2")

-			InstallModels([]config.Gallery{}, tmpdir, true, nil, url)
+			InstallModels([]config.Gallery{}, []config.Gallery{}, tmpdir, "", true, true, nil, url)

 			resultFile := filepath.Join(tmpdir, fileName)

@@ -36,7 +36,7 @@ var _ = Describe("Preload test", func() {
 			url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
 			fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")

-			err = InstallModels([]config.Gallery{}, tmpdir, false, nil, url)
+			err = InstallModels([]config.Gallery{}, []config.Gallery{}, tmpdir, "", false, true, nil, url)
 			Expect(err).ToNot(HaveOccurred())

 			resultFile := filepath.Join(tmpdir, fileName)