mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
47 Commits
v3.1.0
...
chore/ubun
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
057d5c25f1 | ||
|
|
cdf70db0d6 | ||
|
|
c35dd0a7b8 | ||
|
|
2f5af6b246 | ||
|
|
00cf2e0e0a | ||
|
|
c7a1d9c089 | ||
|
|
ad7ba52166 | ||
|
|
c5b9f45166 | ||
|
|
61b64a65ab | ||
|
|
8276952920 | ||
|
|
b7cd5bfaec | ||
|
|
da4312e4d3 | ||
|
|
7d507c54ed | ||
|
|
df7ed49889 | ||
|
|
bfdc29d316 | ||
|
|
7fdc006071 | ||
|
|
615830245b | ||
|
|
61376c0fa7 | ||
|
|
d0fb23514f | ||
|
|
780d034ac9 | ||
|
|
ec2a044c7e | ||
|
|
ad6fdd21fd | ||
|
|
cd94e6b352 | ||
|
|
b37cef3718 | ||
|
|
9f957d547d | ||
|
|
f0d9f0c5d8 | ||
|
|
d33e1c72a3 | ||
|
|
33f9ee06c9 | ||
|
|
c54677402d | ||
|
|
3fe3a7b23d | ||
|
|
f8ff6fa1fd | ||
|
|
dfadc3696e | ||
|
|
dbcf5fb4fc | ||
|
|
2633137a17 | ||
|
|
d9c17dd23b | ||
|
|
d8b7bd4860 | ||
|
|
a611cbc0f4 | ||
|
|
850b525159 | ||
|
|
35b3426a2a | ||
|
|
cd2b0c0e7c | ||
|
|
73d80c43a8 | ||
|
|
665562b850 | ||
|
|
7a78e4f482 | ||
|
|
6f41a6f934 | ||
|
|
bb54f2da2b | ||
|
|
e1cc7ee107 | ||
|
|
cfc9dfa3d5 |
101
.github/workflows/backend.yml
vendored
101
.github/workflows/backend.yml
vendored
@@ -7,7 +7,7 @@ on:
|
||||
- master
|
||||
tags:
|
||||
- '*'
|
||||
#pull_request:
|
||||
pull_request:
|
||||
|
||||
concurrency:
|
||||
group: ci-backends-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -26,7 +26,6 @@ jobs:
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
backend: ${{ matrix.backend }}
|
||||
latest-image: ${{ matrix.latest-image }}
|
||||
dockerfile: $${ matrix.dockerfile }}
|
||||
context: $${ matrix.context }}
|
||||
secrets:
|
||||
@@ -47,9 +46,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "rerankers"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11-rerankers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -59,9 +57,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-vllm'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "vllm"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11-vllm'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -71,9 +68,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-transformers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "transformers"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11-transformers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -83,9 +79,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "diffusers"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11-diffusers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# CUDA 11 additional backends
|
||||
@@ -96,9 +91,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "kokoro"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11-kokoro'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -108,9 +102,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "faster-whisper"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11-faster-whisper'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -120,9 +113,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-coqui'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "coqui"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11-coqui'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -132,9 +124,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-bark'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "bark"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11-bark'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -144,9 +135,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "chatterbox"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11-chatterbox'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# CUDA 12 builds
|
||||
@@ -157,9 +147,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "rerankers"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-rerankers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -169,9 +158,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-vllm'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "vllm"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-vllm'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -181,9 +169,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-transformers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "transformers"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-transformers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -193,9 +180,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
backend: "diffusers"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-diffusers'
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "diffusers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# CUDA 12 additional backends
|
||||
@@ -206,9 +192,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "kokoro"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-kokoro'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -218,9 +203,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "faster-whisper"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-faster-whisper'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -230,9 +214,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-coqui'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "coqui"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-coqui'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -242,9 +225,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-bark'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "bark"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-bark'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
@@ -254,9 +236,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "chatterbox"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-chatterbox'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# hipblas builds
|
||||
@@ -267,9 +248,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-rocm-hipblas-rerankers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.1"
|
||||
backend: "rerankers"
|
||||
latest-image: 'latest-gpu-rocm-hipblas-rerankers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'hipblas'
|
||||
@@ -279,9 +259,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-rocm-hipblas-vllm'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.1"
|
||||
backend: "vllm"
|
||||
latest-image: 'latest-gpu-rocm-hipblas-vllm'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'hipblas'
|
||||
@@ -291,9 +270,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-rocm-hipblas-transformers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.1"
|
||||
backend: "transformers"
|
||||
latest-image: 'latest-gpu-rocm-hipblas-transformers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'hipblas'
|
||||
@@ -303,9 +281,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-rocm-hipblas-diffusers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.1"
|
||||
backend: "diffusers"
|
||||
latest-image: 'latest-gpu-rocm-hipblas-diffusers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# ROCm additional backends
|
||||
@@ -316,9 +293,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-rocm-hipblas-kokoro'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.1"
|
||||
backend: "kokoro"
|
||||
latest-image: 'latest-gpu-rocm-hipblas-kokoro'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'hipblas'
|
||||
@@ -328,9 +304,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.1"
|
||||
backend: "faster-whisper"
|
||||
latest-image: 'latest-gpu-rocm-hipblas-faster-whisper'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'hipblas'
|
||||
@@ -340,9 +315,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-rocm-hipblas-coqui'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.1"
|
||||
backend: "coqui"
|
||||
latest-image: 'latest-gpu-rocm-hipblas-coqui'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'hipblas'
|
||||
@@ -352,9 +326,8 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-rocm-hipblas-bark'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.1"
|
||||
backend: "bark"
|
||||
latest-image: 'latest-gpu-rocm-hipblas-bark'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# sycl builds
|
||||
@@ -367,7 +340,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "rerankers"
|
||||
latest-image: 'latest-gpu-intel-sycl-f32-rerankers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f16'
|
||||
@@ -379,7 +351,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "rerankers"
|
||||
latest-image: 'latest-gpu-intel-sycl-f16-rerankers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f32'
|
||||
@@ -391,7 +362,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "vllm"
|
||||
latest-image: 'latest-gpu-intel-sycl-f32-vllm'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f16'
|
||||
@@ -403,7 +373,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "vllm"
|
||||
latest-image: 'latest-gpu-intel-sycl-f16-vllm'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f32'
|
||||
@@ -415,7 +384,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "transformers"
|
||||
latest-image: 'latest-gpu-intel-sycl-f32-transformers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f16'
|
||||
@@ -427,7 +395,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "transformers"
|
||||
latest-image: 'latest-gpu-intel-sycl-f16-transformers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f32'
|
||||
@@ -439,7 +406,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "diffusers"
|
||||
latest-image: 'latest-gpu-intel-sycl-f32-diffusers'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# SYCL additional backends
|
||||
@@ -452,7 +418,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "kokoro"
|
||||
latest-image: 'latest-gpu-intel-sycl-f32-kokoro'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f16'
|
||||
@@ -464,7 +429,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "kokoro"
|
||||
latest-image: 'latest-gpu-intel-sycl-f16-kokoro'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f32'
|
||||
@@ -476,7 +440,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "faster-whisper"
|
||||
latest-image: 'latest-gpu-intel-sycl-f32-faster-whisper'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f16'
|
||||
@@ -488,7 +451,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "faster-whisper"
|
||||
latest-image: 'latest-gpu-intel-sycl-f16-faster-whisper'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f32'
|
||||
@@ -500,7 +462,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "coqui"
|
||||
latest-image: 'latest-gpu-intel-sycl-f32-coqui'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f16'
|
||||
@@ -512,7 +473,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "coqui"
|
||||
latest-image: 'latest-gpu-intel-sycl-f16-coqui'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f32'
|
||||
@@ -524,7 +484,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "bark"
|
||||
latest-image: 'latest-gpu-intel-sycl-f32-bark'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f16'
|
||||
@@ -536,7 +495,6 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
backend: "bark"
|
||||
latest-image: 'latest-gpu-intel-sycl-f16-bark'
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# bark-cpp
|
||||
@@ -547,8 +505,7 @@ jobs:
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-bark-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
backend: "bark"
|
||||
latest-image: 'latest-bark-cpp'
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
20
.github/workflows/backend_build.yml
vendored
20
.github/workflows/backend_build.yml
vendored
@@ -28,10 +28,6 @@ on:
|
||||
description: 'Tag latest'
|
||||
default: ''
|
||||
type: string
|
||||
latest-image:
|
||||
description: 'Tag latest'
|
||||
default: ''
|
||||
type: string
|
||||
tag-suffix:
|
||||
description: 'Tag suffix'
|
||||
default: ''
|
||||
@@ -153,7 +149,7 @@ jobs:
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.tag-suffix }}
|
||||
suffix=${{ inputs.tag-suffix }},onlatest=true
|
||||
|
||||
- name: Docker meta for PR
|
||||
id: meta_pull_request
|
||||
@@ -168,7 +164,7 @@ jobs:
|
||||
type=sha,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.tag-suffix }}
|
||||
suffix=${{ inputs.tag-suffix }},onlatest=true
|
||||
## End testing image
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@master
|
||||
@@ -210,7 +206,6 @@ jobs:
|
||||
cache-from: type=gha
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
load: ${{ github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
@@ -233,18 +228,7 @@ jobs:
|
||||
tags: ${{ steps.meta_pull_request.outputs.tags }}
|
||||
labels: ${{ steps.meta_pull_request.outputs.labels }}
|
||||
|
||||
- name: Cleanup
|
||||
run: |
|
||||
docker builder prune -f
|
||||
docker system prune --force --volumes --all
|
||||
|
||||
- name: Latest tag
|
||||
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
||||
run: |
|
||||
docker tag localai/localai-backends:${{ steps.meta.outputs.version }} localai/localai-backends:${{ inputs.latest-image }}
|
||||
docker push localai/localai-backends:${{ inputs.latest-image }}
|
||||
docker tag quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}
|
||||
docker push quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}
|
||||
|
||||
- name: job summary
|
||||
run: |
|
||||
|
||||
2
.github/workflows/generate_grpc_cache.yaml
vendored
2
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -16,7 +16,7 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- grpc-base-image: ubuntu:22.04
|
||||
- grpc-base-image: ubuntu:24.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
|
||||
2
.github/workflows/generate_intel_image.yaml
vendored
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- base-image: intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04
|
||||
- base-image: intel/oneapi-basekit:2025.1.3-0-devel-ubuntu24.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
|
||||
10
.github/workflows/image-pr.yml
vendored
10
.github/workflows/image-pr.yml
vendored
@@ -43,22 +43,22 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas'
|
||||
ffmpeg: 'false'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.1"
|
||||
grpc-base-image: "ubuntu:24.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
grpc-base-image: "ubuntu:24.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
@@ -69,5 +69,5 @@ jobs:
|
||||
tag-suffix: '-vulkan-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
|
||||
32
.github/workflows/image.yml
vendored
32
.github/workflows/image.yml
vendored
@@ -38,11 +38,11 @@ jobs:
|
||||
include:
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-hipblas'
|
||||
ffmpeg: 'true'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.1"
|
||||
grpc-base-image: "ubuntu:24.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
aio: "-aio-gpu-hipblas"
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: ''
|
||||
ffmpeg: 'true'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
aio: "-aio-cpu"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
@@ -86,11 +86,11 @@ jobs:
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda11'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
aio: "-aio-gpu-nvidia-cuda-11"
|
||||
@@ -98,29 +98,29 @@ jobs:
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda12'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
aio: "-aio-gpu-nvidia-cuda-12"
|
||||
- build-type: 'vulkan'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-vulkan'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
aio: "-aio-gpu-vulkan"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-latest: 'auto'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
grpc-base-image: "ubuntu:24.04"
|
||||
tag-suffix: '-gpu-intel-f16'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
@@ -128,9 +128,9 @@ jobs:
|
||||
aio: "-aio-gpu-intel-f16"
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-latest: 'auto'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
grpc-base-image: "ubuntu:24.04"
|
||||
tag-suffix: '-gpu-intel-f32'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
@@ -165,7 +165,7 @@ jobs:
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64'
|
||||
ffmpeg: 'true'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
|
||||
11
Dockerfile
11
Dockerfile
@@ -1,4 +1,4 @@
|
||||
ARG BASE_IMAGE=ubuntu:22.04
|
||||
ARG BASE_IMAGE=ubuntu:24.04
|
||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||
ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
|
||||
|
||||
@@ -24,6 +24,8 @@ ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
RUN mkdir -p /run/localai
|
||||
|
||||
# Vulkan requirements
|
||||
RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||
@@ -36,7 +38,8 @@ RUN <<EOT bash
|
||||
apt-get install -y \
|
||||
vulkan-sdk && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
echo "vulkan" > /run/localai/capability
|
||||
fi
|
||||
EOT
|
||||
|
||||
@@ -63,7 +66,8 @@ RUN <<EOT bash
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
echo "nvidia" > /run/localai/capability
|
||||
fi
|
||||
EOT
|
||||
|
||||
@@ -83,6 +87,7 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||
rocblas-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
echo "amd" > /run/localai/capability && \
|
||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
||||
ldconfig \
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
ARG BASE_IMAGE=ubuntu:22.04
|
||||
ARG BASE_IMAGE=ubuntu:24.04
|
||||
|
||||
FROM ${BASE_IMAGE}
|
||||
|
||||
|
||||
24
Makefile
24
Makefile
@@ -6,11 +6,11 @@ BINARY_NAME=local-ai
|
||||
DETECT_LIBS?=true
|
||||
|
||||
# llama.cpp versions
|
||||
CPPLLAMA_VERSION?=8846aace4934ad29651ea61b8c7e3f6b0556e3d2
|
||||
CPPLLAMA_VERSION?=bee28421be25fd447f61cb6db64d556cbfce32ec
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=32cf4e2aba799aff069011f37ca025401433cf9f
|
||||
WHISPER_CPP_VERSION?=d9999d54c868b8bfcd376aa26067e787d53e679e
|
||||
|
||||
# go-piper version
|
||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||
@@ -18,7 +18,7 @@ PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||
|
||||
# bark.cpp
|
||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||
BARKCPP_VERSION?=v1.0.0
|
||||
BARKCPP_VERSION?=5d5be84f089ab9ea53b7a793f088d3fbf7247495
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
|
||||
@@ -265,8 +265,8 @@ sources/bark.cpp/build/libbark.a: sources/bark.cpp
|
||||
cmake $(CMAKE_ARGS) .. && \
|
||||
cmake --build . --config Release
|
||||
|
||||
backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
|
||||
$(MAKE) -C backend/go/bark libbark.a
|
||||
backend/go/bark-cpp/libbark.a: sources/bark.cpp/build/libbark.a
|
||||
$(MAKE) -C backend/go/bark-cpp libbark.a
|
||||
|
||||
## go-piper
|
||||
sources/go-piper:
|
||||
@@ -355,7 +355,7 @@ clean: ## Remove build related file
|
||||
rm -rf release/
|
||||
rm -rf backend-assets/*
|
||||
$(MAKE) -C backend/cpp/grpc clean
|
||||
$(MAKE) -C backend/go/bark clean
|
||||
$(MAKE) -C backend/go/bark-cpp clean
|
||||
$(MAKE) -C backend/cpp/llama clean
|
||||
$(MAKE) -C backend/go/image/stablediffusion-ggml clean
|
||||
rm -rf backend/cpp/llama-* || true
|
||||
@@ -778,9 +778,9 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
mkdir -p backend-assets/util/
|
||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
|
||||
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
||||
backend-assets/grpc/bark-cpp: backend/go/bark-cpp/libbark.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark-cpp/ LIBRARY_PATH=$(CURDIR)/backend/go/bark-cpp/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark-cpp/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/bark-cpp
|
||||
endif
|
||||
@@ -817,7 +817,7 @@ grpcs: prepare $(GRPC_BACKENDS)
|
||||
DOCKER_IMAGE?=local-ai
|
||||
DOCKER_AIO_IMAGE?=local-ai-aio
|
||||
IMAGE_TYPE?=core
|
||||
BASE_IMAGE?=ubuntu:22.04
|
||||
BASE_IMAGE?=ubuntu:24.04
|
||||
|
||||
docker:
|
||||
docker build \
|
||||
@@ -852,7 +852,7 @@ docker-aio-all:
|
||||
|
||||
docker-image-intel:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu24.04 \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.3-0-devel-ubuntu24.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="$(GO_TAGS)" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
@@ -860,7 +860,7 @@ docker-image-intel:
|
||||
|
||||
docker-image-intel-xpu:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04 \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.3-0-devel-ubuntu24.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="$(GO_TAGS)" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
|
||||
@@ -141,10 +141,10 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri
|
||||
|
||||
```bash
|
||||
# Intel GPU with FP16 support
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
|
||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f16
|
||||
|
||||
# Intel GPU with FP32 support
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
|
||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f32
|
||||
```
|
||||
|
||||
### Vulkan GPU Images:
|
||||
@@ -215,6 +215,7 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A
|
||||
|
||||
## 🚀 [Features](https://localai.io/features/)
|
||||
|
||||
- 🧩 [Backend Gallery](https://localai.io/backends/): Install/remove backends on the fly, powered by OCI images — fully customizable and API-driven.
|
||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
ARG BASE_IMAGE=ubuntu:22.04
|
||||
ARG BASE_IMAGE=ubuntu:24.04
|
||||
|
||||
FROM ${BASE_IMAGE} AS builder
|
||||
ARG BACKEND=rerankers
|
||||
@@ -123,9 +123,9 @@ EOT
|
||||
|
||||
COPY . /LocalAI
|
||||
|
||||
RUN cd /LocalAI && make backend-assets/grpc/bark-cpp
|
||||
RUN cd /LocalAI && make backend-assets/grpc/${BACKEND}
|
||||
|
||||
FROM scratch
|
||||
|
||||
COPY --from=builder /LocalAI/backend-assets/grpc/bark-cpp ./
|
||||
COPY --from=builder /LocalAI/backend/go/bark/run.sh ./
|
||||
COPY --from=builder /LocalAI/backend-assets/grpc/${BACKEND} ./
|
||||
COPY --from=builder /LocalAI/backend/go/${BACKEND}/run.sh ./
|
||||
@@ -1,4 +1,4 @@
|
||||
ARG BASE_IMAGE=ubuntu:22.04
|
||||
ARG BASE_IMAGE=ubuntu:24.04
|
||||
|
||||
FROM ${BASE_IMAGE} AS builder
|
||||
ARG BACKEND=rerankers
|
||||
|
||||
@@ -258,6 +258,8 @@ message ModelOptions {
|
||||
repeated GrammarTrigger GrammarTriggers = 65;
|
||||
|
||||
bool Reranking = 71;
|
||||
|
||||
repeated string Overrides = 72;
|
||||
}
|
||||
|
||||
message Result {
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
|
||||
#include "backend.pb.h"
|
||||
#include "backend.grpc.pb.h"
|
||||
#include "common.h"
|
||||
#include <getopt.h>
|
||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||
#include <grpcpp/grpcpp.h>
|
||||
@@ -260,6 +261,13 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
}
|
||||
}
|
||||
|
||||
// Add kv_overrides
|
||||
if (request->overrides_size() > 0) {
|
||||
for (int i = 0; i < request->overrides_size(); i++) {
|
||||
string_parse_kv_override(request->overrides(i).c_str(), params.kv_overrides);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Add yarn
|
||||
|
||||
if (!request->tensorsplit().empty()) {
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
---
|
||||
## vLLM
|
||||
- &vllm
|
||||
name: "cuda11-vllm"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-vllm"
|
||||
name: "vllm"
|
||||
license: apache-2.0
|
||||
urls:
|
||||
- https://github.com/vllm-project/vllm
|
||||
@@ -29,6 +28,19 @@
|
||||
Speculative decoding
|
||||
Chunked prefill
|
||||
alias: "vllm"
|
||||
capabilities:
|
||||
nvidia: "cuda12-vllm"
|
||||
amd: "rocm-vllm"
|
||||
intel: "intel-sycl-f16-vllm"
|
||||
- !!merge <<: *vllm
|
||||
name: "vllm-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-vllm-development"
|
||||
amd: "rocm-vllm-development"
|
||||
intel: "intel-sycl-f16-vllm-development"
|
||||
- !!merge <<: *vllm
|
||||
name: "cuda11-vllm"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-vllm"
|
||||
- !!merge <<: *vllm
|
||||
name: "cuda12-vllm"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm"
|
||||
@@ -57,43 +69,52 @@
|
||||
name: "intel-sycl-f16-vllm-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
|
||||
## Rerankers
|
||||
- name: "cuda11-rerankers"
|
||||
- &rerankers
|
||||
name: "rerankers"
|
||||
alias: "rerankers"
|
||||
capabilities:
|
||||
nvidia: "cuda12-rerankers"
|
||||
intel: "intel-sycl-f16-rerankers"
|
||||
amd: "rocm-rerankers"
|
||||
- !!merge <<: *rerankers
|
||||
name: "rerankers-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-rerankers-development"
|
||||
intel: "intel-sycl-f16-rerankers-development"
|
||||
amd: "rocm-rerankers-development"
|
||||
- !!merge <<: *rerankers
|
||||
name: "cuda11-rerankers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers"
|
||||
alias: "cuda11-rerankers"
|
||||
- name: "cuda12-rerankers"
|
||||
- !!merge <<: *rerankers
|
||||
name: "cuda12-rerankers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rerankers"
|
||||
alias: "cuda12-rerankers"
|
||||
- name: "intel-sycl-f32-rerankers"
|
||||
- !!merge <<: *rerankers
|
||||
name: "intel-sycl-f32-rerankers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-rerankers"
|
||||
alias: "intel-sycl-f32-rerankers"
|
||||
- name: "intel-sycl-f16-rerankers"
|
||||
- !!merge <<: *rerankers
|
||||
name: "intel-sycl-f16-rerankers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-rerankers"
|
||||
alias: "intel-sycl-f16-rerankers"
|
||||
- name: "rocm-rerankers"
|
||||
- !!merge <<: *rerankers
|
||||
name: "rocm-rerankers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers"
|
||||
alias: "rocm-rerankers"
|
||||
- name: "cuda11-rerankers-development"
|
||||
- !!merge <<: *rerankers
|
||||
name: "cuda11-rerankers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-rerankers"
|
||||
alias: "rerankers"
|
||||
|
||||
- name: "cuda12-rerankers-development"
|
||||
- !!merge <<: *rerankers
|
||||
name: "cuda12-rerankers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rerankers"
|
||||
alias: "rerankers"
|
||||
- name: "rocm-rerankers-development"
|
||||
- !!merge <<: *rerankers
|
||||
name: "rocm-rerankers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-rerankers"
|
||||
alias: "rerankers"
|
||||
|
||||
- name: "intel-sycl-f32-rerankers-development"
|
||||
- !!merge <<: *rerankers
|
||||
name: "intel-sycl-f32-rerankers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-rerankers"
|
||||
alias: "rerankers"
|
||||
|
||||
- name: "intel-sycl-f16-rerankers-development"
|
||||
- !!merge <<: *rerankers
|
||||
name: "intel-sycl-f16-rerankers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-rerankers"
|
||||
alias: "rerankers"
|
||||
## Transformers
|
||||
- &transformers
|
||||
name: "cuda12-transformers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers"
|
||||
name: "transformers"
|
||||
icon: https://camo.githubusercontent.com/26569a27b8a30a488dd345024b71dbc05da7ff1b2ba97bb6080c9f1ee0f26cc7/68747470733a2f2f68756767696e67666163652e636f2f64617461736574732f68756767696e67666163652f646f63756d656e746174696f6e2d696d616765732f7265736f6c76652f6d61696e2f7472616e73666f726d6572732f7472616e73666f726d6572735f61735f615f6d6f64656c5f646566696e6974696f6e2e706e67
|
||||
alias: "transformers"
|
||||
license: apache-2.0
|
||||
@@ -105,6 +126,19 @@
|
||||
tags:
|
||||
- text-to-text
|
||||
- multimodal
|
||||
capabilities:
|
||||
nvidia: "cuda12-transformers"
|
||||
intel: "intel-sycl-f16-transformers"
|
||||
amd: "rocm-transformers"
|
||||
- !!merge <<: *transformers
|
||||
name: "transformers-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-transformers-development"
|
||||
intel: "intel-sycl-f16-transformers-development"
|
||||
amd: "rocm-transformers-development"
|
||||
- !!merge <<: *transformers
|
||||
name: "cuda12-transformers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers"
|
||||
- !!merge <<: *transformers
|
||||
name: "rocm-transformers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-transformers"
|
||||
@@ -143,10 +177,21 @@
|
||||
- image-generation
|
||||
- video-generation
|
||||
- diffusion-models
|
||||
name: "cuda12-diffusers"
|
||||
license: apache-2.0
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers"
|
||||
alias: "diffusers"
|
||||
capabilities:
|
||||
nvidia: "cuda12-diffusers"
|
||||
intel: "intel-sycl-f32-diffusers"
|
||||
amd: "rocm-diffusers"
|
||||
- !!merge <<: *diffusers
|
||||
name: "diffusers-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-diffusers-development"
|
||||
intel: "intel-sycl-f32-diffusers-development"
|
||||
amd: "rocm-diffusers-development"
|
||||
- !!merge <<: *diffusers
|
||||
name: "cuda12-diffusers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers"
|
||||
- !!merge <<: *diffusers
|
||||
name: "rocm-diffusers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers"
|
||||
@@ -170,6 +215,7 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-diffusers"
|
||||
## exllama2
|
||||
- &exllama2
|
||||
name: "exllama2"
|
||||
urls:
|
||||
- https://github.com/turboderp-org/exllamav2
|
||||
tags:
|
||||
@@ -179,9 +225,20 @@
|
||||
license: MIT
|
||||
description: |
|
||||
ExLlamaV2 is an inference library for running local LLMs on modern consumer GPUs.
|
||||
alias: "exllama2"
|
||||
capabilities:
|
||||
nvidia: "cuda12-exllama2"
|
||||
intel: "intel-sycl-f32-exllama2"
|
||||
amd: "rocm-exllama2"
|
||||
- !!merge <<: *exllama2
|
||||
name: "exllama2-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-exllama2-development"
|
||||
intel: "intel-sycl-f32-exllama2-development"
|
||||
amd: "rocm-exllama2-development"
|
||||
- !!merge <<: *exllama2
|
||||
name: "cuda11-exllama2"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2"
|
||||
alias: "exllama2"
|
||||
- !!merge <<: *exllama2
|
||||
name: "cuda12-exllama2"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2"
|
||||
@@ -204,9 +261,21 @@
|
||||
- TTS
|
||||
- LLM
|
||||
license: apache-2.0
|
||||
alias: "kokoro"
|
||||
name: "kokoro"
|
||||
capabilities:
|
||||
nvidia: "cuda12-kokoro"
|
||||
intel: "intel-sycl-f32-kokoro"
|
||||
amd: "rocm-kokoro"
|
||||
- !!merge <<: *kokoro
|
||||
name: "kokoro-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-kokoro-development"
|
||||
intel: "intel-sycl-f32-kokoro-development"
|
||||
amd: "rocm-kokoro-development"
|
||||
- !!merge <<: *kokoro
|
||||
name: "cuda11-kokoro-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro"
|
||||
alias: "kokoro"
|
||||
- !!merge <<: *kokoro
|
||||
name: "cuda12-kokoro-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoro"
|
||||
@@ -225,6 +294,15 @@
|
||||
- !!merge <<: *kokoro
|
||||
name: "sycl-f32-kokoro-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-kokoro"
|
||||
- !!merge <<: *kokoro
|
||||
name: "cuda11-kokoro"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-kokoro"
|
||||
- !!merge <<: *kokoro
|
||||
name: "cuda12-kokoro"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-kokoro"
|
||||
- !!merge <<: *kokoro
|
||||
name: "rocm-kokoro"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-kokoro"
|
||||
## faster-whisper
|
||||
- &faster-whisper
|
||||
icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
|
||||
@@ -237,9 +315,20 @@
|
||||
- speech-to-text
|
||||
- Whisper
|
||||
license: MIT
|
||||
name: "cuda11-faster-whisper-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-faster-whisper"
|
||||
alias: "faster-whisper"
|
||||
name: "faster-whisper"
|
||||
capabilities:
|
||||
nvidia: "cuda12-faster-whisper"
|
||||
intel: "intel-sycl-f32-faster-whisper"
|
||||
amd: "rocm-faster-whisper"
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "faster-whisper-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-faster-whisper-development"
|
||||
intel: "intel-sycl-f32-faster-whisper-development"
|
||||
amd: "rocm-faster-whisper-development"
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "cuda11-faster-whisper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-faster-whisper"
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "cuda12-faster-whisper-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper"
|
||||
@@ -274,10 +363,28 @@
|
||||
- text-to-speech
|
||||
- TTS
|
||||
license: mpl-2.0
|
||||
name: "coqui"
|
||||
alias: "coqui"
|
||||
capabilities:
|
||||
nvidia: "cuda12-coqui"
|
||||
intel: "intel-sycl-f32-coqui"
|
||||
amd: "rocm-coqui"
|
||||
icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
|
||||
- !!merge <<: *coqui
|
||||
name: "coqui-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-coqui-development"
|
||||
intel: "intel-sycl-f32-coqui-development"
|
||||
amd: "rocm-coqui-development"
|
||||
- !!merge <<: *coqui
|
||||
name: "cuda11-coqui"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-coqui"
|
||||
- !!merge <<: *coqui
|
||||
name: "cuda12-coqui"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-coqui"
|
||||
- !!merge <<: *coqui
|
||||
name: "cuda11-coqui-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-coqui"
|
||||
alias: "coqui"
|
||||
icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
|
||||
- !!merge <<: *coqui
|
||||
name: "cuda12-coqui-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-coqui"
|
||||
@@ -296,6 +403,9 @@
|
||||
- !!merge <<: *coqui
|
||||
name: "sycl-f16-coqui-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-coqui"
|
||||
- !!merge <<: *coqui
|
||||
name: "rocm-coqui"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-coqui"
|
||||
## bark
|
||||
- &bark
|
||||
urls:
|
||||
@@ -306,13 +416,25 @@
|
||||
- text-to-speech
|
||||
- TTS
|
||||
license: MIT
|
||||
name: "cuda11-bark-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark"
|
||||
name: "bark"
|
||||
alias: "bark"
|
||||
capabilities:
|
||||
cuda: "cuda12-bark"
|
||||
intel: "intel-sycl-f32-bark"
|
||||
rocm: "rocm-bark"
|
||||
icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4
|
||||
- !!merge <<: *bark
|
||||
name: "cuda12-bark-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-bark"
|
||||
name: "bark-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-bark-development"
|
||||
intel: "intel-sycl-f32-bark-development"
|
||||
amd: "rocm-bark-development"
|
||||
- !!merge <<: *bark
|
||||
name: "cuda11-bark-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark"
|
||||
- !!merge <<: *bark
|
||||
name: "cuda11-bark"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-bark"
|
||||
- !!merge <<: *bark
|
||||
name: "rocm-bark-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-bark"
|
||||
@@ -328,6 +450,15 @@
|
||||
- !!merge <<: *bark
|
||||
name: "sycl-f16-bark-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-bark"
|
||||
- !!merge <<: *bark
|
||||
name: "cuda12-bark"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-bark"
|
||||
- !!merge <<: *bark
|
||||
name: "rocm-bark"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-bark"
|
||||
- !!merge <<: *bark
|
||||
name: "cuda12-bark-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-bark"
|
||||
- &barkcpp
|
||||
urls:
|
||||
- https://github.com/PABannier/bark.cpp
|
||||
@@ -369,15 +500,22 @@
|
||||
- TTS
|
||||
license: MIT
|
||||
icon: https://private-user-images.githubusercontent.com/660224/448166653-bd8c5f03-e91d-4ee5-b680-57355da204d1.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NTAxOTE0MDAsIm5iZiI6MTc1MDE5MTEwMCwicGF0aCI6Ii82NjAyMjQvNDQ4MTY2NjUzLWJkOGM1ZjAzLWU5MWQtNGVlNS1iNjgwLTU3MzU1ZGEyMDRkMS5wbmc_WC1BbXotQWxnb3JpdGhtPUFXUzQtSE1BQy1TSEEyNTYmWC1BbXotQ3JlZGVudGlhbD1BS0lBVkNPRFlMU0E1M1BRSzRaQSUyRjIwMjUwNjE3JTJGdXMtZWFzdC0xJTJGczMlMkZhd3M0X3JlcXVlc3QmWC1BbXotRGF0ZT0yMDI1MDYxN1QyMDExNDBaJlgtQW16LUV4cGlyZXM9MzAwJlgtQW16LVNpZ25hdHVyZT1hMmI1NGY3OGFiZTlhNGFkNTVlYTY4NTIwMWEzODRiZGE4YzdhNGQ5MGNhNzE3MDYyYTA2NDIxYTkyYzhiODkwJlgtQW16LVNpZ25lZEhlYWRlcnM9aG9zdCJ9.mR9kM9xX0TdzPuSpuspCllHYQiq79dFQ2rtuNvjrl6w
|
||||
name: "cuda11-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox"
|
||||
alias: "chatterbox"
|
||||
name: "chatterbox"
|
||||
capabilities:
|
||||
nvidia: "cuda12-chatterbox"
|
||||
- !!merge <<: *chatterbox
|
||||
name: "chatterbox-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-chatterbox-development"
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cuda12-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox"
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cuda11-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-chatterbox"
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cuda11-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox"
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cuda12-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-chatterbox"
|
||||
|
||||
@@ -38,9 +38,7 @@ DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
|
||||
FRAMES = os.environ.get("FRAMES", "64")
|
||||
|
||||
if XPU:
|
||||
import intel_extension_for_pytorch as ipex
|
||||
|
||||
print(ipex.xpu.get_device_name(0))
|
||||
print(torch.xpu.get_device_name(0))
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
@@ -336,6 +334,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter)
|
||||
|
||||
device = "cpu" if not request.CUDA else "cuda"
|
||||
if XPU:
|
||||
device = "xpu"
|
||||
self.device = device
|
||||
if request.LoraAdapter:
|
||||
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
|
||||
@@ -359,12 +359,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)
|
||||
|
||||
if request.CUDA:
|
||||
self.pipe.to('cuda')
|
||||
if device != "cpu":
|
||||
self.pipe.to(device)
|
||||
if self.controlnet:
|
||||
self.controlnet.to('cuda')
|
||||
if XPU:
|
||||
self.pipe = self.pipe.to("xpu")
|
||||
self.controlnet.to(device)
|
||||
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
# Implement your logic here for the LoadModel service
|
||||
|
||||
@@ -6,4 +6,10 @@ else
|
||||
source $backend_dir/../common/libbackend.sh
|
||||
fi
|
||||
|
||||
startBackend $@
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Assumes we are using the Intel oneAPI container image
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
export XPU=1
|
||||
fi
|
||||
|
||||
startBackend $@
|
||||
|
||||
@@ -57,7 +57,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
}
|
||||
}
|
||||
|
||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||
if err := pkgStartup.InstallModels(options.Galleries, options.BackendGalleries, options.ModelPath, options.BackendsPath, options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
|
||||
log.Error().Err(err).Msg("error installing models")
|
||||
}
|
||||
|
||||
@@ -86,13 +86,13 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
}
|
||||
|
||||
if options.PreloadJSONModels != "" {
|
||||
if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil {
|
||||
if err := services.ApplyGalleryFromString(options.ModelPath, options.BackendsPath, options.EnforcePredownloadScans, options.AutoloadBackendGalleries, options.Galleries, options.BackendGalleries, options.PreloadJSONModels); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if options.PreloadModelsFromPath != "" {
|
||||
if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil {
|
||||
if err := services.ApplyGalleryFromFile(options.ModelPath, options.BackendsPath, options.EnforcePredownloadScans, options.AutoloadBackendGalleries, options.Galleries, options.BackendGalleries, options.PreloadModelsFromPath); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,8 +4,8 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode/utf8"
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
@@ -34,17 +35,22 @@ type TokenUsage struct {
|
||||
TimingTokenGeneration float64
|
||||
}
|
||||
|
||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, cl *config.BackendConfigLoader, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
modelFile := c.Model
|
||||
|
||||
// Check if the modelFile exists, if it doesn't try to load it from the gallery
|
||||
if o.AutoloadGalleries { // experimental
|
||||
if _, err := os.Stat(modelFile); os.IsNotExist(err) {
|
||||
modelNames, err := services.ListModels(cl, loader, nil, services.SKIP_ALWAYS)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !slices.Contains(modelNames, c.Name) {
|
||||
utils.ResetDownloadTimers()
|
||||
// if we failed to load the model, we try to download it
|
||||
err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans)
|
||||
err := gallery.InstallModelFromGallery(o.Galleries, o.BackendGalleries, c.Name, loader.ModelPath, o.BackendsPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans, o.AutoloadBackendGalleries)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
log.Error().Err(err).Msgf("failed to install model %q from gallery", modelFile)
|
||||
//return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,6 +143,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
CLIPModel: c.Diffusers.ClipModel,
|
||||
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
||||
Options: c.Options,
|
||||
Overrides: c.Overrides,
|
||||
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
||||
ControlNet: c.Diffusers.ControlNet,
|
||||
ContextSize: int32(ctxSize),
|
||||
|
||||
156
core/cli/backends.go
Normal file
156
core/cli/backends.go
Normal file
@@ -0,0 +1,156 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/startup"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
type BackendsCMDFlags struct {
|
||||
BackendGalleries string `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
|
||||
BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"storage"`
|
||||
}
|
||||
|
||||
type BackendsList struct {
|
||||
BackendsCMDFlags `embed:""`
|
||||
}
|
||||
|
||||
type BackendsInstallSingle struct {
|
||||
InstallArgs []string `arg:"" optional:"" name:"backend" help:"Backend images to install"`
|
||||
|
||||
BackendsCMDFlags `embed:""`
|
||||
}
|
||||
|
||||
type BackendsInstall struct {
|
||||
BackendArgs []string `arg:"" optional:"" name:"backends" help:"Backend configuration URLs to load"`
|
||||
|
||||
BackendsCMDFlags `embed:""`
|
||||
}
|
||||
|
||||
type BackendsUninstall struct {
|
||||
BackendArgs []string `arg:"" name:"backends" help:"Backend names to uninstall"`
|
||||
|
||||
BackendsCMDFlags `embed:""`
|
||||
}
|
||||
|
||||
type BackendsCMD struct {
|
||||
List BackendsList `cmd:"" help:"List the backends available in your galleries" default:"withargs"`
|
||||
Install BackendsInstall `cmd:"" help:"Install a backend from the gallery"`
|
||||
InstallSingle BackendsInstallSingle `cmd:"" help:"Install a single backend from the gallery"`
|
||||
Uninstall BackendsUninstall `cmd:"" help:"Uninstall a backend"`
|
||||
}
|
||||
|
||||
func (bi *BackendsInstallSingle) Run(ctx *cliContext.Context) error {
|
||||
for _, backend := range bi.InstallArgs {
|
||||
progressBar := progressbar.NewOptions(
|
||||
1000,
|
||||
progressbar.OptionSetDescription(fmt.Sprintf("downloading backend %s", backend)),
|
||||
progressbar.OptionShowBytes(false),
|
||||
progressbar.OptionClearOnFinish(),
|
||||
)
|
||||
progressCallback := func(fileName string, current string, total string, percentage float64) {
|
||||
v := int(percentage * 10)
|
||||
err := progressBar.Set(v)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
|
||||
}
|
||||
}
|
||||
|
||||
if err := gallery.InstallBackend(bi.BackendsPath, &gallery.GalleryBackend{
|
||||
URI: backend,
|
||||
}, progressCallback); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (bl *BackendsList) Run(ctx *cliContext.Context) error {
|
||||
var galleries []config.Gallery
|
||||
if err := json.Unmarshal([]byte(bl.BackendGalleries), &galleries); err != nil {
|
||||
log.Error().Err(err).Msg("unable to load galleries")
|
||||
}
|
||||
|
||||
backends, err := gallery.AvailableBackends(galleries, bl.BackendsPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, backend := range backends {
|
||||
if backend.Installed {
|
||||
fmt.Printf(" * %s@%s (installed)\n", backend.Gallery.Name, backend.Name)
|
||||
} else {
|
||||
fmt.Printf(" - %s@%s\n", backend.Gallery.Name, backend.Name)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (bi *BackendsInstall) Run(ctx *cliContext.Context) error {
|
||||
var galleries []config.Gallery
|
||||
if err := json.Unmarshal([]byte(bi.BackendGalleries), &galleries); err != nil {
|
||||
log.Error().Err(err).Msg("unable to load galleries")
|
||||
}
|
||||
|
||||
for _, backendName := range bi.BackendArgs {
|
||||
|
||||
progressBar := progressbar.NewOptions(
|
||||
1000,
|
||||
progressbar.OptionSetDescription(fmt.Sprintf("downloading backend %s", backendName)),
|
||||
progressbar.OptionShowBytes(false),
|
||||
progressbar.OptionClearOnFinish(),
|
||||
)
|
||||
progressCallback := func(fileName string, current string, total string, percentage float64) {
|
||||
v := int(percentage * 10)
|
||||
err := progressBar.Set(v)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
|
||||
}
|
||||
}
|
||||
|
||||
backendURI := downloader.URI(backendName)
|
||||
|
||||
if !backendURI.LooksLikeOCI() {
|
||||
backends, err := gallery.AvailableBackends(galleries, bi.BackendsPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
backend := gallery.FindGalleryElement(backends, backendName, bi.BackendsPath)
|
||||
if backend == nil {
|
||||
log.Error().Str("backend", backendName).Msg("backend not found")
|
||||
return fmt.Errorf("backend not found: %s", backendName)
|
||||
}
|
||||
|
||||
log.Info().Str("backend", backendName).Str("license", backend.License).Msg("installing backend")
|
||||
}
|
||||
|
||||
err := startup.InstallExternalBackends(galleries, bi.BackendsPath, progressCallback, backendName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (bu *BackendsUninstall) Run(ctx *cliContext.Context) error {
|
||||
for _, backendName := range bu.BackendArgs {
|
||||
log.Info().Str("backend", backendName).Msg("uninstalling backend")
|
||||
|
||||
err := gallery.DeleteBackendFromSystem(bu.BackendsPath, backendName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Printf("Backend %s uninstalled successfully\n", backendName)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -11,6 +11,7 @@ var CLI struct {
|
||||
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
|
||||
Federated FederatedCLI `cmd:"" help:"Run LocalAI in federated mode"`
|
||||
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
|
||||
Backends BackendsCMD `cmd:"" help:"Manage LocalAI backends and definitions"`
|
||||
TTS TTSCMD `cmd:"" help:"Convert text to speech"`
|
||||
SoundGeneration SoundGenerationCMD `cmd:"" help:"Generates audio files from text or audio"`
|
||||
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
|
||||
|
||||
@@ -16,8 +16,10 @@ import (
|
||||
)
|
||||
|
||||
type ModelsCMDFlags struct {
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
BackendGalleries string `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"storage"`
|
||||
}
|
||||
|
||||
type ModelsList struct {
|
||||
@@ -25,8 +27,9 @@ type ModelsList struct {
|
||||
}
|
||||
|
||||
type ModelsInstall struct {
|
||||
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
||||
ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
|
||||
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
||||
AutoloadBackendGalleries bool `env:"LOCALAI_AUTOLOAD_BACKEND_GALLERIES" help:"If true, automatically loads backend galleries" group:"backends" default:"true"`
|
||||
ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
|
||||
|
||||
ModelsCMDFlags `embed:""`
|
||||
}
|
||||
@@ -62,6 +65,11 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
|
||||
log.Error().Err(err).Msg("unable to load galleries")
|
||||
}
|
||||
|
||||
var backendGalleries []config.Gallery
|
||||
if err := json.Unmarshal([]byte(mi.BackendGalleries), &backendGalleries); err != nil {
|
||||
log.Error().Err(err).Msg("unable to load backend galleries")
|
||||
}
|
||||
|
||||
for _, modelName := range mi.ModelArgs {
|
||||
|
||||
progressBar := progressbar.NewOptions(
|
||||
@@ -100,7 +108,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
|
||||
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
||||
}
|
||||
|
||||
err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||
err = startup.InstallModels(galleries, backendGalleries, mi.ModelsPath, mi.BackendsPath, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -30,13 +30,14 @@ type RunCMD struct {
|
||||
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
|
||||
LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"`
|
||||
// The alias on this option is there to preserve functionality with the old `--config-file` parameter
|
||||
ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"`
|
||||
BackendGalleries string `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
||||
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
||||
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
||||
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
||||
ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"`
|
||||
BackendGalleries string `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models" default:"true"`
|
||||
AutoloadBackendGalleries bool `env:"LOCALAI_AUTOLOAD_BACKEND_GALLERIES,AUTOLOAD_BACKEND_GALLERIES" group:"backends" default:"true"`
|
||||
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
||||
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
||||
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
||||
|
||||
F16 bool `name:"f16" env:"LOCALAI_F16,F16" help:"Enable GPU acceleration" group:"performance"`
|
||||
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
|
||||
@@ -192,6 +193,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
opts = append(opts, config.EnableGalleriesAutoload)
|
||||
}
|
||||
|
||||
if r.AutoloadBackendGalleries {
|
||||
opts = append(opts, config.EnableBackendGalleriesAutoload)
|
||||
}
|
||||
|
||||
if r.PreloadBackendOnly {
|
||||
_, err := application.New(opts...)
|
||||
return err
|
||||
|
||||
@@ -55,7 +55,7 @@ type ApplicationConfig struct {
|
||||
|
||||
ExternalGRPCBackends map[string]string
|
||||
|
||||
AutoloadGalleries bool
|
||||
AutoloadGalleries, AutoloadBackendGalleries bool
|
||||
|
||||
SingleBackend bool
|
||||
ParallelBackendRequests bool
|
||||
@@ -192,6 +192,10 @@ var EnableGalleriesAutoload = func(o *ApplicationConfig) {
|
||||
o.AutoloadGalleries = true
|
||||
}
|
||||
|
||||
var EnableBackendGalleriesAutoload = func(o *ApplicationConfig) {
|
||||
o.AutoloadBackendGalleries = true
|
||||
}
|
||||
|
||||
func WithExternalBackend(name string, uri string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
if o.ExternalGRPCBackends == nil {
|
||||
|
||||
@@ -70,7 +70,8 @@ type BackendConfig struct {
|
||||
Description string `yaml:"description"`
|
||||
Usage string `yaml:"usage"`
|
||||
|
||||
Options []string `yaml:"options"`
|
||||
Options []string `yaml:"options"`
|
||||
Overrides []string `yaml:"overrides"`
|
||||
}
|
||||
|
||||
// Pipeline defines other models to use for audio-to-audio
|
||||
|
||||
@@ -30,7 +30,7 @@ func (m *GalleryBackend) SetGallery(gallery config.Gallery) {
|
||||
}
|
||||
|
||||
func (m *GalleryBackend) IsMeta() bool {
|
||||
return len(m.CapabilitiesMap) > 0
|
||||
return len(m.CapabilitiesMap) > 0 && m.URI == ""
|
||||
}
|
||||
|
||||
func (m *GalleryBackend) SetInstalled(installed bool) {
|
||||
|
||||
@@ -62,7 +62,7 @@ func findBestBackendFromMeta(backend *GalleryBackend, systemState *system.System
|
||||
return nil
|
||||
}
|
||||
|
||||
realBackend := backend.CapabilitiesMap[systemState.GPUVendor]
|
||||
realBackend := backend.CapabilitiesMap[systemState.Capability()]
|
||||
if realBackend == "" {
|
||||
return nil
|
||||
}
|
||||
@@ -71,7 +71,22 @@ func findBestBackendFromMeta(backend *GalleryBackend, systemState *system.System
|
||||
}
|
||||
|
||||
// Installs a model from the gallery
|
||||
func InstallBackendFromGallery(galleries []config.Gallery, systemState *system.SystemState, name string, basePath string, downloadStatus func(string, string, string, float64)) error {
|
||||
func InstallBackendFromGallery(galleries []config.Gallery, systemState *system.SystemState, name string, basePath string, downloadStatus func(string, string, string, float64), force bool) error {
|
||||
if !force {
|
||||
// check if we already have the backend installed
|
||||
backends, err := ListSystemBackends(basePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, ok := backends[name]; ok {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if name == "" {
|
||||
return fmt.Errorf("backend name is empty")
|
||||
}
|
||||
|
||||
log.Debug().Interface("galleries", galleries).Str("name", name).Msg("Installing backend from gallery")
|
||||
|
||||
backends, err := AvailableBackends(galleries, basePath)
|
||||
@@ -147,7 +162,7 @@ func InstallBackend(basePath string, config *GalleryBackend, downloadStatus func
|
||||
return fmt.Errorf("failed to create backend path %q: %v", backendPath, err)
|
||||
}
|
||||
|
||||
if err := oci.ExtractOCIImage(img, backendPath, downloadStatus); err != nil {
|
||||
if err := oci.ExtractOCIImage(img, config.URI, backendPath, downloadStatus); err != nil {
|
||||
return fmt.Errorf("failed to extract image %q: %v", config.URI, err)
|
||||
}
|
||||
|
||||
@@ -231,14 +246,32 @@ func ListSystemBackends(basePath string) (map[string]string, error) {
|
||||
for _, backend := range backends {
|
||||
if backend.IsDir() {
|
||||
runFile := filepath.Join(basePath, backend.Name(), runFile)
|
||||
backendsNames[backend.Name()] = runFile
|
||||
// Skip if metadata file don't exist
|
||||
metadataFilePath := filepath.Join(basePath, backend.Name(), metadataFile)
|
||||
if _, err := os.Stat(metadataFilePath); os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for alias in metadata
|
||||
metadata, err := readBackendMetadata(filepath.Join(basePath, backend.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if metadata != nil && metadata.Alias != "" {
|
||||
|
||||
if metadata == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, exists := backendsNames[backend.Name()]; !exists {
|
||||
// We don't want to override aliases if already set, and if we are meta backend
|
||||
if _, err := os.Stat(runFile); err == nil {
|
||||
backendsNames[backend.Name()] = runFile
|
||||
} else {
|
||||
backendsNames[backend.Name()] = ""
|
||||
}
|
||||
}
|
||||
|
||||
if metadata.Alias != "" {
|
||||
backendsNames[metadata.Alias] = runFile
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,13 +42,13 @@ var _ = Describe("Gallery Backends", func() {
|
||||
|
||||
Describe("InstallBackendFromGallery", func() {
|
||||
It("should return error when backend is not found", func() {
|
||||
err := InstallBackendFromGallery(galleries, nil, "non-existent", tempDir, nil)
|
||||
err := InstallBackendFromGallery(galleries, nil, "non-existent", tempDir, nil, true)
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("no backend found with name \"non-existent\""))
|
||||
})
|
||||
|
||||
It("should install backend from gallery", func() {
|
||||
err := InstallBackendFromGallery(galleries, nil, "test-backend", tempDir, nil)
|
||||
err := InstallBackendFromGallery(galleries, nil, "test-backend", tempDir, nil, true)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(filepath.Join(tempDir, "test-backend", "run.sh")).To(BeARegularFile())
|
||||
})
|
||||
@@ -181,7 +181,7 @@ var _ = Describe("Gallery Backends", func() {
|
||||
|
||||
// Test with NVIDIA system state
|
||||
nvidiaSystemState := &system.SystemState{GPUVendor: "nvidia"}
|
||||
err = InstallBackendFromGallery([]config.Gallery{gallery}, nvidiaSystemState, "meta-backend", tempDir, nil)
|
||||
err = InstallBackendFromGallery([]config.Gallery{gallery}, nvidiaSystemState, "meta-backend", tempDir, nil, true)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
metaBackendPath := filepath.Join(tempDir, "meta-backend")
|
||||
@@ -206,6 +206,144 @@ var _ = Describe("Gallery Backends", func() {
|
||||
Expect(concreteBackendPath).NotTo(BeADirectory())
|
||||
})
|
||||
|
||||
It("should handle meta backend deletion correctly with aliases", func() {
|
||||
metaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "meta-backend",
|
||||
},
|
||||
Alias: "backend-alias",
|
||||
CapabilitiesMap: map[string]string{
|
||||
"nvidia": "nvidia-backend",
|
||||
"amd": "amd-backend",
|
||||
"intel": "intel-backend",
|
||||
},
|
||||
}
|
||||
|
||||
nvidiaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "nvidia-backend",
|
||||
},
|
||||
Alias: "backend-alias",
|
||||
URI: testImage,
|
||||
}
|
||||
|
||||
amdBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "amd-backend",
|
||||
},
|
||||
Alias: "backend-alias",
|
||||
URI: testImage,
|
||||
}
|
||||
|
||||
gallery := config.Gallery{
|
||||
Name: "test-gallery",
|
||||
URL: "file://" + filepath.Join(tempDir, "backend-gallery.yaml"),
|
||||
}
|
||||
|
||||
galleryBackend := GalleryBackends{amdBackend, nvidiaBackend, metaBackend}
|
||||
|
||||
dat, err := yaml.Marshal(galleryBackend)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(tempDir, "backend-gallery.yaml"), dat, 0644)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Test with NVIDIA system state
|
||||
nvidiaSystemState := &system.SystemState{GPUVendor: "nvidia"}
|
||||
err = InstallBackendFromGallery([]config.Gallery{gallery}, nvidiaSystemState, "meta-backend", tempDir, nil, true)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
metaBackendPath := filepath.Join(tempDir, "meta-backend")
|
||||
Expect(metaBackendPath).To(BeADirectory())
|
||||
|
||||
concreteBackendPath := filepath.Join(tempDir, "nvidia-backend")
|
||||
Expect(concreteBackendPath).To(BeADirectory())
|
||||
|
||||
allBackends, err := ListSystemBackends(tempDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(allBackends).To(HaveKey("meta-backend"))
|
||||
Expect(allBackends).To(HaveKey("nvidia-backend"))
|
||||
Expect(allBackends["meta-backend"]).To(BeEmpty())
|
||||
|
||||
// Delete meta backend by name
|
||||
err = DeleteBackendFromSystem(tempDir, "meta-backend")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Verify meta backend directory is deleted
|
||||
Expect(metaBackendPath).NotTo(BeADirectory())
|
||||
|
||||
// Verify concrete backend directory is deleted
|
||||
Expect(concreteBackendPath).NotTo(BeADirectory())
|
||||
})
|
||||
|
||||
It("should handle meta backend deletion correctly with aliases pointing to the same backend", func() {
|
||||
metaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "meta-backend",
|
||||
},
|
||||
Alias: "meta-backend",
|
||||
CapabilitiesMap: map[string]string{
|
||||
"nvidia": "nvidia-backend",
|
||||
"amd": "amd-backend",
|
||||
"intel": "intel-backend",
|
||||
},
|
||||
}
|
||||
|
||||
nvidiaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "nvidia-backend",
|
||||
},
|
||||
Alias: "meta-backend",
|
||||
URI: testImage,
|
||||
}
|
||||
|
||||
amdBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "amd-backend",
|
||||
},
|
||||
Alias: "meta-backend",
|
||||
URI: testImage,
|
||||
}
|
||||
|
||||
gallery := config.Gallery{
|
||||
Name: "test-gallery",
|
||||
URL: "file://" + filepath.Join(tempDir, "backend-gallery.yaml"),
|
||||
}
|
||||
|
||||
galleryBackend := GalleryBackends{amdBackend, nvidiaBackend, metaBackend}
|
||||
|
||||
dat, err := yaml.Marshal(galleryBackend)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(tempDir, "backend-gallery.yaml"), dat, 0644)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Test with NVIDIA system state
|
||||
nvidiaSystemState := &system.SystemState{GPUVendor: "nvidia"}
|
||||
err = InstallBackendFromGallery([]config.Gallery{gallery}, nvidiaSystemState, "meta-backend", tempDir, nil, true)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
metaBackendPath := filepath.Join(tempDir, "meta-backend")
|
||||
Expect(metaBackendPath).To(BeADirectory())
|
||||
|
||||
concreteBackendPath := filepath.Join(tempDir, "nvidia-backend")
|
||||
Expect(concreteBackendPath).To(BeADirectory())
|
||||
|
||||
allBackends, err := ListSystemBackends(tempDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(allBackends).To(HaveKey("meta-backend"))
|
||||
Expect(allBackends).To(HaveKey("nvidia-backend"))
|
||||
Expect(allBackends["meta-backend"]).To(Equal(filepath.Join(tempDir, "nvidia-backend", "run.sh")))
|
||||
|
||||
// Delete meta backend by name
|
||||
err = DeleteBackendFromSystem(tempDir, "meta-backend")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Verify meta backend directory is deleted
|
||||
Expect(metaBackendPath).NotTo(BeADirectory())
|
||||
|
||||
// Verify concrete backend directory is deleted
|
||||
Expect(concreteBackendPath).NotTo(BeADirectory())
|
||||
})
|
||||
|
||||
It("should list meta backends correctly in system backends", func() {
|
||||
// Create a meta backend directory with metadata
|
||||
metaBackendPath := filepath.Join(tempDir, "meta-backend")
|
||||
@@ -227,7 +365,9 @@ var _ = Describe("Gallery Backends", func() {
|
||||
concreteBackendPath := filepath.Join(tempDir, "concrete-backend")
|
||||
err = os.MkdirAll(concreteBackendPath, 0750)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(concreteBackendPath, "run.sh"), []byte("#!/bin/bash"), 0755)
|
||||
err = os.WriteFile(filepath.Join(concreteBackendPath, "metadata.json"), []byte("{}"), 0755)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(concreteBackendPath, "run.sh"), []byte(""), 0755)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// List system backends
|
||||
@@ -238,8 +378,8 @@ var _ = Describe("Gallery Backends", func() {
|
||||
Expect(backends).To(HaveKey("meta-backend"))
|
||||
Expect(backends).To(HaveKey("concrete-backend"))
|
||||
|
||||
// meta-backend should point to its own run.sh
|
||||
Expect(backends["meta-backend"]).To(Equal(filepath.Join(tempDir, "meta-backend", "run.sh")))
|
||||
// meta-backend should be empty
|
||||
Expect(backends["meta-backend"]).To(BeEmpty())
|
||||
// concrete-backend should point to its own run.sh
|
||||
Expect(backends["concrete-backend"]).To(Equal(filepath.Join(tempDir, "concrete-backend", "run.sh")))
|
||||
})
|
||||
@@ -319,6 +459,10 @@ var _ = Describe("Gallery Backends", func() {
|
||||
for _, name := range backendNames {
|
||||
err := os.MkdirAll(filepath.Join(tempDir, name), 0750)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(tempDir, name, "metadata.json"), []byte("{}"), 0755)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(tempDir, name, "run.sh"), []byte(""), 0755)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
}
|
||||
|
||||
backends, err := ListSystemBackends(tempDir)
|
||||
@@ -349,6 +493,8 @@ var _ = Describe("Gallery Backends", func() {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(backendPath, "metadata.json"), metadataData, 0644)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(backendPath, "run.sh"), []byte(""), 0755)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
backends, err := ListSystemBackends(tempDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
@@ -121,7 +121,12 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) (Galler
|
||||
|
||||
// Get models from galleries
|
||||
for _, gallery := range galleries {
|
||||
galleryModels, err := getGalleryElements[*GalleryModel](gallery, basePath)
|
||||
galleryModels, err := getGalleryElements[*GalleryModel](gallery, basePath, func(model *GalleryModel) bool {
|
||||
if _, err := os.Stat(filepath.Join(basePath, fmt.Sprintf("%s.yaml", model.GetName()))); err == nil {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -137,7 +142,14 @@ func AvailableBackends(galleries []config.Gallery, basePath string) (GalleryElem
|
||||
|
||||
// Get models from galleries
|
||||
for _, gallery := range galleries {
|
||||
galleryModels, err := getGalleryElements[*GalleryBackend](gallery, basePath)
|
||||
galleryModels, err := getGalleryElements[*GalleryBackend](gallery, basePath, func(backend *GalleryBackend) bool {
|
||||
backends, err := ListSystemBackends(basePath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_, exists := backends[backend.GetName()]
|
||||
return exists
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -162,7 +174,7 @@ func findGalleryURLFromReferenceURL(url string, basePath string) (string, error)
|
||||
return refFile, err
|
||||
}
|
||||
|
||||
func getGalleryElements[T GalleryElement](gallery config.Gallery, basePath string) ([]T, error) {
|
||||
func getGalleryElements[T GalleryElement](gallery config.Gallery, basePath string, isInstalledCallback func(T) bool) ([]T, error) {
|
||||
var models []T = []T{}
|
||||
|
||||
if strings.HasSuffix(gallery.URL, ".ref") {
|
||||
@@ -187,15 +199,7 @@ func getGalleryElements[T GalleryElement](gallery config.Gallery, basePath strin
|
||||
// Add gallery to models
|
||||
for _, model := range models {
|
||||
model.SetGallery(gallery)
|
||||
// we check if the model was already installed by checking if the config file exists
|
||||
// TODO: (what to do if the model doesn't install a config file?)
|
||||
// TODO: This is sub-optimal now that the gallery handles both backends and models - we need to abstract this away
|
||||
if _, err := os.Stat(filepath.Join(basePath, fmt.Sprintf("%s.yaml", model.GetName()))); err == nil {
|
||||
model.SetInstalled(true)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(basePath, model.GetName())); err == nil {
|
||||
model.SetInstalled(true)
|
||||
}
|
||||
model.SetInstalled(isInstalledCallback(model))
|
||||
}
|
||||
return models, nil
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"dario.cat/mergo"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
lconfig "github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
|
||||
@@ -69,7 +70,9 @@ type PromptTemplate struct {
|
||||
}
|
||||
|
||||
// Installs a model from the gallery
|
||||
func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error {
|
||||
func InstallModelFromGallery(
|
||||
modelGalleries, backendGalleries []config.Gallery,
|
||||
name string, basePath, backendBasePath string, req GalleryModel, downloadStatus func(string, string, string, float64), enforceScan, automaticallyInstallBackend bool) error {
|
||||
|
||||
applyModel := func(model *GalleryModel) error {
|
||||
name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
|
||||
@@ -119,14 +122,26 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s
|
||||
return err
|
||||
}
|
||||
|
||||
if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus, enforceScan); err != nil {
|
||||
installedModel, err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus, enforceScan)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if automaticallyInstallBackend && installedModel.Backend != "" {
|
||||
systemState, err := system.GetSystemState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := InstallBackendFromGallery(backendGalleries, systemState, installedModel.Backend, backendBasePath, downloadStatus, false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
models, err := AvailableGalleryModels(galleries, basePath)
|
||||
models, err := AvailableGalleryModels(modelGalleries, basePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -139,11 +154,11 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s
|
||||
return applyModel(model)
|
||||
}
|
||||
|
||||
func InstallModel(basePath, nameOverride string, config *ModelConfig, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64), enforceScan bool) error {
|
||||
func InstallModel(basePath, nameOverride string, config *ModelConfig, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64), enforceScan bool) (*lconfig.BackendConfig, error) {
|
||||
// Create base path if it doesn't exist
|
||||
err := os.MkdirAll(basePath, 0750)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create base path: %v", err)
|
||||
return nil, fmt.Errorf("failed to create base path: %v", err)
|
||||
}
|
||||
|
||||
if len(configOverrides) > 0 {
|
||||
@@ -155,7 +170,7 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
|
||||
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
|
||||
|
||||
if err := utils.VerifyPath(file.Filename, basePath); err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create file path
|
||||
@@ -165,19 +180,19 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
|
||||
scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI))
|
||||
if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
|
||||
log.Error().Str("model", config.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
uri := downloader.URI(file.URI)
|
||||
if err := uri.DownloadFile(filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Write prompt template contents to separate files
|
||||
for _, template := range config.PromptTemplates {
|
||||
if err := utils.VerifyPath(template.Name+".tmpl", basePath); err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
// Create file path
|
||||
filePath := filepath.Join(basePath, template.Name+".tmpl")
|
||||
@@ -185,12 +200,12 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
|
||||
// Create parent directory
|
||||
err := os.MkdirAll(filepath.Dir(filePath), 0750)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create parent directory for prompt template %q: %v", template.Name, err)
|
||||
return nil, fmt.Errorf("failed to create parent directory for prompt template %q: %v", template.Name, err)
|
||||
}
|
||||
// Create and write file content
|
||||
err = os.WriteFile(filePath, []byte(template.Content), 0600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write prompt template %q: %v", template.Name, err)
|
||||
return nil, fmt.Errorf("failed to write prompt template %q: %v", template.Name, err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Prompt template %q written", template.Name)
|
||||
@@ -202,9 +217,11 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
|
||||
}
|
||||
|
||||
if err := utils.VerifyPath(name+".yaml", basePath); err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
backendConfig := lconfig.BackendConfig{}
|
||||
|
||||
// write config file
|
||||
if len(configOverrides) != 0 || len(config.ConfigFile) != 0 {
|
||||
configFilePath := filepath.Join(basePath, name+".yaml")
|
||||
@@ -213,33 +230,33 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
|
||||
configMap := make(map[string]interface{})
|
||||
err = yaml.Unmarshal([]byte(config.ConfigFile), &configMap)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to unmarshal config YAML: %v", err)
|
||||
return nil, fmt.Errorf("failed to unmarshal config YAML: %v", err)
|
||||
}
|
||||
|
||||
configMap["name"] = name
|
||||
|
||||
if err := mergo.Merge(&configMap, configOverrides, mergo.WithOverride); err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Write updated config file
|
||||
updatedConfigYAML, err := yaml.Marshal(configMap)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal updated config YAML: %v", err)
|
||||
return nil, fmt.Errorf("failed to marshal updated config YAML: %v", err)
|
||||
}
|
||||
|
||||
backendConfig := lconfig.BackendConfig{}
|
||||
err = yaml.Unmarshal(updatedConfigYAML, &backendConfig)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to unmarshal updated config YAML: %v", err)
|
||||
return nil, fmt.Errorf("failed to unmarshal updated config YAML: %v", err)
|
||||
}
|
||||
|
||||
if !backendConfig.Validate() {
|
||||
return fmt.Errorf("failed to validate updated config YAML")
|
||||
return nil, fmt.Errorf("failed to validate updated config YAML")
|
||||
}
|
||||
|
||||
err = os.WriteFile(configFilePath, updatedConfigYAML, 0600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write updated config file: %v", err)
|
||||
return nil, fmt.Errorf("failed to write updated config file: %v", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Written config file %s", configFilePath)
|
||||
@@ -249,14 +266,12 @@ func InstallModel(basePath, nameOverride string, config *ModelConfig, configOver
|
||||
modelFile := filepath.Join(basePath, galleryFileName(name))
|
||||
data, err := yaml.Marshal(config)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Written gallery file %s", modelFile)
|
||||
|
||||
return os.WriteFile(modelFile, data, 0600)
|
||||
|
||||
//return nil
|
||||
return &backendConfig, os.WriteFile(modelFile, data, 0600)
|
||||
}
|
||||
|
||||
func galleryFileName(name string) string {
|
||||
|
||||
@@ -29,7 +29,7 @@ var _ = Describe("Model test", func() {
|
||||
defer os.RemoveAll(tempdir)
|
||||
c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
|
||||
_, err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "cerebras.yaml"} {
|
||||
@@ -79,7 +79,7 @@ var _ = Describe("Model test", func() {
|
||||
Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
|
||||
Expect(models[0].Installed).To(BeFalse())
|
||||
|
||||
err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
|
||||
err = InstallModelFromGallery(galleries, []config.Gallery{}, "test@bert", tempdir, "", GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true, true)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
dat, err := os.ReadFile(filepath.Join(tempdir, "bert.yaml"))
|
||||
@@ -116,7 +116,7 @@ var _ = Describe("Model test", func() {
|
||||
c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
|
||||
_, err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
|
||||
@@ -132,7 +132,7 @@ var _ = Describe("Model test", func() {
|
||||
c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {}, true)
|
||||
_, err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {}, true)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
|
||||
@@ -158,7 +158,7 @@ var _ = Describe("Model test", func() {
|
||||
c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
|
||||
_, err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
|
||||
Expect(err).To(HaveOccurred())
|
||||
})
|
||||
})
|
||||
|
||||
@@ -41,7 +41,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
}
|
||||
responses <- initialMessage
|
||||
|
||||
ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
||||
ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
||||
usage := schema.OpenAIUsage{
|
||||
PromptTokens: tokenUsage.Prompt,
|
||||
CompletionTokens: tokenUsage.Completion,
|
||||
@@ -68,7 +68,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
}
|
||||
processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
|
||||
result := ""
|
||||
_, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||
_, tokenUsage, _ := ComputeChoices(req, prompt, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||
result += s
|
||||
// TODO: Change generated BNF grammar to be compliant with the schema so we can
|
||||
// stream the result token by token here.
|
||||
@@ -92,7 +92,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
}
|
||||
responses <- initialMessage
|
||||
|
||||
result, err := handleQuestion(config, req, ml, startupOptions, functionResults, result, prompt)
|
||||
result, err := handleQuestion(config, cl, req, ml, startupOptions, functionResults, result, prompt)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error handling question")
|
||||
return
|
||||
@@ -383,7 +383,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
|
||||
// no streaming mode
|
||||
default:
|
||||
result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
|
||||
|
||||
tokenCallback := func(s string, c *[]schema.Choice) {
|
||||
if !shouldUseFn {
|
||||
// no function is called, just reply and use stop as finish reason
|
||||
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
|
||||
@@ -403,7 +404,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
|
||||
switch {
|
||||
case noActionsToRun:
|
||||
result, err := handleQuestion(config, input, ml, startupOptions, results, s, predInput)
|
||||
result, err := handleQuestion(config, cl, input, ml, startupOptions, results, s, predInput)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error handling question")
|
||||
return
|
||||
@@ -458,7 +459,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
}
|
||||
}
|
||||
|
||||
}, nil)
|
||||
}
|
||||
|
||||
result, tokenUsage, err := ComputeChoices(
|
||||
input,
|
||||
predInput,
|
||||
config,
|
||||
cl,
|
||||
startupOptions,
|
||||
ml,
|
||||
tokenCallback,
|
||||
nil,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -489,7 +501,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
}
|
||||
}
|
||||
|
||||
func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, funcResults []functions.FuncCallResults, result, prompt string) (string, error) {
|
||||
func handleQuestion(config *config.BackendConfig, cl *config.BackendConfigLoader, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, funcResults []functions.FuncCallResults, result, prompt string) (string, error) {
|
||||
|
||||
if len(funcResults) == 0 && result != "" {
|
||||
log.Debug().Msgf("nothing function results but we had a message from the LLM")
|
||||
@@ -538,7 +550,7 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
|
||||
audios = append(audios, m.StringAudios...)
|
||||
}
|
||||
|
||||
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, o, nil)
|
||||
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, cl, o, nil)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("model inference failed")
|
||||
return "", err
|
||||
|
||||
@@ -31,7 +31,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
created := int(time.Now().Unix())
|
||||
|
||||
process := func(id string, s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
|
||||
ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
||||
tokenCallback := func(s string, tokenUsage backend.TokenUsage) bool {
|
||||
usage := schema.OpenAIUsage{
|
||||
PromptTokens: tokenUsage.Prompt,
|
||||
CompletionTokens: tokenUsage.Completion,
|
||||
@@ -58,7 +58,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
|
||||
responses <- resp
|
||||
return true
|
||||
})
|
||||
}
|
||||
ComputeChoices(req, s, config, cl, appConfig, loader, func(s string, c *[]schema.Choice) {}, tokenCallback)
|
||||
close(responses)
|
||||
}
|
||||
|
||||
@@ -168,7 +169,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
}
|
||||
|
||||
r, tokenUsage, err := ComputeChoices(
|
||||
input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
|
||||
input, i, config, cl, appConfig, ml, func(s string, c *[]schema.Choice) {
|
||||
*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
|
||||
}, nil)
|
||||
if err != nil {
|
||||
|
||||
@@ -56,7 +56,7 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
|
||||
r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
|
||||
r, tokenUsage, err := ComputeChoices(input, i, config, cl, appConfig, ml, func(s string, c *[]schema.Choice) {
|
||||
*c = append(*c, schema.Choice{Text: s})
|
||||
}, nil)
|
||||
if err != nil {
|
||||
|
||||
@@ -12,6 +12,7 @@ func ComputeChoices(
|
||||
req *schema.OpenAIRequest,
|
||||
predInput string,
|
||||
config *config.BackendConfig,
|
||||
bcl *config.BackendConfigLoader,
|
||||
o *config.ApplicationConfig,
|
||||
loader *model.ModelLoader,
|
||||
cb func(string, *[]schema.Choice),
|
||||
@@ -37,7 +38,7 @@ func ComputeChoices(
|
||||
}
|
||||
|
||||
// get the model function to call for the result
|
||||
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, config, o, tokenCallback)
|
||||
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, config, bcl, o, tokenCallback)
|
||||
if err != nil {
|
||||
return result, backend.TokenUsage{}, err
|
||||
}
|
||||
|
||||
@@ -223,7 +223,7 @@ func registerBackendGalleryRoutes(app *fiber.App, appConfig *config.ApplicationC
|
||||
return c.SendString(elements.ProgressBar("0"))
|
||||
}
|
||||
|
||||
if status.Progress == 100 {
|
||||
if status.Progress == 100 && status.Processed && status.Message == "completed" {
|
||||
c.Set("HX-Trigger", "done") // this triggers /browse/backend/job/:uid
|
||||
return c.SendString(elements.ProgressBar("100"))
|
||||
}
|
||||
|
||||
@@ -243,7 +243,7 @@ func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appCo
|
||||
return c.SendString(elements.ProgressBar("0"))
|
||||
}
|
||||
|
||||
if status.Progress == 100 {
|
||||
if status.Progress == 100 && status.Processed && status.Message == "completed" {
|
||||
c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done)
|
||||
return c.SendString(elements.ProgressBar("100"))
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend], s
|
||||
g.modelLoader.DeleteExternalBackend(op.GalleryElementName)
|
||||
} else {
|
||||
log.Warn().Msgf("installing backend %s", op.GalleryElementName)
|
||||
err = gallery.InstallBackendFromGallery(g.appConfig.BackendGalleries, systemState, op.GalleryElementName, g.appConfig.BackendsPath, progressCallback)
|
||||
err = gallery.InstallBackendFromGallery(g.appConfig.BackendGalleries, systemState, op.GalleryElementName, g.appConfig.BackendsPath, progressCallback, true)
|
||||
if err == nil {
|
||||
err = gallery.RegisterBackends(g.appConfig.BackendsPath, g.modelLoader)
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
@@ -22,7 +23,7 @@ func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel], cl *c
|
||||
utils.DisplayDownloadFunction(fileName, current, total, percentage)
|
||||
}
|
||||
|
||||
err := processModelOperation(op, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback)
|
||||
err := processModelOperation(op, g.appConfig.ModelPath, g.appConfig.BackendsPath, g.appConfig.EnforcePredownloadScans, g.appConfig.AutoloadBackendGalleries, progressCallback)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -49,7 +50,7 @@ func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel], cl *c
|
||||
return nil
|
||||
}
|
||||
|
||||
func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error {
|
||||
func installModelFromRemoteConfig(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64), enforceScan, automaticallyInstallBackend bool, backendGalleries []config.Gallery, backendBasePath string) error {
|
||||
config, err := gallery.GetGalleryConfigFromURL[gallery.ModelConfig](req.URL, modelPath)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -57,7 +58,23 @@ func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus fun
|
||||
|
||||
config.Files = append(config.Files, req.AdditionalFiles...)
|
||||
|
||||
return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus, enforceScan)
|
||||
installedModel, err := gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus, enforceScan)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if automaticallyInstallBackend && installedModel.Backend != "" {
|
||||
systemState, err := system.GetSystemState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := gallery.InstallBackendFromGallery(backendGalleries, systemState, installedModel.Backend, backendBasePath, downloadStatus, false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type galleryModel struct {
|
||||
@@ -65,22 +82,22 @@ type galleryModel struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
|
||||
func processRequests(modelPath string, enforceScan bool, galleries []config.Gallery, requests []galleryModel) error {
|
||||
func processRequests(modelPath, backendBasePath string, enforceScan, automaticallyInstallBackend bool, galleries []config.Gallery, backendGalleries []config.Gallery, requests []galleryModel) error {
|
||||
var err error
|
||||
for _, r := range requests {
|
||||
utils.ResetDownloadTimers()
|
||||
if r.ID == "" {
|
||||
err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan)
|
||||
err = installModelFromRemoteConfig(modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan, automaticallyInstallBackend, backendGalleries, backendBasePath)
|
||||
|
||||
} else {
|
||||
err = gallery.InstallModelFromGallery(
|
||||
galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan)
|
||||
galleries, backendGalleries, r.ID, modelPath, backendBasePath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan, automaticallyInstallBackend)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func ApplyGalleryFromFile(modelPath, s string, enforceScan bool, galleries []config.Gallery) error {
|
||||
func ApplyGalleryFromFile(modelPath, backendBasePath string, enforceScan, automaticallyInstallBackend bool, galleries []config.Gallery, backendGalleries []config.Gallery, s string) error {
|
||||
dat, err := os.ReadFile(s)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -91,24 +108,26 @@ func ApplyGalleryFromFile(modelPath, s string, enforceScan bool, galleries []con
|
||||
return err
|
||||
}
|
||||
|
||||
return processRequests(modelPath, enforceScan, galleries, requests)
|
||||
return processRequests(modelPath, backendBasePath, enforceScan, automaticallyInstallBackend, galleries, backendGalleries, requests)
|
||||
}
|
||||
|
||||
func ApplyGalleryFromString(modelPath, s string, enforceScan bool, galleries []config.Gallery) error {
|
||||
func ApplyGalleryFromString(modelPath, backendBasePath string, enforceScan, automaticallyInstallBackend bool, galleries []config.Gallery, backendGalleries []config.Gallery, s string) error {
|
||||
var requests []galleryModel
|
||||
err := json.Unmarshal([]byte(s), &requests)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return processRequests(modelPath, enforceScan, galleries, requests)
|
||||
return processRequests(modelPath, backendBasePath, enforceScan, automaticallyInstallBackend, galleries, backendGalleries, requests)
|
||||
}
|
||||
|
||||
// processModelOperation handles the installation or deletion of a model
|
||||
func processModelOperation(
|
||||
op *GalleryOp[gallery.GalleryModel],
|
||||
modelPath string,
|
||||
backendBasePath string,
|
||||
enforcePredownloadScans bool,
|
||||
automaticallyInstallBackend bool,
|
||||
progressCallback func(string, string, string, float64),
|
||||
) error {
|
||||
// delete a model
|
||||
@@ -140,7 +159,7 @@ func processModelOperation(
|
||||
|
||||
// if the request contains a gallery name, we apply the gallery from the gallery list
|
||||
if op.GalleryElementName != "" {
|
||||
return gallery.InstallModelFromGallery(op.Galleries, op.GalleryElementName, modelPath, op.Req, progressCallback, enforcePredownloadScans)
|
||||
return gallery.InstallModelFromGallery(op.Galleries, op.BackendGalleries, op.GalleryElementName, modelPath, backendBasePath, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend)
|
||||
// } else if op.ConfigURL != "" {
|
||||
// err := startup.InstallModels(op.Galleries, modelPath, enforcePredownloadScans, progressCallback, op.ConfigURL)
|
||||
// if err != nil {
|
||||
@@ -148,6 +167,6 @@ func processModelOperation(
|
||||
// }
|
||||
// return cl.Preload(modelPath)
|
||||
} else {
|
||||
return prepareModel(modelPath, op.Req, progressCallback, enforcePredownloadScans)
|
||||
return installModelFromRemoteConfig(modelPath, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend, op.BackendGalleries, backendBasePath)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,8 +10,9 @@ type GalleryOp[T any] struct {
|
||||
GalleryElementName string
|
||||
Delete bool
|
||||
|
||||
Req T
|
||||
Galleries []config.Gallery
|
||||
Req T
|
||||
Galleries []config.Gallery
|
||||
BackendGalleries []config.Gallery
|
||||
}
|
||||
|
||||
type GalleryOpStatus struct {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package system
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||
@@ -11,6 +12,29 @@ type SystemState struct {
|
||||
GPUVendor string
|
||||
}
|
||||
|
||||
func (s *SystemState) Capability() string {
|
||||
if os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY") != "" {
|
||||
return os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY")
|
||||
}
|
||||
|
||||
capabilityRunFile := "/run/localai/capability"
|
||||
if os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE") != "" {
|
||||
capabilityRunFile = os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE")
|
||||
}
|
||||
|
||||
// Check if /run/localai/capability exists and use it
|
||||
// This might be used by e.g. container images to specify which
|
||||
// backends to pull in automatically when installing meta backends.
|
||||
if _, err := os.Stat(capabilityRunFile); err == nil {
|
||||
capability, err := os.ReadFile(capabilityRunFile)
|
||||
if err == nil {
|
||||
return string(capability)
|
||||
}
|
||||
}
|
||||
|
||||
return s.GPUVendor
|
||||
}
|
||||
|
||||
func GetSystemState() (*SystemState, error) {
|
||||
gpuVendor, _ := detectGPUVendor()
|
||||
log.Debug().Str("gpuVendor", gpuVendor).Msg("GPU vendor")
|
||||
@@ -30,13 +54,13 @@ func detectGPUVendor() (string, error) {
|
||||
if gpu.DeviceInfo != nil {
|
||||
if gpu.DeviceInfo.Vendor != nil {
|
||||
gpuVendorName := strings.ToUpper(gpu.DeviceInfo.Vendor.Name)
|
||||
if gpuVendorName == "NVIDIA" {
|
||||
if strings.Contains(gpuVendorName, "NVIDIA") {
|
||||
return "nvidia", nil
|
||||
}
|
||||
if gpuVendorName == "AMD" {
|
||||
if strings.Contains(gpuVendorName, "AMD") {
|
||||
return "amd", nil
|
||||
}
|
||||
if gpuVendorName == "INTEL" {
|
||||
if strings.Contains(gpuVendorName, "INTEL") {
|
||||
return "intel", nil
|
||||
}
|
||||
return "nvidia", nil
|
||||
|
||||
@@ -11,7 +11,7 @@ services:
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
- IMAGE_TYPE=core
|
||||
- BASE_IMAGE=ubuntu:22.04
|
||||
- BASE_IMAGE=ubuntu:24.04
|
||||
ports:
|
||||
- 8080:8080
|
||||
env_file:
|
||||
|
||||
@@ -205,9 +205,9 @@ Standard container images do not have pre-installed models.
|
||||
|
||||
| Description | Quay | Docker Hub |
|
||||
| --- | --- |-------------------------------------------------------------|
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-hipblas` | `localai/localai:master-hipblas` |
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-hipblas` | `localai/localai:master-gpu-hipblas` |
|
||||
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-hipblas` | `localai/localai:latest-gpu-hipblas` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-hipblas` | `localai/localai:{{< version >}}-hipblas` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-hipblas` | `localai/localai:{{< version >}}-gpu-hipblas` |
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v3.0.0"
|
||||
"version": "v3.1.1"
|
||||
}
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
{{ end }}
|
||||
{{ end -}}
|
||||
</div>
|
||||
<div class="d-flex align-items-center m-1">
|
||||
<div class="d-none d-md-flex d-flex align-items-center m-1">
|
||||
<h5>Star us on GitHub ! </h5>
|
||||
<script async defer src="https://buttons.github.io/buttons.js"></script>
|
||||
<a class="github-button" href="https://github.com/mudler/LocalAI" data-color-scheme="no-preference: light; light: light; dark: dark;" data-icon="octicon-star" data-size="large" data-show-count="true" aria-label="Star mudler/LocalAI on GitHub">Star</a>
|
||||
|
||||
2
docs/static/install.sh
vendored
2
docs/static/install.sh
vendored
@@ -698,7 +698,7 @@ install_docker() {
|
||||
$envs \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
|
||||
elif [ "$HAS_AMD" ]; then
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-hipblas
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-gpu-hipblas
|
||||
# AIO
|
||||
if [ "$USE_AIO" = true ]; then
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-hipblas
|
||||
|
||||
@@ -1279,6 +1279,337 @@
|
||||
- filename: Menlo_Jan-nano-128k-Q4_K_M.gguf
|
||||
sha256: a864031a138288da427ca176afd61d7fe2b03fd19a84a656b2691aa1f7a12921
|
||||
uri: huggingface://bartowski/Menlo_Jan-nano-128k-GGUF/Menlo_Jan-nano-128k-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
icon: https://huggingface.co/DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-V1.3/resolve/main/qwen3-total-recall.gif
|
||||
name: "qwen3-55b-a3b-total-recall-v1.3-i1"
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-V1.3
|
||||
- https://huggingface.co/mradermacher/Qwen3-55B-A3B-TOTAL-RECALL-V1.3-i1-GGUF
|
||||
description: |
|
||||
WARNING: MADNESS - UN HINGED and... NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
|
||||
This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
|
||||
|
||||
This model is for all use cases, but excels in creative use cases specifically.
|
||||
|
||||
This model is based on Qwen3-30B-A3B (MOE, 128 experts, 8 activated), with Brainstorm 40X (by DavidAU - details at bottom of this page.
|
||||
|
||||
This is the refined version -V1.3- from this project (see this repo for all settings, details, system prompts, example generations etc etc):
|
||||
|
||||
https://huggingface.co/DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-Deep-40X-GGUF/
|
||||
|
||||
This version -1.3- is slightly smaller, with further refinements to the Brainstorm adapter.
|
||||
|
||||
This will change generation and reasoning performance within the model.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-55B-A3B-TOTAL-RECALL-V1.3.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-55B-A3B-TOTAL-RECALL-V1.3.i1-Q4_K_M.gguf
|
||||
sha256: bcf5a1f8a40e9438a19b23dfb40e872561c310296c5ac804f937a0e3c1376def
|
||||
uri: huggingface://mradermacher/Qwen3-55B-A3B-TOTAL-RECALL-V1.3-i1-GGUF/Qwen3-55B-A3B-TOTAL-RECALL-V1.3.i1-Q4_K_M.gguf
|
||||
|
||||
- !!merge <<: *qwen3
|
||||
name: "qwen3-55b-a3b-total-recall-deep-40x"
|
||||
icon: https://huggingface.co/DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-V1.3/resolve/main/qwen3-total-recall.gif
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-Deep-40X-GGUF
|
||||
description: |
|
||||
WARNING: MADNESS - UN HINGED and... NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
|
||||
Qwen3-55B-A3B-TOTAL-RECALL-Deep-40X-GGUF
|
||||
|
||||
A highly experimental model ("tamer" versions below) based on Qwen3-30B-A3B (MOE, 128 experts, 8 activated), with Brainstorm 40X (by DavidAU - details at bottom of this page).
|
||||
|
||||
These modifications blow the model (V1) out to 87 layers, 1046 tensors and 55B parameters.
|
||||
|
||||
Note that some versions are smaller than this, with fewer layers/tensors and smaller parameter counts.
|
||||
|
||||
The adapter extensively alters performance, reasoning and output generation.
|
||||
|
||||
Exceptional changes in creative, prose and general performance.
|
||||
|
||||
Regens of the same prompt - even with the same settings - will be very different.
|
||||
|
||||
THREE example generations below - creative (generated with Q3_K_M, V1 model).
|
||||
|
||||
ONE example generation (#4) - non creative (generated with Q3_K_M, V1 model).
|
||||
|
||||
You can run this model on CPU and/or GPU due to unique model construction, size of experts and total activated experts at 3B parameters (8 experts), which translates into roughly almost 6B parameters in this version.
|
||||
|
||||
Two quants uploaded for testing: Q3_K_M, Q4_K_M
|
||||
|
||||
V3, V4 and V5 are also available in these two quants.
|
||||
|
||||
V2 and V6 in Q3_k_m only; as are: V 1.3, 1.4, 1.5, 1.7 and V7 (newest)
|
||||
|
||||
NOTE: V2 and up are from source model 2, V1 and 1.3,1.4,1.5,1.7 are from source model 1.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-55B-A3B-TOTAL-RECALL-V5-Deep-40X-q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-55B-A3B-TOTAL-RECALL-V5-Deep-40X-q4_K_M.gguf
|
||||
sha256: 20ef786a8c8e74eb257aa3069e237cbd40f42d25f5502fed6fa016bb8afbdae4
|
||||
uri: huggingface://DavidAU/Qwen3-55B-A3B-TOTAL-RECALL-Deep-40X-GGUF/Qwen3-55B-A3B-TOTAL-RECALL-V5-Deep-40X-q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "qwen3-42b-a3b-stranger-thoughts-deep20x-abliterated-uncensored-i1"
|
||||
icon: https://huggingface.co/DavidAU/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored/resolve/main/qwen-42b-ablit.jpg
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored
|
||||
- https://huggingface.co/mradermacher/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored-i1-GGUF
|
||||
description: |
|
||||
WARNING: NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
|
||||
Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored
|
||||
|
||||
This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
|
||||
|
||||
ABOUT:
|
||||
|
||||
Qwen's excellent "Qwen3-30B-A3B", abliterated by "huihui-ai" then combined Brainstorm 20x (tech notes at bottom of the page) in a MOE (128 experts) at 42B parameters (up from 30B).
|
||||
|
||||
This pushes Qwen's abliterated/uncensored model to the absolute limit for creative use cases.
|
||||
|
||||
Prose (all), reasoning, thinking ... all will be very different from reg "Qwen 3s".
|
||||
|
||||
This model will generate horror, fiction, erotica, - you name it - in vivid, stark detail.
|
||||
|
||||
It will NOT hold back.
|
||||
|
||||
Likewise, regen(s) of the same prompt - even at the same settings - will create very different version(s) too.
|
||||
|
||||
See FOUR examples below.
|
||||
|
||||
Model retains full reasoning, and output generation of a Qwen3 MOE ; but has not been tested for "non-creative" use cases.
|
||||
|
||||
Model is set with Qwen's default config:
|
||||
|
||||
40 k context
|
||||
8 of 128 experts activated.
|
||||
Chatml OR Jinja Template (embedded)
|
||||
|
||||
IMPORTANT:
|
||||
|
||||
See usage guide / repo below to get the most out of this model, as settings are very specific.
|
||||
|
||||
USAGE GUIDE:
|
||||
|
||||
Please refer to this model card for
|
||||
|
||||
Specific usage, suggested settings, changing ACTIVE EXPERTS, templates, settings and the like:
|
||||
How to maximize this model in "uncensored" form, with specific notes on "abliterated" models.
|
||||
Rep pen / temp settings specific to getting the model to perform strongly.
|
||||
|
||||
https://huggingface.co/DavidAU/Qwen3-18B-A3B-Stranger-Thoughts-Abliterated-Uncensored-GGUF
|
||||
|
||||
GGUF / QUANTS / SPECIAL SHOUTOUT:
|
||||
|
||||
Special thanks to team Mradermacher for making the quants!
|
||||
|
||||
https://huggingface.co/mradermacher/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored-GGUF
|
||||
|
||||
KNOWN ISSUES:
|
||||
|
||||
Model may "mis-capitalize" word(s) - lowercase, where uppercase should be - from time to time.
|
||||
Model may add extra space from time to time before a word.
|
||||
Incorrect template and/or settings will result in a drop in performance / poor performance.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored.i1-Q4_K_M.gguf
|
||||
sha256: ef4a601adfc2897b214cda2d16f76dcb8215a1b994bc76c696158d68ec535dd8
|
||||
uri: huggingface://mradermacher/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored-i1-GGUF/Qwen3-42B-A3B-Stranger-Thoughts-Deep20x-Abliterated-Uncensored.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "qwen3-22b-a3b-the-harley-quinn"
|
||||
icon: https://huggingface.co/DavidAU/Qwen3-22B-A3B-The-Harley-Quinn/resolve/main/qwen3-harley-quinn-23b.webp
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/Qwen3-22B-A3B-The-Harley-Quinn
|
||||
- https://huggingface.co/mradermacher/Qwen3-22B-A3B-The-Harley-Quinn-GGUF
|
||||
description: |
|
||||
WARNING: MADNESS - UN HINGED and... NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
|
||||
Qwen3-22B-A3B-The-Harley-Quinn
|
||||
|
||||
This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
|
||||
|
||||
ABOUT:
|
||||
|
||||
A stranger, yet radically different version of Kalmaze's "Qwen/Qwen3-16B-A3B" with the experts pruned to 64 (from 128, the Qwen 3 30B-A3B version) and then I added 19 layers expanding (Brainstorm 20x by DavidAU info at bottom of this page) the model to 22B total parameters.
|
||||
|
||||
The goal: slightly alter the model, to address some odd creative thinking and output choices.
|
||||
|
||||
Then... Harley Quinn showed up, and then it was a party!
|
||||
|
||||
A wild, out of control (sometimes) but never boring party.
|
||||
|
||||
Please note that the modifications affect the entire model operation; roughly I adjusted the model to think a little "deeper" and "ponder" a bit - but this is a very rough description.
|
||||
|
||||
That being said, reasoning and output generation will be altered regardless of your use case(s).
|
||||
|
||||
These modifications pushes Qwen's model to the absolute limit for creative use cases.
|
||||
|
||||
Detail, vividiness, and creativity all get a boost.
|
||||
|
||||
Prose (all) will also be very different from "default" Qwen3.
|
||||
|
||||
Likewise, regen(s) of the same prompt - even at the same settings - will create very different version(s) too.
|
||||
|
||||
The Brainstrom 20x has also lightly de-censored the model under some conditions.
|
||||
|
||||
However, this model can be prone to bouts of madness.
|
||||
|
||||
It will not always behave, and it will sometimes go -wildly- off script.
|
||||
|
||||
See 4 examples below.
|
||||
|
||||
Model retains full reasoning, and output generation of a Qwen3 MOE ; but has not been tested for "non-creative" use cases.
|
||||
|
||||
Model is set with Qwen's default config:
|
||||
|
||||
40 k context
|
||||
8 of 64 experts activated.
|
||||
Chatml OR Jinja Template (embedded)
|
||||
|
||||
Four example generations below.
|
||||
|
||||
IMPORTANT:
|
||||
|
||||
See usage guide / repo below to get the most out of this model, as settings are very specific.
|
||||
|
||||
If not set correctly, this model will not work the way it should.
|
||||
|
||||
Critical settings:
|
||||
|
||||
Chatml or Jinja Template (embedded, but updated version at repo below)
|
||||
Rep pen of 1.01 or 1.02 ; higher (1.04, 1.05) will result in "Harley Mode".
|
||||
Temp range of .6 to 1.2. ; higher you may need to prompt the model to "output" after thinking.
|
||||
Experts set at 8-10 ; higher will result in "odder" output BUT it might be better.
|
||||
|
||||
That being said, "Harley Quinn" may make her presence known at any moment.
|
||||
|
||||
USAGE GUIDE:
|
||||
|
||||
Please refer to this model card for
|
||||
|
||||
Specific usage, suggested settings, changing ACTIVE EXPERTS, templates, settings and the like:
|
||||
How to maximize this model in "uncensored" form, with specific notes on "abliterated" models.
|
||||
Rep pen / temp settings specific to getting the model to perform strongly.
|
||||
|
||||
https://huggingface.co/DavidAU/Qwen3-18B-A3B-Stranger-Thoughts-Abliterated-Uncensored-GGUF
|
||||
|
||||
GGUF / QUANTS / SPECIAL SHOUTOUT:
|
||||
|
||||
Special thanks to team Mradermacher for making the quants!
|
||||
|
||||
https://huggingface.co/mradermacher/Qwen3-22B-A3B-The-Harley-Quinn-GGUF
|
||||
|
||||
KNOWN ISSUES:
|
||||
|
||||
Model may "mis-capitalize" word(s) - lowercase, where uppercase should be - from time to time.
|
||||
Model may add extra space from time to time before a word.
|
||||
Incorrect template and/or settings will result in a drop in performance / poor performance.
|
||||
Can rant at the end / repeat. Most of the time it will stop on its own.
|
||||
|
||||
Looking for the Abliterated / Uncensored version?
|
||||
|
||||
https://huggingface.co/DavidAU/Qwen3-23B-A3B-The-Harley-Quinn-PUDDIN-Abliterated-Uncensored
|
||||
|
||||
In some cases this "abliterated/uncensored" version may work better than this version.
|
||||
EXAMPLES
|
||||
|
||||
Standard system prompt, rep pen 1.01-1.02, topk 100, topp .95, minp .05, rep pen range 64.
|
||||
|
||||
Tested in LMStudio, quant Q4KS, GPU (CPU output will differ slightly).
|
||||
|
||||
As this is the mid range quant, expected better results from higher quants and/or with more experts activated to be better.
|
||||
|
||||
NOTE: Some formatting lost on copy/paste.
|
||||
|
||||
WARNING: NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-22B-A3B-The-Harley-Quinn.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-22B-A3B-The-Harley-Quinn.Q4_K_M.gguf
|
||||
sha256: a3666754efde5d6c054de53cff0f38f1bb4a20117e2502eed7018ae57017b0a2
|
||||
uri: huggingface://mradermacher/Qwen3-22B-A3B-The-Harley-Quinn-GGUF/Qwen3-22B-A3B-The-Harley-Quinn.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "qwen3-33b-a3b-stranger-thoughts-abliterated-uncensored"
|
||||
icon: https://huggingface.co/DavidAU/Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored/resolve/main/qwen3-33b-ablit.jpg
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored
|
||||
- https://huggingface.co/mradermacher/Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored-GGUF
|
||||
description: |
|
||||
WARNING: NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
|
||||
Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored
|
||||
|
||||
This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
|
||||
|
||||
ABOUT:
|
||||
|
||||
A stranger, yet radically different version of "Qwen/Qwen3-30B-A3B", abliterated by "huihui-ai" , with 4 added layers expanding the model to 33B total parameters.
|
||||
|
||||
The goal: slightly alter the model, to address some odd creative thinking and output choices AND de-censor it.
|
||||
|
||||
Please note that the modifications affect the entire model operation; roughly I adjusted the model to think a little "deeper" and "ponder" a bit - but this is a very rough description.
|
||||
|
||||
I also ran reasoning tests (non-creative) to ensure model was not damaged and roughly matched original model performance.
|
||||
|
||||
That being said, reasoning and output generation will be altered regardless of your use case(s)
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored.Q4_K_M.gguf
|
||||
sha256: fc0f028ab04d4643032e5bf65c3b51ba947e97b4f562c4fc25c06b6a20b14616
|
||||
uri: huggingface://mradermacher/Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored-GGUF/Qwen3-33B-A3B-Stranger-Thoughts-Abliterated-Uncensored.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "pinkpixel_crystal-think-v2"
|
||||
icon: https://huggingface.co/PinkPixel/Crystal-Think-V2/resolve/main/crystal-think-v2-logo.png
|
||||
urls:
|
||||
- https://huggingface.co/PinkPixel/Crystal-Think-V2
|
||||
- https://huggingface.co/bartowski/PinkPixel_Crystal-Think-V2-GGUF
|
||||
description: |
|
||||
Crystal-Think is a specialized mathematical reasoning model based on Qwen3-4B, fine-tuned using Group Relative Policy Optimization (GRPO) on NVIDIA's OpenMathReasoning dataset. Version 2 introduces the new <think></think> reasoning format for enhanced step-by-step mathematical problem solving, algebraic reasoning, and mathematical code generation.
|
||||
overrides:
|
||||
parameters:
|
||||
model: PinkPixel_Crystal-Think-V2-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: PinkPixel_Crystal-Think-V2-Q4_K_M.gguf
|
||||
sha256: 10f2558089c90bc9ef8036ac0b1142ad8991902ec83840a00710fd654df19aaa
|
||||
uri: huggingface://bartowski/PinkPixel_Crystal-Think-V2-GGUF/PinkPixel_Crystal-Think-V2-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "helpingai_dhanishtha-2.0-preview"
|
||||
urls:
|
||||
- https://huggingface.co/HelpingAI/Dhanishtha-2.0-preview
|
||||
- https://huggingface.co/bartowski/HelpingAI_Dhanishtha-2.0-preview-GGUF
|
||||
description: |
|
||||
What makes Dhanishtha-2.0 special? Imagine an AI that doesn't just answer your questions instantly, but actually thinks through problems step-by-step, shows its work, and can even change its mind when it realizes a better approach. That's Dhanishtha-2.0.
|
||||
Quick Summary:
|
||||
🚀 For Everyone: An AI that shows its thinking process and can reconsider its reasoning
|
||||
👩💻 For Developers: First model with intermediate thinking capabilities, 39+ language support
|
||||
Dhanishtha-2.0 is a state-of-the-art (SOTA) model developed by HelpingAI, representing the world's first model to feature Intermediate Thinking capabilities. Unlike traditional models that provide single-pass responses, Dhanishtha-2.0 employs a revolutionary multi-phase thinking process that allows the model to think, reconsider, and refine its reasoning multiple times throughout a single response.
|
||||
overrides:
|
||||
parameters:
|
||||
model: HelpingAI_Dhanishtha-2.0-preview-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: HelpingAI_Dhanishtha-2.0-preview-Q4_K_M.gguf
|
||||
sha256: 026a1f80187c9ecdd0227816a35661f3b6b7abe85971121b4c1c25b6cdd7ab86
|
||||
uri: huggingface://bartowski/HelpingAI_Dhanishtha-2.0-preview-GGUF/HelpingAI_Dhanishtha-2.0-preview-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "agentica-org_deepswe-preview"
|
||||
icon: https://hebbkx1anhila5yf.public.blob.vercel-storage.com/IMG_3783-N75vmFhDaJtJkLR4d8pdBymos68DPo.png
|
||||
urls:
|
||||
- https://huggingface.co/agentica-org/DeepSWE-Preview
|
||||
- https://huggingface.co/bartowski/agentica-org_DeepSWE-Preview-GGUF
|
||||
description: |
|
||||
DeepSWE-Preview is a fully open-sourced, state-of-the-art coding agent trained with only reinforcement learning (RL) to excel at software engineering (SWE) tasks. DeepSWE-Preview demonstrates strong reasoning capabilities in navigating complex codebases and viewing/editing multiple files, and it serves as a foundational model for future coding agents. The model achieves an impressive 59.0% on SWE-Bench-Verified, which is currently #1 in the open-weights category.
|
||||
|
||||
DeepSWE-Preview is trained on top of Qwen3-32B with thinking mode enabled. With just 200 steps of RL training, SWE-Bench-Verified score increases by ~20%.
|
||||
overrides:
|
||||
parameters:
|
||||
model: agentica-org_DeepSWE-Preview-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: agentica-org_DeepSWE-Preview-Q4_K_M.gguf
|
||||
sha256: 196a7128d3b7a59f1647792bb72c17db306f773e78d5a47feeeea92e672d761b
|
||||
uri: huggingface://bartowski/agentica-org_DeepSWE-Preview-GGUF/agentica-org_DeepSWE-Preview-Q4_K_M.gguf
|
||||
- &gemma3
|
||||
url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
|
||||
name: "gemma-3-27b-it"
|
||||
@@ -2006,6 +2337,35 @@
|
||||
- filename: gemma-3n-E4B-it-Q8_0.gguf
|
||||
sha256: 9f74079242c765116bd1f33123aa07160b5e93578c2d0032594b7ed97576f9c3
|
||||
uri: huggingface://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf
|
||||
- !!merge <<: *gemma3
|
||||
name: "gemma-3-4b-it-max-horror-uncensored-dbl-x-imatrix"
|
||||
icon: https://huggingface.co/DavidAU/Gemma-3-4b-it-MAX-HORROR-Uncensored-DBL-X-Imatrix-GGUF/resolve/main/gemma4-horror-max2.jpg
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/Gemma-3-4b-it-MAX-HORROR-Uncensored-DBL-X-Imatrix-GGUF
|
||||
description: |
|
||||
Google's newest Gemma-3 model that has been uncensored by David_AU (maintains instruction following / model performance and adds 4 layers to the model) and re-enforced with a system prompt (optional) - see below.
|
||||
The "Horror Imatrix" was built using Grand Horror 16B (at my repo). This adds a "tint" of horror to the model.
|
||||
|
||||
5 examples provided (NSFW / F-Bombs galore) below with prompts at IQ4XS (56 t/s on mid level card).
|
||||
|
||||
Context: 128k.
|
||||
|
||||
"MAXED"
|
||||
|
||||
This means the embed and output tensor are set at "BF16" (full precision) for all quants. This enhances quality, depth and general performance at the cost of a slightly larger quant.
|
||||
|
||||
"HORROR IMATRIX"
|
||||
|
||||
A strong, in house built, imatrix dataset built by David_AU which results in better overall function, instruction following, output quality and stronger connections to ideas, concepts and the world in general.
|
||||
|
||||
This combines with "MAXing" the quant to improve preformance.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Gemma-3-4b-it-MAX-HORROR-Uncensored-D_AU-Q4_K_M-imat.gguf
|
||||
files:
|
||||
- filename: Gemma-3-4b-it-MAX-HORROR-Uncensored-D_AU-Q4_K_M-imat.gguf
|
||||
sha256: 1c577e4c84311c39b3d54b0cef12857ad46e88755f858143accbfcca7cc9fc6b
|
||||
uri: huggingface://DavidAU/Gemma-3-4b-it-MAX-HORROR-Uncensored-DBL-X-Imatrix-GGUF/Gemma-3-4b-it-MAX-HORROR-Uncensored-D_AU-Q4_K_M-imat.gguf
|
||||
- &llama4
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
@@ -3284,6 +3644,38 @@
|
||||
- filename: sophosympatheia_StrawberryLemonade-L3-70B-v1.0-Q4_K_M.gguf
|
||||
sha256: 354472a2946598e0df376f9ecb91f83d7bc9c1b32db46bf48d3ea76f892f2a97
|
||||
uri: huggingface://bartowski/sophosympatheia_StrawberryLemonade-L3-70B-v1.0-GGUF/sophosympatheia_StrawberryLemonade-L3-70B-v1.0-Q4_K_M.gguf
|
||||
- !!merge <<: *llama33
|
||||
name: "steelskull_l3.3-shakudo-70b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/Y3_fED_Re3U1rd0jOPnAR.jpeg
|
||||
urls:
|
||||
- https://huggingface.co/Steelskull/L3.3-Shakudo-70b
|
||||
- https://huggingface.co/bartowski/Steelskull_L3.3-Shakudo-70b-GGUF
|
||||
description: |
|
||||
L3.3-Shakudo-70b is the result of a multi-stage merging process by Steelskull, designed to create a powerful and creative roleplaying model with a unique flavor. The creation process involved several advanced merging techniques, including weight twisting, to achieve its distinct characteristics.
|
||||
Stage 1: The Cognitive Foundation & Weight Twisting
|
||||
|
||||
The process began by creating a cognitive and tool-use focused base model, L3.3-Cogmoblated-70B. This was achieved through a `model_stock` merge of several models known for their reasoning and instruction-following capabilities. This base was built upon `nbeerbower/Llama-3.1-Nemotron-lorablated-70B`, a model intentionally "ablated" to skew refusal behaviors. This technique, known as weight twisting, helps the final model adopt more desirable response patterns by building upon a foundation that is already aligned against common refusal patterns.
|
||||
Stage 2: The Twin Hydrargyrum - Flavor and Depth
|
||||
|
||||
Two distinct models were then created from the Cogmoblated base:
|
||||
|
||||
L3.3-M1-Hydrargyrum-70B: This model was merged using `SCE`, a technique that enhances creative writing and prose style, giving the model its unique "flavor." The Top_K for this merge were set at 0.22 .
|
||||
L3.3-M2-Hydrargyrum-70B: This model was created using a `Della_Linear` merge, which focuses on integrating the "depth" of various roleplaying and narrative models. The settings for this merge were set at: (lambda: 1.1) (weight: 0.2) (density: 0.7) (epsilon: 0.2)
|
||||
|
||||
Final Stage: Shakudo
|
||||
|
||||
The final model, L3.3-Shakudo-70b, was created by merging the two Hydrargyrum variants using a 50/50 `nuslerp`. This final step combines the rich, creative prose (flavor) from the SCE merge with the strong roleplaying capabilities (depth) from the Della_Linear merge, resulting in a model with a distinct and refined narrative voice.
|
||||
|
||||
A special thank you to Nectar.ai for their generous support of the open-source community and my projects.
|
||||
|
||||
Additionally, a heartfelt thanks to all the Ko-fi supporters who have contributed—your generosity is deeply appreciated and helps keep this work going and the Pods spinning.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Steelskull_L3.3-Shakudo-70b-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Steelskull_L3.3-Shakudo-70b-Q4_K_M.gguf
|
||||
sha256: 54590c02226f12c6f48a4af6bfed0e3c90130addd1fb8a2b4fcc1f0ab1674ef7
|
||||
uri: huggingface://bartowski/Steelskull_L3.3-Shakudo-70b-GGUF/Steelskull_L3.3-Shakudo-70b-Q4_K_M.gguf
|
||||
- &rwkv
|
||||
url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
|
||||
name: "rwkv-6-world-7b"
|
||||
@@ -10599,6 +10991,31 @@
|
||||
- filename: AstroSage-70B.Q4_K_M.gguf
|
||||
sha256: 1d98dabfa001d358d9f95d2deba93a94ad8baa8839c75a0129cdb6bcf1507f38
|
||||
uri: huggingface://mradermacher/AstroSage-70B-GGUF/AstroSage-70B.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "thedrummer_anubis-70b-v1.1"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/G-NwpVtnbdfdnPusYDzx3.png
|
||||
urls:
|
||||
- https://huggingface.co/TheDrummer/Anubis-70B-v1.1
|
||||
- https://huggingface.co/bartowski/TheDrummer_Anubis-70B-v1.1-GGUF
|
||||
description: |
|
||||
A follow up to Anubis 70B v1.0 but with two main strengths: character adherence and unalignment.
|
||||
|
||||
This is not a minor update to Anubis. It is a totally different beast.
|
||||
|
||||
The model does a fantastic job portraying my various characters without fail, adhering to them in such a refreshing and pleasing degree with their dialogue and mannerisms, while also being able to impart a very nice and fresh style that doesn't feel like any other L3.3 models.
|
||||
|
||||
I do think it's a solid improvement though, like it nails characters.
|
||||
|
||||
It feels fresh. I am quite impressed on how it picked up on and empasized subtle details I have not seen other models do in one of my historically accurate character cards.
|
||||
|
||||
Anubis v1.1 is in my main model rotation now, I really like it! -Tarek
|
||||
overrides:
|
||||
parameters:
|
||||
model: TheDrummer_Anubis-70B-v1.1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: TheDrummer_Anubis-70B-v1.1-Q4_K_M.gguf
|
||||
sha256: a73bed551c64703737f598f1120aac28d1a62c08b5dbe2208da810936bb2522d
|
||||
uri: huggingface://bartowski/TheDrummer_Anubis-70B-v1.1-GGUF/TheDrummer_Anubis-70B-v1.1-Q4_K_M.gguf
|
||||
- &deepseek
|
||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" ## Deepseek
|
||||
name: "deepseek-coder-v2-lite-instruct"
|
||||
@@ -12905,6 +13322,94 @@
|
||||
- filename: Delta-Vector_Austral-24B-Winton-Q4_K_M.gguf
|
||||
sha256: feb76e0158d1ebba1809de89d01671b86037f768ebd5f6fb165885ae6338b1b7
|
||||
uri: huggingface://bartowski/Delta-Vector_Austral-24B-Winton-GGUF/Delta-Vector_Austral-24B-Winton-Q4_K_M.gguf
|
||||
- !!merge <<: *mistral03
|
||||
name: "mistral-small-3.2-46b-the-brilliant-raconteur-ii-instruct-2506"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: https://huggingface.co/DavidAU/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506/resolve/main/mistral-2506.jpg
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506
|
||||
- https://huggingface.co/mradermacher/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506-GGUF
|
||||
description: |
|
||||
WARNING: MADNESS - UN HINGED and... NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
|
||||
Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506
|
||||
|
||||
This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
|
||||
|
||||
ABOUT:
|
||||
|
||||
A stronger, more creative Mistral (Mistral-Small-3.2-24B-Instruct-2506) extended to 79 layers, 46B parameters with Brainstorm 40x by DavidAU (details at very bottom of the page). This is version II, which has a jump in detail, and raw emotion relative to version 1.
|
||||
|
||||
This model pushes Mistral's Instruct 2506 to the limit:
|
||||
|
||||
Regens will be very different, even with same prompt / settings.
|
||||
Output generation will vary vastly on each generation.
|
||||
Reasoning will be changed, and often shorter.
|
||||
Prose, creativity, word choice, and general "flow" are improved.
|
||||
Several system prompts below help push this model even further.
|
||||
Model is partly de-censored / abliterated. Most Mistrals are more uncensored that most other models too.
|
||||
This model can also be used for coding too; even at low quants.
|
||||
Model can be used for all use cases too.
|
||||
|
||||
As this is an instruct model, this model thrives on instructions - both in the system prompt and/or the prompt itself.
|
||||
|
||||
One example below with 3 generations using Q4_K_S.
|
||||
|
||||
Second example below with 2 generations using Q4_K_S.
|
||||
|
||||
Quick Details:
|
||||
|
||||
Model is 128k context, Jinja template (embedded) OR Chatml Template.
|
||||
Reasoning can be turned on/off (see system prompts below) and is OFF by default.
|
||||
Temp range .1 to 1 suggested, with 1-2 for enhanced creative. Above temp 2, is strong but can be very different.
|
||||
Rep pen range: 1 (off) or very light 1.01, 1.02 to 1.05. (model is sensitive to rep pen - this affects reasoning / generation length.)
|
||||
For creative/brainstorming use: suggest 2-5 generations due to variations caused by Brainstorm.
|
||||
|
||||
Observations:
|
||||
|
||||
Sometimes using Chatml (or Alpaca / others ) template (VS Jinja) will result in stronger creative generation.
|
||||
Model can be operated with NO system prompt; however a system prompt will enhance generation.
|
||||
Longer prompts, that more detailed, with more instructions will result in much stronger generations.
|
||||
For prose directives: You may need to add directions, because the model may follow your instructions too closely. IE: "use short sentences" vs "use short sentences sparsely".
|
||||
Reasoning (on) can lead to better creative generation, however sometimes generation with reasoning off is better.
|
||||
Rep pen of up to 1.05 may be needed on quants Q2k/q3ks for some prompts to address "low bit" issues.
|
||||
|
||||
Detailed settings, system prompts, how to and examples below.
|
||||
|
||||
NOTES:
|
||||
|
||||
Image generation should also be possible with this model, just like the base model. Brainstorm was not applied to the image generation systems of the model... yet.
|
||||
|
||||
This is Version II and subject to change / revision.
|
||||
|
||||
This model is a slightly different version of:
|
||||
|
||||
https://huggingface.co/DavidAU/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-Instruct-2506
|
||||
overrides:
|
||||
parameters:
|
||||
model: Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506.Q4_K_M.gguf
|
||||
sha256: 5c8b6f21ae4f671880fafe60001f30f4c639a680e257701e474777cfcf00f8f6
|
||||
uri: huggingface://mradermacher/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506-GGUF/Mistral-Small-3.2-46B-The-Brilliant-Raconteur-II-Instruct-2506.Q4_K_M.gguf
|
||||
- !!merge <<: *mistral03
|
||||
name: "zerofata_ms3.2-paintedfantasy-visage-33b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/CQeog2SHdGUdmx8vHqL71.png
|
||||
urls:
|
||||
- https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-33B
|
||||
- https://huggingface.co/bartowski/zerofata_MS3.2-PaintedFantasy-Visage-33B-GGUF
|
||||
description: |
|
||||
Another experimental release. Mistral Small 3.2 24B upscaled by 18 layers to create a 33.6B model. This model then went through pretraining, SFT & DPO.
|
||||
|
||||
Can't guarantee the Mistral 3.2 repetition issues are fixed, but this model seems to be less repetitive than my previous attempt.
|
||||
|
||||
This is an uncensored creative model intended to excel at character driven RP / ERP where characters are portrayed creatively and proactively.
|
||||
overrides:
|
||||
parameters:
|
||||
model: zerofata_MS3.2-PaintedFantasy-Visage-33B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: zerofata_MS3.2-PaintedFantasy-Visage-33B-Q4_K_M.gguf
|
||||
sha256: bd315ad9a4cf0f47ed24f8d387b0cad1dd127e10f2bbe1c6820ae91f700ada56
|
||||
uri: huggingface://bartowski/zerofata_MS3.2-PaintedFantasy-Visage-33B-GGUF/zerofata_MS3.2-PaintedFantasy-Visage-33B-Q4_K_M.gguf
|
||||
- &mudler
|
||||
url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
|
||||
name: "LocalAI-llama3-8b-function-call-v0.2"
|
||||
|
||||
@@ -256,7 +256,7 @@ func (uri URI) DownloadFile(filePath, sha string, fileN, total int, downloadStat
|
||||
return fmt.Errorf("failed to get image %q: %v", url, err)
|
||||
}
|
||||
|
||||
return oci.ExtractOCIImage(img, filepath.Dir(filePath), downloadStatus)
|
||||
return oci.ExtractOCIImage(img, url, filepath.Dir(filePath), downloadStatus)
|
||||
}
|
||||
|
||||
// Check if the file already exists
|
||||
|
||||
202
pkg/oci/image.go
202
pkg/oci/image.go
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -21,6 +22,7 @@ import (
|
||||
"github.com/google/go-containerregistry/pkg/v1/mutate"
|
||||
"github.com/google/go-containerregistry/pkg/v1/remote"
|
||||
"github.com/google/go-containerregistry/pkg/v1/remote/transport"
|
||||
"github.com/google/go-containerregistry/pkg/v1/tarball"
|
||||
)
|
||||
|
||||
// ref: https://github.com/mudler/luet/blob/master/pkg/helpers/docker/docker.go#L117
|
||||
@@ -95,31 +97,28 @@ func (pw *progressWriter) Write(p []byte) (int, error) {
|
||||
}
|
||||
|
||||
// ExtractOCIImage will extract a given targetImage into a given targetDestination
|
||||
func ExtractOCIImage(img v1.Image, targetDestination string, downloadStatus func(string, string, string, float64)) error {
|
||||
var reader io.Reader
|
||||
reader = mutate.Extract(img)
|
||||
func ExtractOCIImage(img v1.Image, imageRef string, targetDestination string, downloadStatus func(string, string, string, float64)) error {
|
||||
// Create a temporary tar file
|
||||
tmpTarFile, err := os.CreateTemp("", "localai-oci-*.tar")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create temporary tar file: %v", err)
|
||||
}
|
||||
defer os.Remove(tmpTarFile.Name())
|
||||
defer tmpTarFile.Close()
|
||||
|
||||
if downloadStatus != nil {
|
||||
var totalSize int64
|
||||
layers, err := img.Layers()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, layer := range layers {
|
||||
size, err := layer.Size()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
totalSize += size
|
||||
}
|
||||
reader = io.TeeReader(reader, &progressWriter{total: totalSize, downloadStatus: downloadStatus})
|
||||
// Download the image as tar with progress tracking
|
||||
err = DownloadOCIImageTar(img, imageRef, tmpTarFile.Name(), downloadStatus)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download image tar: %v", err)
|
||||
}
|
||||
|
||||
_, err := archive.Apply(context.Background(),
|
||||
targetDestination, reader,
|
||||
archive.WithNoSameOwner())
|
||||
// Extract the tar file to the target destination
|
||||
err = ExtractOCIImageFromTar(tmpTarFile.Name(), imageRef, targetDestination, downloadStatus)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to extract image tar: %v", err)
|
||||
}
|
||||
|
||||
return err
|
||||
return nil
|
||||
}
|
||||
|
||||
func ParseImageParts(image string) (tag, repository, dstimage string) {
|
||||
@@ -205,3 +204,164 @@ func GetOCIImageSize(targetImage, targetPlatform string, auth *registrytypes.Aut
|
||||
|
||||
return size, nil
|
||||
}
|
||||
|
||||
// DownloadOCIImageTar downloads the compressed layers of an image and then creates an uncompressed tar
|
||||
// This provides accurate size estimation and allows for later extraction
|
||||
func DownloadOCIImageTar(img v1.Image, imageRef string, tarFilePath string, downloadStatus func(string, string, string, float64)) error {
|
||||
// Get layers to calculate total compressed size for estimation
|
||||
layers, err := img.Layers()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get layers: %v", err)
|
||||
}
|
||||
|
||||
// Calculate total compressed size for progress tracking
|
||||
var totalCompressedSize int64
|
||||
for _, layer := range layers {
|
||||
size, err := layer.Size()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get layer size: %v", err)
|
||||
}
|
||||
totalCompressedSize += size
|
||||
}
|
||||
|
||||
// Create a temporary directory to store the compressed layers
|
||||
tmpDir, err := os.MkdirTemp("", "localai-oci-layers-*")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create temporary directory: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
// Download all compressed layers with progress tracking
|
||||
var downloadedLayers []v1.Layer
|
||||
var downloadedSize int64
|
||||
|
||||
// Extract image name from the reference for display
|
||||
imageName := imageRef
|
||||
for i, layer := range layers {
|
||||
layerSize, err := layer.Size()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get layer size: %v", err)
|
||||
}
|
||||
|
||||
// Create a temporary file for this layer
|
||||
layerFile := fmt.Sprintf("%s/layer-%d.tar.gz", tmpDir, i)
|
||||
file, err := os.Create(layerFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create layer file: %v", err)
|
||||
}
|
||||
|
||||
// Create progress writer for this layer
|
||||
var writer io.Writer = file
|
||||
if downloadStatus != nil {
|
||||
writer = io.MultiWriter(file, &progressWriter{
|
||||
total: totalCompressedSize,
|
||||
fileName: fmt.Sprintf("Downloading %d/%d %s", i+1, len(layers), imageName),
|
||||
downloadStatus: downloadStatus,
|
||||
})
|
||||
}
|
||||
|
||||
// Download the compressed layer
|
||||
layerReader, err := layer.Compressed()
|
||||
if err != nil {
|
||||
file.Close()
|
||||
return fmt.Errorf("failed to get compressed layer: %v", err)
|
||||
}
|
||||
|
||||
_, err = io.Copy(writer, layerReader)
|
||||
file.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download layer %d: %v", i, err)
|
||||
}
|
||||
|
||||
// Load the downloaded layer
|
||||
downloadedLayer, err := tarball.LayerFromFile(layerFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load downloaded layer: %v", err)
|
||||
}
|
||||
|
||||
downloadedLayers = append(downloadedLayers, downloadedLayer)
|
||||
downloadedSize += layerSize
|
||||
}
|
||||
|
||||
// Create a local image from the downloaded layers
|
||||
localImg, err := mutate.AppendLayers(img, downloadedLayers...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create local image: %v", err)
|
||||
}
|
||||
|
||||
// Now extract the uncompressed tar from the local image
|
||||
tarFile, err := os.Create(tarFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create tar file: %v", err)
|
||||
}
|
||||
defer tarFile.Close()
|
||||
|
||||
// Extract uncompressed tar from local image
|
||||
extractReader := mutate.Extract(localImg)
|
||||
_, err = io.Copy(tarFile, extractReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to extract uncompressed tar: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExtractOCIImageFromTar extracts an image from a previously downloaded tar file
|
||||
func ExtractOCIImageFromTar(tarFilePath, imageRef, targetDestination string, downloadStatus func(string, string, string, float64)) error {
|
||||
// Open the tar file
|
||||
tarFile, err := os.Open(tarFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open tar file: %v", err)
|
||||
}
|
||||
defer tarFile.Close()
|
||||
|
||||
// Get file size for progress tracking
|
||||
fileInfo, err := tarFile.Stat()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get file info: %v", err)
|
||||
}
|
||||
|
||||
var reader io.Reader = tarFile
|
||||
if downloadStatus != nil {
|
||||
reader = io.TeeReader(tarFile, &progressWriter{
|
||||
total: fileInfo.Size(),
|
||||
fileName: fmt.Sprintf("Extracting %s", imageRef),
|
||||
downloadStatus: downloadStatus,
|
||||
})
|
||||
}
|
||||
|
||||
// Extract the tar file
|
||||
_, err = archive.Apply(context.Background(),
|
||||
targetDestination, reader,
|
||||
archive.WithNoSameOwner())
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// GetOCIImageUncompressedSize returns the total uncompressed size of an image
|
||||
func GetOCIImageUncompressedSize(targetImage, targetPlatform string, auth *registrytypes.AuthConfig, t http.RoundTripper) (int64, error) {
|
||||
var totalSize int64
|
||||
var img v1.Image
|
||||
var err error
|
||||
|
||||
img, err = GetImage(targetImage, targetPlatform, auth, t)
|
||||
if err != nil {
|
||||
return totalSize, err
|
||||
}
|
||||
|
||||
layers, err := img.Layers()
|
||||
if err != nil {
|
||||
return totalSize, err
|
||||
}
|
||||
|
||||
for _, layer := range layers {
|
||||
// Use compressed size as an approximation since uncompressed size is not directly available
|
||||
size, err := layer.Size()
|
||||
if err != nil {
|
||||
return totalSize, err
|
||||
}
|
||||
totalSize += size
|
||||
}
|
||||
|
||||
return totalSize, nil
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ var _ = Describe("OCI", func() {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
err = ExtractOCIImage(img, dir, nil)
|
||||
err = ExtractOCIImage(img, imageName, dir, nil)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
})
|
||||
})
|
||||
|
||||
@@ -27,7 +27,7 @@ func InstallExternalBackends(galleries []config.Gallery, backendPath string, dow
|
||||
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
|
||||
}
|
||||
default:
|
||||
err := gallery.InstallBackendFromGallery(galleries, systemState, backend, backendPath, downloadStatus)
|
||||
err := gallery.InstallBackendFromGallery(galleries, systemState, backend, backendPath, downloadStatus, true)
|
||||
if err != nil {
|
||||
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ import (
|
||||
// InstallModels will preload models from the given list of URLs and galleries
|
||||
// It will download the model if it is not already present in the model path
|
||||
// It will also try to resolve if the model is an embedded model YAML configuration
|
||||
func InstallModels(galleries []config.Gallery, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
|
||||
func InstallModels(galleries, backendGalleries []config.Gallery, modelPath, backendBasePath string, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
|
||||
// create an error that groups all errors
|
||||
var err error
|
||||
|
||||
@@ -99,7 +99,7 @@ func InstallModels(galleries []config.Gallery, modelPath string, enforceScan boo
|
||||
}
|
||||
} else {
|
||||
// Check if it's a model gallery, or print a warning
|
||||
e, found := installModel(galleries, url, modelPath, downloadStatus, enforceScan)
|
||||
e, found := installModel(galleries, backendGalleries, url, modelPath, backendBasePath, downloadStatus, enforceScan, autoloadBackendGalleries)
|
||||
if e != nil && found {
|
||||
log.Error().Err(err).Msgf("[startup] failed installing model '%s'", url)
|
||||
err = errors.Join(err, e)
|
||||
@@ -113,7 +113,7 @@ func InstallModels(galleries []config.Gallery, modelPath string, enforceScan boo
|
||||
return err
|
||||
}
|
||||
|
||||
func installModel(galleries []config.Gallery, modelName, modelPath string, downloadStatus func(string, string, string, float64), enforceScan bool) (error, bool) {
|
||||
func installModel(galleries, backendGalleries []config.Gallery, modelName, modelPath, backendBasePath string, downloadStatus func(string, string, string, float64), enforceScan, autoloadBackendGalleries bool) (error, bool) {
|
||||
models, err := gallery.AvailableGalleryModels(galleries, modelPath)
|
||||
if err != nil {
|
||||
return err, false
|
||||
@@ -129,7 +129,7 @@ func installModel(galleries []config.Gallery, modelName, modelPath string, downl
|
||||
}
|
||||
|
||||
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
||||
err = gallery.InstallModelFromGallery(galleries, modelName, modelPath, gallery.GalleryModel{}, downloadStatus, enforceScan)
|
||||
err = gallery.InstallModelFromGallery(galleries, backendGalleries, modelName, modelPath, backendBasePath, gallery.GalleryModel{}, downloadStatus, enforceScan, autoloadBackendGalleries)
|
||||
if err != nil {
|
||||
return err, true
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ var _ = Describe("Preload test", func() {
|
||||
url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
|
||||
fileName := fmt.Sprintf("%s.yaml", "phi-2")
|
||||
|
||||
InstallModels([]config.Gallery{}, tmpdir, true, nil, url)
|
||||
InstallModels([]config.Gallery{}, []config.Gallery{}, tmpdir, "", true, true, nil, url)
|
||||
|
||||
resultFile := filepath.Join(tmpdir, fileName)
|
||||
|
||||
@@ -36,7 +36,7 @@ var _ = Describe("Preload test", func() {
|
||||
url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
|
||||
fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
|
||||
|
||||
err = InstallModels([]config.Gallery{}, tmpdir, false, nil, url)
|
||||
err = InstallModels([]config.Gallery{}, []config.Gallery{}, tmpdir, "", false, true, nil, url)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
resultFile := filepath.Join(tmpdir, fileName)
|
||||
|
||||
Reference in New Issue
Block a user