mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 19:22:39 -05:00
Compare commits
156 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fd17a3312c | ||
|
|
12d0fe610b | ||
|
|
11c67d16b8 | ||
|
|
63f7c86c4d | ||
|
|
ac89bf77bf | ||
|
|
0395cc02fb | ||
|
|
616972fca0 | ||
|
|
942fbff62d | ||
|
|
2612a0c910 | ||
|
|
2dcb6d7247 | ||
|
|
6978eec69f | ||
|
|
2fcfe54466 | ||
|
|
4e7506a3be | ||
|
|
2a46217f90 | ||
|
|
31ff9dbd52 | ||
|
|
9483abef03 | ||
|
|
ce3e8b3e31 | ||
|
|
f3bb84c9a7 | ||
|
|
ecb1297582 | ||
|
|
73fc702b3c | ||
|
|
e3af62ae1a | ||
|
|
dc21604741 | ||
|
|
5433f1a70e | ||
|
|
d5e032bdcd | ||
|
|
de786f6586 | ||
|
|
8b9bc4aa6e | ||
|
|
e6cea7d28e | ||
|
|
7d7d56f2ce | ||
|
|
1caae91ab6 | ||
|
|
e90f2cb0ca | ||
|
|
5a4291fadd | ||
|
|
91ef58ee5a | ||
|
|
a86e8c78f1 | ||
|
|
adb24214c6 | ||
|
|
f03a0430aa | ||
|
|
73bc12abc0 | ||
|
|
7fa437bbcc | ||
|
|
4a27c99928 | ||
|
|
6ce94834b6 | ||
|
|
84a26458dc | ||
|
|
7aa377b6a9 | ||
|
|
64e66dda4a | ||
|
|
a085f61fdc | ||
|
|
21bdfe5fa4 | ||
|
|
7ebd7b2454 | ||
|
|
6984749ea1 | ||
|
|
c0a206bc7a | ||
|
|
01bbb31fb3 | ||
|
|
72111c597d | ||
|
|
b2f9fc870b | ||
|
|
1fc6d469ac | ||
|
|
05848b2027 | ||
|
|
1da0644aa3 | ||
|
|
c087cd1377 | ||
|
|
c621412f6a | ||
|
|
5a8b1892cd | ||
|
|
5b20426863 | ||
|
|
5c6cd50ed6 | ||
|
|
bace6516f1 | ||
|
|
3baadf6f27 | ||
|
|
8804c701b8 | ||
|
|
7b3ceb19bb | ||
|
|
e7f3effea1 | ||
|
|
61694a2ffb | ||
|
|
573a3f104c | ||
|
|
0e8af53a5b | ||
|
|
960ffa808c | ||
|
|
92719568e5 | ||
|
|
163939af71 | ||
|
|
399f1241dc | ||
|
|
58c9ade2e8 | ||
|
|
6e1c93d84f | ||
|
|
4076ea0494 | ||
|
|
26cbf77c0d | ||
|
|
640790d628 | ||
|
|
4132adea2f | ||
|
|
2b2d907a3a | ||
|
|
6e8f4f584b | ||
|
|
662cfc2b48 | ||
|
|
a25d355d66 | ||
|
|
6d1cfdbefc | ||
|
|
5ecc478968 | ||
|
|
aef5c4291b | ||
|
|
c059f912b9 | ||
|
|
bc1e059259 | ||
|
|
38dc07793a | ||
|
|
da6ef0967d | ||
|
|
7a011e60bd | ||
|
|
e13dd5b09f | ||
|
|
86ee303bd6 | ||
|
|
978ee96fd3 | ||
|
|
3ad5691db6 | ||
|
|
0027681090 | ||
|
|
8cba990edc | ||
|
|
88857696d4 | ||
|
|
23f347e687 | ||
|
|
b6e3dc5f02 | ||
|
|
69667521e2 | ||
|
|
2a92effc5d | ||
|
|
a65e012aa2 | ||
|
|
8e9b41d05f | ||
|
|
078da5c2f0 | ||
|
|
c5af5d139c | ||
|
|
2c9279a542 | ||
|
|
a67d22f5f2 | ||
|
|
dc7c51dcc7 | ||
|
|
98df65c7aa | ||
|
|
1559b6b522 | ||
|
|
a0244e3fb4 | ||
|
|
d66396201a | ||
|
|
9628860c0e | ||
|
|
cae9bf1308 | ||
|
|
5bb5da0760 | ||
|
|
867973a850 | ||
|
|
701cd6b6d5 | ||
|
|
7f61d397d5 | ||
|
|
1ae0b896fa | ||
|
|
3937407cb3 | ||
|
|
0e34ae4f3f | ||
|
|
a38b99ecb6 | ||
|
|
a4a4358182 | ||
|
|
4bc39c2db3 | ||
|
|
cc3df759f8 | ||
|
|
378161060c | ||
|
|
f2f788fe60 | ||
|
|
9fa8ed6b1e | ||
|
|
7fc37c5e29 | ||
|
|
4bc4b1e8bc | ||
|
|
e495b89f18 | ||
|
|
ba09eaea1b | ||
|
|
61cc76c455 | ||
|
|
8abecb4a18 | ||
|
|
8b3f76d8e6 | ||
|
|
4e0497f1a6 | ||
|
|
ba88c9f451 | ||
|
|
a598285825 | ||
|
|
cb7a172897 | ||
|
|
771be28dfb | ||
|
|
7d6b3eb42d | ||
|
|
0bb33fab55 | ||
|
|
e3bf7f77f7 | ||
|
|
bd1707d339 | ||
|
|
0474804541 | ||
|
|
72693b3917 | ||
|
|
a03b70010f | ||
|
|
e3717e5c1a | ||
|
|
c8f6858218 | ||
|
|
06d7cc43ae | ||
|
|
f2147cb850 | ||
|
|
75bb9f4c28 | ||
|
|
a2ef4b1e07 | ||
|
|
161c9fe2db | ||
|
|
7547463f81 | ||
|
|
32e4dfd47b | ||
|
|
f67e5dec68 | ||
|
|
297d54acea |
2
.env
2
.env
@@ -76,7 +76,7 @@
|
||||
|
||||
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
|
||||
# https://github.com/ggerganov/llama.cpp/pull/6829
|
||||
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
|
||||
# https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md
|
||||
# LLAMACPP_GRPC_SERVERS=""
|
||||
|
||||
### Enable to run parallel requests
|
||||
|
||||
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
@@ -29,10 +29,6 @@ updates:
|
||||
schedule:
|
||||
# Check for updates to GitHub Actions every weekday
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/autogptq"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/bark"
|
||||
schedule:
|
||||
|
||||
2
.github/workflows/bump_deps.yaml
vendored
2
.github/workflows/bump_deps.yaml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
- repository: "ggml-org/llama.cpp"
|
||||
variable: "CPPLLAMA_VERSION"
|
||||
branch: "master"
|
||||
- repository: "ggerganov/whisper.cpp"
|
||||
- repository: "ggml-org/whisper.cpp"
|
||||
variable: "WHISPER_CPP_VERSION"
|
||||
branch: "master"
|
||||
- repository: "PABannier/bark.cpp"
|
||||
|
||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- name: Dependabot metadata
|
||||
id: metadata
|
||||
uses: dependabot/fetch-metadata@v2.3.0
|
||||
uses: dependabot/fetch-metadata@v2.4.0
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
skip-commit-verification: true
|
||||
|
||||
2
.github/workflows/deploy-explorer.yaml
vendored
2
.github/workflows/deploy-explorer.yaml
vendored
@@ -42,7 +42,7 @@ jobs:
|
||||
script: |
|
||||
sudo rm -rf local-ai/ || true
|
||||
- name: copy file via ssh
|
||||
uses: appleboy/scp-action@v0.1.7
|
||||
uses: appleboy/scp-action@v1.0.0
|
||||
with:
|
||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||
|
||||
50
.github/workflows/image-pr.yml
vendored
50
.github/workflows/image-pr.yml
vendored
@@ -33,6 +33,7 @@ jobs:
|
||||
# Pushing with all jobs in parallel
|
||||
# eats the bandwidth of all the nodes
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# This is basically covered by the AIO test
|
||||
@@ -56,26 +57,35 @@ jobs:
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
# - build-type: 'hipblas'
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# tag-suffix: '-hipblas'
|
||||
# ffmpeg: 'false'
|
||||
# image-type: 'extras'
|
||||
# base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
# grpc-base-image: "ubuntu:22.04"
|
||||
# runs-on: 'arc-runner-set'
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
# - build-type: 'sycl_f16'
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
# grpc-base-image: "ubuntu:22.04"
|
||||
# tag-suffix: 'sycl-f16-ffmpeg'
|
||||
# ffmpeg: 'true'
|
||||
# image-type: 'extras'
|
||||
# runs-on: 'arc-runner-set'
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'extras'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'vulkan'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-vulkan-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
# core-image-build:
|
||||
# uses: ./.github/workflows/image_build.yml
|
||||
# with:
|
||||
|
||||
161
.github/workflows/image.yml
vendored
161
.github/workflows/image.yml
vendored
@@ -45,13 +45,13 @@ jobs:
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-hipblas-ffmpeg'
|
||||
tag-suffix: '-hipblas-extras'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
aio: "-aio-gpu-hipblas"
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
latest-image: 'latest-gpu-hipblas'
|
||||
latest-image: 'latest-gpu-hipblas-extras'
|
||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
@@ -59,32 +59,13 @@ jobs:
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'extras'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas-core'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
latest-image: 'latest-gpu-hipblas'
|
||||
self-hosted-jobs:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
@@ -114,60 +95,18 @@ jobs:
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
|
||||
matrix:
|
||||
include:
|
||||
# Extra images
|
||||
- build-type: ''
|
||||
#platforms: 'linux/amd64,linux/arm64'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: ''
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11'
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg'
|
||||
tag-suffix: '-cublas-cuda11-extras'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
aio: "-aio-gpu-nvidia-cuda-11"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11'
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11-extras'
|
||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
@@ -175,36 +114,26 @@ jobs:
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
tag-suffix: '-cublas-cuda12-extras'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
aio: "-aio-gpu-nvidia-cuda-12"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12'
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-extras'
|
||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: ''
|
||||
#platforms: 'linux/amd64,linux/arm64'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: ''
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-ffmpeg'
|
||||
tag-suffix: '-sycl-f16-extras'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
aio: "-aio-gpu-intel-f16"
|
||||
latest-image: 'latest-gpu-intel-f16'
|
||||
latest-image: 'latest-gpu-intel-f16-extras'
|
||||
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f32'
|
||||
@@ -212,12 +141,12 @@ jobs:
|
||||
tag-latest: 'auto'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-ffmpeg'
|
||||
tag-suffix: '-sycl-f32-extras'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
aio: "-aio-gpu-intel-f32"
|
||||
latest-image: 'latest-gpu-intel-f32'
|
||||
latest-image: 'latest-gpu-intel-f32-extras'
|
||||
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
# Core images
|
||||
@@ -226,41 +155,23 @@ jobs:
|
||||
tag-latest: 'false'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-core'
|
||||
ffmpeg: 'false'
|
||||
tag-suffix: '-sycl-f16'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
latest-image: 'latest-gpu-intel-f16'
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-core'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-ffmpeg-core'
|
||||
tag-suffix: '-sycl-f32'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
latest-image: 'latest-gpu-intel-f32'
|
||||
|
||||
core-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
@@ -293,7 +204,7 @@ jobs:
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-ffmpeg-core'
|
||||
tag-suffix: ''
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
base-image: "ubuntu:22.04"
|
||||
@@ -308,60 +219,38 @@ jobs:
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-core'
|
||||
ffmpeg: ''
|
||||
image-type: 'core'
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-core'
|
||||
ffmpeg: ''
|
||||
image-type: 'core'
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
||||
tag-suffix: '-cublas-cuda11'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12'
|
||||
- build-type: 'vulkan'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-vulkan-ffmpeg-core'
|
||||
latest-image: 'latest-vulkan-ffmpeg-core'
|
||||
tag-suffix: '-vulkan'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
latest-image: 'latest-gpu-vulkan'
|
||||
gh-runner:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
@@ -394,8 +283,8 @@ jobs:
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-nvidia-l4t-arm64-core'
|
||||
latest-image: 'latest-nvidia-l4t-arm64-core'
|
||||
tag-suffix: '-nvidia-l4t-arm64'
|
||||
latest-image: 'latest-nvidia-l4t-arm64'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
|
||||
10
.github/workflows/notify-models.yaml
vendored
10
.github/workflows/notify-models.yaml
vendored
@@ -8,7 +8,7 @@ jobs:
|
||||
notify-discord:
|
||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||
env:
|
||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
||||
MODEL_NAME: gemma-3-12b-it
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -16,7 +16,7 @@ jobs:
|
||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||
- uses: mudler/localai-github-action@v1
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||
id: git-diff-action
|
||||
@@ -79,7 +79,7 @@ jobs:
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
- name: Setup tmate session if fails
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
notify-twitter:
|
||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||
env:
|
||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
||||
MODEL_NAME: gemma-3-12b-it
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -161,7 +161,7 @@ jobs:
|
||||
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
||||
- name: Setup tmate session if fails
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
|
||||
4
.github/workflows/notify-releases.yaml
vendored
4
.github/workflows/notify-releases.yaml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- uses: mudler/localai-github-action@v1
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
- name: Summarize
|
||||
id: summarize
|
||||
run: |
|
||||
@@ -60,4 +60,4 @@ jobs:
|
||||
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
|
||||
uses: Ilshidur/action-discord@master
|
||||
with:
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
|
||||
16
.github/workflows/release.yaml
vendored
16
.github/workflows/release.yaml
vendored
@@ -36,6 +36,7 @@ jobs:
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
||||
make install-go-tools
|
||||
- name: Install CUDA Dependencies
|
||||
run: |
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
||||
@@ -123,7 +124,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -151,6 +152,7 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||
make install-go-tools
|
||||
- name: Intel Dependencies
|
||||
run: |
|
||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||
@@ -232,7 +234,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -253,8 +255,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
make install-go-tools
|
||||
- name: Build
|
||||
id: build
|
||||
run: |
|
||||
@@ -275,7 +276,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -295,8 +296,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc libomp llvm
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
make install-go-tools
|
||||
- name: Build
|
||||
id: build
|
||||
run: |
|
||||
@@ -317,7 +317,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
|
||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
- name: Run Gosec Security Scanner
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
uses: securego/gosec@v2.22.3
|
||||
uses: securego/gosec@v2.22.4
|
||||
with:
|
||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||
|
||||
20
.github/workflows/test-extra.yml
vendored
20
.github/workflows/test-extra.yml
vendored
@@ -78,6 +78,26 @@ jobs:
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||
|
||||
#tests-vllm:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install -y build-essential ffmpeg
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# # Install UV
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
# - name: Test vllm backend
|
||||
# run: |
|
||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
||||
# tests-transformers-musicgen:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
|
||||
11
.github/workflows/test.yml
vendored
11
.github/workflows/test.yml
vendored
@@ -71,7 +71,7 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
||||
sudo apt-get install -y libgmock-dev
|
||||
sudo apt-get install -y libgmock-dev clang
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
@@ -96,6 +96,7 @@ jobs:
|
||||
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install github.com/GeertJohan/go.rice/rice@latest
|
||||
|
||||
# The python3-grpc-tools package in 22.04 is too old
|
||||
pip install --user grpcio-tools
|
||||
@@ -130,7 +131,7 @@ jobs:
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -183,6 +184,7 @@ jobs:
|
||||
rm protoc.zip
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install github.com/GeertJohan/go.rice/rice@latest
|
||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||
- name: Build images
|
||||
run: |
|
||||
@@ -194,7 +196,7 @@ jobs:
|
||||
make run-e2e-aio
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -222,6 +224,7 @@ jobs:
|
||||
run: |
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||
pip install --user --no-cache-dir grpcio-tools
|
||||
go install github.com/GeertJohan/go.rice/rice@latest
|
||||
- name: Test
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
@@ -232,7 +235,7 @@ jobs:
|
||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
|
||||
17
Dockerfile
17
Dockerfile
@@ -15,7 +15,7 @@ ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
@@ -46,9 +46,10 @@ EOT
|
||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
||||
|
||||
# Install grpc compilers
|
||||
# Install grpc compilers and rice
|
||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
|
||||
go install github.com/GeertJohan/go.rice/rice@latest
|
||||
|
||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||
RUN update-ca-certificates
|
||||
@@ -300,10 +301,9 @@ COPY .git .
|
||||
RUN make prepare
|
||||
|
||||
## Build the binary
|
||||
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
|
||||
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
||||
## (both will use CUDA or hipblas for the actual computation)
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
|
||||
## Otherwise just run the normal build
|
||||
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||
else \
|
||||
make build; \
|
||||
@@ -431,9 +431,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMA
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/vllm \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/autogptq \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/bark \
|
||||
; fi && \
|
||||
|
||||
87
Makefile
87
Makefile
@@ -6,11 +6,11 @@ BINARY_NAME=local-ai
|
||||
DETECT_LIBS?=true
|
||||
|
||||
# llama.cpp versions
|
||||
CPPLLAMA_VERSION?=d6d2c2ab8c8865784ba9fef37f2b2de3f2134d33
|
||||
CPPLLAMA_VERSION?=9a390c4829cd3058d26a2e2c09d16e3fd12bf1b1
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=2e310b841e0b4e7cf00890b53411dd9f8578f243
|
||||
|
||||
# go-piper version
|
||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||
@@ -24,14 +24,21 @@ BARKCPP_VERSION?=v1.0.0
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
|
||||
|
||||
# ONEAPI variables for SYCL
|
||||
export ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||
|
||||
ONNX_VERSION?=1.20.0
|
||||
ONNX_ARCH?=x64
|
||||
ONNX_OS?=linux
|
||||
|
||||
export BUILD_TYPE?=
|
||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||
export CMAKE_ARGS?=
|
||||
export CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
|
||||
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
|
||||
export BACKEND_LIBS?=
|
||||
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
|
||||
export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
|
||||
export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
|
||||
|
||||
CGO_LDFLAGS?=
|
||||
CGO_LDFLAGS_WHISPER?=
|
||||
@@ -81,6 +88,7 @@ endif
|
||||
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
|
||||
ifeq ($(NATIVE),false)
|
||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
endif
|
||||
|
||||
# Detect if we are running on arm64
|
||||
@@ -108,13 +116,31 @@ ifeq ($(OS),Darwin)
|
||||
# disable metal if on Darwin and any other value is explicitly passed.
|
||||
else ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
WHISPER_CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
export GGML_NO_ACCELERATE=1
|
||||
export GGML_NO_METAL=1
|
||||
GO_LDFLAGS_WHISPER+=-lggml-blas
|
||||
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
# -lcblas removed: it seems to always be listed as a duplicate flag.
|
||||
CGO_LDFLAGS += -framework Accelerate
|
||||
CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
|
||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
|
||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||
CMAKE_ARGS+=-DGGML_OPENMP=OFF
|
||||
WHISPER_CMAKE_ARGS+=-DGGML_METAL=ON
|
||||
WHISPER_CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
|
||||
WHISPER_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
|
||||
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
|
||||
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
|
||||
WHISPER_CMAKE_ARGS+=-DGGML_OPENMP=OFF
|
||||
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||
else
|
||||
CGO_LDFLAGS_WHISPER+=-lggml-blas
|
||||
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||
endif
|
||||
else
|
||||
CGO_LDFLAGS_WHISPER+=-lgomp
|
||||
@@ -126,21 +152,29 @@ ifeq ($(BUILD_TYPE),openblas)
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
|
||||
export GGML_CUDA=1
|
||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
|
||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||
WHISPER_CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||
CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
|
||||
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||
WHISPER_CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||
CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
|
||||
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
|
||||
endif
|
||||
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
export GGML_SYCL=1
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
export GGML_SYCL_F16=1
|
||||
CMAKE_ARGS+=-DGGML_SYCL_F16=ON
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),hipblas)
|
||||
@@ -151,7 +185,7 @@ ifeq ($(BUILD_TYPE),hipblas)
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
export STABLE_BUILD_TYPE=
|
||||
export GGML_HIP=1
|
||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||
GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
|
||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
||||
@@ -286,8 +320,9 @@ sources/whisper.cpp:
|
||||
git checkout $(WHISPER_CPP_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && cmake $(WHISPER_CMAKE_ARGS) . -B ./build
|
||||
cd sources/whisper.cpp/build && cmake --build . --config Release
|
||||
|
||||
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||
|
||||
@@ -337,8 +372,14 @@ clean-tests:
|
||||
clean-dc: clean
|
||||
cp -r /build/backend-assets /workspace/backend-assets
|
||||
|
||||
## Install Go tools
|
||||
install-go-tools:
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
go install github.com/GeertJohan/go.rice/rice@latest
|
||||
|
||||
## Build:
|
||||
build: prepare backend-assets grpcs ## Build the project
|
||||
build: prepare backend-assets grpcs install-go-tools ## Build the project
|
||||
$(info ${GREEN}I local-ai build info:${RESET})
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||
@@ -348,7 +389,9 @@ ifneq ($(BACKEND_LIBS),)
|
||||
$(MAKE) backend-assets/lib
|
||||
cp -f $(BACKEND_LIBS) backend-assets/lib/
|
||||
endif
|
||||
rm -rf $(BINARY_NAME) || true
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||
rice append --exec $(BINARY_NAME)
|
||||
|
||||
build-minimal:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
||||
@@ -420,6 +463,7 @@ prepare-test: grpcs
|
||||
cp -rf backend-assets core/http
|
||||
cp tests/models_fixtures/* test-models
|
||||
|
||||
## Test targets
|
||||
test: prepare test-models/testmodel.ggml grpcs
|
||||
@echo 'Running tests'
|
||||
export GO_TAGS="tts debug"
|
||||
@@ -494,7 +538,7 @@ protogen: protogen-go protogen-python
|
||||
protogen-clean: protogen-go-clean protogen-python-clean
|
||||
|
||||
.PHONY: protogen-go
|
||||
protogen-go:
|
||||
protogen-go: install-go-tools
|
||||
mkdir -p pkg/grpc/proto
|
||||
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||
backend/backend.proto
|
||||
@@ -505,18 +549,10 @@ protogen-go-clean:
|
||||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||
protogen-python: bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||
|
||||
.PHONY: autogptq-protogen
|
||||
autogptq-protogen:
|
||||
$(MAKE) -C backend/python/autogptq protogen
|
||||
|
||||
.PHONY: autogptq-protogen-clean
|
||||
autogptq-protogen-clean:
|
||||
$(MAKE) -C backend/python/autogptq protogen-clean
|
||||
protogen-python-clean: bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||
|
||||
.PHONY: bark-protogen
|
||||
bark-protogen:
|
||||
@@ -593,7 +629,6 @@ vllm-protogen-clean:
|
||||
## GRPC
|
||||
# Note: it is duplicated in the Dockerfile
|
||||
prepare-extra-conda-environments: protogen-python
|
||||
$(MAKE) -C backend/python/autogptq
|
||||
$(MAKE) -C backend/python/bark
|
||||
$(MAKE) -C backend/python/coqui
|
||||
$(MAKE) -C backend/python/diffusers
|
||||
@@ -607,10 +642,12 @@ prepare-extra-conda-environments: protogen-python
|
||||
prepare-test-extra: protogen-python
|
||||
$(MAKE) -C backend/python/transformers
|
||||
$(MAKE) -C backend/python/diffusers
|
||||
$(MAKE) -C backend/python/vllm
|
||||
|
||||
test-extra: prepare-test-extra
|
||||
$(MAKE) -C backend/python/transformers test
|
||||
$(MAKE) -C backend/python/diffusers test
|
||||
$(MAKE) -C backend/python/vllm test
|
||||
|
||||
backend-assets:
|
||||
mkdir -p backend-assets
|
||||
@@ -752,8 +789,8 @@ ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/silero-vad
|
||||
endif
|
||||
|
||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/whisper
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
|
||||
<p align="center">
|
||||
<a href="https://twitter.com/LocalAI_API" target="blank">
|
||||
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
|
||||
<img src="https://img.shields.io/badge/X-%23000000.svg?style=for-the-badge&logo=X&logoColor=white&label=LocalAI_API" alt="Follow LocalAI_API"/>
|
||||
</a>
|
||||
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
||||
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
||||
@@ -43,7 +43,8 @@
|
||||
|
||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||
>
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples)
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
|
||||
[](https://t.me/localaiofficial_bot)
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||
|
||||
@@ -103,9 +104,12 @@
|
||||
Run the installer script:
|
||||
|
||||
```bash
|
||||
# Basic installation
|
||||
curl https://localai.io/install.sh | sh
|
||||
```
|
||||
|
||||
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
|
||||
|
||||
Or run with docker:
|
||||
|
||||
### CPU only image:
|
||||
|
||||
@@ -48,6 +48,6 @@ template:
|
||||
<|im_start|>assistant
|
||||
|
||||
download_files:
|
||||
- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
||||
sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
|
||||
uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
||||
- filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
||||
sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4
|
||||
uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
||||
|
||||
15
assets.go
15
assets.go
@@ -1,6 +1,15 @@
|
||||
package main
|
||||
|
||||
import "embed"
|
||||
import (
|
||||
rice "github.com/GeertJohan/go.rice"
|
||||
)
|
||||
|
||||
//go:embed backend-assets/*
|
||||
var backendAssets embed.FS
|
||||
var backendAssets *rice.Box
|
||||
|
||||
func init() {
|
||||
var err error
|
||||
backendAssets, err = rice.FindBox("backend-assets")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ service Backend {
|
||||
rpc PredictStream(PredictOptions) returns (stream Reply) {}
|
||||
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
|
||||
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
||||
rpc GenerateVideo(GenerateVideoRequest) returns (Result) {}
|
||||
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
||||
rpc TTS(TTSRequest) returns (Result) {}
|
||||
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
||||
@@ -190,11 +191,7 @@ message ModelOptions {
|
||||
int32 NGQA = 20;
|
||||
string ModelFile = 21;
|
||||
|
||||
// AutoGPTQ
|
||||
string Device = 22;
|
||||
bool UseTriton = 23;
|
||||
string ModelBaseName = 24;
|
||||
bool UseFastTokenizer = 25;
|
||||
|
||||
|
||||
// Diffusers
|
||||
string PipelineType = 26;
|
||||
@@ -305,6 +302,19 @@ message GenerateImageRequest {
|
||||
int32 CLIPSkip = 11;
|
||||
}
|
||||
|
||||
message GenerateVideoRequest {
|
||||
string prompt = 1;
|
||||
string start_image = 2; // Path or base64 encoded image for the start frame
|
||||
string end_image = 3; // Path or base64 encoded image for the end frame
|
||||
int32 width = 4;
|
||||
int32 height = 5;
|
||||
int32 num_frames = 6; // Number of frames to generate
|
||||
int32 fps = 7; // Frames per second
|
||||
int32 seed = 8;
|
||||
float cfg_scale = 9; // Classifier-free guidance scale
|
||||
string dst = 10; // Output path for the generated video
|
||||
}
|
||||
|
||||
message TTSRequest {
|
||||
string text = 1;
|
||||
string model = 2;
|
||||
|
||||
@@ -59,8 +59,8 @@ llama.cpp:
|
||||
git checkout -b build $(LLAMA_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
llama.cpp/examples/grpc-server: llama.cpp
|
||||
mkdir -p llama.cpp/examples/grpc-server
|
||||
llama.cpp/tools/grpc-server: llama.cpp
|
||||
mkdir -p llama.cpp/tools/grpc-server
|
||||
bash prepare.sh
|
||||
|
||||
rebuild:
|
||||
@@ -70,13 +70,13 @@ rebuild:
|
||||
|
||||
purge:
|
||||
rm -rf llama.cpp/build
|
||||
rm -rf llama.cpp/examples/grpc-server
|
||||
rm -rf llama.cpp/tools/grpc-server
|
||||
rm -rf grpc-server
|
||||
|
||||
clean: purge
|
||||
rm -rf llama.cpp
|
||||
|
||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||
grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
+bash -c "source $(ONEAPI_VARS); \
|
||||
|
||||
@@ -52,7 +52,7 @@ struct server_params
|
||||
{
|
||||
std::string hostname = "127.0.0.1";
|
||||
std::vector<std::string> api_keys;
|
||||
std::string public_path = "examples/server/public";
|
||||
std::string public_path = "tools/server/public";
|
||||
std::string chat_template = "";
|
||||
int32_t port = 8080;
|
||||
int32_t read_timeout = 600;
|
||||
@@ -2644,7 +2644,9 @@ void RunServer(const std::string& server_address) {
|
||||
ServerBuilder builder;
|
||||
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
|
||||
builder.RegisterService(&service);
|
||||
|
||||
builder.SetMaxMessageSize(50 * 1024 * 1024); // 50MB
|
||||
builder.SetMaxSendMessageSize(50 * 1024 * 1024); // 50MB
|
||||
builder.SetMaxReceiveMessageSize(50 * 1024 * 1024); // 50MB
|
||||
std::unique_ptr<Server> server(builder.BuildAndStart());
|
||||
std::cout << "Server listening on " << server_address << std::endl;
|
||||
server->Wait();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
|
||||
index 3cd0d2fa..6c5e811a 100644
|
||||
--- a/examples/llava/clip.cpp
|
||||
+++ b/examples/llava/clip.cpp
|
||||
--- a/tools/mtmd/clip.cpp
|
||||
+++ b/tools/mtmd/clip.cpp
|
||||
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
||||
|
||||
@@ -7,22 +7,22 @@ for patch in $(ls patches); do
|
||||
patch -d llama.cpp/ -p1 < patches/$patch
|
||||
done
|
||||
|
||||
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv utils.hpp llama.cpp/examples/grpc-server/
|
||||
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
||||
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
||||
cp -rfv json.hpp llama.cpp/tools/grpc-server/
|
||||
cp -rfv utils.hpp llama.cpp/tools/grpc-server/
|
||||
|
||||
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
||||
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
|
||||
echo "grpc-server already added"
|
||||
else
|
||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
|
||||
fi
|
||||
|
||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||
## This is an hack for now, but it should be fixed in the future.
|
||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||
cp -rfv llama.cpp/examples/llava/clip-impl.h llama.cpp/examples/grpc-server/clip-impl.h
|
||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||
cp -rfv llama.cpp/tools/mtmd/clip.h llama.cpp/tools/grpc-server/clip.h
|
||||
cp -rfv llama.cpp/tools/mtmd/clip-impl.h llama.cpp/tools/grpc-server/clip-impl.h
|
||||
cp -rfv llama.cpp/tools/mtmd/llava.cpp llama.cpp/tools/grpc-server/llava.cpp
|
||||
echo '#include "llama.h"' > llama.cpp/tools/grpc-server/llava.h
|
||||
cat llama.cpp/tools/mtmd/llava.h >> llama.cpp/tools/grpc-server/llava.h
|
||||
cp -rfv llama.cpp/tools/mtmd/clip.cpp llama.cpp/tools/grpc-server/clip.cpp
|
||||
4
backend/cpp/llama/utils.hpp
vendored
4
backend/cpp/llama/utils.hpp
vendored
@@ -1,4 +1,4 @@
|
||||
// https://github.com/ggerganov/llama.cpp/blob/master/examples/server/utils.hpp
|
||||
// https://github.com/ggerganov/llama.cpp/blob/master/tools/server/utils.hpp
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
#include "json.hpp"
|
||||
|
||||
#include "../llava/clip.h"
|
||||
#include "../mtmd/clip.h"
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||
CMAKE_ARGS+=-DSD_CUDA=ON
|
||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
# to CMAKE_ARGS automatically
|
||||
else ifeq ($(BUILD_TYPE),openblas)
|
||||
@@ -30,14 +30,14 @@ else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
||||
CMAKE_ARGS+=-DSD_HIPBLAS=ON
|
||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||
# But if it's OSX without metal, disable it here
|
||||
else ifeq ($(OS),Darwin)
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
CMAKE_ARGS+=-DSD_METAL=OFF
|
||||
else
|
||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||
CMAKE_ARGS+=-DSD_METAL=ON
|
||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||
TARGET+=--target ggml-metal
|
||||
endif
|
||||
|
||||
@@ -74,7 +74,7 @@ func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.Transcript
|
||||
context.SetTranslate(true)
|
||||
}
|
||||
|
||||
if err := context.Process(data, nil, nil); err != nil {
|
||||
if err := context.Process(data, nil, nil, nil); err != nil {
|
||||
return pb.TranscriptResult{}, err
|
||||
}
|
||||
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
.PHONY: autogptq
|
||||
autogptq: protogen
|
||||
bash install.sh
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the autogptq project
|
||||
|
||||
```
|
||||
make autogptq
|
||||
```
|
||||
@@ -1,153 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from concurrent import futures
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import base64
|
||||
|
||||
import grpc
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
from auto_gptq import AutoGPTQForCausalLM
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from transformers import TextGenerationPipeline
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
def Health(self, request, context):
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
def LoadModel(self, request, context):
|
||||
try:
|
||||
device = "cuda:0"
|
||||
if request.Device != "":
|
||||
device = request.Device
|
||||
|
||||
# support loading local model files
|
||||
model_path = os.path.join(os.environ.get('MODELS_PATH', './'), request.Model)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, trust_remote_code=request.TrustRemoteCode)
|
||||
|
||||
# support model `Qwen/Qwen-VL-Chat-Int4`
|
||||
if "qwen-vl" in request.Model.lower():
|
||||
self.model_name = "Qwen-VL-Chat"
|
||||
model = AutoModelForCausalLM.from_pretrained(model_path,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
device_map="auto").eval()
|
||||
else:
|
||||
model = AutoGPTQForCausalLM.from_quantized(model_path,
|
||||
model_basename=request.ModelBaseName,
|
||||
use_safetensors=True,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
device=device,
|
||||
use_triton=request.UseTriton,
|
||||
quantize_config=None)
|
||||
|
||||
self.model = model
|
||||
self.tokenizer = tokenizer
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def Predict(self, request, context):
|
||||
penalty = 1.0
|
||||
if request.Penalty != 0.0:
|
||||
penalty = request.Penalty
|
||||
tokens = 512
|
||||
if request.Tokens != 0:
|
||||
tokens = request.Tokens
|
||||
top_p = 0.95
|
||||
if request.TopP != 0.0:
|
||||
top_p = request.TopP
|
||||
|
||||
|
||||
prompt_images = self.recompile_vl_prompt(request)
|
||||
compiled_prompt = prompt_images[0]
|
||||
print(f"Prompt: {compiled_prompt}", file=sys.stderr)
|
||||
|
||||
# Implement Predict RPC
|
||||
pipeline = TextGenerationPipeline(
|
||||
model=self.model,
|
||||
tokenizer=self.tokenizer,
|
||||
max_new_tokens=tokens,
|
||||
temperature=request.Temperature,
|
||||
top_p=top_p,
|
||||
repetition_penalty=penalty,
|
||||
)
|
||||
t = pipeline(compiled_prompt)[0]["generated_text"]
|
||||
print(f"generated_text: {t}", file=sys.stderr)
|
||||
|
||||
if compiled_prompt in t:
|
||||
t = t.replace(compiled_prompt, "")
|
||||
# house keeping. Remove the image files from /tmp folder
|
||||
for img_path in prompt_images[1]:
|
||||
try:
|
||||
os.remove(img_path)
|
||||
except Exception as e:
|
||||
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
|
||||
|
||||
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
|
||||
|
||||
def PredictStream(self, request, context):
|
||||
# Implement PredictStream RPC
|
||||
#for reply in some_data_generator():
|
||||
# yield reply
|
||||
# Not implemented yet
|
||||
return self.Predict(request, context)
|
||||
|
||||
def recompile_vl_prompt(self, request):
|
||||
prompt = request.Prompt
|
||||
image_paths = []
|
||||
|
||||
if "qwen-vl" in self.model_name.lower():
|
||||
# request.Images is an array which contains base64 encoded images. Iterate the request.Images array, decode and save each image to /tmp folder with a random filename.
|
||||
# Then, save the image file paths to an array "image_paths".
|
||||
# read "request.Prompt", replace "[img-%d]" with the image file paths in the order they appear in "image_paths". Save the new prompt to "prompt".
|
||||
for i, img in enumerate(request.Images):
|
||||
timestamp = str(int(time.time() * 1000)) # Generate timestamp
|
||||
img_path = f"/tmp/vl-{timestamp}.jpg" # Use timestamp in filename
|
||||
with open(img_path, "wb") as f:
|
||||
f.write(base64.b64decode(img))
|
||||
image_paths.append(img_path)
|
||||
prompt = prompt.replace(f"[img-{i}]", "<img>" + img_path + "</img>,")
|
||||
else:
|
||||
prompt = request.Prompt
|
||||
return (prompt, image_paths)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
@@ -1,2 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
@@ -1 +0,0 @@
|
||||
torch==2.4.1
|
||||
@@ -1,2 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.4.1+rocm6.0
|
||||
@@ -1,6 +0,0 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools
|
||||
@@ -1,6 +0,0 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
||||
@@ -61,7 +61,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
bark==0.1.5
|
||||
grpcio==1.71.0
|
||||
grpcio==1.72.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.71.0
|
||||
grpcio==1.72.0
|
||||
protobuf
|
||||
grpcio-tools
|
||||
@@ -86,7 +86,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.71.0
|
||||
grpcio==1.72.0
|
||||
protobuf
|
||||
certifi
|
||||
packaging==24.1
|
||||
@@ -168,9 +168,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
# We are storing all the options in a dict so we can use it later when
|
||||
# generating the images
|
||||
for opt in options:
|
||||
if ":" not in opt:
|
||||
continue
|
||||
key, value = opt.split(":")
|
||||
self.options[key] = value
|
||||
|
||||
print(f"Options: {self.options}", file=sys.stderr)
|
||||
|
||||
local = False
|
||||
modelFile = request.Model
|
||||
|
||||
@@ -522,7 +526,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
setuptools
|
||||
grpcio==1.71.0
|
||||
grpcio==1.72.0
|
||||
pillow
|
||||
protobuf
|
||||
certifi
|
||||
|
||||
@@ -105,7 +105,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.71.0
|
||||
grpcio==1.72.0
|
||||
protobuf
|
||||
certifi
|
||||
wheel
|
||||
|
||||
@@ -62,7 +62,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.71.0
|
||||
grpcio==1.72.0
|
||||
protobuf
|
||||
grpcio-tools
|
||||
@@ -99,7 +99,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.71.0
|
||||
grpcio==1.72.0
|
||||
protobuf
|
||||
phonemizer
|
||||
scipy
|
||||
|
||||
@@ -91,7 +91,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
return backend_pb2.RerankResult(usage=usage, results=results)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.71.0
|
||||
grpcio==1.72.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -559,7 +559,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
async def serve(address):
|
||||
# Start asyncio gRPC server
|
||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
# Add the servicer to the server
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
# Bind the server to the address
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.71.0
|
||||
grpcio==1.72.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
|
||||
@@ -194,27 +194,40 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
await iterations.aclose()
|
||||
|
||||
async def _predict(self, request, context, streaming=False):
|
||||
# Build the sampling parameters
|
||||
# NOTE: this must stay in sync with the vllm backend
|
||||
request_to_sampling_params = {
|
||||
"N": "n",
|
||||
"PresencePenalty": "presence_penalty",
|
||||
"FrequencyPenalty": "frequency_penalty",
|
||||
"RepetitionPenalty": "repetition_penalty",
|
||||
"Temperature": "temperature",
|
||||
"TopP": "top_p",
|
||||
"TopK": "top_k",
|
||||
"MinP": "min_p",
|
||||
"Seed": "seed",
|
||||
"StopPrompts": "stop",
|
||||
"StopTokenIds": "stop_token_ids",
|
||||
"BadWords": "bad_words",
|
||||
"IncludeStopStrInOutput": "include_stop_str_in_output",
|
||||
"IgnoreEOS": "ignore_eos",
|
||||
"Tokens": "max_tokens",
|
||||
"MinTokens": "min_tokens",
|
||||
"Logprobs": "logprobs",
|
||||
"PromptLogprobs": "prompt_logprobs",
|
||||
"SkipSpecialTokens": "skip_special_tokens",
|
||||
"SpacesBetweenSpecialTokens": "spaces_between_special_tokens",
|
||||
"TruncatePromptTokens": "truncate_prompt_tokens",
|
||||
"GuidedDecoding": "guided_decoding",
|
||||
}
|
||||
|
||||
# Build sampling parameters
|
||||
sampling_params = SamplingParams(top_p=0.9, max_tokens=200)
|
||||
if request.TopP != 0:
|
||||
sampling_params.top_p = request.TopP
|
||||
if request.Tokens > 0:
|
||||
sampling_params.max_tokens = request.Tokens
|
||||
if request.Temperature != 0:
|
||||
sampling_params.temperature = request.Temperature
|
||||
if request.TopK != 0:
|
||||
sampling_params.top_k = request.TopK
|
||||
if request.PresencePenalty != 0:
|
||||
sampling_params.presence_penalty = request.PresencePenalty
|
||||
if request.FrequencyPenalty != 0:
|
||||
sampling_params.frequency_penalty = request.FrequencyPenalty
|
||||
if request.StopPrompts:
|
||||
sampling_params.stop = request.StopPrompts
|
||||
if request.IgnoreEOS:
|
||||
sampling_params.ignore_eos = request.IgnoreEOS
|
||||
if request.Seed != 0:
|
||||
sampling_params.seed = request.Seed
|
||||
|
||||
for request_field, param_field in request_to_sampling_params.items():
|
||||
if hasattr(request, request_field):
|
||||
value = getattr(request, request_field)
|
||||
if value not in (None, 0, [], False):
|
||||
setattr(sampling_params, param_field, value)
|
||||
|
||||
# Extract image paths and process images
|
||||
prompt = request.Prompt
|
||||
@@ -320,7 +333,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
async def serve(address):
|
||||
# Start asyncio gRPC server
|
||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
# Add the servicer to the server
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
# Bind the server to the address
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.71.0
|
||||
grpcio==1.72.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
@@ -75,6 +75,53 @@ class TestBackendServicer(unittest.TestCase):
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_sampling_params(self):
|
||||
"""
|
||||
This method tests if all sampling parameters are correctly processed
|
||||
NOTE: this does NOT test for correctness, just that we received a compatible response
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
|
||||
self.assertTrue(response.success)
|
||||
|
||||
req = backend_pb2.PredictOptions(
|
||||
Prompt="The capital of France is",
|
||||
TopP=0.8,
|
||||
Tokens=50,
|
||||
Temperature=0.7,
|
||||
TopK=40,
|
||||
PresencePenalty=0.1,
|
||||
FrequencyPenalty=0.2,
|
||||
RepetitionPenalty=1.1,
|
||||
MinP=0.05,
|
||||
Seed=42,
|
||||
StopPrompts=["\n"],
|
||||
StopTokenIds=[50256],
|
||||
BadWords=["badword"],
|
||||
IncludeStopStrInOutput=True,
|
||||
IgnoreEOS=True,
|
||||
MinTokens=5,
|
||||
Logprobs=5,
|
||||
PromptLogprobs=5,
|
||||
SkipSpecialTokens=True,
|
||||
SpacesBetweenSpecialTokens=True,
|
||||
TruncatePromptTokens=10,
|
||||
GuidedDecoding=True,
|
||||
N=2,
|
||||
)
|
||||
resp = stub.Predict(req)
|
||||
self.assertIsNotNone(resp.message)
|
||||
self.assertIsNotNone(resp.logprobs)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("sampling params service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
|
||||
def test_embedding(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
|
||||
@@ -43,18 +43,12 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create ModelPath: %q", err)
|
||||
}
|
||||
if options.ImageDir != "" {
|
||||
err := os.MkdirAll(options.ImageDir, 0750)
|
||||
if options.GeneratedContentDir != "" {
|
||||
err := os.MkdirAll(options.GeneratedContentDir, 0750)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create ImageDir: %q", err)
|
||||
}
|
||||
}
|
||||
if options.AudioDir != "" {
|
||||
err := os.MkdirAll(options.AudioDir, 0750)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create AudioDir: %q", err)
|
||||
}
|
||||
}
|
||||
if options.UploadDir != "" {
|
||||
err := os.MkdirAll(options.UploadDir, 0750)
|
||||
if err != nil {
|
||||
|
||||
@@ -99,7 +99,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
mmap = *c.MMap
|
||||
}
|
||||
|
||||
ctxSize := 1024
|
||||
ctxSize := 4096
|
||||
if c.ContextSize != nil {
|
||||
ctxSize = *c.ContextSize
|
||||
}
|
||||
@@ -184,11 +184,6 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(*c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
// AutoGPTQ
|
||||
ModelBaseName: c.AutoGPTQ.ModelBaseName,
|
||||
Device: c.AutoGPTQ.Device,
|
||||
UseTriton: c.AutoGPTQ.Triton,
|
||||
UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
|
||||
// RWKV
|
||||
Tokenizer: c.Tokenizer,
|
||||
}
|
||||
|
||||
@@ -35,12 +35,17 @@ func SoundGeneration(
|
||||
return "", nil, fmt.Errorf("could not load sound generation model")
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
|
||||
if err := os.MkdirAll(appConfig.GeneratedContentDir, 0750); err != nil {
|
||||
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
||||
}
|
||||
|
||||
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "sound_generation", ".wav")
|
||||
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
||||
audioDir := filepath.Join(appConfig.GeneratedContentDir, "audio")
|
||||
if err := os.MkdirAll(audioDir, 0750); err != nil {
|
||||
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
||||
}
|
||||
|
||||
fileName := utils.GenerateUniqueFileName(audioDir, "sound_generation", ".wav")
|
||||
filePath := filepath.Join(audioDir, fileName)
|
||||
|
||||
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
|
||||
Text: text,
|
||||
|
||||
@@ -32,12 +32,13 @@ func ModelTTS(
|
||||
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
|
||||
audioDir := filepath.Join(appConfig.GeneratedContentDir, "audio")
|
||||
if err := os.MkdirAll(audioDir, 0750); err != nil {
|
||||
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
||||
}
|
||||
|
||||
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
|
||||
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
||||
fileName := utils.GenerateUniqueFileName(audioDir, "tts", ".wav")
|
||||
filePath := filepath.Join(audioDir, fileName)
|
||||
|
||||
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
|
||||
// This should be addressed in a follow up PR soon.
|
||||
|
||||
36
core/backend/video.go
Normal file
36
core/backend/video.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func VideoGeneration(height, width int32, prompt, startImage, endImage, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
inferenceModel, err := loader.Load(
|
||||
opts...,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
fn := func() error {
|
||||
_, err := inferenceModel.GenerateVideo(
|
||||
appConfig.Context,
|
||||
&proto.GenerateVideoRequest{
|
||||
Height: height,
|
||||
Width: width,
|
||||
Prompt: prompt,
|
||||
StartImage: startImage,
|
||||
EndImage: endImage,
|
||||
Dst: dst,
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
return fn, nil
|
||||
}
|
||||
@@ -1,11 +1,13 @@
|
||||
package cliContext
|
||||
|
||||
import "embed"
|
||||
import (
|
||||
rice "github.com/GeertJohan/go.rice"
|
||||
)
|
||||
|
||||
type Context struct {
|
||||
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
|
||||
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
|
||||
|
||||
// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
|
||||
BackendAssets embed.FS `kong:"-"`
|
||||
BackendAssets *rice.Box `kong:"-"`
|
||||
}
|
||||
|
||||
@@ -21,8 +21,7 @@ type RunCMD struct {
|
||||
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||
ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
|
||||
AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
|
||||
GeneratedContentPath string `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"`
|
||||
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
|
||||
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
|
||||
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
|
||||
@@ -47,7 +46,7 @@ type RunCMD struct {
|
||||
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
|
||||
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
|
||||
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
|
||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disables the web user interface. When set to true, the server will only expose API endpoints without serving the web interface" group:"api"`
|
||||
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
||||
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
|
||||
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
|
||||
@@ -81,8 +80,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithModelPath(r.ModelsPath),
|
||||
config.WithContextSize(r.ContextSize),
|
||||
config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel),
|
||||
config.WithImageDir(r.ImagePath),
|
||||
config.WithAudioDir(r.AudioPath),
|
||||
config.WithGeneratedContentDir(r.GeneratedContentPath),
|
||||
config.WithUploadDir(r.UploadPath),
|
||||
config.WithConfigsDir(r.ConfigPath),
|
||||
config.WithDynamicConfigDir(r.LocalaiConfigDir),
|
||||
|
||||
@@ -70,7 +70,7 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
|
||||
opts := &config.ApplicationConfig{
|
||||
ModelPath: t.ModelsPath,
|
||||
Context: context.Background(),
|
||||
AudioDir: outputDir,
|
||||
GeneratedContentDir: outputDir,
|
||||
AssetsDestination: t.BackendAssetsPath,
|
||||
ExternalGRPCBackends: externalBackends,
|
||||
}
|
||||
|
||||
@@ -36,10 +36,10 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
|
||||
text := strings.Join(t.Text, " ")
|
||||
|
||||
opts := &config.ApplicationConfig{
|
||||
ModelPath: t.ModelsPath,
|
||||
Context: context.Background(),
|
||||
AudioDir: outputDir,
|
||||
AssetsDestination: t.BackendAssetsPath,
|
||||
ModelPath: t.ModelsPath,
|
||||
Context: context.Background(),
|
||||
GeneratedContentDir: outputDir,
|
||||
AssetsDestination: t.BackendAssetsPath,
|
||||
}
|
||||
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
|
||||
|
||||
|
||||
@@ -7,11 +7,11 @@ import (
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
gguf "github.com/gpustack/gguf-parser-go"
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
gguf "github.com/thxcode/gguf-parser-go"
|
||||
)
|
||||
|
||||
type UtilCMD struct {
|
||||
@@ -51,7 +51,7 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
||||
log.Info().
|
||||
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
||||
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
||||
Any("modelName", f.Model().Name).
|
||||
Any("modelName", f.Metadata().Name).
|
||||
Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0])
|
||||
|
||||
log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer")
|
||||
|
||||
@@ -2,11 +2,11 @@ package config
|
||||
|
||||
import (
|
||||
"context"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
rice "github.com/GeertJohan/go.rice"
|
||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -19,20 +19,21 @@ type ApplicationConfig struct {
|
||||
UploadLimitMB, Threads, ContextSize int
|
||||
F16 bool
|
||||
Debug bool
|
||||
ImageDir string
|
||||
AudioDir string
|
||||
UploadDir string
|
||||
ConfigsDir string
|
||||
DynamicConfigsDir string
|
||||
DynamicConfigsDirPollInterval time.Duration
|
||||
CORS bool
|
||||
CSRF bool
|
||||
PreloadJSONModels string
|
||||
PreloadModelsFromPath string
|
||||
CORSAllowOrigins string
|
||||
ApiKeys []string
|
||||
P2PToken string
|
||||
P2PNetworkID string
|
||||
GeneratedContentDir string
|
||||
|
||||
ConfigsDir string
|
||||
UploadDir string
|
||||
|
||||
DynamicConfigsDir string
|
||||
DynamicConfigsDirPollInterval time.Duration
|
||||
CORS bool
|
||||
CSRF bool
|
||||
PreloadJSONModels string
|
||||
PreloadModelsFromPath string
|
||||
CORSAllowOrigins string
|
||||
ApiKeys []string
|
||||
P2PToken string
|
||||
P2PNetworkID string
|
||||
|
||||
DisableWebUI bool
|
||||
EnforcePredownloadScans bool
|
||||
@@ -46,7 +47,7 @@ type ApplicationConfig struct {
|
||||
|
||||
Galleries []Gallery
|
||||
|
||||
BackendAssets embed.FS
|
||||
BackendAssets *rice.Box
|
||||
AssetsDestination string
|
||||
|
||||
ExternalGRPCBackends map[string]string
|
||||
@@ -197,7 +198,7 @@ func WithBackendAssetsOutput(out string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithBackendAssets(f embed.FS) AppOption {
|
||||
func WithBackendAssets(f *rice.Box) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.BackendAssets = f
|
||||
}
|
||||
@@ -279,15 +280,9 @@ func WithDebug(debug bool) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithAudioDir(audioDir string) AppOption {
|
||||
func WithGeneratedContentDir(generatedContentDir string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.AudioDir = audioDir
|
||||
}
|
||||
}
|
||||
|
||||
func WithImageDir(imageDir string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.ImageDir = imageDir
|
||||
o.GeneratedContentDir = generatedContentDir
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -50,9 +50,6 @@ type BackendConfig struct {
|
||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||
LLMConfig `yaml:",inline"`
|
||||
|
||||
// AutoGPTQ specifics
|
||||
AutoGPTQ AutoGPTQ `yaml:"autogptq"`
|
||||
|
||||
// Diffusers
|
||||
Diffusers Diffusers `yaml:"diffusers"`
|
||||
Step int `yaml:"step"`
|
||||
@@ -176,14 +173,6 @@ type LimitMMPerPrompt struct {
|
||||
LimitAudioPerPrompt int `yaml:"audio"`
|
||||
}
|
||||
|
||||
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
|
||||
type AutoGPTQ struct {
|
||||
ModelBaseName string `yaml:"model_base_name"`
|
||||
Device string `yaml:"device"`
|
||||
Triton bool `yaml:"triton"`
|
||||
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
|
||||
}
|
||||
|
||||
// TemplateConfig is a struct that holds the configuration of the templating system
|
||||
type TemplateConfig struct {
|
||||
// Chat is the template used in the chat completion endpoint
|
||||
@@ -315,9 +304,6 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
defaultTFZ := 1.0
|
||||
defaultZero := 0
|
||||
|
||||
// Try to offload all GPU layers (if GPU is found)
|
||||
defaultHigh := 99999999
|
||||
|
||||
trueV := true
|
||||
falseV := false
|
||||
|
||||
@@ -377,9 +363,6 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
if cfg.MirostatTAU == nil {
|
||||
cfg.MirostatTAU = &defaultMirostatTAU
|
||||
}
|
||||
if cfg.NGPULayers == nil {
|
||||
cfg.NGPULayers = &defaultHigh
|
||||
}
|
||||
|
||||
if cfg.LowVRAM == nil {
|
||||
cfg.LowVRAM = &falseV
|
||||
@@ -447,18 +430,19 @@ func (c *BackendConfig) HasTemplate() bool {
|
||||
type BackendConfigUsecases int
|
||||
|
||||
const (
|
||||
FLAG_ANY BackendConfigUsecases = 0b00000000000
|
||||
FLAG_CHAT BackendConfigUsecases = 0b00000000001
|
||||
FLAG_COMPLETION BackendConfigUsecases = 0b00000000010
|
||||
FLAG_EDIT BackendConfigUsecases = 0b00000000100
|
||||
FLAG_EMBEDDINGS BackendConfigUsecases = 0b00000001000
|
||||
FLAG_RERANK BackendConfigUsecases = 0b00000010000
|
||||
FLAG_IMAGE BackendConfigUsecases = 0b00000100000
|
||||
FLAG_TRANSCRIPT BackendConfigUsecases = 0b00001000000
|
||||
FLAG_TTS BackendConfigUsecases = 0b00010000000
|
||||
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
|
||||
FLAG_TOKENIZE BackendConfigUsecases = 0b01000000000
|
||||
FLAG_VAD BackendConfigUsecases = 0b10000000000
|
||||
FLAG_ANY BackendConfigUsecases = 0b000000000000
|
||||
FLAG_CHAT BackendConfigUsecases = 0b000000000001
|
||||
FLAG_COMPLETION BackendConfigUsecases = 0b000000000010
|
||||
FLAG_EDIT BackendConfigUsecases = 0b000000000100
|
||||
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000000001000
|
||||
FLAG_RERANK BackendConfigUsecases = 0b000000010000
|
||||
FLAG_IMAGE BackendConfigUsecases = 0b000000100000
|
||||
FLAG_TRANSCRIPT BackendConfigUsecases = 0b000001000000
|
||||
FLAG_TTS BackendConfigUsecases = 0b000010000000
|
||||
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b000100000000
|
||||
FLAG_TOKENIZE BackendConfigUsecases = 0b001000000000
|
||||
FLAG_VAD BackendConfigUsecases = 0b010000000000
|
||||
FLAG_VIDEO BackendConfigUsecases = 0b100000000000
|
||||
|
||||
// Common Subsets
|
||||
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
|
||||
@@ -479,6 +463,7 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
||||
"FLAG_TOKENIZE": FLAG_TOKENIZE,
|
||||
"FLAG_VAD": FLAG_VAD,
|
||||
"FLAG_LLM": FLAG_LLM,
|
||||
"FLAG_VIDEO": FLAG_VIDEO,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -543,6 +528,17 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
}
|
||||
if (u & FLAG_VIDEO) == FLAG_VIDEO {
|
||||
videoBackends := []string{"diffusers", "stablediffusion"}
|
||||
if !slices.Contains(videoBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
|
||||
if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
}
|
||||
if (u & FLAG_RERANK) == FLAG_RERANK {
|
||||
if c.Backend != "rerankers" {
|
||||
@@ -555,7 +551,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||
}
|
||||
}
|
||||
if (u & FLAG_TTS) == FLAG_TTS {
|
||||
ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"}
|
||||
ttsBackends := []string{"bark-cpp", "parler-tts", "piper", "transformers-musicgen"}
|
||||
if !slices.Contains(ttsBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -3,9 +3,10 @@ package config
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
gguf "github.com/thxcode/gguf-parser-go"
|
||||
gguf "github.com/gpustack/gguf-parser-go"
|
||||
)
|
||||
|
||||
type familyType uint8
|
||||
@@ -23,6 +24,7 @@ const (
|
||||
|
||||
const (
|
||||
defaultContextSize = 1024
|
||||
defaultNGPULayers = 99999999
|
||||
)
|
||||
|
||||
type settingsConfig struct {
|
||||
@@ -147,7 +149,7 @@ var knownTemplates = map[string]familyType{
|
||||
func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||
|
||||
if defaultCtx == 0 && cfg.ContextSize == nil {
|
||||
ctxSize := f.EstimateLLaMACppUsage().ContextSize
|
||||
ctxSize := f.EstimateLLaMACppRun().ContextSize
|
||||
if ctxSize > 0 {
|
||||
cSize := int(ctxSize)
|
||||
cfg.ContextSize = &cSize
|
||||
@@ -157,6 +159,46 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||
}
|
||||
}
|
||||
|
||||
// GPU options
|
||||
if cfg.Options == nil {
|
||||
if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
|
||||
cfg.Options = []string{"gpu"}
|
||||
}
|
||||
}
|
||||
|
||||
// vram estimation
|
||||
vram, err := xsysinfo.TotalAvailableVRAM()
|
||||
if err != nil {
|
||||
log.Error().Msgf("guessDefaultsFromFile(TotalAvailableVRAM): %s", err)
|
||||
} else if vram > 0 {
|
||||
estimate, err := xsysinfo.EstimateGGUFVRAMUsage(f, vram)
|
||||
if err != nil {
|
||||
log.Error().Msgf("guessDefaultsFromFile(EstimateGGUFVRAMUsage): %s", err)
|
||||
} else {
|
||||
if estimate.IsFullOffload {
|
||||
log.Warn().Msgf("guessDefaultsFromFile: %s", "full offload is recommended")
|
||||
}
|
||||
|
||||
if estimate.EstimatedVRAM > vram {
|
||||
log.Warn().Msgf("guessDefaultsFromFile: %s", "estimated VRAM usage is greater than available VRAM")
|
||||
}
|
||||
|
||||
if cfg.NGPULayers == nil && estimate.EstimatedLayers > 0 {
|
||||
log.Debug().Msgf("guessDefaultsFromFile: %d layers estimated", estimate.EstimatedLayers)
|
||||
cfg.NGPULayers = &estimate.EstimatedLayers
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.NGPULayers == nil {
|
||||
// we assume we want to offload all layers
|
||||
defaultHigh := defaultNGPULayers
|
||||
cfg.NGPULayers = &defaultHigh
|
||||
}
|
||||
|
||||
log.Debug().Any("NGPULayers", cfg.NGPULayers).Msgf("guessDefaultsFromFile: %s", "NGPULayers set")
|
||||
|
||||
// template estimations
|
||||
if cfg.HasTemplate() {
|
||||
// nothing to guess here
|
||||
log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
|
||||
@@ -166,12 +208,12 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||
log.Debug().
|
||||
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
||||
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
||||
Any("modelName", f.Model().Name).
|
||||
Any("modelName", f.Metadata().Name).
|
||||
Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())
|
||||
|
||||
// guess the name
|
||||
if cfg.Name == "" {
|
||||
cfg.Name = f.Model().Name
|
||||
cfg.Name = f.Metadata().Name
|
||||
}
|
||||
|
||||
family := identifyFamily(f)
|
||||
@@ -207,6 +249,7 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||
cfg.TemplateConfig.JinjaTemplate = true
|
||||
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func identifyFamily(f *gguf.GGUFFile) familyType {
|
||||
@@ -231,7 +274,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
|
||||
commandR := arch == "command-r" && eosTokenID == 255001
|
||||
qwen2 := arch == "qwen2"
|
||||
phi3 := arch == "phi-3"
|
||||
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
|
||||
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Metadata().Name), "gemma")
|
||||
deepseek2 := arch == "deepseek2"
|
||||
|
||||
switch {
|
||||
|
||||
@@ -4,8 +4,8 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
gguf "github.com/gpustack/gguf-parser-go"
|
||||
"github.com/rs/zerolog/log"
|
||||
gguf "github.com/thxcode/gguf-parser-go"
|
||||
)
|
||||
|
||||
func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) {
|
||||
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/dave-gray101/v2keyauth"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
@@ -153,12 +155,19 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
Browse: true,
|
||||
}))
|
||||
|
||||
if application.ApplicationConfig().ImageDir != "" {
|
||||
router.Static("/generated-images", application.ApplicationConfig().ImageDir)
|
||||
}
|
||||
if application.ApplicationConfig().GeneratedContentDir != "" {
|
||||
os.MkdirAll(application.ApplicationConfig().GeneratedContentDir, 0750)
|
||||
audioPath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "audio")
|
||||
imagePath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "images")
|
||||
videoPath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "videos")
|
||||
|
||||
if application.ApplicationConfig().AudioDir != "" {
|
||||
router.Static("/generated-audio", application.ApplicationConfig().AudioDir)
|
||||
os.MkdirAll(audioPath, 0750)
|
||||
os.MkdirAll(imagePath, 0750)
|
||||
os.MkdirAll(videoPath, 0750)
|
||||
|
||||
router.Static("/generated-audio", audioPath)
|
||||
router.Static("/generated-images", imagePath)
|
||||
router.Static("/generated-videos", videoPath)
|
||||
}
|
||||
|
||||
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
|
||||
|
||||
@@ -3,7 +3,6 @@ package http_test
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -24,6 +23,7 @@ import (
|
||||
. "github.com/onsi/gomega"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
||||
rice "github.com/GeertJohan/go.rice"
|
||||
openaigo "github.com/otiai10/openaigo"
|
||||
"github.com/sashabaranov/go-openai"
|
||||
"github.com/sashabaranov/go-openai/jsonschema"
|
||||
@@ -264,8 +264,15 @@ func getRequest(url string, header http.Header) (error, int, []byte) {
|
||||
|
||||
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
|
||||
|
||||
//go:embed backend-assets/*
|
||||
var backendAssets embed.FS
|
||||
var backendAssets *rice.Box
|
||||
|
||||
func init() {
|
||||
var err error
|
||||
backendAssets, err = rice.FindBox("backend-assets")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
var _ = Describe("API test", func() {
|
||||
|
||||
@@ -629,8 +636,7 @@ var _ = Describe("API test", func() {
|
||||
application, err := application.New(
|
||||
append(commonOpts,
|
||||
config.WithContext(c),
|
||||
config.WithAudioDir(tmpdir),
|
||||
config.WithImageDir(tmpdir),
|
||||
config.WithGeneratedContentDir(tmpdir),
|
||||
config.WithGalleries(galleries),
|
||||
config.WithModelPath(modelDir),
|
||||
config.WithBackendAssets(backendAssets),
|
||||
|
||||
205
core/http/endpoints/localai/video.go
Normal file
205
core/http/endpoints/localai/video.go
Normal file
@@ -0,0 +1,205 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func downloadFile(url string) (string, error) {
|
||||
// Get the data
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Create the file
|
||||
out, err := os.CreateTemp("", "video")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
// Write the body to file
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
return out.Name(), err
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
/*
|
||||
*
|
||||
|
||||
curl http://localhost:8080/v1/images/generations \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "A cute baby sea otter",
|
||||
"n": 1,
|
||||
"size": "512x512"
|
||||
}'
|
||||
|
||||
*
|
||||
*/
|
||||
// VideoEndpoint
|
||||
// @Summary Creates a video given a prompt.
|
||||
// @Param request body schema.OpenAIRequest true "query params"
|
||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||
// @Router /video [post]
|
||||
func VideoEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.VideoRequest)
|
||||
if !ok || input.Model == "" {
|
||||
log.Error().Msg("Video Endpoint - Invalid Input")
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
log.Error().Msg("Video Endpoint - Invalid Config")
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
src := ""
|
||||
if input.StartImage != "" {
|
||||
|
||||
var fileData []byte
|
||||
var err error
|
||||
// check if input.File is an URL, if so download it and save it
|
||||
// to a temporary file
|
||||
if strings.HasPrefix(input.StartImage, "http://") || strings.HasPrefix(input.StartImage, "https://") {
|
||||
out, err := downloadFile(input.StartImage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed downloading file:%w", err)
|
||||
}
|
||||
defer os.RemoveAll(out)
|
||||
|
||||
fileData, err = os.ReadFile(out)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading file:%w", err)
|
||||
}
|
||||
|
||||
} else {
|
||||
// base 64 decode the file and write it somewhere
|
||||
// that we will cleanup
|
||||
fileData, err = base64.StdEncoding.DecodeString(input.StartImage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Create a temporary file
|
||||
outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// write the base64 result
|
||||
writer := bufio.NewWriter(outputFile)
|
||||
_, err = writer.Write(fileData)
|
||||
if err != nil {
|
||||
outputFile.Close()
|
||||
return err
|
||||
}
|
||||
outputFile.Close()
|
||||
src = outputFile.Name()
|
||||
defer os.RemoveAll(src)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
switch config.Backend {
|
||||
case "stablediffusion":
|
||||
config.Backend = model.StableDiffusionGGMLBackend
|
||||
case "":
|
||||
config.Backend = model.StableDiffusionGGMLBackend
|
||||
}
|
||||
|
||||
width := input.Width
|
||||
height := input.Height
|
||||
|
||||
if width == 0 {
|
||||
width = 512
|
||||
}
|
||||
if height == 0 {
|
||||
height = 512
|
||||
}
|
||||
|
||||
b64JSON := input.ResponseFormat == "b64_json"
|
||||
|
||||
tempDir := ""
|
||||
if !b64JSON {
|
||||
tempDir = filepath.Join(appConfig.GeneratedContentDir, "videos")
|
||||
}
|
||||
// Create a temporary file
|
||||
outputFile, err := os.CreateTemp(tempDir, "b64")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
outputFile.Close()
|
||||
|
||||
// TODO: use mime type to determine the extension
|
||||
output := outputFile.Name() + ".mp4"
|
||||
|
||||
// Rename the temporary file
|
||||
err = os.Rename(outputFile.Name(), output)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
baseURL := c.BaseURL()
|
||||
|
||||
fn, err := backend.VideoGeneration(height, width, input.Prompt, src, input.EndImage, output, ml, *config, appConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fn(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
item := &schema.Item{}
|
||||
|
||||
if b64JSON {
|
||||
defer os.RemoveAll(output)
|
||||
data, err := os.ReadFile(output)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
item.B64JSON = base64.StdEncoding.EncodeToString(data)
|
||||
} else {
|
||||
base := filepath.Base(output)
|
||||
item.URL = baseURL + "/generated-videos/" + base
|
||||
}
|
||||
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
resp := &schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Data: []schema.Item{*item},
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
log.Debug().Msgf("Response: %s", jsonResult)
|
||||
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
}
|
||||
}
|
||||
@@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||
log.Error().Msg("Image Endpoint - Invalid Input")
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
log.Error().Msg("Image Endpoint - Invalid Config")
|
||||
@@ -108,7 +108,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||
}
|
||||
|
||||
// Create a temporary file
|
||||
outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64")
|
||||
outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -184,7 +184,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||
|
||||
tempDir := ""
|
||||
if !b64JSON {
|
||||
tempDir = appConfig.ImageDir
|
||||
tempDir = filepath.Join(appConfig.GeneratedContentDir, "images")
|
||||
}
|
||||
// Create a temporary file
|
||||
outputFile, err := os.CreateTemp(tempDir, "b64")
|
||||
@@ -192,6 +192,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||
return err
|
||||
}
|
||||
outputFile.Close()
|
||||
|
||||
output := outputFile.Name() + ".png"
|
||||
// Rename the temporary file
|
||||
err = os.Rename(outputFile.Name(), output)
|
||||
|
||||
@@ -203,18 +203,10 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
|
||||
config.Diffusers.ClipSkip = input.ClipSkip
|
||||
}
|
||||
|
||||
if input.ModelBaseName != "" {
|
||||
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
|
||||
}
|
||||
|
||||
if input.NegativePromptScale != 0 {
|
||||
config.NegativePromptScale = input.NegativePromptScale
|
||||
}
|
||||
|
||||
if input.UseFastTokenizer {
|
||||
config.UseFastTokenizer = input.UseFastTokenizer
|
||||
}
|
||||
|
||||
if input.NegativePrompt != "" {
|
||||
config.NegativePrompt = input.NegativePrompt
|
||||
}
|
||||
|
||||
@@ -59,6 +59,11 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
||||
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
||||
}
|
||||
|
||||
router.Post("/video",
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VIDEO)),
|
||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.VideoRequest) }),
|
||||
localai.VideoEndpoint(cl, ml, appConfig))
|
||||
|
||||
// Backend Statistics Module
|
||||
// TODO: Should these use standard middlewares? Refactor later, they are extremely simple.
|
||||
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
||||
|
||||
@@ -115,6 +115,7 @@ async function sendTextToChatGPT(text) {
|
||||
|
||||
const response = await fetch('v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: getModel(),
|
||||
messages: conversationHistory
|
||||
|
||||
@@ -24,6 +24,20 @@ type GalleryResponse struct {
|
||||
StatusURL string `json:"status"`
|
||||
}
|
||||
|
||||
type VideoRequest struct {
|
||||
BasicModelRequest
|
||||
Prompt string `json:"prompt" yaml:"prompt"`
|
||||
StartImage string `json:"start_image" yaml:"start_image"`
|
||||
EndImage string `json:"end_image" yaml:"end_image"`
|
||||
Width int32 `json:"width" yaml:"width"`
|
||||
Height int32 `json:"height" yaml:"height"`
|
||||
NumFrames int32 `json:"num_frames" yaml:"num_frames"`
|
||||
FPS int32 `json:"fps" yaml:"fps"`
|
||||
Seed int32 `json:"seed" yaml:"seed"`
|
||||
CFGScale float32 `json:"cfg_scale" yaml:"cfg_scale"`
|
||||
ResponseFormat string `json:"response_format" yaml:"response_format"`
|
||||
}
|
||||
|
||||
// @Description TTS request body
|
||||
type TTSRequest struct {
|
||||
BasicModelRequest
|
||||
|
||||
@@ -202,7 +202,6 @@ type OpenAIRequest struct {
|
||||
|
||||
Backend string `json:"backend" yaml:"backend"`
|
||||
|
||||
// AutoGPTQ
|
||||
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
|
||||
}
|
||||
|
||||
|
||||
@@ -41,8 +41,6 @@ type PredictionOptions struct {
|
||||
RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
|
||||
RopeFreqScale float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
|
||||
NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
|
||||
// AutoGPTQ
|
||||
UseFastTokenizer bool `json:"use_fast_tokenizer" yaml:"use_fast_tokenizer"`
|
||||
|
||||
// Diffusers
|
||||
ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
|
||||
|
||||
@@ -268,14 +268,6 @@ yarn_ext_factor: 0
|
||||
yarn_attn_factor: 0
|
||||
yarn_beta_fast: 0
|
||||
yarn_beta_slow: 0
|
||||
|
||||
# AutoGPT-Q settings, for configurations specific to GPT models.
|
||||
autogptq:
|
||||
model_base_name: "" # Base name of the model.
|
||||
device: "" # Device to run the model on.
|
||||
triton: false # Whether to use Triton Inference Server.
|
||||
use_fast_tokenizer: false # Whether to use a fast tokenizer for quicker processing.
|
||||
|
||||
# configuration for diffusers model
|
||||
diffusers:
|
||||
cuda: false # Whether to use CUDA
|
||||
@@ -489,8 +481,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed
|
||||
|-----------|---------|-------------|----------------------|
|
||||
| --models-path | BASEPATH/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH |
|
||||
| --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH |
|
||||
| --image-path | /tmp/generated/images | Location for images generated by backends (e.g. stablediffusion) | $LOCALAI_IMAGE_PATH |
|
||||
| --audio-path | /tmp/generated/audio | Location for audio generated by backends (e.g. piper) | $LOCALAI_AUDIO_PATH |
|
||||
| --generated-content-path | /tmp/generated/content | Location for assets generated by backends (e.g. stablediffusion) | $LOCALAI_GENERATED_CONTENT_PATH |
|
||||
| --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH |
|
||||
| --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH |
|
||||
| --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR |
|
||||
@@ -523,6 +514,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed
|
||||
| --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT |
|
||||
| --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY |
|
||||
| --disable-welcome | | Disable welcome pages | $LOCALAI_DISABLE_WELCOME |
|
||||
| --disable-webui | false | Disables the web user interface. When set to true, the server will only expose API endpoints without serving the web interface | $LOCALAI_DISABLE_WEBUI |
|
||||
| --machine-tag | | If not empty - put that string to Machine-Tag header in each response. Useful to track response from different machines using multiple P2P federated nodes | $LOCALAI_MACHINE_TAG |
|
||||
|
||||
#### Backend Flags
|
||||
|
||||
@@ -34,4 +34,12 @@ List of the Environment Variables:
|
||||
| **FEDERATED** | Set to "true" to share the instance with the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}})) |
|
||||
| **FEDERATED_SERVER** | Set to "true" to run the instance as a federation server which forwards requests to the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}})) |
|
||||
|
||||
## Uninstallation
|
||||
|
||||
To uninstall, run:
|
||||
|
||||
```
|
||||
curl https://localai.io/install.sh | sh -s -- --uninstall
|
||||
```
|
||||
|
||||
We are looking into improving the installer, and as this is a first iteration any feedback is welcome! Open up an [issue](https://github.com/mudler/LocalAI/issues/new/choose) if something doesn't work for you!
|
||||
@@ -57,12 +57,14 @@ diffusers:
|
||||
|
||||
Requirement: nvidia-container-toolkit (installation instructions [1](https://www.server-world.info/en/note?os=Ubuntu_22.04&p=nvidia&f=2) [2](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html))
|
||||
|
||||
To check what CUDA version do you need, you can either run `nvidia-smi` or `nvcc --version`.
|
||||
If using a system with SELinux, ensure you have the policies installed, such as those [provided by nvidia](https://github.com/NVIDIA/dgx-selinux/)
|
||||
|
||||
To check what CUDA version do you need, you can either run `nvidia-smi` or `nvcc --version`.
|
||||
|
||||
Alternatively, you can also check nvidia-smi with docker:
|
||||
|
||||
```
|
||||
docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi
|
||||
docker run --runtime=nvidia --rm nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi
|
||||
```
|
||||
|
||||
To use CUDA, use the images with the `cublas` tag, for example.
|
||||
@@ -112,7 +114,7 @@ llama_init_from_file: kv self size = 512.00 MB
|
||||
|
||||
## ROCM(AMD) acceleration
|
||||
|
||||
There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation.
|
||||
There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation.
|
||||
|
||||
Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatability and package versions for dependencies across all variations of OS must be tested independently if disired, please refer to the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) documentation.
|
||||
|
||||
@@ -137,7 +139,7 @@ LocalAI hipblas images are built against the following targets: gfx900,gfx906,gf
|
||||
|
||||
If your device is not one of these you must specify the corresponding `GPU_TARGETS` and specify `REBUILD=true`. Otherwise you don't need to specify these in the commands below.
|
||||
|
||||
### Verified
|
||||
### Verified
|
||||
|
||||
The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0`
|
||||
|
||||
@@ -147,7 +149,6 @@ The devices in the following list have been tested with `hipblas` images running
|
||||
| diffusers | yes | Radeon VII (gfx906) |
|
||||
| piper | yes | Radeon VII (gfx906) |
|
||||
| whisper | no | none |
|
||||
| autogptq | no | none |
|
||||
| bark | no | none |
|
||||
| coqui | no | none |
|
||||
| transformers | no | none |
|
||||
@@ -166,7 +167,7 @@ The devices in the following list have been tested with `hipblas` images running
|
||||
1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html).
|
||||
2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatability for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html))
|
||||
3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html))
|
||||
4. Deploy. Yes it's that easy.
|
||||
4. Deploy. Yes it's that easy.
|
||||
|
||||
#### Setup Example (Docker/containerd)
|
||||
|
||||
@@ -248,7 +249,7 @@ This configuration has been tested on a 'custom' cluster managed by SUSE Rancher
|
||||
|
||||
- When installing the ROCM kernel driver on your system ensure that you are installing an equal or newer version that that which is currently implemented in LocalAI (6.0.0 at time of writing).
|
||||
- AMD documentation indicates that this will ensure functionality however your milage may vary depending on the GPU and distro you are using.
|
||||
- If you encounter an `Error 413` on attempting to upload an audio file or image for whisper or llava/bakllava on a k8s deployment, note that the ingress for your deployment may require the annontation `nginx.ingress.kubernetes.io/proxy-body-size: "25m"` to allow larger uploads. This may be included in future versions of the helm chart.
|
||||
- If you encounter an `Error 413` on attempting to upload an audio file or image for whisper or llava/bakllava on a k8s deployment, note that the ingress for your deployment may require the annontation `nginx.ingress.kubernetes.io/proxy-body-size: "25m"` to allow larger uploads. This may be included in future versions of the helm chart.
|
||||
|
||||
## Intel acceleration (sycl)
|
||||
|
||||
@@ -279,3 +280,36 @@ docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=
|
||||
```
|
||||
|
||||
Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
|
||||
|
||||
## Vulkan acceleration
|
||||
|
||||
### Requirements
|
||||
|
||||
If using nvidia, follow the steps in the [CUDA](#cudanvidia-acceleration) section to configure your docker runtime to allow access to the GPU.
|
||||
|
||||
### Container images
|
||||
|
||||
To use Vulkan, use the images with the `vulkan` tag, for example `{{< version >}}-vulkan-ffmpeg-core`.
|
||||
|
||||
#### Example
|
||||
|
||||
To run LocalAI with Docker and Vulkan, you can use the following command as an example:
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models localai/localai:latest-vulkan-ffmpeg-core
|
||||
```
|
||||
|
||||
### Notes
|
||||
|
||||
In addition to the commands to run LocalAI normally, you need to specify additonal flags to pass the GPU hardware to the container.
|
||||
|
||||
These flags are the same as the sections above, depending on the hardware, for [nvidia](#cudanvidia-acceleration), [AMD](#rocmamd-acceleration) or [Intel](#intel-acceleration-sycl).
|
||||
|
||||
If you have mixed hardware, you can pass flags for multiple GPUs, for example:
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models \
|
||||
--gpus=all \ # nvidia passthrough
|
||||
--device /dev/dri --device /dev/kfd \ # AMD/Intel passthrough
|
||||
localai/localai:latest-vulkan-ffmpeg-core
|
||||
```
|
||||
@@ -74,49 +74,9 @@ curl http://localhost:8080/v1/models
|
||||
|
||||
## Backends
|
||||
|
||||
### AutoGPTQ
|
||||
|
||||
[AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) is an easy-to-use LLMs quantization package with user-friendly apis, based on GPTQ algorithm.
|
||||
|
||||
#### Prerequisites
|
||||
|
||||
This is an extra backend - in the container images is already available and there is nothing to do for the setup.
|
||||
|
||||
If you are building LocalAI locally, you need to install [AutoGPTQ manually](https://github.com/PanQiWei/AutoGPTQ#quick-installation).
|
||||
|
||||
|
||||
#### Model setup
|
||||
|
||||
The models are automatically downloaded from `huggingface` if not present the first time. It is possible to define models via `YAML` config file, or just by querying the endpoint with the `huggingface` repository model name. For example, create a `YAML` config file in `models/`:
|
||||
|
||||
```
|
||||
name: orca
|
||||
backend: autogptq
|
||||
model_base_name: "orca_mini_v2_13b-GPTQ-4bit-128g.no-act.order"
|
||||
parameters:
|
||||
model: "TheBloke/orca_mini_v2_13b-GPTQ"
|
||||
# ...
|
||||
```
|
||||
|
||||
Test with:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "orca",
|
||||
"messages": [{"role": "user", "content": "How are you?"}],
|
||||
"temperature": 0.1
|
||||
}'
|
||||
```
|
||||
### RWKV
|
||||
|
||||
A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
|
||||
|
||||
Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it:
|
||||
|
||||
```
|
||||
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
|
||||
36464543 -rw-r--r-- 1 mudler mudler 2.4M May 3 10:51 rwkv_small.tokenizer.json
|
||||
```
|
||||
RWKV support is available through llama.cpp (see below)
|
||||
|
||||
### llama.cpp
|
||||
|
||||
|
||||
@@ -14,18 +14,19 @@ icon = "rocket_launch"
|
||||
|
||||
If you are exposing LocalAI remotely, make sure you protect the API endpoints adequately with a mechanism which allows to protect from the incoming traffic or alternatively, run LocalAI with `API_KEY` to gate the access with an API key. The API key guarantees a total access to the features (there is no role separation), and it is to be considered as likely as an admin role.
|
||||
|
||||
To access the WebUI with an API_KEY, browser extensions such as [Requestly](https://requestly.com/) can be used (see also https://github.com/mudler/LocalAI/issues/2227#issuecomment-2093333752). See also [API flags]({{% relref "docs/advanced/advanced-usage#api-flags" %}}) for the flags / options available when starting LocalAI.
|
||||
|
||||
{{% /alert %}}
|
||||
|
||||
## Quickstart
|
||||
|
||||
|
||||
### Using the Bash Installer
|
||||
|
||||
```bash
|
||||
# Basic installation
|
||||
curl https://localai.io/install.sh | sh
|
||||
```
|
||||
|
||||
See [Installer]({{% relref "docs/advanced/installer" %}}) for all the supported options
|
||||
|
||||
### Run with docker:
|
||||
```bash
|
||||
# CPU only image:
|
||||
@@ -100,6 +101,57 @@ The AIO images come pre-configured with the following features:
|
||||
|
||||
For instructions on using AIO images, see [Using container images]({{% relref "docs/getting-started/container-images#all-in-one-images" %}}).
|
||||
|
||||
## Using LocalAI and the full stack with LocalAGI
|
||||
|
||||
LocalAI is part of the Local family stack, along with LocalAGI and LocalRecall.
|
||||
|
||||
[LocalAGI](https://github.com/mudler/LocalAGI) is a powerful, self-hostable AI Agent platform designed for maximum privacy and flexibility which encompassess and uses all the softwre stack. It provides a complete drop-in replacement for OpenAI's Responses APIs with advanced agentic capabilities, working entirely locally on consumer-grade hardware (CPU and GPU).
|
||||
|
||||
### Quick Start
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/mudler/LocalAGI
|
||||
cd LocalAGI
|
||||
|
||||
# CPU setup (default)
|
||||
docker compose up
|
||||
|
||||
# NVIDIA GPU setup
|
||||
docker compose -f docker-compose.nvidia.yaml up
|
||||
|
||||
# Intel GPU setup (for Intel Arc and integrated GPUs)
|
||||
docker compose -f docker-compose.intel.yaml up
|
||||
|
||||
# Start with a specific model (see available models in models.localai.io, or localai.io to use any model in huggingface)
|
||||
MODEL_NAME=gemma-3-12b-it docker compose up
|
||||
|
||||
# NVIDIA GPU setup with custom multimodal and image models
|
||||
MODEL_NAME=gemma-3-12b-it \
|
||||
MULTIMODAL_MODEL=minicpm-v-2_6 \
|
||||
IMAGE_MODEL=flux.1-dev-ggml \
|
||||
docker compose -f docker-compose.nvidia.yaml up
|
||||
```
|
||||
|
||||
### Key Features
|
||||
|
||||
- **Privacy-Focused**: All processing happens locally, ensuring your data never leaves your machine
|
||||
- **Flexible Deployment**: Supports CPU, NVIDIA GPU, and Intel GPU configurations
|
||||
- **Multiple Model Support**: Compatible with various models from Hugging Face and other sources
|
||||
- **Web Interface**: User-friendly chat interface for interacting with AI agents
|
||||
- **Advanced Capabilities**: Supports multimodal models, image generation, and more
|
||||
- **Docker Integration**: Easy deployment using Docker Compose
|
||||
|
||||
### Environment Variables
|
||||
|
||||
You can customize your LocalAGI setup using the following environment variables:
|
||||
|
||||
- `MODEL_NAME`: Specify the model to use (e.g., `gemma-3-12b-it`)
|
||||
- `MULTIMODAL_MODEL`: Set a custom multimodal model
|
||||
- `IMAGE_MODEL`: Configure an image generation model
|
||||
|
||||
For more advanced configuration and API documentation, visit the [LocalAGI GitHub repository](https://github.com/mudler/LocalAGI).
|
||||
|
||||
## What's Next?
|
||||
|
||||
There is much more to explore with LocalAI! You can run any model from Hugging Face, perform video generation, and also voice cloning. For a comprehensive overview, check out the [features]({{% relref "docs/features" %}}) section.
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v2.27.0"
|
||||
"version": "v2.28.0"
|
||||
}
|
||||
|
||||
340
docs/static/install.sh
vendored
Normal file → Executable file
340
docs/static/install.sh
vendored
Normal file → Executable file
@@ -1,30 +1,74 @@
|
||||
#!/bin/sh
|
||||
# This script installs LocalAI on Linux.
|
||||
# It detects the current operating system architecture and installs the appropriate version of LocalAI.
|
||||
# LocalAI Installer Script
|
||||
# This script installs LocalAI on Linux and macOS systems.
|
||||
# It automatically detects the system architecture and installs the appropriate version.
|
||||
|
||||
# Usage:
|
||||
# curl ... | ENV_VAR=... sh -
|
||||
# or
|
||||
# ENV_VAR=... ./install.sh
|
||||
# Basic installation:
|
||||
# curl https://localai.io/install.sh | sh
|
||||
#
|
||||
# With environment variables:
|
||||
# DOCKER_INSTALL=true USE_AIO=true API_KEY=your-key PORT=8080 THREADS=4 curl https://localai.io/install.sh | sh
|
||||
#
|
||||
# To uninstall:
|
||||
# curl https://localai.io/install.sh | sh -s -- --uninstall
|
||||
#
|
||||
# Environment Variables:
|
||||
# DOCKER_INSTALL - Set to "true" to install Docker images (default: auto-detected)
|
||||
# USE_AIO - Set to "true" to use the all-in-one LocalAI image (default: false)
|
||||
# API_KEY - API key for securing LocalAI access (default: none)
|
||||
# PORT - Port to run LocalAI on (default: 8080)
|
||||
# THREADS - Number of CPU threads to use (default: auto-detected)
|
||||
# MODELS_PATH - Path to store models (default: /usr/share/local-ai/models)
|
||||
# CORE_IMAGES - Set to "true" to download core LocalAI images (default: false)
|
||||
# P2P_TOKEN - Token for P2P federation/worker mode (default: none)
|
||||
# WORKER - Set to "true" to run as a worker node (default: false)
|
||||
# FEDERATED - Set to "true" to enable federation mode (default: false)
|
||||
# FEDERATED_SERVER - Set to "true" to run as a federation server (default: false)
|
||||
|
||||
set -e
|
||||
set -o noglob
|
||||
#set -x
|
||||
|
||||
# --- helper functions for logs ---
|
||||
# ANSI escape codes
|
||||
LIGHT_BLUE='\033[38;5;117m'
|
||||
ORANGE='\033[38;5;214m'
|
||||
RED='\033[38;5;196m'
|
||||
BOLD='\033[1m'
|
||||
RESET='\033[0m'
|
||||
|
||||
info()
|
||||
{
|
||||
echo ' ' "$@"
|
||||
echo -e "${BOLD}${LIGHT_BLUE}" '[INFO] ' "$@" "${RESET}"
|
||||
}
|
||||
|
||||
warn()
|
||||
{
|
||||
echo '[WARN] ' "$@" >&2
|
||||
echo -e "${BOLD}${ORANGE}" '[WARN] ' "$@" "${RESET}" >&2
|
||||
}
|
||||
|
||||
fatal()
|
||||
{
|
||||
echo '[ERROR] ' "$@" >&2
|
||||
echo -e "${BOLD}${RED}" '[ERROR] ' "$@" "${RESET}" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# --- custom choice functions ---
|
||||
# like the logging functions, but with the -n flag to prevent the new line and keep the cursor in line for choices inputs like y/n
|
||||
choice_info()
|
||||
{
|
||||
echo -e -n "${BOLD}${LIGHT_BLUE}" '[INFO] ' "$@" "${RESET}"
|
||||
}
|
||||
|
||||
choice_warn()
|
||||
{
|
||||
echo -e -n "${BOLD}${ORANGE}" '[WARN] ' "$@" "${RESET}" >&2
|
||||
}
|
||||
|
||||
choice_fatal()
|
||||
{
|
||||
echo -e -n "${BOLD}${RED}" '[ERROR] ' "$@" "${RESET}" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
@@ -57,6 +101,59 @@ require() {
|
||||
echo $MISSING
|
||||
}
|
||||
|
||||
# Function to uninstall LocalAI
|
||||
uninstall_localai() {
|
||||
info "Starting LocalAI uninstallation..."
|
||||
|
||||
# Stop and remove Docker container if it exists
|
||||
if available docker && $SUDO docker ps -a --format '{{.Names}}' | grep -q local-ai; then
|
||||
info "Stopping and removing LocalAI Docker container..."
|
||||
$SUDO docker stop local-ai || true
|
||||
$SUDO docker rm local-ai || true
|
||||
$SUDO docker volume rm local-ai-data || true
|
||||
fi
|
||||
|
||||
# Remove systemd service if it exists
|
||||
if [ -f "/etc/systemd/system/local-ai.service" ]; then
|
||||
info "Removing systemd service..."
|
||||
$SUDO systemctl stop local-ai || true
|
||||
$SUDO systemctl disable local-ai || true
|
||||
$SUDO rm -f /etc/systemd/system/local-ai.service
|
||||
$SUDO systemctl daemon-reload
|
||||
fi
|
||||
|
||||
# Remove environment file
|
||||
if [ -f "/etc/localai.env" ]; then
|
||||
info "Removing environment file..."
|
||||
$SUDO rm -f /etc/localai.env
|
||||
fi
|
||||
|
||||
# Remove binary
|
||||
for BINDIR in /usr/local/bin /usr/bin /bin; do
|
||||
if [ -f "$BINDIR/local-ai" ]; then
|
||||
info "Removing binary from $BINDIR..."
|
||||
$SUDO rm -f "$BINDIR/local-ai"
|
||||
fi
|
||||
done
|
||||
|
||||
# Remove models directory
|
||||
if [ -d "/usr/share/local-ai" ]; then
|
||||
info "Removing LocalAI data directory..."
|
||||
$SUDO rm -rf /usr/share/local-ai
|
||||
fi
|
||||
|
||||
# Remove local-ai user if it exists
|
||||
if id local-ai >/dev/null 2>&1; then
|
||||
info "Removing local-ai user..."
|
||||
$SUDO userdel -r local-ai || true
|
||||
fi
|
||||
|
||||
info "LocalAI has been successfully uninstalled."
|
||||
exit 0
|
||||
}
|
||||
|
||||
|
||||
|
||||
## VARIABLES
|
||||
|
||||
# DOCKER_INSTALL - set to "true" to install Docker images
|
||||
@@ -89,7 +186,7 @@ else
|
||||
fi
|
||||
THREADS=${THREADS:-$procs}
|
||||
LATEST_VERSION=$(curl -s "https://api.github.com/repos/mudler/LocalAI/releases/latest" | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
|
||||
VERSION="${VERSION:-$LATEST_VERSION}"
|
||||
LOCALAI_VERSION="${LOCALAI_VERSION:-$LATEST_VERSION}" #changed due to VERSION beign already defined in Fedora 42 Cloud Edition
|
||||
MODELS_PATH=${MODELS_PATH:-/usr/share/local-ai/models}
|
||||
|
||||
|
||||
@@ -156,7 +253,7 @@ WorkingDirectory=/usr/share/local-ai
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
EOF
|
||||
|
||||
|
||||
$SUDO touch /etc/localai.env
|
||||
$SUDO echo "ADDRESS=0.0.0.0:$PORT" | $SUDO tee /etc/localai.env >/dev/null
|
||||
$SUDO echo "API_KEY=$API_KEY" | $SUDO tee -a /etc/localai.env >/dev/null
|
||||
@@ -189,23 +286,74 @@ EOF
|
||||
|
||||
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-yum-or-dnf
|
||||
install_container_toolkit_yum() {
|
||||
info 'Installing NVIDIA repository...'
|
||||
info 'Installing NVIDIA container toolkit repository...'
|
||||
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
|
||||
$SUDO tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
|
||||
if [ "$PACKAGE_MANAGER" = "dnf" ]; then
|
||||
$SUDO $PACKAGE_MANAGER config-manager --enable nvidia-container-toolkit-experimental
|
||||
else
|
||||
DNF_VERSION=$($PACKAGE_MANAGER --version | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -n1 | cut -d. -f1)
|
||||
if [ "$DNF_VERSION" -ge 5 ]; then
|
||||
# DNF5: Use 'setopt' to enable the repository
|
||||
$SUDO $PACKAGE_MANAGER config-manager setopt nvidia-container-toolkit-experimental.enabled=1
|
||||
else
|
||||
# DNF4: Use '--set-enabled' to enable the repository
|
||||
$SUDO $PACKAGE_MANAGER config-manager --enable nvidia-container-toolkit-experimental
|
||||
fi
|
||||
else
|
||||
$SUDO $PACKAGE_MANAGER -y install yum-utils
|
||||
$SUDO $PACKAGE_MANAGER-config-manager --enable nvidia-container-toolkit-experimental
|
||||
fi
|
||||
$SUDO $PACKAGE_MANAGER install -y nvidia-container-toolkit
|
||||
}
|
||||
|
||||
# Fedora, Rhel and other distro ships tunable SELinux booleans in the container-selinux policy to control device access.
|
||||
# In particular, enabling container_use_devices allows containers to use arbitrary host device labels (including GPU devices)
|
||||
# ref: https://github.com/containers/ramalama/blob/main/docs/ramalama-cuda.7.md#expected-output
|
||||
enable_selinux_container_booleans() {
|
||||
|
||||
# Check SELinux mode
|
||||
SELINUX_MODE=$(getenforce)
|
||||
|
||||
if [ "$SELINUX_MODE" == "Enforcing" ]; then
|
||||
# Check the status of container_use_devices
|
||||
CONTAINER_USE_DEVICES=$(getsebool container_use_devices | awk '{print $3}')
|
||||
|
||||
if [ "$CONTAINER_USE_DEVICES" == "off" ]; then
|
||||
|
||||
#We want to give the user the choice to enable the SE booleans since it is a security config
|
||||
warn "+-----------------------------------------------------------------------------------------------------------+"
|
||||
warn "| WARNING: |"
|
||||
warn "| Your distribution ships tunable SELinux booleans in the container-selinux policy to control device access.|"
|
||||
warn "| In particular, enabling \"container_use_devices\" allows containers to use arbitrary host device labels |"
|
||||
warn "| (including GPU devices). |"
|
||||
warn "| This script can try to enable them enabling the \"container_use_devices\" flag. |"
|
||||
warn "| |"
|
||||
warn "| Otherwise you can exit the install script and enable them yourself. |"
|
||||
warn "+-----------------------------------------------------------------------------------------------------------+"
|
||||
|
||||
while true; do
|
||||
choice_warn "I understand that this script is going to change my SELinux configs, which is a security risk: (yes/exit) ";
|
||||
read Answer
|
||||
|
||||
if [ "$Answer" = "yes" ]; then
|
||||
warn "Enabling \"container_use_devices\" persistently..."
|
||||
$SUDO setsebool -P container_use_devices 1
|
||||
|
||||
break
|
||||
elif [ "$Answer" = "exit" ]; then
|
||||
aborted
|
||||
else
|
||||
warn "Invalid choice. Please enter 'yes' or 'exit'."
|
||||
fi
|
||||
done
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-apt
|
||||
install_container_toolkit_apt() {
|
||||
info 'Installing NVIDIA repository...'
|
||||
info 'Installing NVIDIA container toolkit repository...'
|
||||
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | $SUDO gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
|
||||
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
|
||||
@@ -217,7 +365,7 @@ install_container_toolkit_apt() {
|
||||
|
||||
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-zypper
|
||||
install_container_toolkit_zypper() {
|
||||
info 'Installing NVIDIA repository...'
|
||||
info 'Installing NVIDIA zypper repository...'
|
||||
$SUDO zypper ar https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo
|
||||
$SUDO zypper modifyrepo --enable nvidia-container-toolkit-experimental
|
||||
$SUDO zypper --gpg-auto-import-keys install -y nvidia-container-toolkit
|
||||
@@ -246,6 +394,29 @@ install_container_toolkit() {
|
||||
opensuse*|suse*) install_container_toolkit_zypper ;;
|
||||
*) echo "Could not install nvidia container toolkit - unknown OS" ;;
|
||||
esac
|
||||
|
||||
# after installing the toolkit we need to add it to the docker runtimes, otherwise even with --gpu all
|
||||
# the container would still run with runc and would not have access to nvidia-smi
|
||||
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#configuring-docker
|
||||
info "Adding NVIDIA Container Runtime to Docker runtimes..."
|
||||
$SUDO nvidia-ctk runtime configure --runtime=docker
|
||||
|
||||
info "Restarting Docker Daemon"
|
||||
$SUDO systemctl restart docker
|
||||
|
||||
# The NVML error arises because SELinux blocked the container’s attempts to open the GPU devices or related libraries.
|
||||
# Without relaxing SELinux for the container, GPU commands like nvidia-smi report “Insufficient Permissions”
|
||||
# This has been noted in NVIDIA’s documentation:
|
||||
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/1.13.5/install-guide.html#id2
|
||||
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/troubleshooting.html#nvml-insufficient-permissions-and-selinux
|
||||
case $OS_NAME in
|
||||
fedora|rhel|centos|rocky)
|
||||
enable_selinux_container_booleans
|
||||
;;
|
||||
opensuse-tumbleweed)
|
||||
enable_selinux_container_booleans
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-7-centos-7
|
||||
@@ -253,14 +424,21 @@ install_container_toolkit() {
|
||||
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-9-rocky-9
|
||||
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#fedora
|
||||
install_cuda_driver_yum() {
|
||||
info 'Installing NVIDIA repository...'
|
||||
info 'Installing NVIDIA CUDA repository...'
|
||||
case $PACKAGE_MANAGER in
|
||||
yum)
|
||||
$SUDO $PACKAGE_MANAGER -y install yum-utils
|
||||
$SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
|
||||
;;
|
||||
dnf)
|
||||
$SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
|
||||
DNF_VERSION=$($PACKAGE_MANAGER --version | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -n1 | cut -d. -f1)
|
||||
if [ "$DNF_VERSION" -ge 5 ]; then
|
||||
# DNF5: Use 'addrepo' to add the repository
|
||||
$SUDO $PACKAGE_MANAGER config-manager addrepo --id=nvidia-cuda --set=name="nvidia-cuda" --set=baseurl="https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo"
|
||||
else
|
||||
# DNF4: Use '--add-repo' to add the repository
|
||||
$SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
@@ -281,10 +459,68 @@ install_cuda_driver_yum() {
|
||||
$SUDO $PACKAGE_MANAGER -y install cuda-drivers
|
||||
}
|
||||
|
||||
install_fedora_nvidia_kernel_drivers(){
|
||||
|
||||
#We want to give the user the choice to install the akmod kernel drivers or not, since it could break some setups
|
||||
warn "+------------------------------------------------------------------------------------------------+"
|
||||
warn "| WARNING: |"
|
||||
warn "| Looks like the NVIDIA Kernel modules are not installed. |"
|
||||
warn "| |"
|
||||
warn "| This script can try to install them using akmod-nvidia. |"
|
||||
warn "| - The script need the rpmfusion free and nonfree repos and will install them if not available. |"
|
||||
warn "| - The akmod installation can sometimes inhibit the reboot command. |"
|
||||
warn "| |"
|
||||
warn "| Otherwise you can exit the install script and install them yourself. |"
|
||||
warn "| NOTE: you will need to reboot after the installation. |"
|
||||
warn "+------------------------------------------------------------------------------------------------+"
|
||||
|
||||
while true; do
|
||||
choice_warn "Do you wish for the script to try and install them? (akmod/exit) ";
|
||||
read Answer
|
||||
|
||||
if [ "$Answer" = "akmod" ]; then
|
||||
|
||||
DNF_VERSION=$($PACKAGE_MANAGER --version | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -n1 | cut -d. -f1)
|
||||
|
||||
OS_NAME=$ID
|
||||
OS_VERSION=$VERSION_ID
|
||||
FREE_URL="https://mirrors.rpmfusion.org/free/fedora/rpmfusion-free-release-${OS_VERSION}.noarch.rpm"
|
||||
NONFREE_URL="https://mirrors.rpmfusion.org/nonfree/fedora/rpmfusion-nonfree-release-${OS_VERSION}.noarch.rpm"
|
||||
|
||||
curl -LO "$FREE_URL"
|
||||
curl -LO "$NONFREE_URL"
|
||||
|
||||
if [ "$DNF_VERSION" -ge 5 ]; then
|
||||
# DNF5:
|
||||
$SUDO $PACKAGE_MANAGER install -y "rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm" "rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm"
|
||||
$SUDO $PACKAGE_MANAGER install -y akmod-nvidia
|
||||
else
|
||||
# DNF4:
|
||||
$SUDO $PACKAGE_MANAGER install -y "rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm" "rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm"
|
||||
$SUDO $PACKAGE_MANAGER install -y akmod-nvidia
|
||||
fi
|
||||
|
||||
$SUDO rm "rpmfusion-free-release-$(rpm -E %fedora).noarch.rpm"
|
||||
$SUDO rm "rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm"
|
||||
|
||||
install_cuda_driver_yum $OS_NAME '41'
|
||||
|
||||
info "Nvidia driver installation complete, please reboot now and run the Install script again to complete the setup."
|
||||
exit
|
||||
|
||||
elif [ "$Answer" = "exit" ]; then
|
||||
|
||||
aborted
|
||||
else
|
||||
warn "Invalid choice. Please enter 'akmod' or 'exit'."
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu
|
||||
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian
|
||||
install_cuda_driver_apt() {
|
||||
info 'Installing NVIDIA repository...'
|
||||
info 'Installing NVIDIA CUDA repository...'
|
||||
curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb
|
||||
|
||||
case $1 in
|
||||
@@ -323,7 +559,7 @@ install_cuda() {
|
||||
case $OS_NAME in
|
||||
centos|rhel) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -d '.' -f 1) ;;
|
||||
rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
|
||||
fedora) [ $OS_VERSION -lt '37' ] && install_cuda_driver_yum $OS_NAME $OS_VERSION || install_cuda_driver_yum $OS_NAME '37';;
|
||||
fedora) [ $OS_VERSION -lt '41' ] && install_cuda_driver_yum $OS_NAME $OS_VERSION || install_cuda_driver_yum $OS_NAME '41';;
|
||||
amzn) install_cuda_driver_yum 'fedora' '37' ;;
|
||||
debian) install_cuda_driver_apt $OS_NAME $OS_VERSION ;;
|
||||
ubuntu) install_cuda_driver_apt $OS_NAME $(echo $OS_VERSION | sed 's/\.//') ;;
|
||||
@@ -399,7 +635,7 @@ install_docker() {
|
||||
$SUDO systemctl start docker
|
||||
fi
|
||||
|
||||
info "Starting LocalAI Docker container..."
|
||||
info "Creating LocalAI Docker volume..."
|
||||
# Create volume if doesn't exist already
|
||||
if ! $SUDO docker volume inspect local-ai-data > /dev/null 2>&1; then
|
||||
$SUDO docker volume create local-ai-data
|
||||
@@ -413,7 +649,7 @@ install_docker() {
|
||||
# if $SUDO docker ps --format '{{.Names}}' | grep -q local-ai; then
|
||||
# info "LocalAI Docker container is already running."
|
||||
# exit 0
|
||||
# fi
|
||||
# fi
|
||||
|
||||
# info "Starting LocalAI Docker container..."
|
||||
# $SUDO docker start local-ai
|
||||
@@ -430,22 +666,28 @@ install_docker() {
|
||||
|
||||
IMAGE_TAG=
|
||||
if [ "$HAS_CUDA" ]; then
|
||||
IMAGE_TAG=${VERSION}-cublas-cuda12-ffmpeg
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-cublas-cuda12-ffmpeg
|
||||
# CORE
|
||||
if [ "$CORE_IMAGES" = true ]; then
|
||||
IMAGE_TAG=${VERSION}-cublas-cuda12-ffmpeg-core
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-cublas-cuda12-ffmpeg-core
|
||||
fi
|
||||
# AIO
|
||||
if [ "$USE_AIO" = true ]; then
|
||||
IMAGE_TAG=${VERSION}-aio-gpu-nvidia-cuda-12
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-nvidia-cuda-12
|
||||
fi
|
||||
|
||||
info "Checking Nvidia Kernel Drivers presence..."
|
||||
if ! available nvidia-smi; then
|
||||
info "Installing nvidia-cuda-toolkit..."
|
||||
# TODO:
|
||||
$SUDO apt-get -y install nvidia-cuda-toolkit
|
||||
OS_NAME=$ID
|
||||
OS_VERSION=$VERSION_ID
|
||||
|
||||
case $OS_NAME in
|
||||
debian|ubuntu) $SUDO apt-get -y install nvidia-cuda-toolkit;;
|
||||
fedora) install_fedora_nvidia_kernel_drivers;;
|
||||
esac
|
||||
fi
|
||||
|
||||
info "Starting LocalAI Docker container..."
|
||||
$SUDO docker run -v local-ai-data:/build/models \
|
||||
--gpus all \
|
||||
--restart=always \
|
||||
@@ -454,16 +696,17 @@ install_docker() {
|
||||
$envs \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
|
||||
elif [ "$HAS_AMD" ]; then
|
||||
IMAGE_TAG=${VERSION}-hipblas-ffmpeg
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-hipblas-ffmpeg
|
||||
# CORE
|
||||
if [ "$CORE_IMAGES" = true ]; then
|
||||
IMAGE_TAG=${VERSION}-hipblas-ffmpeg-core
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-hipblas-ffmpeg-core
|
||||
fi
|
||||
# AIO
|
||||
if [ "$USE_AIO" = true ]; then
|
||||
IMAGE_TAG=${VERSION}-aio-gpu-hipblas
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-hipblas
|
||||
fi
|
||||
|
||||
info "Starting LocalAI Docker container..."
|
||||
$SUDO docker run -v local-ai-data:/build/models \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
@@ -473,16 +716,17 @@ install_docker() {
|
||||
$envs \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
|
||||
elif [ "$HAS_INTEL" ]; then
|
||||
IMAGE_TAG=${VERSION}-sycl-f32-ffmpeg
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-sycl-f32-ffmpeg
|
||||
# CORE
|
||||
if [ "$CORE_IMAGES" = true ]; then
|
||||
IMAGE_TAG=${VERSION}-sycl-f32-ffmpeg-core
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-sycl-f32-ffmpeg-core
|
||||
fi
|
||||
# AIO
|
||||
if [ "$USE_AIO" = true ]; then
|
||||
IMAGE_TAG=${VERSION}-aio-gpu-intel-f32
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-intel-f32
|
||||
fi
|
||||
|
||||
info "Starting LocalAI Docker container..."
|
||||
$SUDO docker run -v local-ai-data:/build/models \
|
||||
--device /dev/dri \
|
||||
--restart=always \
|
||||
@@ -491,15 +735,17 @@ install_docker() {
|
||||
$envs \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
|
||||
else
|
||||
IMAGE_TAG=${VERSION}-ffmpeg
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-ffmpeg
|
||||
# CORE
|
||||
if [ "$CORE_IMAGES" = true ]; then
|
||||
IMAGE_TAG=${VERSION}-ffmpeg-core
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-ffmpeg-core
|
||||
fi
|
||||
# AIO
|
||||
if [ "$USE_AIO" = true ]; then
|
||||
IMAGE_TAG=${VERSION}-aio-cpu
|
||||
fi
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-aio-cpu
|
||||
fi
|
||||
|
||||
info "Starting LocalAI Docker container..."
|
||||
$SUDO docker run -v local-ai-data:/models \
|
||||
--restart=always \
|
||||
-e MODELS_PATH=/models \
|
||||
@@ -516,10 +762,10 @@ install_docker() {
|
||||
install_binary_darwin() {
|
||||
[ "$(uname -s)" = "Darwin" ] || fatal 'This script is intended to run on macOS only.'
|
||||
|
||||
info "Downloading local-ai..."
|
||||
curl --fail --show-error --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${VERSION}/local-ai-Darwin-${ARCH}"
|
||||
info "Downloading LocalAI ${LOCALAI_VERSION}..."
|
||||
curl --fail --show-error --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-Darwin-${ARCH}"
|
||||
|
||||
info "Installing local-ai..."
|
||||
info "Installing to /usr/local/bin/local-ai"
|
||||
install -o0 -g0 -m755 $TEMP_DIR/local-ai /usr/local/bin/local-ai
|
||||
|
||||
install_success
|
||||
@@ -548,14 +794,14 @@ install_binary() {
|
||||
exit 1
|
||||
fi
|
||||
|
||||
info "Downloading local-ai..."
|
||||
curl --fail --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${VERSION}/local-ai-Linux-${ARCH}"
|
||||
info "Downloading LocalAI ${LOCALAI_VERSION}..."
|
||||
curl --fail --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-Linux-${ARCH}"
|
||||
|
||||
for BINDIR in /usr/local/bin /usr/bin /bin; do
|
||||
echo $PATH | grep -q $BINDIR && break || continue
|
||||
done
|
||||
|
||||
info "Installing local-ai to $BINDIR..."
|
||||
info "Installing LocalAI as local-ai to $BINDIR..."
|
||||
$SUDO install -o0 -g0 -m755 -d $BINDIR
|
||||
$SUDO install -o0 -g0 -m755 $TEMP_DIR/local-ai $BINDIR/local-ai
|
||||
|
||||
@@ -603,7 +849,7 @@ detect_start_command() {
|
||||
if [ "$WORKER" = true ]; then
|
||||
if [ -n "$P2P_TOKEN" ]; then
|
||||
STARTCOMMAND="worker p2p-llama-cpp-rpc"
|
||||
else
|
||||
else
|
||||
STARTCOMMAND="worker llama-cpp-rpc"
|
||||
fi
|
||||
elif [ "$FEDERATED" = true ]; then
|
||||
@@ -617,6 +863,10 @@ detect_start_command() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Check if uninstall flag is provided
|
||||
if [ "$1" = "--uninstall" ]; then
|
||||
uninstall_localai
|
||||
fi
|
||||
|
||||
detect_start_command
|
||||
|
||||
@@ -664,10 +914,12 @@ for PACKAGE_MANAGER in dnf yum apt-get; do
|
||||
done
|
||||
|
||||
if [ "$DOCKER_INSTALL" = "true" ]; then
|
||||
info "Installing LocalAI from container images"
|
||||
if [ "$HAS_CUDA" = true ]; then
|
||||
install_container_toolkit
|
||||
fi
|
||||
install_docker
|
||||
else
|
||||
info "Installing LocalAI from binaries"
|
||||
install_binary
|
||||
fi
|
||||
|
||||
1516
gallery/index.yaml
1516
gallery/index.yaml
File diff suppressed because it is too large
Load Diff
39
gallery/qwen3.yaml
Normal file
39
gallery/qwen3.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
---
|
||||
name: "qwen3"
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{ .RoleName }}
|
||||
{{ if .FunctionCall -}}
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{ end -}}<|im_end|>
|
||||
function: |
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
For each function call return a json object with function name and arguments
|
||||
<|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 8192
|
||||
f16: true
|
||||
stopwords:
|
||||
- '<|im_end|>'
|
||||
- '<dummy32000>'
|
||||
- '</s>'
|
||||
- '<|endoftext|>'
|
||||
27
go.mod
27
go.mod
@@ -6,6 +6,7 @@ toolchain go1.23.1
|
||||
|
||||
require (
|
||||
dario.cat/mergo v1.0.1
|
||||
github.com/GeertJohan/go.rice v1.0.3
|
||||
github.com/Masterminds/sprig/v3 v3.3.0
|
||||
github.com/alecthomas/kong v0.9.0
|
||||
github.com/census-instrumentation/opencensus-proto v0.4.1
|
||||
@@ -27,6 +28,7 @@ require (
|
||||
github.com/golang/protobuf v1.5.4
|
||||
github.com/google/go-containerregistry v0.19.2
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/gpustack/gguf-parser-go v0.17.0
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.5.0
|
||||
github.com/hpcloud/tail v1.0.0
|
||||
github.com/ipfs/go-log v1.0.5
|
||||
@@ -42,7 +44,6 @@ require (
|
||||
github.com/onsi/ginkgo/v2 v2.22.2
|
||||
github.com/onsi/gomega v1.36.2
|
||||
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e
|
||||
github.com/ory/dockertest/v3 v3.10.0
|
||||
github.com/otiai10/openaigo v1.7.0
|
||||
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
|
||||
github.com/prometheus/client_golang v1.20.5
|
||||
@@ -54,7 +55,7 @@ require (
|
||||
github.com/streamer45/silero-vad-go v0.2.1
|
||||
github.com/stretchr/testify v1.10.0
|
||||
github.com/swaggo/swag v1.16.3
|
||||
github.com/thxcode/gguf-parser-go v0.1.0
|
||||
github.com/testcontainers/testcontainers-go v0.35.0
|
||||
github.com/tmc/langchaingo v0.1.12
|
||||
github.com/valyala/fasthttp v1.55.0
|
||||
go.opentelemetry.io/otel v1.34.0
|
||||
@@ -74,19 +75,26 @@ require (
|
||||
cloud.google.com/go/auth v0.4.1 // indirect
|
||||
cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
|
||||
cloud.google.com/go/compute/metadata v0.5.0 // indirect
|
||||
github.com/containerd/platforms v0.2.1 // indirect
|
||||
github.com/cpuguy83/dockercfg v0.3.2 // indirect
|
||||
github.com/daaku/go.zipexe v1.0.2 // indirect
|
||||
github.com/distribution/reference v0.6.0 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect
|
||||
github.com/fasthttp/websocket v1.5.3 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.0.0 // indirect
|
||||
github.com/google/s2a-go v0.1.7 // indirect
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
|
||||
github.com/googleapis/gax-go/v2 v2.12.4 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/magiconair/properties v1.8.7 // indirect
|
||||
github.com/moby/docker-image-spec v1.3.1 // indirect
|
||||
github.com/moby/patternmatcher v0.6.0 // indirect
|
||||
github.com/moby/sys/user v0.1.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/morikuni/aec v1.0.0 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/pion/datachannel v1.5.10 // indirect
|
||||
github.com/pion/dtls/v2 v2.2.12 // indirect
|
||||
@@ -110,6 +118,7 @@ require (
|
||||
github.com/pion/turn/v2 v2.1.6 // indirect
|
||||
github.com/pion/turn/v4 v4.0.0 // indirect
|
||||
github.com/pion/webrtc/v4 v4.0.9 // indirect
|
||||
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
|
||||
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
|
||||
github.com/shirou/gopsutil/v4 v4.24.7 // indirect
|
||||
github.com/wlynxg/anet v0.0.5 // indirect
|
||||
@@ -128,7 +137,6 @@ require (
|
||||
github.com/Masterminds/semver/v3 v3.3.0 // indirect
|
||||
github.com/Microsoft/go-winio v0.6.2 // indirect
|
||||
github.com/Microsoft/hcsshim v0.11.7 // indirect
|
||||
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
|
||||
github.com/StackExchange/wmi v1.2.1 // indirect
|
||||
github.com/alecthomas/chroma/v2 v2.8.0 // indirect
|
||||
github.com/andybalholm/brotli v1.1.0 // indirect
|
||||
@@ -152,7 +160,7 @@ require (
|
||||
github.com/dlclark/regexp2 v1.10.0 // indirect
|
||||
github.com/docker/cli v27.0.3+incompatible // indirect
|
||||
github.com/docker/distribution v2.8.2+incompatible // indirect
|
||||
github.com/docker/docker v27.0.3+incompatible
|
||||
github.com/docker/docker v27.1.1+incompatible
|
||||
github.com/docker/docker-credential-helpers v0.7.0 // indirect
|
||||
github.com/docker/go-connections v0.5.0 // indirect
|
||||
github.com/docker/go-units v0.5.0 // indirect
|
||||
@@ -181,14 +189,13 @@ require (
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/google/gopacket v1.1.19 // indirect
|
||||
github.com/google/pprof v0.0.0-20250208200701-d0013a598941 // indirect
|
||||
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
|
||||
github.com/gorilla/css v1.0.1 // indirect
|
||||
github.com/gorilla/websocket v1.5.3 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/golang-lru v1.0.2 // indirect
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||
github.com/henvic/httpretty v0.1.3 // indirect
|
||||
github.com/henvic/httpretty v0.1.4 // indirect
|
||||
github.com/huandu/xstrings v1.5.0 // indirect
|
||||
github.com/huin/goupnp v1.3.0 // indirect
|
||||
github.com/ipfs/boxo v0.27.4 // indirect
|
||||
@@ -255,7 +262,6 @@ require (
|
||||
github.com/olekukonko/tablewriter v0.0.5 // indirect
|
||||
github.com/opencontainers/go-digest v1.0.0 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.0
|
||||
github.com/opencontainers/runc v1.1.12 // indirect
|
||||
github.com/opencontainers/runtime-spec v1.2.0 // indirect
|
||||
github.com/opentracing/opentracing-go v1.2.0 // indirect
|
||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
|
||||
@@ -278,7 +284,7 @@ require (
|
||||
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
||||
github.com/shopspring/decimal v1.4.0 // indirect
|
||||
github.com/sirupsen/logrus v1.9.3 // indirect
|
||||
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
|
||||
github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
|
||||
github.com/spaolacci/murmur3 v1.1.0 // indirect
|
||||
github.com/spf13/cast v1.7.0 // indirect
|
||||
@@ -293,9 +299,6 @@ require (
|
||||
github.com/vishvananda/netlink v1.3.0 // indirect
|
||||
github.com/vishvananda/netns v0.0.5 // indirect
|
||||
github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
|
||||
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
|
||||
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
|
||||
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
|
||||
github.com/yuin/goldmark v1.5.4 // indirect
|
||||
github.com/yuin/goldmark-emoji v1.0.2 // indirect
|
||||
|
||||
83
go.sum
83
go.sum
@@ -17,10 +17,15 @@ dmitri.shuralyov.com/html/belt v0.0.0-20180602232347-f7d459c86be0/go.mod h1:JLBr
|
||||
dmitri.shuralyov.com/service/change v0.0.0-20181023043359-a85b471d5412/go.mod h1:a1inKt/atXimZ4Mv927x+r7UpyzRUf4emIoiiSC2TN4=
|
||||
dmitri.shuralyov.com/state v0.0.0-20180228185332-28bcc343414c/go.mod h1:0PRwlb0D6DFvNNtx+9ybjezNCa8XF0xaYcETyp6rHWU=
|
||||
git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg=
|
||||
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU=
|
||||
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/GeertJohan/go.incremental v1.0.0/go.mod h1:6fAjUhbVuX1KcMD3c8TEgVUqmo4seqhv0i0kdATSkM0=
|
||||
github.com/GeertJohan/go.rice v1.0.3 h1:k5viR+xGtIhF61125vCE1cmJ5957RQGXG6dmbaWZSmI=
|
||||
github.com/GeertJohan/go.rice v1.0.3/go.mod h1:XVdrU4pW00M4ikZed5q56tPf1v2KwnIKeIdc9CBYNt4=
|
||||
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
||||
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
||||
github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
|
||||
@@ -35,10 +40,9 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/Microsoft/hcsshim v0.11.7 h1:vl/nj3Bar/CvJSYo7gIQPyRWc9f3c6IeSNavBTSZNZQ=
|
||||
github.com/Microsoft/hcsshim v0.11.7/go.mod h1:MV8xMfmECjl5HdO7U/3/hFVnkmSBjAjmA09d4bExKcU=
|
||||
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw=
|
||||
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk=
|
||||
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
|
||||
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
|
||||
github.com/akavel/rsrc v0.8.0/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxkKq+c=
|
||||
github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU=
|
||||
github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
|
||||
github.com/alecthomas/chroma/v2 v2.8.0 h1:w9WJUjFFmHHB2e8mRpL9jjy3alYDlU0QLDezj1xE264=
|
||||
@@ -93,12 +97,16 @@ github.com/containerd/errdefs v0.1.0 h1:m0wCRBiu1WJT/Fr+iOoQHMQS/eP5myQ8lCv4Dz5Z
|
||||
github.com/containerd/errdefs v0.1.0/go.mod h1:YgWiiHtLmSeBrvpw+UfPijzbLaB77mEG1WwJTDETIV0=
|
||||
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
|
||||
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
|
||||
github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
|
||||
github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
|
||||
github.com/containerd/stargz-snapshotter/estargz v0.14.3 h1:OqlDCK3ZVUO6C3B/5FSkDwbkEETK84kQgEeFwDC+62k=
|
||||
github.com/containerd/stargz-snapshotter/estargz v0.14.3/go.mod h1:KY//uOCIkSuNAHhJogcZtrNHdKrA99/FCCRjE3HD36o=
|
||||
github.com/coreos/go-systemd v0.0.0-20181012123002-c6f51f82210d/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
|
||||
github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
|
||||
github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
@@ -108,6 +116,8 @@ github.com/creachadair/otp v0.5.0 h1:q3Th7CXm2zlmCdBjw5tEPFOj4oWJMnVL5HXlq0sNKS0
|
||||
github.com/creachadair/otp v0.5.0/go.mod h1:0kceI87EnYFNYSTL121goJVAnk3eJhaed9H0nMuJUkA=
|
||||
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
|
||||
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
|
||||
github.com/daaku/go.zipexe v1.0.2 h1:Zg55YLYTr7M9wjKn8SY/WcpuuEi+kR2u4E8RhvpyXmk=
|
||||
github.com/daaku/go.zipexe v1.0.2/go.mod h1:5xWogtqlYnfBXkSB1o9xysukNP9GTvaNkqzUZbt3Bw8=
|
||||
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 h1:flLYmnQFZNo04x2NPehMbf30m7Pli57xwZ0NFqR/hb0=
|
||||
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2/go.mod h1:NtWqRzAp/1tw+twkW8uuBenEVVYndEAZACWU3F3xdoQ=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
@@ -119,14 +129,16 @@ github.com/decred/dcrd/crypto/blake256 v1.0.1 h1:7PltbUIQB7u/FfZ39+DGa/ShuMyJ5il
|
||||
github.com/decred/dcrd/crypto/blake256 v1.0.1/go.mod h1:2OfgNZ5wDpcsFmHmCK5gZTPcCXqlm2ArzUIkw9czNJo=
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 h1:rpfIENRNNilwHwZeG5+P150SMrnNEcHYvcCuK6dPZSg=
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0=
|
||||
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
|
||||
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
|
||||
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
|
||||
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||
github.com/docker/cli v27.0.3+incompatible h1:usGs0/BoBW8MWxGeEtqPMkzOY56jZ6kYlSN5BLDioCQ=
|
||||
github.com/docker/cli v27.0.3+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
|
||||
github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8=
|
||||
github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
|
||||
github.com/docker/docker v27.0.3+incompatible h1:aBGI9TeQ4MPlhquTQKq9XbK79rKFVwXNUAYz9aXyEBE=
|
||||
github.com/docker/docker v27.0.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
|
||||
github.com/docker/docker v27.1.1+incompatible h1:hO/M4MtV36kzKldqnA37IWhebRA+LnqqcqDja6kVaKY=
|
||||
github.com/docker/docker v27.1.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
|
||||
github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A=
|
||||
github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=
|
||||
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
|
||||
@@ -165,8 +177,6 @@ github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7z
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
|
||||
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad h1:dQ93Vd6i25o+zH9vvnZ8mu7jtJQ6jT3D+zE3V8Q49n0=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
|
||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||
github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
|
||||
@@ -196,12 +206,8 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr
|
||||
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46 h1:lALhXzDkqtp12udlDLLg+ybXVMmL7Ox9tybqVLWxjPE=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo=
|
||||
github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
|
||||
github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
|
||||
github.com/go-viper/mapstructure/v2 v2.0.0 h1:dhn8MZ1gZ0mzeodTG3jt5Vj/o87xZKuNAprG2mQfMfc=
|
||||
github.com/go-viper/mapstructure/v2 v2.0.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
|
||||
github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0=
|
||||
github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
@@ -276,8 +282,6 @@ github.com/google/pprof v0.0.0-20250208200701-d0013a598941/go.mod h1:vavhavw2zAx
|
||||
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
|
||||
github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o=
|
||||
github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw=
|
||||
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
|
||||
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
|
||||
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
@@ -295,9 +299,13 @@ github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
|
||||
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
|
||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/gpustack/gguf-parser-go v0.17.0 h1:DkSziWLsiQM0pqqkr/zMcaBn94KY7iQTi4zmaHixDus=
|
||||
github.com/gpustack/gguf-parser-go v0.17.0/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo=
|
||||
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
|
||||
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
@@ -307,8 +315,8 @@ github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iP
|
||||
github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
||||
github.com/henvic/httpretty v0.1.3 h1:4A6vigjz6Q/+yAfTD4wqipCv+Px69C7Th/NhT0ApuU8=
|
||||
github.com/henvic/httpretty v0.1.3/go.mod h1:UUEv7c2kHZ5SPQ51uS3wBpzPDibg2U3Y+IaXyHy5GBg=
|
||||
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
|
||||
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
|
||||
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
|
||||
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
|
||||
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
|
||||
@@ -387,8 +395,6 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||
github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394=
|
||||
github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
|
||||
github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8=
|
||||
github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg=
|
||||
github.com/libp2p/go-cidranger v1.1.0 h1:ewPN8EZ0dd1LSnrtuwd4709PXVcITVeuwbag38yPW7c=
|
||||
@@ -428,6 +434,8 @@ github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i
|
||||
github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7 h1:5RK988zAqB3/AN3opGfRpoQgAVqr6/A5+qRTi67VUZY=
|
||||
github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7/go.mod h1:ilwx/Dta8jXAgpFYFvSWEMwxmbWXyiUHkd5FwyKhb5k=
|
||||
github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI=
|
||||
github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
|
||||
github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
|
||||
github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
|
||||
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
@@ -474,10 +482,14 @@ github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zx
|
||||
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
|
||||
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
|
||||
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
|
||||
github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk=
|
||||
github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
|
||||
github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78=
|
||||
github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
|
||||
github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc=
|
||||
github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=
|
||||
github.com/moby/sys/user v0.1.0 h1:WmZ93f5Ux6het5iituh9x2zAG7NFY9Aqi49jjE1PaQg=
|
||||
github.com/moby/sys/user v0.1.0/go.mod h1:fKJhFOnsCN6xZ5gSfbM6zaHGgDJMrqt9/reuj4T7MmU=
|
||||
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
|
||||
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
@@ -486,13 +498,13 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ
|
||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
||||
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
||||
github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
|
||||
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
|
||||
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
|
||||
github.com/mudler/edgevpn v0.30.1 h1:4yyhNFJX62NpRp50sxiyZE5E/sdAqEZX+aE5Mv7QS60=
|
||||
github.com/mudler/edgevpn v0.30.1/go.mod h1:IAJkkJ0oH3rwsSGOGTFT4UBYFqYuD/QyaKzTLB3P/eU=
|
||||
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
|
||||
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
|
||||
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU=
|
||||
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82/go.mod h1:Urp7LG5jylKoDq0663qeBh0pINGcRl35nXdKx82PSoU=
|
||||
github.com/mudler/water v0.0.0-20221010214108-8c7313014ce0 h1:Qh6ghkMgTu6siFbTf7L3IszJmshMhXxNL4V+t7IIA6w=
|
||||
@@ -529,6 +541,7 @@ github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJE
|
||||
github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM=
|
||||
github.com/nikolalohinski/gonja/v2 v2.3.2 h1:UgLFfqi7L9XfX0PEcE4eUpvGojVQL5KhBfJJaBp7ZxY=
|
||||
github.com/nikolalohinski/gonja/v2 v2.3.2/go.mod h1:1Wcc/5huTu6y36e0sOFR1XQoFlylw3c3H3L5WOz0RDg=
|
||||
github.com/nkovacs/streamquote v1.0.0/go.mod h1:BN+NaZ2CmdKqUuTUXUEm9j95B2TRbpOWpxbJYzzgUsc=
|
||||
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
|
||||
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
|
||||
github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY=
|
||||
@@ -545,8 +558,6 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
|
||||
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
|
||||
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
|
||||
github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss=
|
||||
github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8=
|
||||
github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
|
||||
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
@@ -555,8 +566,6 @@ github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYr
|
||||
github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
|
||||
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e h1:s2RNOM/IGdY0Y6qfTeUKhDawdHDpK9RGBdx80qN4Ttw=
|
||||
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e/go.mod h1:nBdnFKj15wFbf94Rwfq4m30eAcyY9V/IyKAGQFtqkW0=
|
||||
github.com/ory/dockertest/v3 v3.10.0 h1:4K3z2VMe8Woe++invjaTB7VRyQXQy5UY+loujO4aNE4=
|
||||
github.com/ory/dockertest/v3 v3.10.0/go.mod h1:nr57ZbRWMqfsdGdFNLHz5jjNdDb7VVFnzAeW1n5N1Lg=
|
||||
github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
|
||||
github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
|
||||
github.com/otiai10/openaigo v1.7.0 h1:AOQcOjRRM57ABvz+aI2oJA/Qsz1AydKbdZAlGiKyCqg=
|
||||
@@ -660,6 +669,8 @@ github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc
|
||||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
||||
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
|
||||
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
|
||||
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 h1:18kd+8ZUlt/ARXhljq+14TwAoKa61q6dX8jtwOf6DH8=
|
||||
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529/go.mod h1:qe5TWALJ8/a1Lqznoc5BDHpYX/8HU60Hm2AwRmqzxqA=
|
||||
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
|
||||
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||
@@ -712,8 +723,8 @@ github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic
|
||||
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b h1:e9eeuSYSLmUKxy7ALzKcxo7ggTceQaVcBhjDIcewa9c=
|
||||
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
|
||||
github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
|
||||
github.com/smartystreets/assertions v1.13.0 h1:Dx1kYM01xsSqKPno3aqLnrwac2LetPvN23diwyr69Qs=
|
||||
github.com/smartystreets/assertions v1.13.0/go.mod h1:wDmR7qL282YbGsPy6H/yAsesrxfxaaSlJazyFLYVFx8=
|
||||
@@ -732,6 +743,8 @@ github.com/streamer45/silero-vad-go v0.2.1/go.mod h1:B+2FXs/5fZ6pzl6unUZYhZqkYdO
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
@@ -748,8 +761,8 @@ github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0J
|
||||
github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg=
|
||||
github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk=
|
||||
github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA=
|
||||
github.com/thxcode/gguf-parser-go v0.1.0 h1:J4QruXyEQGjrAKeKZFlsD2na9l4XF5+bjR194d+wJS4=
|
||||
github.com/thxcode/gguf-parser-go v0.1.0/go.mod h1:Tn1PsO/YDEtLIxm1+QDCjIIH9L/9Sr7+KpxZKm0sEuE=
|
||||
github.com/testcontainers/testcontainers-go v0.35.0 h1:uADsZpTKFAtp8SLK+hMwSaa+X+JiERHtd4sQAFmXeMo=
|
||||
github.com/testcontainers/testcontainers-go v0.35.0/go.mod h1:oEVBj5zrfJTrgjwONs1SsRbnBtH9OKl+IGl3UMcr2B4=
|
||||
github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
|
||||
github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
|
||||
github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU=
|
||||
@@ -768,6 +781,7 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw
|
||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||
github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
|
||||
github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM=
|
||||
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
|
||||
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
|
||||
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
|
||||
github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=
|
||||
@@ -786,13 +800,6 @@ github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1/go.mod h
|
||||
github.com/wlynxg/anet v0.0.3/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
|
||||
github.com/wlynxg/anet v0.0.5 h1:J3VJGi1gvo0JwZ/P1/Yc/8p63SoW98B5dHkYDmpgvvU=
|
||||
github.com/wlynxg/anet v0.0.5/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
|
||||
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
|
||||
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
|
||||
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
|
||||
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
|
||||
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
|
||||
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
|
||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
@@ -815,6 +822,10 @@ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuH
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM=
|
||||
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
|
||||
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 h1:K0XaT3DwHAcV4nKLzcQvwAgSyisUghWoY20I7huthMk=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0/go.mod h1:B5Ki776z/MBnVha1Nzwp5arlzBbE3+1jk+pGmaP5HME=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 h1:lUsI2TYsQw2r1IASwoROaCnjdj2cvC2+Jbxvk6nHnWU=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0/go.mod h1:2HpZxxQurfGxJlJDblybejHB6RX6pmExPNe517hREw4=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.50.0 h1:2Ewsda6hejmbhGFyUvWZjUThC98Cf8Zy6g0zkIimOng=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.50.0/go.mod h1:pMm5PkUo5YwbLiuEf7t2xg4wbP0/eSJrMxIMxKosynY=
|
||||
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
|
||||
@@ -825,6 +836,8 @@ go.opentelemetry.io/otel/sdk/metric v1.28.0 h1:OkuaKgKrgAbYrrY0t92c+cC+2F6hsFNnC
|
||||
go.opentelemetry.io/otel/sdk/metric v1.28.0/go.mod h1:cWPjykihLAPvXKi4iZc1dpER3Jdq2Z0YLse3moQUCpg=
|
||||
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
|
||||
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
|
||||
go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
|
||||
go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
|
||||
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
|
||||
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
|
||||
go.uber.org/dig v1.18.0 h1:imUL1UiY0Mg4bqbFfsRQO5G4CGRBec/ZujWTvSVp3pw=
|
||||
@@ -1094,8 +1107,8 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo=
|
||||
gotest.tools/v3 v3.3.0/go.mod h1:Mcr9QNxkg0uMvy/YElmo4SpXgJKWgQvYrT7Kw5RzJ1A=
|
||||
gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU=
|
||||
gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU=
|
||||
grpc.go4.org v0.0.0-20170609214715-11d0a25b4919/go.mod h1:77eQGdRu53HpSqPFJFmuJdjuHRquDANNeA4x7B8WQ9o=
|
||||
gvisor.dev/gvisor v0.0.0-20230927004350-cbd86285d259 h1:TbRPT0HtzFP3Cno1zZo7yPzEEnfu8EjLfl6IU9VfqkQ=
|
||||
gvisor.dev/gvisor v0.0.0-20230927004350-cbd86285d259/go.mod h1:AVgIgHMwK63XvmAzWG9vLQ41YnVHN0du0tEC46fI7yY=
|
||||
|
||||
@@ -1,35 +1,37 @@
|
||||
package assets
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
rice "github.com/GeertJohan/go.rice"
|
||||
"github.com/mudler/LocalAI/pkg/library"
|
||||
)
|
||||
|
||||
const backendAssetsDir = "backend-assets"
|
||||
|
||||
func ResolvePath(dir string, paths ...string) string {
|
||||
return filepath.Join(append([]string{dir, "backend-assets"}, paths...)...)
|
||||
return filepath.Join(append([]string{dir, backendAssetsDir}, paths...)...)
|
||||
}
|
||||
|
||||
func ExtractFiles(content embed.FS, extractDir string) error {
|
||||
// Create the target directory if it doesn't exist
|
||||
err := os.MkdirAll(extractDir, 0750)
|
||||
func ExtractFiles(content *rice.Box, extractDir string) error {
|
||||
// Create the target directory with backend-assets subdirectory
|
||||
backendAssetsDir := filepath.Join(extractDir, backendAssetsDir)
|
||||
err := os.MkdirAll(backendAssetsDir, 0750)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create directory: %v", err)
|
||||
}
|
||||
|
||||
// Walk through the embedded FS and extract files
|
||||
err = fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error {
|
||||
// Walk through the rice box and extract files
|
||||
err = content.Walk("", func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Reconstruct the directory structure in the target directory
|
||||
targetFile := filepath.Join(extractDir, path)
|
||||
if d.IsDir() {
|
||||
targetFile := filepath.Join(backendAssetsDir, path)
|
||||
if info.IsDir() {
|
||||
// Create the directory in the target directory
|
||||
err := os.MkdirAll(targetFile, 0750)
|
||||
if err != nil {
|
||||
@@ -38,8 +40,8 @@ func ExtractFiles(content embed.FS, extractDir string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read the file from the embedded FS
|
||||
fileData, err := content.ReadFile(path)
|
||||
// Read the file from the rice box
|
||||
fileData, err := content.Bytes(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read file: %v", err)
|
||||
}
|
||||
@@ -56,7 +58,7 @@ func ExtractFiles(content embed.FS, extractDir string) error {
|
||||
// If there is a lib directory, set LD_LIBRARY_PATH to include it
|
||||
// we might use this mechanism to carry over e.g. Nvidia CUDA libraries
|
||||
// from the embedded FS to the target directory
|
||||
library.LoadExtractedLibs(extractDir)
|
||||
library.LoadExtractedLibs(backendAssetsDir)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
package assets
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"io/fs"
|
||||
"os"
|
||||
|
||||
rice "github.com/GeertJohan/go.rice"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func ListFiles(content embed.FS) (files []string) {
|
||||
err := fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error {
|
||||
func ListFiles(content *rice.Box) (files []string) {
|
||||
err := content.Walk("", func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if d.IsDir() {
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ func ListFiles(content embed.FS) (files []string) {
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error walking the embedded filesystem")
|
||||
log.Error().Err(err).Msg("error walking the rice box")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@ type Backend interface {
|
||||
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error
|
||||
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
|
||||
|
||||
@@ -53,6 +53,10 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error {
|
||||
return fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) GenerateVideo(*pb.GenerateVideoRequest) error {
|
||||
return fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error) {
|
||||
return pb.TranscriptResult{}, fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
@@ -57,7 +57,11 @@ func (c *Client) HealthCheck(ctx context.Context) (bool, error) {
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
@@ -89,7 +93,11 @@ func (c *Client) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -108,7 +116,11 @@ func (c *Client) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grp
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -127,7 +139,11 @@ func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grp
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -145,7 +161,11 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -182,7 +202,11 @@ func (c *Client) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest,
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -191,6 +215,28 @@ func (c *Client) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest,
|
||||
return client.GenerateImage(ctx, in, opts...)
|
||||
}
|
||||
|
||||
func (c *Client) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewBackendClient(conn)
|
||||
return client.GenerateVideo(ctx, in, opts...)
|
||||
}
|
||||
|
||||
func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
@@ -200,7 +246,11 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -218,7 +268,11 @@ func (c *Client) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequ
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -236,7 +290,11 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -254,7 +312,11 @@ func (c *Client) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -276,7 +338,11 @@ func (c *Client) Status(ctx context.Context) (*pb.StatusResponse, error) {
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -294,7 +360,11 @@ func (c *Client) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ..
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -312,7 +382,11 @@ func (c *Client) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, o
|
||||
defer c.wdUnMark()
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -330,7 +404,11 @@ func (c *Client) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ..
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -348,7 +426,11 @@ func (c *Client) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -366,7 +448,11 @@ func (c *Client) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -384,7 +470,11 @@ func (c *Client) GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opt
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -402,7 +492,11 @@ func (c *Client) VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOp
|
||||
defer c.setBusy(false)
|
||||
c.wdMark()
|
||||
defer c.wdUnMark()
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(
|
||||
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||
))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -47,6 +47,10 @@ func (e *embedBackend) GenerateImage(ctx context.Context, in *pb.GenerateImageRe
|
||||
return e.s.GenerateImage(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
return e.s.GenerateVideo(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
return e.s.TTS(ctx, in)
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ type LLM interface {
|
||||
Load(*pb.ModelOptions) error
|
||||
Embeddings(*pb.PredictOptions) ([]float32, error)
|
||||
GenerateImage(*pb.GenerateImageRequest) error
|
||||
GenerateVideo(*pb.GenerateVideoRequest) error
|
||||
AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error)
|
||||
TTS(*pb.TTSRequest) error
|
||||
SoundGeneration(*pb.SoundGenerationRequest) error
|
||||
|
||||
@@ -75,6 +75,18 @@ func (s *server) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest)
|
||||
return &pb.Result{Message: "Image generated", Success: true}, nil
|
||||
}
|
||||
|
||||
func (s *server) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest) (*pb.Result, error) {
|
||||
if s.llm.Locking() {
|
||||
s.llm.Lock()
|
||||
defer s.llm.Unlock()
|
||||
}
|
||||
err := s.llm.GenerateVideo(in)
|
||||
if err != nil {
|
||||
return &pb.Result{Message: fmt.Sprintf("Error generating video: %s", err.Error()), Success: false}, err
|
||||
}
|
||||
return &pb.Result{Message: "Video generated", Success: true}, nil
|
||||
}
|
||||
|
||||
func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error) {
|
||||
if s.llm.Locking() {
|
||||
s.llm.Lock()
|
||||
@@ -244,7 +256,10 @@ func StartServer(address string, model LLM) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s := grpc.NewServer()
|
||||
s := grpc.NewServer(
|
||||
grpc.MaxRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxSendMsgSize(50*1024*1024), // 50MB
|
||||
)
|
||||
pb.RegisterBackendServer(s, &server{llm: model})
|
||||
log.Printf("gRPC Server listening at %v", lis.Addr())
|
||||
if err := s.Serve(lis); err != nil {
|
||||
@@ -259,7 +274,10 @@ func RunServer(address string, model LLM) (func() error, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s := grpc.NewServer()
|
||||
s := grpc.NewServer(
|
||||
grpc.MaxRecvMsgSize(50*1024*1024), // 50MB
|
||||
grpc.MaxSendMsgSize(50*1024*1024), // 50MB
|
||||
)
|
||||
pb.RegisterBackendServer(s, &server{llm: model})
|
||||
log.Printf("gRPC Server listening at %v", lis.Addr())
|
||||
if err = s.Serve(lis); err != nil {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user