Compare commits

..

2 Commits

Author SHA1 Message Date
Ettore Di Giacinto
8fea82e68b wire to grpc
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-19 20:22:31 +02:00
Ettore Di Giacinto
01e2e3dbc3 wip reranking llama.cpp
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-19 19:52:02 +02:00
85 changed files with 1190 additions and 3312 deletions

2
.env
View File

@@ -76,7 +76,7 @@
### Define a list of GRPC Servers for llama-cpp workers to distribute the load ### Define a list of GRPC Servers for llama-cpp workers to distribute the load
# https://github.com/ggerganov/llama.cpp/pull/6829 # https://github.com/ggerganov/llama.cpp/pull/6829
# https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md # https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
# LLAMACPP_GRPC_SERVERS="" # LLAMACPP_GRPC_SERVERS=""
### Enable to run parallel requests ### Enable to run parallel requests

View File

@@ -12,7 +12,7 @@ jobs:
- repository: "ggml-org/llama.cpp" - repository: "ggml-org/llama.cpp"
variable: "CPPLLAMA_VERSION" variable: "CPPLLAMA_VERSION"
branch: "master" branch: "master"
- repository: "ggml-org/whisper.cpp" - repository: "ggerganov/whisper.cpp"
variable: "WHISPER_CPP_VERSION" variable: "WHISPER_CPP_VERSION"
branch: "master" branch: "master"
- repository: "PABannier/bark.cpp" - repository: "PABannier/bark.cpp"

View File

@@ -14,7 +14,7 @@ jobs:
steps: steps:
- name: Dependabot metadata - name: Dependabot metadata
id: metadata id: metadata
uses: dependabot/fetch-metadata@v2.4.0 uses: dependabot/fetch-metadata@v2.3.0
with: with:
github-token: "${{ secrets.GITHUB_TOKEN }}" github-token: "${{ secrets.GITHUB_TOKEN }}"
skip-commit-verification: true skip-commit-verification: true

View File

@@ -42,7 +42,7 @@ jobs:
script: | script: |
sudo rm -rf local-ai/ || true sudo rm -rf local-ai/ || true
- name: copy file via ssh - name: copy file via ssh
uses: appleboy/scp-action@v1.0.0 uses: appleboy/scp-action@v0.1.7
with: with:
host: ${{ secrets.EXPLORER_SSH_HOST }} host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }} username: ${{ secrets.EXPLORER_SSH_USERNAME }}

View File

@@ -33,7 +33,6 @@ jobs:
# Pushing with all jobs in parallel # Pushing with all jobs in parallel
# eats the bandwidth of all the nodes # eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }} max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
fail-fast: false
matrix: matrix:
include: include:
# This is basically covered by the AIO test # This is basically covered by the AIO test
@@ -57,35 +56,26 @@ jobs:
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas' # - build-type: 'hipblas'
platforms: 'linux/amd64' # platforms: 'linux/amd64'
tag-latest: 'false' # tag-latest: 'false'
tag-suffix: '-hipblas' # tag-suffix: '-hipblas'
ffmpeg: 'false' # ffmpeg: 'false'
image-type: 'extras' # image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1" # base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04" # grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' # runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" # makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16' # - build-type: 'sycl_f16'
platforms: 'linux/amd64' # platforms: 'linux/amd64'
tag-latest: 'false' # tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" # base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04" # grpc-base-image: "ubuntu:22.04"
tag-suffix: 'sycl-f16-ffmpeg' # tag-suffix: 'sycl-f16-ffmpeg'
ffmpeg: 'true' # ffmpeg: 'true'
image-type: 'extras' # image-type: 'extras'
runs-on: 'arc-runner-set' # runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" # makeflags: "--jobs=3 --output-sync=target"
- build-type: 'vulkan'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-vulkan-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
# core-image-build: # core-image-build:
# uses: ./.github/workflows/image_build.yml # uses: ./.github/workflows/image_build.yml
# with: # with:

View File

@@ -45,13 +45,13 @@ jobs:
- build-type: 'hipblas' - build-type: 'hipblas'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-hipblas-extras' tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'extras' image-type: 'extras'
aio: "-aio-gpu-hipblas" aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1" base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas-extras' latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas' latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
@@ -59,13 +59,33 @@ jobs:
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-hipblas' tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1" base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
latest-image: 'latest-gpu-hipblas' latest-image: 'latest-gpu-hipblas-core'
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
self-hosted-jobs: self-hosted-jobs:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml
with: with:
@@ -95,58 +115,110 @@ jobs:
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }} max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
matrix: matrix:
include: include:
# Extra images
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: ''
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "11" cuda-major-version: "11"
cuda-minor-version: "7" cuda-minor-version: "7"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda11-extras' tag-suffix: '-cublas-cuda11'
ffmpeg: 'true' ffmpeg: ''
image-type: 'extras' image-type: 'extras'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-11"
latest-image: 'latest-gpu-nvidia-cuda-11-extras'
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "0" cuda-minor-version: "0"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda12-extras' tag-suffix: '-cublas-cuda12'
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda11-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-11"
latest-image: 'latest-gpu-nvidia-cuda-11'
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'extras' image-type: 'extras'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-12" aio: "-aio-gpu-nvidia-cuda-12"
latest-image: 'latest-gpu-nvidia-cuda-12-extras' latest-image: 'latest-gpu-nvidia-cuda-12'
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12' latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: ''
ffmpeg: ''
image-type: 'extras'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16' - build-type: 'sycl_f16'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16-extras' tag-suffix: '-sycl-f16-ffmpeg'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'extras' image-type: 'extras'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f16" aio: "-aio-gpu-intel-f16"
latest-image: 'latest-gpu-intel-f16-extras' latest-image: 'latest-gpu-intel-f16'
latest-image-aio: 'latest-aio-gpu-intel-f16' latest-image-aio: 'latest-aio-gpu-intel-f16'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32' - build-type: 'sycl_f32'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32-extras' tag-suffix: '-sycl-f32-ffmpeg'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'extras' image-type: 'extras'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f32" aio: "-aio-gpu-intel-f32"
latest-image: 'latest-gpu-intel-f32-extras' latest-image: 'latest-gpu-intel-f32'
latest-image-aio: 'latest-aio-gpu-intel-f32' latest-image-aio: 'latest-aio-gpu-intel-f32'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
# Core images # Core images
@@ -155,23 +227,43 @@ jobs:
tag-latest: 'false' tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16' tag-suffix: '-sycl-f16-core'
ffmpeg: 'true' ffmpeg: 'false'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
latest-image: 'latest-gpu-intel-f16'
- build-type: 'sycl_f32' - build-type: 'sycl_f32'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32' tag-suffix: '-sycl-f32-core'
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
latest-image: 'latest-gpu-intel-f32' latest-image: 'latest-gpu-intel-f16-core'
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
latest-image: 'latest-gpu-intel-f32-core'
core-image-build: core-image-build:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml
@@ -204,7 +296,7 @@ jobs:
- build-type: '' - build-type: ''
platforms: 'linux/amd64,linux/arm64' platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '' tag-suffix: '-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
@@ -219,38 +311,62 @@ jobs:
cuda-minor-version: "7" cuda-minor-version: "7"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda11' tag-suffix: '-cublas-cuda11-core'
ffmpeg: 'true' ffmpeg: ''
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false' skip-drivers: 'false'
latest-image: 'latest-gpu-nvidia-cuda-12'
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "0" cuda-minor-version: "0"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda12' tag-suffix: '-cublas-cuda12-core'
ffmpeg: ''
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
latest-image: 'latest-gpu-nvidia-cuda-12-core'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
latest-image: 'latest-gpu-nvidia-cuda-12' latest-image: 'latest-gpu-nvidia-cuda-12-core'
- build-type: 'vulkan' - build-type: 'vulkan'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-vulkan' tag-suffix: '-vulkan-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
latest-image: 'latest-gpu-vulkan' latest-image: 'latest-gpu-vulkan-core'
gh-runner: gh-runner:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml
with: with:
@@ -283,8 +399,8 @@ jobs:
cuda-minor-version: "0" cuda-minor-version: "0"
platforms: 'linux/arm64' platforms: 'linux/arm64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-nvidia-l4t-arm64' tag-suffix: '-nvidia-l4t-arm64-core'
latest-image: 'latest-nvidia-l4t-arm64' latest-image: 'latest-nvidia-l4t-arm64-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"

View File

@@ -79,7 +79,7 @@ jobs:
args: ${{ steps.summarize.outputs.message }} args: ${{ steps.summarize.outputs.message }}
- name: Setup tmate session if fails - name: Setup tmate session if fails
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22 uses: mxschmitt/action-tmate@v3.19
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -161,7 +161,7 @@ jobs:
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
- name: Setup tmate session if fails - name: Setup tmate session if fails
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22 uses: mxschmitt/action-tmate@v3.19
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180

View File

@@ -36,7 +36,6 @@ jobs:
sudo apt-get update sudo apt-get update
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
make install-go-tools
- name: Install CUDA Dependencies - name: Install CUDA Dependencies
run: | run: |
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
@@ -124,7 +123,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22 uses: mxschmitt/action-tmate@v3.19
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -152,7 +151,6 @@ jobs:
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
make install-go-tools
- name: Intel Dependencies - name: Intel Dependencies
run: | run: |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -234,7 +232,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22 uses: mxschmitt/action-tmate@v3.19
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -255,7 +253,8 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
brew install protobuf grpc brew install protobuf grpc
make install-go-tools go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
- name: Build - name: Build
id: build id: build
run: | run: |
@@ -276,7 +275,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22 uses: mxschmitt/action-tmate@v3.19
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -296,7 +295,8 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
brew install protobuf grpc libomp llvm brew install protobuf grpc libomp llvm
make install-go-tools go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Build - name: Build
id: build id: build
run: | run: |
@@ -317,7 +317,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22 uses: mxschmitt/action-tmate@v3.19
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180

View File

@@ -18,7 +18,7 @@ jobs:
if: ${{ github.actor != 'dependabot[bot]' }} if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner - name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }} if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/gosec@v2.22.4 uses: securego/gosec@v2.22.3
with: with:
# we let the report trigger content trigger a failure using the GitHub Security features. # we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...' args: '-no-fail -fmt sarif -out results.sarif ./...'

View File

@@ -78,26 +78,6 @@ jobs:
make --jobs=5 --output-sync=target -C backend/python/diffusers make --jobs=5 --output-sync=target -C backend/python/diffusers
make --jobs=5 --output-sync=target -C backend/python/diffusers test make --jobs=5 --output-sync=target -C backend/python/diffusers test
#tests-vllm:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
# run: |
# sudo apt-get update
# sudo apt-get install -y build-essential ffmpeg
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# # Install UV
# curl -LsSf https://astral.sh/uv/install.sh | sh
# pip install --user --no-cache-dir grpcio-tools==1.64.1
# - name: Test vllm backend
# run: |
# make --jobs=5 --output-sync=target -C backend/python/vllm
# make --jobs=5 --output-sync=target -C backend/python/vllm test
# tests-transformers-musicgen: # tests-transformers-musicgen:
# runs-on: ubuntu-latest # runs-on: ubuntu-latest
# steps: # steps:

View File

@@ -71,7 +71,7 @@ jobs:
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
sudo apt-get install -y libgmock-dev clang sudo apt-get install -y libgmock-dev
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
@@ -96,7 +96,6 @@ jobs:
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install github.com/GeertJohan/go.rice/rice@latest
# The python3-grpc-tools package in 22.04 is too old # The python3-grpc-tools package in 22.04 is too old
pip install --user grpcio-tools pip install --user grpcio-tools
@@ -131,7 +130,7 @@ jobs:
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22 uses: mxschmitt/action-tmate@v3.19
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -184,7 +183,6 @@ jobs:
rm protoc.zip rm protoc.zip
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install github.com/GeertJohan/go.rice/rice@latest
PATH="$PATH:$HOME/go/bin" make protogen-go PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Build images - name: Build images
run: | run: |
@@ -196,7 +194,7 @@ jobs:
make run-e2e-aio make run-e2e-aio
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22 uses: mxschmitt/action-tmate@v3.19
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -224,7 +222,6 @@ jobs:
run: | run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
pip install --user --no-cache-dir grpcio-tools pip install --user --no-cache-dir grpcio-tools
go install github.com/GeertJohan/go.rice/rice@latest
- name: Test - name: Test
run: | run: |
export C_INCLUDE_PATH=/usr/local/include export C_INCLUDE_PATH=/usr/local/include
@@ -235,7 +232,7 @@ jobs:
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22 uses: mxschmitt/action-tmate@v3.19
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180

View File

@@ -46,10 +46,9 @@ EOT
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
# Install grpc compilers and rice # Install grpc compilers
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install github.com/GeertJohan/go.rice/rice@latest
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates RUN update-ca-certificates
@@ -301,9 +300,10 @@ COPY .git .
RUN make prepare RUN make prepare
## Build the binary ## Build the binary
## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space ## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
## Otherwise just run the normal build ## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \ ## (both will use CUDA or hipblas for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \ SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
else \ else \
make build; \ make build; \

View File

@@ -6,11 +6,11 @@ BINARY_NAME=local-ai
DETECT_LIBS?=true DETECT_LIBS?=true
# llama.cpp versions # llama.cpp versions
CPPLLAMA_VERSION?=e5c834f718a32b7584f142799bbf508fddb9021c CPPLLAMA_VERSION?=6408210082cc0a61b992b487be7e2ff2efbb9e36
# whisper.cpp version # whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=e41bc5c61ae66af6be2bd7011769bb821a83e8ae WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
# go-piper version # go-piper version
PIPER_REPO?=https://github.com/mudler/go-piper PIPER_REPO?=https://github.com/mudler/go-piper
@@ -24,21 +24,14 @@ BARKCPP_VERSION?=v1.0.0
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
# ONEAPI variables for SYCL
export ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
ONNX_VERSION?=1.20.0 ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64 ONNX_ARCH?=x64
ONNX_OS?=linux ONNX_OS?=linux
export BUILD_TYPE?= export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE) export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF export CMAKE_ARGS?=
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
export BACKEND_LIBS?= export BACKEND_LIBS?=
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
CGO_LDFLAGS?= CGO_LDFLAGS?=
CGO_LDFLAGS_WHISPER?= CGO_LDFLAGS_WHISPER?=
@@ -88,7 +81,6 @@ endif
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS # IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
ifeq ($(NATIVE),false) ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF CMAKE_ARGS+=-DGGML_NATIVE=OFF
WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif endif
# Detect if we are running on arm64 # Detect if we are running on arm64
@@ -116,31 +108,13 @@ ifeq ($(OS),Darwin)
# disable metal if on Darwin and any other value is explicitly passed. # disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal) else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF CMAKE_ARGS+=-DGGML_METAL=OFF
WHISPER_CMAKE_ARGS+=-DGGML_METAL=OFF
export GGML_NO_ACCELERATE=1 export GGML_NO_ACCELERATE=1
export GGML_NO_METAL=1 export GGML_NO_METAL=1
GO_LDFLAGS_WHISPER+=-lggml-blas
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
endif endif
ifeq ($(BUILD_TYPE),metal) ifeq ($(BUILD_TYPE),metal)
# -lcblas removed: it seems to always be listed as a duplicate flag.
CGO_LDFLAGS += -framework Accelerate CGO_LDFLAGS += -framework Accelerate
CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
CMAKE_ARGS+=-DGGML_OPENMP=OFF
WHISPER_CMAKE_ARGS+=-DGGML_METAL=ON
WHISPER_CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
WHISPER_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
WHISPER_CMAKE_ARGS+=-DGGML_OPENMP=OFF
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
else
CGO_LDFLAGS_WHISPER+=-lggml-blas
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
endif endif
else else
CGO_LDFLAGS_WHISPER+=-lgomp CGO_LDFLAGS_WHISPER+=-lgomp
@@ -152,29 +126,21 @@ ifeq ($(BUILD_TYPE),openblas)
endif endif
ifeq ($(BUILD_TYPE),cublas) ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
export GGML_CUDA=1 export GGML_CUDA=1
CMAKE_ARGS+=-DGGML_CUDA=ON CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
WHISPER_CMAKE_ARGS+=-DGGML_CUDA=ON
CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
endif endif
ifeq ($(BUILD_TYPE),vulkan) ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1 CMAKE_ARGS+=-DGGML_VULKAN=1
WHISPER_CMAKE_ARGS+=-DGGML_VULKAN=1
CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
endif endif
ifneq (,$(findstring sycl,$(BUILD_TYPE))) ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export GGML_SYCL=1 export GGML_SYCL=1
CMAKE_ARGS+=-DGGML_SYCL=ON
endif endif
ifeq ($(BUILD_TYPE),sycl_f16) ifeq ($(BUILD_TYPE),sycl_f16)
export GGML_SYCL_F16=1 export GGML_SYCL_F16=1
CMAKE_ARGS+=-DGGML_SYCL_F16=ON
endif endif
ifeq ($(BUILD_TYPE),hipblas) ifeq ($(BUILD_TYPE),hipblas)
@@ -185,7 +151,7 @@ ifeq ($(BUILD_TYPE),hipblas)
export CC=$(ROCM_HOME)/llvm/bin/clang export CC=$(ROCM_HOME)/llvm/bin/clang
export STABLE_BUILD_TYPE= export STABLE_BUILD_TYPE=
export GGML_HIP=1 export GGML_HIP=1
GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102 GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
AMDGPU_TARGETS ?= "$(GPU_TARGETS)" AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)" CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
@@ -320,9 +286,8 @@ sources/whisper.cpp:
git checkout $(WHISPER_CPP_VERSION) && \ git checkout $(WHISPER_CPP_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch git submodule update --init --recursive --depth 1 --single-branch
sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && cmake $(WHISPER_CMAKE_ARGS) . -B ./build cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
cd sources/whisper.cpp/build && cmake --build . --config Release
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
@@ -372,14 +337,8 @@ clean-tests:
clean-dc: clean clean-dc: clean
cp -r /build/backend-assets /workspace/backend-assets cp -r /build/backend-assets /workspace/backend-assets
## Install Go tools
install-go-tools:
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install github.com/GeertJohan/go.rice/rice@latest
## Build: ## Build:
build: prepare backend-assets grpcs install-go-tools ## Build the project build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET}) $(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET}) $(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET}) $(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
@@ -389,9 +348,7 @@ ifneq ($(BACKEND_LIBS),)
$(MAKE) backend-assets/lib $(MAKE) backend-assets/lib
cp -f $(BACKEND_LIBS) backend-assets/lib/ cp -f $(BACKEND_LIBS) backend-assets/lib/
endif endif
rm -rf $(BINARY_NAME) || true
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./ CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
rice append --exec $(BINARY_NAME)
build-minimal: build-minimal:
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
@@ -463,7 +420,6 @@ prepare-test: grpcs
cp -rf backend-assets core/http cp -rf backend-assets core/http
cp tests/models_fixtures/* test-models cp tests/models_fixtures/* test-models
## Test targets
test: prepare test-models/testmodel.ggml grpcs test: prepare test-models/testmodel.ggml grpcs
@echo 'Running tests' @echo 'Running tests'
export GO_TAGS="tts debug" export GO_TAGS="tts debug"
@@ -538,7 +494,7 @@ protogen: protogen-go protogen-python
protogen-clean: protogen-go-clean protogen-python-clean protogen-clean: protogen-go-clean protogen-python-clean
.PHONY: protogen-go .PHONY: protogen-go
protogen-go: install-go-tools protogen-go:
mkdir -p pkg/grpc/proto mkdir -p pkg/grpc/proto
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \ protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
backend/backend.proto backend/backend.proto
@@ -642,12 +598,10 @@ prepare-extra-conda-environments: protogen-python
prepare-test-extra: protogen-python prepare-test-extra: protogen-python
$(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/diffusers $(MAKE) -C backend/python/diffusers
$(MAKE) -C backend/python/vllm
test-extra: prepare-test-extra test-extra: prepare-test-extra
$(MAKE) -C backend/python/transformers test $(MAKE) -C backend/python/transformers test
$(MAKE) -C backend/python/diffusers test $(MAKE) -C backend/python/diffusers test
$(MAKE) -C backend/python/vllm test
backend-assets: backend-assets:
mkdir -p backend-assets mkdir -p backend-assets
@@ -789,8 +743,8 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/silero-vad $(UPX) backend-assets/grpc/silero-vad
endif endif
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \ CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
ifneq ($(UPX),) ifneq ($(UPX),)
$(UPX) backend-assets/grpc/whisper $(UPX) backend-assets/grpc/whisper

View File

@@ -30,7 +30,7 @@
<p align="center"> <p align="center">
<a href="https://twitter.com/LocalAI_API" target="blank"> <a href="https://twitter.com/LocalAI_API" target="blank">
<img src="https://img.shields.io/badge/X-%23000000.svg?style=for-the-badge&logo=X&logoColor=white&label=LocalAI_API" alt="Follow LocalAI_API"/> <img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
</a> </a>
<a href="https://discord.gg/uJAeKSAGDy" target="blank"> <a href="https://discord.gg/uJAeKSAGDy" target="blank">
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/> <img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
@@ -43,8 +43,7 @@
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/) > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
> >
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on > [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples)
[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/localaiofficial_bot)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai) [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
@@ -104,12 +103,9 @@
Run the installer script: Run the installer script:
```bash ```bash
# Basic installation
curl https://localai.io/install.sh | sh curl https://localai.io/install.sh | sh
``` ```
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
Or run with docker: Or run with docker:
### CPU only image: ### CPU only image:

View File

@@ -48,6 +48,6 @@ template:
<|im_start|>assistant <|im_start|>assistant
download_files: download_files:
- filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf - filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4 sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf

View File

@@ -1,15 +1,6 @@
package main package main
import ( import "embed"
rice "github.com/GeertJohan/go.rice"
)
var backendAssets *rice.Box //go:embed backend-assets/*
var backendAssets embed.FS
func init() {
var err error
backendAssets, err = rice.FindBox("backend-assets")
if err != nil {
panic(err)
}
}

View File

@@ -14,7 +14,6 @@ service Backend {
rpc PredictStream(PredictOptions) returns (stream Reply) {} rpc PredictStream(PredictOptions) returns (stream Reply) {}
rpc Embedding(PredictOptions) returns (EmbeddingResult) {} rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
rpc GenerateImage(GenerateImageRequest) returns (Result) {} rpc GenerateImage(GenerateImageRequest) returns (Result) {}
rpc GenerateVideo(GenerateVideoRequest) returns (Result) {}
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {} rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
rpc TTS(TTSRequest) returns (Result) {} rpc TTS(TTSRequest) returns (Result) {}
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {} rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
@@ -302,19 +301,6 @@ message GenerateImageRequest {
int32 CLIPSkip = 11; int32 CLIPSkip = 11;
} }
message GenerateVideoRequest {
string prompt = 1;
string start_image = 2; // Path or base64 encoded image for the start frame
string end_image = 3; // Path or base64 encoded image for the end frame
int32 width = 4;
int32 height = 5;
int32 num_frames = 6; // Number of frames to generate
int32 fps = 7; // Frames per second
int32 seed = 8;
float cfg_scale = 9; // Classifier-free guidance scale
string dst = 10; // Output path for the generated video
}
message TTSRequest { message TTSRequest {
string text = 1; string text = 1;
string model = 2; string model = 2;

View File

@@ -1,17 +1,17 @@
## XXX: In some versions of CMake clip wasn't being built before llama. ## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future. ## This is an hack for now, but it should be fixed in the future.
# set(TARGET myclip) set(TARGET myclip)
# add_library(${TARGET} clip.cpp clip.h clip-impl.h llava.cpp llava.h) add_library(${TARGET} clip.cpp clip.h clip-impl.h llava.cpp llava.h)
# install(TARGETS ${TARGET} LIBRARY) install(TARGETS ${TARGET} LIBRARY)
# target_include_directories(myclip PUBLIC .) target_include_directories(myclip PUBLIC .)
# target_include_directories(myclip PUBLIC ../..) target_include_directories(myclip PUBLIC ../..)
# target_include_directories(myclip PUBLIC ../../common) target_include_directories(myclip PUBLIC ../../common)
# target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT})
# target_compile_features(${TARGET} PRIVATE cxx_std_11) target_compile_features(${TARGET} PRIVATE cxx_std_11)
# if (NOT MSVC) if (NOT MSVC)
# target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
# endif() endif()
# END CLIP hack # END CLIP hack
@@ -75,11 +75,7 @@ add_library(hw_grpc_proto
${hw_proto_hdrs} ) ${hw_proto_hdrs} )
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp) add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
target_include_directories(${TARGET} PRIVATE ../llava)
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse absl::flags_parse
gRPC::${_REFLECTION} gRPC::${_REFLECTION}
gRPC::${_GRPC_GRPCPP} gRPC::${_GRPC_GRPCPP}

View File

@@ -59,8 +59,8 @@ llama.cpp:
git checkout -b build $(LLAMA_VERSION) && \ git checkout -b build $(LLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch git submodule update --init --recursive --depth 1 --single-branch
llama.cpp/tools/grpc-server: llama.cpp llama.cpp/examples/grpc-server: llama.cpp
mkdir -p llama.cpp/tools/grpc-server mkdir -p llama.cpp/examples/grpc-server
bash prepare.sh bash prepare.sh
rebuild: rebuild:
@@ -70,13 +70,13 @@ rebuild:
purge: purge:
rm -rf llama.cpp/build rm -rf llama.cpp/build
rm -rf llama.cpp/tools/grpc-server rm -rf llama.cpp/examples/grpc-server
rm -rf grpc-server rm -rf grpc-server
clean: purge clean: purge
rm -rf llama.cpp rm -rf llama.cpp
grpc-server: llama.cpp llama.cpp/tools/grpc-server grpc-server: llama.cpp llama.cpp/examples/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)" @echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE))) ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \ +bash -c "source $(ONEAPI_VARS); \

View File

@@ -11,7 +11,8 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <getopt.h> #include <getopt.h>
#include "mtmd.h" #include "clip.h"
#include "llava.h"
#include "log.h" #include "log.h"
#include "stb_image.h" #include "stb_image.h"
#include "common.h" #include "common.h"
@@ -51,7 +52,7 @@ struct server_params
{ {
std::string hostname = "127.0.0.1"; std::string hostname = "127.0.0.1";
std::vector<std::string> api_keys; std::vector<std::string> api_keys;
std::string public_path = "tools/server/public"; std::string public_path = "examples/server/public";
std::string chat_template = ""; std::string chat_template = "";
int32_t port = 8080; int32_t port = 8080;
int32_t read_timeout = 600; int32_t read_timeout = 600;
@@ -209,8 +210,6 @@ struct llama_client_slot
int32_t num_prompt_tokens_processed = 0; int32_t num_prompt_tokens_processed = 0;
json prompt; json prompt;
json data;
std::string generated_text; std::string generated_text;
llama_token sampled; llama_token sampled;
std::vector<llama_token> cache_tokens; std::vector<llama_token> cache_tokens;
@@ -218,6 +217,7 @@ struct llama_client_slot
bool infill = false; bool infill = false;
bool embedding = false; bool embedding = false;
bool reranker = false;
bool has_next_token = true; bool has_next_token = true;
bool truncated = false; bool truncated = false;
bool stopped_eos = false; bool stopped_eos = false;
@@ -240,7 +240,7 @@ struct llama_client_slot
int32_t n_past_se = 0; // self-extend int32_t n_past_se = 0; // self-extend
// multimodal // multimodal
mtmd_context * mctx = nullptr; std::vector<slot_image> images;
// stats // stats
size_t sent_count = 0; size_t sent_count = 0;
@@ -271,6 +271,17 @@ struct llama_client_slot
n_past_se = 0; n_past_se = 0;
generated_token_probs.clear(); generated_token_probs.clear();
for (slot_image & img : images)
{
free(img.image_embedding);
if (img.img_data) {
clip_image_u8_free(img.img_data);
}
img.prefix_prompt = "";
}
images.clear();
} }
bool has_budget(common_params &global_params) { bool has_budget(common_params &global_params) {
@@ -446,9 +457,6 @@ struct llama_server_context
llama_context *ctx = nullptr; llama_context *ctx = nullptr;
const llama_vocab * vocab = nullptr; const llama_vocab * vocab = nullptr;
// multimodal
mtmd_context * mctx = nullptr;
clip_ctx *clp_ctx = nullptr; clip_ctx *clp_ctx = nullptr;
common_params params; common_params params;
@@ -487,10 +495,6 @@ struct llama_server_context
~llama_server_context() ~llama_server_context()
{ {
if (mctx) {
mtmd_free(mctx);
mctx = nullptr;
}
if (ctx) if (ctx)
{ {
llama_free(ctx); llama_free(ctx);
@@ -509,14 +513,12 @@ struct llama_server_context
if (!params.mmproj.path.empty()) { if (!params.mmproj.path.empty()) {
multimodal = true; multimodal = true;
LOG_INFO("Multi Modal Mode Enabled", {}); LOG_INFO("Multi Modal Mode Enabled", {});
mtmd_context_params mparams = mtmd_context_params_default(); clp_ctx = clip_init(params.mmproj.path.c_str(), clip_context_params {
mparams.use_gpu = has_gpu; /* use_gpu */ has_gpu,
mparams.print_timings = false; /*verbosity=*/ GGML_LOG_LEVEL_INFO,
mparams.n_threads = params.cpuparams.n_threads; });
mparams.verbosity = GGML_LOG_LEVEL_INFO; if(clp_ctx == nullptr) {
mctx = mtmd_init_from_file(params.mmproj.path.c_str(), model, mparams); LOG_ERR("unable to load clip model: %s", params.mmproj.path.c_str());
if (mctx == nullptr) {
LOG_ERR("failed to load multimodal model, '%s'\n", params.mmproj.path.c_str());
return false; return false;
} }
@@ -534,6 +536,12 @@ struct llama_server_context
return false; return false;
} }
// Enable reranking if embeddings are enabled - moved after context initialization
if (params.embedding) {
params.reranking = true;
LOG_INFO("Reranking enabled (embeddings are enabled)", {});
}
if (multimodal) { if (multimodal) {
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx); const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
const int n_embd_llm = llama_model_n_embd(model); const int n_embd_llm = llama_model_n_embd(model);
@@ -578,8 +586,6 @@ struct llama_server_context
slot.id = i; slot.id = i;
slot.n_ctx = n_ctx_slot; slot.n_ctx = n_ctx_slot;
slot.n_predict = params.n_predict; slot.n_predict = params.n_predict;
slot.mctx = mctx;
//slot.cache_tokens.has_mtmd = mctx != nullptr;
LOG_INFO("new slot", { LOG_INFO("new slot", {
{"slot_id", slot.id}, {"slot_id", slot.id},
@@ -617,61 +623,54 @@ struct llama_server_context
batch = llama_batch_init(n_ctx, 0, params.n_parallel); batch = llama_batch_init(n_ctx, 0, params.n_parallel);
} }
std::vector<server_tokens> tokenize(json &data, const json & json_prompt, bool add_bos) const std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
{ {
mtmd::bitmaps bitmaps; // TODO: currently, we tokenize using special tokens by default
std::vector<server_tokens> inputs; // this is not always correct (see https://github.com/ggerganov/llama.cpp/pull/4160#issuecomment-1824826216)
// but it's better compared to completely ignoring ChatML and other chat templates
const bool TMP_FORCE_SPECIAL = true;
if (mctx != nullptr) // If `add_bos` is true, we only add BOS, when json_prompt is a string,
// or the first element of the json_prompt array is a string.
std::vector<llama_token> prompt_tokens;
if (json_prompt.is_array())
{ {
const auto &images_data = data.find("image_data"); bool first = true;
if (images_data != data.end() && images_data->is_array()) for (const auto& p : json_prompt)
{ {
for (const auto &img : *images_data) if (p.is_string())
{ {
const std::vector<uint8_t> image_buffer = base64_decode(img["data"].get<std::string>()); auto s = p.template get<std::string>();
std::vector<llama_token> p;
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(image_buffer.data(), image_buffer.size())); if (first)
if (!bmp.ptr) { {
throw std::runtime_error("Failed to load image"); p = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
first = false;
} }
// calculate bitmap hash (for KV caching) else
std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3); {
bmp.set_id(hash.c_str()); p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
bitmaps.entries.push_back(std::move(bmp)); }
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
}
else
{
if (first)
{
first = false;
}
prompt_tokens.push_back(p.template get<llama_token>());
} }
} }
}
// multimodal else
std::string prompt_str = json_prompt.template get<std::string>(); {
mtmd_input_text inp_txt = { auto s = json_prompt.template get<std::string>();
prompt_str.c_str(), prompt_tokens = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
/* add_special */ true,
/* parse_special */ true,
};
mtmd::input_chunks chunks(mtmd_input_chunks_init());
auto bitmaps_c_ptr = bitmaps.c_ptr();
int32_t tokenized = mtmd_tokenize(mctx,
chunks.ptr.get(),
&inp_txt,
bitmaps_c_ptr.data(),
bitmaps_c_ptr.size());
if (tokenized != 0) {
throw std::runtime_error("Failed to tokenize prompt");
}
server_tokens tmp(chunks, true);
inputs.push_back(std::move(tmp));
} else {
// non-multimodal version
auto tokenized_prompts = tokenize_input_prompts(vocab, json_prompt, true, true);
for (auto & p : tokenized_prompts) {
auto tmp = server_tokens(p, mctx != nullptr);
inputs.push_back(std::move(tmp));
}
} }
return inputs; return prompt_tokens;
} }
llama_client_slot* get_slot(int id) { llama_client_slot* get_slot(int id) {
@@ -724,8 +723,6 @@ struct llama_server_context
slot->sparams.grammar_triggers = grammar_triggers; slot->sparams.grammar_triggers = grammar_triggers;
slot->sparams.grammar_lazy = grammar_lazy; slot->sparams.grammar_lazy = grammar_lazy;
slot->data = data;
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) { if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
// Might be better to reject the request with a 400 ? // Might be better to reject the request with a 400 ?
LOG_WARNING("Max tokens to predict exceeds server configuration", { LOG_WARNING("Max tokens to predict exceeds server configuration", {
@@ -767,7 +764,43 @@ struct llama_server_context
if (json_value(data, "ignore_eos", false) && has_eos_token) { if (json_value(data, "ignore_eos", false) && has_eos_token) {
slot->sparams.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY}); slot->sparams.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY});
} }
/*
slot->sparams.penalty_prompt_tokens.clear();
slot->sparams.use_penalty_prompt_tokens = false;
const auto &penalty_prompt = data.find("penalty_prompt");
if (penalty_prompt != data.end())
{
if (penalty_prompt->is_string())
{
const auto penalty_prompt_string = penalty_prompt->get<std::string>();
auto penalty_tokens = llama_tokenize(model, penalty_prompt_string, false);
slot->sparams.penalty_prompt_tokens.swap(penalty_tokens);
if (slot->params.n_predict > 0)
{
slot->sparams.penalty_prompt_tokens.reserve(slot->sparams.penalty_prompt_tokens.size() + slot->params.n_predict);
}
slot->sparams.use_penalty_prompt_tokens = true;
}
else if (penalty_prompt->is_array())
{
const auto n_tokens = penalty_prompt->size();
slot->sparams.penalty_prompt_tokens.reserve(n_tokens + std::max(0, slot->params.n_predict));
const int n_vocab = llama_n_vocab(model);
for (const auto &penalty_token : *penalty_prompt)
{
if (penalty_token.is_number_integer())
{
const auto tok = penalty_token.get<llama_token>();
if (tok >= 0 && tok < n_vocab)
{
slot->sparams.penalty_prompt_tokens.push_back(tok);
}
}
}
slot->sparams.use_penalty_prompt_tokens = true;
}
}
*/
slot->sparams.logit_bias.clear(); slot->sparams.logit_bias.clear();
const auto &logit_bias = data.find("logit_bias"); const auto &logit_bias = data.find("logit_bias");
@@ -843,6 +876,79 @@ struct llama_server_context
} }
if (multimodal)
{
const auto &images_data = data.find("image_data");
if (images_data != data.end() && images_data->is_array())
{
for (const auto &img : *images_data)
{
const std::vector<uint8_t> image_buffer = base64_decode(img["data"].get<std::string>());
slot_image img_sl;
img_sl.id = img.count("id") != 0 ? img["id"].get<int>() : slot->images.size();
img_sl.img_data = clip_image_u8_init();
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
{
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
__func__,
slot->id,
img_sl.id
);
return false;
}
LOG_VERBOSE("image loaded", {
{"slot_id", slot->id},
{"img_sl_id", img_sl.id}
});
img_sl.request_encode_image = true;
slot->images.push_back(img_sl);
}
// process prompt
// example: system prompt [img-102] user [img-103] describe [img-134] -> [{id: 102, prefix: 'system prompt '}, {id: 103, prefix: ' user '}, {id: 134, prefix: ' describe '}]}
if (slot->images.size() > 0 && !slot->prompt.is_array())
{
std::string prompt = slot->prompt.get<std::string>();
size_t pos = 0, begin_prefix = 0;
std::string pattern = "[img-";
while ((pos = prompt.find(pattern, pos)) != std::string::npos) {
size_t end_prefix = pos;
pos += pattern.length();
size_t end_pos = prompt.find(']', pos);
if (end_pos != std::string::npos)
{
std::string image_id = prompt.substr(pos, end_pos - pos);
try
{
int img_id = std::stoi(image_id);
bool found = false;
for (slot_image &img : slot->images)
{
if (img.id == img_id) {
found = true;
img.prefix_prompt = prompt.substr(begin_prefix, end_prefix - begin_prefix);
begin_prefix = end_pos + 1;
break;
}
}
if (!found) {
LOG("ERROR: Image with id: %i, not found.\n", img_id);
slot->images.clear();
return false;
}
} catch (const std::invalid_argument& e) {
LOG("Invalid image number id in prompt\n");
slot->images.clear();
return false;
}
}
}
slot->prompt = "";
slot->params.input_suffix = prompt.substr(begin_prefix);
slot->params.cache_prompt = false; // multimodal doesn't support cache prompt
}
}
}
if (slot->ctx_sampling != nullptr) if (slot->ctx_sampling != nullptr)
{ {
@@ -1090,6 +1196,26 @@ struct llama_server_context
return slot.has_next_token; // continue return slot.has_next_token; // continue
} }
bool process_images(llama_client_slot &slot) const
{
for (slot_image &img : slot.images)
{
if (!img.request_encode_image)
{
continue;
}
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
LOG("Error processing the given image");
return false;
}
img.request_encode_image = false;
}
return slot.images.size() > 0;
}
void send_error(task_server& task, const std::string &error) void send_error(task_server& task, const std::string &error)
{ {
LOG("task %i - error: %s\n", task.id, error.c_str()); LOG("task %i - error: %s\n", task.id, error.c_str());
@@ -1294,7 +1420,59 @@ struct llama_server_context
queue_results.send(res); queue_results.send(res);
} }
void request_completion(int task_id, json data, bool infill, bool embedding, int multitask_id) void send_rerank(llama_client_slot &slot, const llama_batch & batch)
{
task_result res;
res.id = slot.task_id;
res.multitask_id = slot.multitask_id;
res.error = false;
res.stop = true;
float score = -1e6f; // Default score if we fail to get embeddings
if (!params.reranking)
{
LOG_WARNING("reranking disabled", {
{"params.reranking", params.reranking},
});
}
else if (ctx == nullptr)
{
LOG_ERR("context is null, cannot perform reranking");
res.error = true;
}
else
{
for (int i = 0; i < batch.n_tokens; ++i) {
if (!batch.logits[i] || batch.seq_id[i][0] != slot.id) {
continue;
}
const float * embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]);
if (embd == NULL) {
embd = llama_get_embeddings_ith(ctx, i);
}
if (embd == NULL) {
LOG("failed to get embeddings");
continue;
}
score = embd[0];
}
}
// Format result as JSON similar to the embedding function
res.result_json = json
{
{"score", score},
{"tokens", slot.num_prompt_tokens}
};
queue_results.send(res);
}
void request_completion(int task_id, json data, bool infill, bool embedding, bool rerank, int multitask_id)
{ {
task_server task; task_server task;
task.id = task_id; task.id = task_id;
@@ -1302,6 +1480,7 @@ struct llama_server_context
task.data = std::move(data); task.data = std::move(data);
task.infill_mode = infill; task.infill_mode = infill;
task.embedding_mode = embedding; task.embedding_mode = embedding;
task.reranking_mode = rerank;
task.type = TASK_TYPE_COMPLETION; task.type = TASK_TYPE_COMPLETION;
task.multitask_id = multitask_id; task.multitask_id = multitask_id;
@@ -1332,6 +1511,74 @@ struct llama_server_context
} }
} }
// for multiple images processing
bool ingest_images(llama_client_slot &slot, int n_batch)
{
int image_idx = 0;
while (image_idx < (int) slot.images.size())
{
slot_image &img = slot.images[image_idx];
// process prefix prompt
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
{
const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
llama_batch batch_view = {
n_tokens,
batch.token + i,
nullptr,
batch.pos + i,
batch.n_seq_id + i,
batch.seq_id + i,
batch.logits + i,
};
if (llama_decode(ctx, batch_view))
{
LOG("%s : failed to eval\n", __func__);
return false;
}
}
// process image with llm
for (int i = 0; i < img.image_tokens; i += n_batch)
{
int n_eval = img.image_tokens - i;
if (n_eval > n_batch)
{
n_eval = n_batch;
}
const int n_embd = llama_model_n_embd(model);
float * embd = img.image_embedding + i * n_embd;
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
if (llama_decode(ctx, llava_batch.batch))
{
LOG("%s : failed to eval image\n", __func__);
return false;
}
slot.n_past += n_eval;
}
image_idx++;
common_batch_clear(batch);
// append prefix of next image
const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
slot.params.input_suffix : // no more images, then process suffix prompt
(json)(slot.images[image_idx].prefix_prompt);
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
for (int i = 0; i < (int) append_tokens.size(); ++i)
{
common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
slot.n_past += 1;
}
}
return true;
}
void request_cancel(int task_id) void request_cancel(int task_id)
{ {
task_server task; task_server task;
@@ -1365,7 +1612,7 @@ struct llama_server_context
subtask_data["prompt"] = subtask_data["prompt"][i]; subtask_data["prompt"] = subtask_data["prompt"][i];
// subtasks inherit everything else (infill mode, embedding mode, etc.) // subtasks inherit everything else (infill mode, embedding mode, etc.)
request_completion(subtask_ids[i], subtask_data, multiprompt_task.infill_mode, multiprompt_task.embedding_mode, multitask_id); request_completion(subtask_ids[i], subtask_data, multiprompt_task.infill_mode, multiprompt_task.embedding_mode, multiprompt_task.reranking_mode, multitask_id);
} }
} }
@@ -1404,6 +1651,7 @@ struct llama_server_context
slot->infill = task.infill_mode; slot->infill = task.infill_mode;
slot->embedding = task.embedding_mode; slot->embedding = task.embedding_mode;
slot->reranker = task.reranking_mode;
slot->task_id = task.id; slot->task_id = task.id;
slot->multitask_id = task.multitask_id; slot->multitask_id = task.multitask_id;
@@ -1546,7 +1794,7 @@ struct llama_server_context
{ {
for (auto & slot : slots) for (auto & slot : slots)
{ {
const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get<std::string>().empty()); const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get<std::string>().empty()) || !slot.images.empty();
// empty prompt passed -> release the slot and send empty response // empty prompt passed -> release the slot and send empty response
// note: infill mode allows empty prompt // note: infill mode allows empty prompt
@@ -1563,7 +1811,7 @@ struct llama_server_context
{ {
slot.state = PROCESSING; slot.state = PROCESSING;
slot.command = NONE; slot.command = NONE;
std::vector<server_tokens> prompt_tokens; std::vector<llama_token> prompt_tokens;
slot.t_start_process_prompt = ggml_time_us(); slot.t_start_process_prompt = ggml_time_us();
slot.t_start_genereration = 0; slot.t_start_genereration = 0;
@@ -1575,41 +1823,24 @@ struct llama_server_context
params.input_suffix.erase(0, 1); params.input_suffix.erase(0, 1);
suff_rm_leading_spc = false; suff_rm_leading_spc = false;
} }
auto prefix_tokens = tokenize(slot.data, slot.params.input_prefix, false); auto prefix_tokens = tokenize(slot.params.input_prefix, false);
auto suffix_tokens = tokenize(slot.data, slot.params.input_suffix, false); auto suffix_tokens = tokenize(slot.params.input_suffix, false);
const int space_token = 29871; // TODO: this should not be hardcoded const int space_token = 29871; // TODO: this should not be hardcoded
if (suff_rm_leading_spc && !suffix_tokens.empty() && suffix_tokens[0][0] == space_token) { if (suff_rm_leading_spc && !suffix_tokens.empty() && suffix_tokens[0] == space_token) {
suffix_tokens.erase(suffix_tokens.begin()); suffix_tokens.erase(suffix_tokens.begin());
} }
// Create llama_tokens vectors for the special tokens prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_fim_pre(vocab));
llama_tokens fim_pre_tokens; prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_bos(vocab)); // always add BOS
fim_pre_tokens.push_back(llama_vocab_fim_pre(vocab)); prefix_tokens.insert(prefix_tokens.end(), llama_vocab_fim_suf(vocab));
llama_tokens bos_tokens; prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
bos_tokens.push_back(llama_vocab_bos(vocab)); prefix_tokens.push_back(llama_vocab_fim_mid(vocab));
llama_tokens fim_suf_tokens;
fim_suf_tokens.push_back(llama_vocab_fim_suf(vocab));
llama_tokens fim_mid_tokens;
fim_mid_tokens.push_back(llama_vocab_fim_mid(vocab));
// Create server_tokens objects
server_tokens fim_pre_token(fim_pre_tokens, mctx != nullptr);
server_tokens bos_token(bos_tokens, mctx != nullptr);
server_tokens fim_suf_token(fim_suf_tokens, mctx != nullptr);
server_tokens fim_mid_token(fim_mid_tokens, mctx != nullptr);
// Insert tokens in the correct order
prefix_tokens.insert(prefix_tokens.begin(), fim_pre_token);
prefix_tokens.insert(prefix_tokens.begin(), bos_token); // always add BOS
prefix_tokens.insert(prefix_tokens.end(), fim_suf_token);
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
prefix_tokens.push_back(fim_mid_token);
prompt_tokens = prefix_tokens; prompt_tokens = prefix_tokens;
} }
else else
{ {
prompt_tokens = tokenize(slot.data, slot.prompt, system_prompt.empty() && add_bos_token); // add BOS if there isn't system prompt prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token); // add BOS if there isn't system prompt
} }
slot.num_prompt_tokens = prompt_tokens.size(); slot.num_prompt_tokens = prompt_tokens.size();
@@ -1637,12 +1868,7 @@ struct llama_server_context
{"new_tokens", tokens_to_str(ctx, new_tokens.cbegin(), new_tokens.cend())}, {"new_tokens", tokens_to_str(ctx, new_tokens.cbegin(), new_tokens.cend())},
}); });
slot.truncated = true; slot.truncated = true;
prompt_tokens = new_tokens;
// Convert new_tokens to server_tokens
std::vector<server_tokens> new_prompt_tokens;
server_tokens new_server_tokens(new_tokens, mctx != nullptr);
new_prompt_tokens.push_back(std::move(new_server_tokens));
prompt_tokens = std::move(new_prompt_tokens);
slot.num_prompt_tokens = prompt_tokens.size(); slot.num_prompt_tokens = prompt_tokens.size();
GGML_ASSERT(slot.num_prompt_tokens < slot.n_ctx); GGML_ASSERT(slot.num_prompt_tokens < slot.n_ctx);
@@ -1662,17 +1888,10 @@ struct llama_server_context
// push the prompt into the sampling context (do not apply grammar) // push the prompt into the sampling context (do not apply grammar)
for (auto &token : prompt_tokens) for (auto &token : prompt_tokens)
{ {
// Convert server_tokens to llama_token for sampling common_sampler_accept(slot.ctx_sampling, token, false);
llama_token tok = token[0]; // Get first token
common_sampler_accept(slot.ctx_sampling, tok, false);
} }
// Convert server_tokens to llama_tokens for comparison slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
std::vector<llama_token> prompt_llama_tokens;
for (const auto &token : prompt_tokens) {
prompt_llama_tokens.push_back(token[0]);
}
slot.n_past = common_part(slot.cache_tokens, prompt_llama_tokens);
// the last token of the cache is not in the KV cache until the next call to llama_decode // the last token of the cache is not in the KV cache until the next call to llama_decode
// (it was sampled, pushed into the "cache_tokens", but not yet put in the context) // (it was sampled, pushed into the "cache_tokens", but not yet put in the context)
@@ -1710,12 +1929,7 @@ struct llama_server_context
}); });
} }
// Convert server_tokens to llama_tokens for cache slot.cache_tokens = prompt_tokens;
std::vector<llama_token> cache_llama_tokens;
for (const auto &token : prompt_tokens) {
cache_llama_tokens.push_back(token[0]);
}
slot.cache_tokens = cache_llama_tokens;
if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0) if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0)
{ {
@@ -1739,36 +1953,18 @@ struct llama_server_context
}); });
llama_kv_cache_seq_rm(ctx, slot.id, p0, -1); llama_kv_cache_seq_rm(ctx, slot.id, p0, -1);
// process the prefix of first image
std::vector<server_tokens> prefix_tokens = prompt_tokens;
int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past;
// check if we should process the image
if (slot.n_past < slot.n_prompt_tokens
&& slot.prompt_tokens[slot.n_past] == LLAMA_TOKEN_NULL) {
// process the image
int32_t new_n_past;
int32_t res = prompt_tokens.process_chunk(ctx, mctx, slot.n_past, slot.id, new_n_past);
int32_t n_pos = new_n_past - slot.n_past;
if (res != 0) {
slot.release();
LOG_ERR("failed to process image, res = %d\n", res);
continue;
}
slot.n_past += n_pos;
// slot.n_prompt_tokens_processed += n_pos;
}
LOG_VERBOSE("prompt ingested", { LOG_VERBOSE("prompt ingested", {
{"n_past", slot.n_past}, {"n_past", slot.n_past},
{"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)}, {"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)},
{"to_eval", tokens_to_str(ctx, slot.cache_tokens.cbegin() + slot.n_past, slot.cache_tokens.cend())}, {"to_eval", tokens_to_str(ctx, slot.cache_tokens.cbegin() + slot.n_past, slot.cache_tokens.cend())},
}); });
const bool has_images = process_images(slot);
// process the prefix of first image
std::vector<llama_token> prefix_tokens = has_images ? tokenize(slot.images[0].prefix_prompt, add_bos_token) : prompt_tokens;
int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past;
int32_t ga_i = slot.ga_i; int32_t ga_i = slot.ga_i;
int32_t ga_n = slot.ga_n; int32_t ga_n = slot.ga_n;
@@ -1788,6 +1984,19 @@ struct llama_server_context
slot_npast++; slot_npast++;
} }
if (has_images && !ingest_images(slot, n_batch))
{
LOG_ERR("%s: failed processing images Slot id : %d, Task id: %d",
__func__,
slot.id,
slot.task_id
);
// FIXME @phymbert: to be properly tested
// early returning without changing the slot state will block the slot for ever
// no one at the moment is checking the return value
return false;
}
// extract the logits only for the last token // extract the logits only for the last token
if (batch.n_tokens > 0) if (batch.n_tokens > 0)
{ {
@@ -1886,6 +2095,14 @@ struct llama_server_context
continue; continue;
} }
if (slot.reranker)
{
send_rerank(slot, batch_view);
slot.release();
slot.i_batch = -1;
continue;
}
completion_token_output result; completion_token_output result;
const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i); const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
@@ -2016,6 +2233,26 @@ static void start_llama_server() {
json parse_options(bool streaming, const backend::PredictOptions* predict, llama_server_context &llama) json parse_options(bool streaming, const backend::PredictOptions* predict, llama_server_context &llama)
{ {
// This is for example a slot data from the json data
// slot->params.stream = json_value(data, "stream", false);
// slot->params.cache_prompt = json_value(data, "cache_prompt", false);
// slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
// slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
// slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
// slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
// slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
// slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
// slot->sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat);
// slot->sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
// slot->sparams.penalty_present = json_value(data, "presence_penalty", default_sparams.penalty_present);
// slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
// slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
// slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
// slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
// slot->params.seed = json_value(data, "seed", default_params.seed);
// slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
// slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
// Create now a json data from the prediction options instead // Create now a json data from the prediction options instead
// //
json data; json data;
@@ -2060,6 +2297,69 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
return data; return data;
} }
// static void parse_options_completion(bool streaming,const backend::PredictOptions* predict, llama_server_context &llama)
// {
// // https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L673
// gpt_params default_params;
// llama.stream = streaming;
// llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
// llama.params.sparams.top_k = predict->topk();
// llama.params.sparams.top_p = predict->topp();
// llama.params.sparams.typical_p = predict->typicalp();
// llama.params.sparams.penalty_last_n = predict->repeat();
// llama.params.sparams.temp = predict->temperature();
// llama.params.sparams.penalty_repeat = predict->penalty();
// llama.params.sparams.penalty_present = predict->presencepenalty();
// llama.params.sparams.penalty_freq = predict->frequencypenalty();
// llama.params.sparams.mirostat = predict->mirostat();
// llama.params.sparams.mirostat_tau = predict->mirostattau();
// llama.params.sparams.mirostat_eta = predict->mirostateta();
// llama.params.n_keep = predict->nkeep();
// llama.params.seed = predict->seed();
// llama.params.sparams.grammar = predict->grammar();
// // llama.params.n_probs = predict->
// llama.params.prompt = predict->prompt();
// llama.params.sparams.logit_bias.clear();
// if (predict->ignoreeos())
// {
// llama.params.sparams.logit_bias[llama_token_eos(llama.model)] = -INFINITY;
// }
// // const auto &logit_bias = body.find("logit_bias");
// // if (logit_bias != body.end() && logit_bias->is_array())
// // {
// // const int n_vocab = llama_n_vocab(llama.model);
// // for (const auto &el : *logit_bias)
// // {
// // if (el.is_array() && el.size() == 2 && el[0].is_number_integer())
// // {
// // llama_token tok = el[0].get<llama_token>();
// // if (tok >= 0 && tok < n_vocab)
// // {
// // if (el[1].is_number())
// // {
// // llama.params.logit_bias[tok] = el[1].get<float>();
// // }
// // else if (el[1].is_boolean() && !el[1].get<bool>())
// // {
// // llama.params.logit_bias[tok] = -INFINITY;
// // }
// // }
// // }
// // }
// // }
// llama.params.antiprompt.clear();
// for (const std::string& stopPrompt : predict->stopprompts()) {
// if (!stopPrompt.empty())
// {
// llama.params.antiprompt.push_back(stopPrompt);
// }
// }
// }
const std::vector<ggml_type> kv_cache_types = { const std::vector<ggml_type> kv_cache_types = {
GGML_TYPE_F32, GGML_TYPE_F32,
@@ -2258,7 +2558,7 @@ public:
json data = parse_options(true, request, llama); json data = parse_options(true, request, llama);
const int task_id = llama.queue_tasks.get_new_id(); const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id); llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, data, false, false, -1); llama.request_completion(task_id, data, false, false, false, -1);
while (true) while (true)
{ {
task_result result = llama.queue_results.recv(task_id); task_result result = llama.queue_results.recv(task_id);
@@ -2312,7 +2612,7 @@ public:
json data = parse_options(false, request, llama); json data = parse_options(false, request, llama);
const int task_id = llama.queue_tasks.get_new_id(); const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id); llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, data, false, false, -1); llama.request_completion(task_id, data, false, false, false, -1);
std::string completion_text; std::string completion_text;
task_result result = llama.queue_results.recv(task_id); task_result result = llama.queue_results.recv(task_id);
if (!result.error && result.stop) { if (!result.error && result.stop) {
@@ -2349,7 +2649,7 @@ public:
json data = parse_options(false, request, llama); json data = parse_options(false, request, llama);
const int task_id = llama.queue_tasks.get_new_id(); const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id); llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1); llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, false, -1);
// get the result // get the result
task_result result = llama.queue_results.recv(task_id); task_result result = llama.queue_results.recv(task_id);
//std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl; //std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
@@ -2372,15 +2672,55 @@ public:
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){ grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
json data = parse_options(false, request, llama); json data = parse_options(false, request, llama);
std::vector<server_tokens> tokens = llama.tokenize(data, data["prompt"],false); std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
for (int i=0 ; i< tokens.size(); i++){ for (int i=0 ; i< tokens.size(); i++){
response->add_tokens(tokens[i].llama_token); response->add_tokens(tokens[i]);
} }
return grpc::Status::OK; return grpc::Status::OK;
} }
grpc::Status Rerank(ServerContext* context, const backend::RerankRequest* request, backend::RerankResult* rerankResult) {
// Create a JSON object with the query and documents
json data = {
{"prompt", request->query()},
{"documents", request->documents()},
{"top_n", request->top_n()}
};
// Generate a new task ID
const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id);
// Queue the task with reranking mode enabled
llama.request_completion(task_id, data, false, false, true, -1);
// Get the result
task_result result = llama.queue_results.recv(task_id);
llama.queue_results.remove_waiting_task_id(task_id);
if (!result.error && result.stop) {
// Set usage information
backend::Usage* usage = rerankResult->mutable_usage();
usage->set_total_tokens(result.result_json.value("tokens", 0));
usage->set_prompt_tokens(result.result_json.value("tokens", 0));
// Get the score from the result
float score = result.result_json.value("score", 0.0f);
// Create document results for each input document
for (int i = 0; i < request->documents_size(); i++) {
backend::DocumentResult* doc_result = rerankResult->add_results();
doc_result->set_index(i);
doc_result->set_text(request->documents(i));
doc_result->set_relevance_score(score);
}
}
return grpc::Status::OK;
}
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) { grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
llama_client_slot* active_slot = llama.get_active_slot(); llama_client_slot* active_slot = llama.get_active_slot();

View File

@@ -1,7 +1,7 @@
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 3cd0d2fa..6c5e811a 100644 index 3cd0d2fa..6c5e811a 100644
--- a/tools/mtmd/clip.cpp --- a/examples/llava/clip.cpp
+++ b/tools/mtmd/clip.cpp +++ b/examples/llava/clip.cpp
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima @@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches"); struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
int* patches_data = (int*)malloc(ggml_nbytes(patches)); int* patches_data = (int*)malloc(ggml_nbytes(patches));

View File

@@ -7,22 +7,22 @@ for patch in $(ls patches); do
patch -d llama.cpp/ -p1 < patches/$patch patch -d llama.cpp/ -p1 < patches/$patch
done done
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/ cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/ cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv json.hpp llama.cpp/tools/grpc-server/ cp -rfv json.hpp llama.cpp/examples/grpc-server/
cp -rfv utils.hpp llama.cpp/tools/grpc-server/ cp -rfv utils.hpp llama.cpp/examples/grpc-server/
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
echo "grpc-server already added" echo "grpc-server already added"
else else
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
fi fi
## XXX: In some versions of CMake clip wasn't being built before llama. ## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future. ## This is an hack for now, but it should be fixed in the future.
# cp -rfv llama.cpp/tools/mtmd/clip.h llama.cpp/tools/grpc-server/clip.h cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
# cp -rfv llama.cpp/tools/mtmd/clip-impl.h llama.cpp/tools/grpc-server/clip-impl.h cp -rfv llama.cpp/examples/llava/clip-impl.h llama.cpp/examples/grpc-server/clip-impl.h
# cp -rfv llama.cpp/tools/mtmd/llava.cpp llama.cpp/tools/grpc-server/llava.cpp cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
# echo '#include "llama.h"' > llama.cpp/tools/grpc-server/llava.h echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
# cat llama.cpp/tools/mtmd/llava.h >> llama.cpp/tools/grpc-server/llava.h cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
# cp -rfv llama.cpp/tools/mtmd/clip.cpp llama.cpp/tools/grpc-server/clip.cpp cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp

View File

@@ -1,4 +1,4 @@
// https://github.com/ggerganov/llama.cpp/blob/master/tools/server/utils.hpp // https://github.com/ggerganov/llama.cpp/blob/master/examples/server/utils.hpp
#pragma once #pragma once
@@ -11,7 +11,7 @@
#include "json.hpp" #include "json.hpp"
#include "../mtmd/clip.h" #include "../llava/clip.h"
using json = nlohmann::json; using json = nlohmann::json;
@@ -61,6 +61,7 @@ struct task_server {
json data; json data;
bool infill_mode = false; bool infill_mode = false;
bool embedding_mode = false; bool embedding_mode = false;
bool reranking_mode = false;
int multitask_id = -1; int multitask_id = -1;
}; };
@@ -480,431 +481,4 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
} }
return ret; return ret;
}
//
// tokenizer and input processing utils
//
static bool json_is_array_of_numbers(const json & data) {
if (data.is_array()) {
for (const auto & e : data) {
if (!e.is_number_integer()) {
return false;
}
}
return true;
}
return false;
}
// is array having BOTH numbers & strings?
static bool json_is_array_of_mixed_numbers_strings(const json & data) {
bool seen_string = false;
bool seen_number = false;
if (data.is_array()) {
for (const auto & e : data) {
seen_string |= e.is_string();
seen_number |= e.is_number_integer();
if (seen_number && seen_string) {
return true;
}
}
}
return false;
}
// get value by path(key1 / key2)
static json json_get_nested_values(const std::vector<std::string> & paths, const json & js) {
json result = json::object();
for (const std::string & path : paths) {
json current = js;
const auto keys = string_split<std::string>(path, /*separator*/ '/');
bool valid_path = true;
for (const std::string & k : keys) {
if (valid_path && current.is_object() && current.contains(k)) {
current = current[k];
} else {
valid_path = false;
}
}
if (valid_path) {
result[path] = current;
}
}
return result;
}
/**
* this handles 2 cases:
* - only string, example: "string"
* - mixed string and tokens, example: [12, 34, "string", 56, 78]
*/
static llama_tokens tokenize_mixed(const llama_vocab * vocab, const json & json_prompt, bool add_special, bool parse_special) {
// If `add_bos` is true, we only add BOS, when json_prompt is a string,
// or the first element of the json_prompt array is a string.
llama_tokens prompt_tokens;
if (json_prompt.is_array()) {
bool first = true;
for (const auto & p : json_prompt) {
if (p.is_string()) {
auto s = p.template get<std::string>();
llama_tokens p;
if (first) {
p = common_tokenize(vocab, s, add_special, parse_special);
first = false;
} else {
p = common_tokenize(vocab, s, false, parse_special);
}
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
} else {
if (first) {
first = false;
}
prompt_tokens.push_back(p.template get<llama_token>());
}
}
} else {
auto s = json_prompt.template get<std::string>();
prompt_tokens = common_tokenize(vocab, s, add_special, parse_special);
}
return prompt_tokens;
}
/**
* break the input "prompt" object into multiple prompt if needed, then tokenize them
* this supports these cases:
* - "prompt": "string"
* - "prompt": [12, 34, 56]
* - "prompt": [12, 34, "string", 56, 78]
* and multiple prompts (multi-tasks):
* - "prompt": ["string1", "string2"]
* - "prompt": ["string1", [12, 34, 56]]
* - "prompt": [[12, 34, 56], [78, 90, 12]]
* - "prompt": [[12, 34, "string", 56, 78], [12, 34, 56]]
*/
static std::vector<llama_tokens> tokenize_input_prompts(const llama_vocab * vocab, const json & json_prompt, bool add_special, bool parse_special) {
std::vector<llama_tokens> result;
if (json_prompt.is_string() || json_is_array_of_mixed_numbers_strings(json_prompt)) {
// string or mixed
result.push_back(tokenize_mixed(vocab, json_prompt, add_special, parse_special));
} else if (json_is_array_of_numbers(json_prompt)) {
// array of tokens
result.push_back(json_prompt.get<llama_tokens>());
} else if (json_prompt.is_array()) {
// array of prompts
result.reserve(json_prompt.size());
for (const auto & p : json_prompt) {
if (p.is_string() || json_is_array_of_mixed_numbers_strings(p)) {
result.push_back(tokenize_mixed(vocab, p, add_special, parse_special));
} else if (json_is_array_of_numbers(p)) {
// array of tokens
result.push_back(p.get<llama_tokens>());
} else {
throw std::runtime_error("element of \"prompt\" must be a string, an list of tokens, or a list of mixed strings & tokens");
}
}
} else {
throw std::runtime_error("\"prompt\" must be a string, an list of tokens, a list of mixed strings & tokens, or a list of prompts");
}
if (result.empty()) {
throw std::runtime_error("\"prompt\" must not be empty");
}
return result;
}
//
// utils for interacting with libmtmd
// (may need to refactor in near future)
//
/**
* server_tokens is a helper to manage the input tokens and image for the server.
* it is made this way to simplify the logic of KV cache management.
*/
struct server_tokens {
bool has_mtmd = false;
private: // disallow accessing these members directly, risking out-of-sync
// map a **start** position in tokens to the image chunk
std::unordered_map<llama_pos, mtmd::input_chunk_ptr> map_pos_to_image;
// list of tokens
// it can include LLAMA_TOKEN_NULL, which is used to indicate a token that is not a text token
// a mtmd_input_chunk can occupy multiple tokens, one llama_token per **position**
// important: for models using mrope, an image can contain multiple tokens but will use only one **position**
llama_tokens tokens;
// for ex. with input of 5 text tokens and 2 images:
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
// pos 0 1 2 3 4 5 6 7 8 9
// map_pos_to_image will contain: {5, img0}, {8, img1}
public:
server_tokens() = default;
~server_tokens() = default;
// Prevent copying
server_tokens(const server_tokens&) = delete;
server_tokens& operator=(const server_tokens&) = delete;
// Allow moving (usually implicitly generated if members are movable)
server_tokens(server_tokens&&) = default;
server_tokens& operator=(server_tokens&&) = default;
// Allow accessing elements using [] operator
llama_token operator[](size_t index) { return tokens[index]; }
const llama_token& operator[](size_t index) const { return tokens[index]; }
server_tokens(mtmd::input_chunks & mtmd_chunks, bool has_mtmd) : has_mtmd(has_mtmd) {
for (size_t i = 0; i < mtmd_chunks.size(); ++i) {
push_back(mtmd_chunks[i]);
}
}
server_tokens(llama_tokens & tokens, bool has_mtmd) : has_mtmd(has_mtmd), tokens(tokens) {}
// for debugging
std::string str() const {
std::ostringstream oss;
oss << "tokens: ";
for (const auto & t : tokens) {
if (t == LLAMA_TOKEN_NULL) {
oss << "<embd> ";
} else {
oss << t << " ";
}
}
oss << "\n";
oss << "image pos: ";
for (const auto & it : map_pos_to_image) {
oss << it.first << ", ";
}
return oss.str();
}
const mtmd::input_chunk_ptr & find_chunk(llama_pos pos) const {
auto it = map_pos_to_image.find(pos);
if (it != map_pos_to_image.end()) {
return it->second;
} else {
throw std::runtime_error("Chunk not found");
}
}
void push_back(llama_token tok) {
if (tok == LLAMA_TOKEN_NULL) {
throw std::runtime_error("Invalid token");
}
tokens.emplace_back(tok);
}
// will create a copy of the chunk if it contains non-text data
void push_back(const mtmd_input_chunk * chunk) {
auto type = mtmd_input_chunk_get_type(chunk);
if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
GGML_ASSERT(has_mtmd);
auto img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
const int n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
llama_pos start_pos = tokens.size();
for (int i = 0; i < n_pos; ++i) {
tokens.emplace_back(LLAMA_TOKEN_NULL);
}
mtmd::input_chunk_ptr new_chunk(mtmd_input_chunk_copy(chunk));
map_pos_to_image[start_pos] = std::move(new_chunk);
} else if (type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
size_t n_tokens;
auto text_tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
for (size_t i = 0; i < n_tokens; ++i) {
push_back(text_tokens[i]);
}
} else {
GGML_ABORT("Invalid chunk type");
}
}
// for compatibility with context shift and prompt truncation
void insert(const llama_tokens & inp_tokens) {
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
tokens.insert(tokens.end(), inp_tokens.begin(), inp_tokens.end());
}
// for compatibility with speculative decoding, ctx shift, slot save/load
const llama_tokens & get_text_tokens() const {
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
return tokens;
}
// for compatibility with speculative decoding
void set_token(llama_pos pos, llama_token id) {
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
tokens[pos] = id;
}
size_t size() const {
return tokens.size();
}
bool empty() const {
return tokens.empty();
}
void clear() {
tokens.clear();
}
void resize(size_t n) {
GGML_ASSERT(n <= tokens.size());
if (has_mtmd) {
// we throw an error if we try to remove a token in the middle of an image
// for ex. with input of 5 text tokens and 2 images:
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
// n 1 2 3 4 5 6 7 8 9 10
// allowed to resize ^ ^
// disallowed to resize ^ ^ ^
if (n > 0) {
llama_token last_token = tokens[n - 1];
// make sure we never remove tokens in the middle of an image
if (last_token == LLAMA_TOKEN_NULL) {
find_chunk(n - 1); // will throw an error if the token is not begin-of-chunk
}
}
// remove all image chunks that are not used anymore
for (auto it = map_pos_to_image.begin(); it != map_pos_to_image.end(); ) {
llama_pos pos = it->first;
if (pos >= (llama_pos)n) {
it = map_pos_to_image.erase(it);
} else {
++it;
}
}
}
tokens.resize(n);
}
std::string detokenize(const llama_context * ctx, bool special) const {
llama_tokens text_tokens;
text_tokens.reserve(tokens.size());
for (const auto & t : tokens) {
if (t != LLAMA_TOKEN_NULL) {
text_tokens.push_back(t);
}
}
return common_detokenize(ctx, text_tokens, special);
}
size_t get_common_prefix(const server_tokens & b) const {
size_t max_idx = std::min(tokens.size(), b.tokens.size());
for (size_t i = 0; i < max_idx; ++i) {
auto & ai = tokens[i];
auto & bi = b.tokens[i];
if (ai == LLAMA_TOKEN_NULL && bi == LLAMA_TOKEN_NULL) {
GGML_ASSERT(has_mtmd);
const auto & a_chunk = find_chunk(i);
const auto & b_chunk = b.find_chunk(i);
GGML_ASSERT(a_chunk && b_chunk);
const auto * a_img = mtmd_input_chunk_get_tokens_image(a_chunk.get());
const auto * b_img = mtmd_input_chunk_get_tokens_image(b_chunk.get());
std::string ai_id = mtmd_image_tokens_get_id(a_img);
std::string bi_id = mtmd_image_tokens_get_id(b_img);
size_t a_pos = mtmd_image_tokens_get_n_pos(a_img);
size_t b_pos = mtmd_image_tokens_get_n_pos(b_img);
if (ai_id == bi_id && a_pos == b_pos) {
GGML_ASSERT(a_pos > 0 && "Invalid image token"); // should never happen
i += a_pos - 1; // will be +1 by the for loop
continue;
} else {
return i;
}
} else if (ai == bi) {
continue;
} else {
return i;
}
}
return max_idx; // all tokens are equal
}
// make sure all text tokens are within the vocab range
bool validate(const struct llama_context * ctx) const {
const llama_model * model = llama_get_model(ctx);
const llama_vocab * vocab = llama_model_get_vocab(model);
const int32_t n_vocab = llama_vocab_n_tokens(vocab);
for (size_t i = 0; i < tokens.size(); ++i) {
auto & t = tokens[i];
if (t == LLAMA_TOKEN_NULL) {
try {
const auto & chunk = find_chunk(i);
const auto * img_tokens = mtmd_input_chunk_get_tokens_image(chunk.get());
size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
i += n_pos - 1; // will be +1 by the for loop
} catch (const std::exception & e) {
return false;
}
} else if (t < 0 || t >= n_vocab) {
return false;
}
}
return true;
}
// encode and decode the image chunk
int32_t process_chunk(
llama_context * ctx,
mtmd_context * mctx,
llama_pos n_past,
int32_t seq_id,
llama_pos & n_pos_out) {
auto it = map_pos_to_image.find(n_past);
if (it == map_pos_to_image.end()) {
throw std::runtime_error("Chunk not found");
}
// SRV_INF("%s\n", "processing image...");
int32_t n_batch = llama_n_batch(ctx);
int64_t t0 = ggml_time_ms();
llama_pos new_n_past = n_past;
int32_t result = mtmd_helper_eval_chunk_single(mctx, ctx,
it->second.get(), // chunk
n_past,
seq_id,
n_batch,
true, // logits last
&new_n_past);
//SRV_INF("image processed in %" PRId64 " ms\n", ggml_time_ms() - t0);
if (result != 0) {
LOG_ERR("mtmd_helper_eval failed with status %d", result);
n_pos_out = n_past;
return result;
}
n_pos_out = new_n_past;
return 0;
}
};
// Computes FNV-1a hash of the data
static std::string fnv_hash(const uint8_t * data, size_t len) {
const uint64_t fnv_prime = 0x100000001b3ULL;
uint64_t hash = 0xcbf29ce484222325ULL;
for (size_t i = 0; i < len; ++i) {
hash ^= data[i];
hash *= fnv_prime;
}
return std::to_string(hash);
} }

View File

@@ -20,7 +20,7 @@ CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically # If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas) ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DSD_CUDA=ON CMAKE_ARGS+=-DGGML_CUDA=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS # If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically # to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas) else ifeq ($(BUILD_TYPE),openblas)
@@ -30,14 +30,14 @@ else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas) else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DSD_HIPBLAS=ON CMAKE_ARGS+=-DGGML_HIP=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation # If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here # But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin) else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal) ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DSD_METAL=OFF CMAKE_ARGS+=-DGGML_METAL=OFF
else else
CMAKE_ARGS+=-DSD_METAL=ON CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
TARGET+=--target ggml-metal TARGET+=--target ggml-metal
endif endif

View File

@@ -74,7 +74,7 @@ func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.Transcript
context.SetTranslate(true) context.SetTranslate(true)
} }
if err := context.Process(data, nil, nil, nil); err != nil { if err := context.Process(data, nil, nil); err != nil {
return pb.TranscriptResult{}, err return pb.TranscriptResult{}, err
} }

View File

@@ -1,4 +1,4 @@
bark==0.1.5 bark==0.1.5
grpcio==1.72.0 grpcio==1.71.0
protobuf protobuf
certifi certifi

View File

@@ -1,3 +1,3 @@
grpcio==1.72.0 grpcio==1.71.0
protobuf protobuf
grpcio-tools grpcio-tools

View File

@@ -1,4 +1,4 @@
grpcio==1.72.0 grpcio==1.71.0
protobuf protobuf
certifi certifi
packaging==24.1 packaging==24.1

View File

@@ -168,13 +168,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# We are storing all the options in a dict so we can use it later when # We are storing all the options in a dict so we can use it later when
# generating the images # generating the images
for opt in options: for opt in options:
if ":" not in opt:
continue
key, value = opt.split(":") key, value = opt.split(":")
self.options[key] = value self.options[key] = value
print(f"Options: {self.options}", file=sys.stderr)
local = False local = False
modelFile = request.Model modelFile = request.Model

View File

@@ -1,5 +1,5 @@
setuptools setuptools
grpcio==1.72.0 grpcio==1.71.0
pillow pillow
protobuf protobuf
certifi certifi

View File

@@ -1,4 +1,4 @@
grpcio==1.72.0 grpcio==1.71.0
protobuf protobuf
certifi certifi
wheel wheel

View File

@@ -1,3 +1,3 @@
grpcio==1.72.0 grpcio==1.71.0
protobuf protobuf
grpcio-tools grpcio-tools

View File

@@ -1,4 +1,4 @@
grpcio==1.72.0 grpcio==1.71.0
protobuf protobuf
phonemizer phonemizer
scipy scipy

View File

@@ -1,3 +1,3 @@
grpcio==1.72.0 grpcio==1.71.0
protobuf protobuf
certifi certifi

View File

@@ -1,4 +1,4 @@
grpcio==1.72.0 grpcio==1.71.0
protobuf protobuf
certifi certifi
setuptools setuptools

View File

@@ -194,40 +194,27 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
await iterations.aclose() await iterations.aclose()
async def _predict(self, request, context, streaming=False): async def _predict(self, request, context, streaming=False):
# Build the sampling parameters
# NOTE: this must stay in sync with the vllm backend
request_to_sampling_params = {
"N": "n",
"PresencePenalty": "presence_penalty",
"FrequencyPenalty": "frequency_penalty",
"RepetitionPenalty": "repetition_penalty",
"Temperature": "temperature",
"TopP": "top_p",
"TopK": "top_k",
"MinP": "min_p",
"Seed": "seed",
"StopPrompts": "stop",
"StopTokenIds": "stop_token_ids",
"BadWords": "bad_words",
"IncludeStopStrInOutput": "include_stop_str_in_output",
"IgnoreEOS": "ignore_eos",
"Tokens": "max_tokens",
"MinTokens": "min_tokens",
"Logprobs": "logprobs",
"PromptLogprobs": "prompt_logprobs",
"SkipSpecialTokens": "skip_special_tokens",
"SpacesBetweenSpecialTokens": "spaces_between_special_tokens",
"TruncatePromptTokens": "truncate_prompt_tokens",
"GuidedDecoding": "guided_decoding",
}
# Build sampling parameters
sampling_params = SamplingParams(top_p=0.9, max_tokens=200) sampling_params = SamplingParams(top_p=0.9, max_tokens=200)
if request.TopP != 0:
for request_field, param_field in request_to_sampling_params.items(): sampling_params.top_p = request.TopP
if hasattr(request, request_field): if request.Tokens > 0:
value = getattr(request, request_field) sampling_params.max_tokens = request.Tokens
if value not in (None, 0, [], False): if request.Temperature != 0:
setattr(sampling_params, param_field, value) sampling_params.temperature = request.Temperature
if request.TopK != 0:
sampling_params.top_k = request.TopK
if request.PresencePenalty != 0:
sampling_params.presence_penalty = request.PresencePenalty
if request.FrequencyPenalty != 0:
sampling_params.frequency_penalty = request.FrequencyPenalty
if request.StopPrompts:
sampling_params.stop = request.StopPrompts
if request.IgnoreEOS:
sampling_params.ignore_eos = request.IgnoreEOS
if request.Seed != 0:
sampling_params.seed = request.Seed
# Extract image paths and process images # Extract image paths and process images
prompt = request.Prompt prompt = request.Prompt

View File

@@ -1,4 +1,4 @@
grpcio==1.72.0 grpcio==1.71.0
protobuf protobuf
certifi certifi
setuptools setuptools

View File

@@ -75,53 +75,6 @@ class TestBackendServicer(unittest.TestCase):
finally: finally:
self.tearDown() self.tearDown()
def test_sampling_params(self):
"""
This method tests if all sampling parameters are correctly processed
NOTE: this does NOT test for correctness, just that we received a compatible response
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
self.assertTrue(response.success)
req = backend_pb2.PredictOptions(
Prompt="The capital of France is",
TopP=0.8,
Tokens=50,
Temperature=0.7,
TopK=40,
PresencePenalty=0.1,
FrequencyPenalty=0.2,
RepetitionPenalty=1.1,
MinP=0.05,
Seed=42,
StopPrompts=["\n"],
StopTokenIds=[50256],
BadWords=["badword"],
IncludeStopStrInOutput=True,
IgnoreEOS=True,
MinTokens=5,
Logprobs=5,
PromptLogprobs=5,
SkipSpecialTokens=True,
SpacesBetweenSpecialTokens=True,
TruncatePromptTokens=10,
GuidedDecoding=True,
N=2,
)
resp = stub.Predict(req)
self.assertIsNotNone(resp.message)
self.assertIsNotNone(resp.logprobs)
except Exception as err:
print(err)
self.fail("sampling params service failed")
finally:
self.tearDown()
def test_embedding(self): def test_embedding(self):
""" """
This method tests if the embeddings are generated successfully This method tests if the embeddings are generated successfully

View File

@@ -43,12 +43,18 @@ func New(opts ...config.AppOption) (*Application, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to create ModelPath: %q", err) return nil, fmt.Errorf("unable to create ModelPath: %q", err)
} }
if options.GeneratedContentDir != "" { if options.ImageDir != "" {
err := os.MkdirAll(options.GeneratedContentDir, 0750) err := os.MkdirAll(options.ImageDir, 0750)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to create ImageDir: %q", err) return nil, fmt.Errorf("unable to create ImageDir: %q", err)
} }
} }
if options.AudioDir != "" {
err := os.MkdirAll(options.AudioDir, 0750)
if err != nil {
return nil, fmt.Errorf("unable to create AudioDir: %q", err)
}
}
if options.UploadDir != "" { if options.UploadDir != "" {
err := os.MkdirAll(options.UploadDir, 0750) err := os.MkdirAll(options.UploadDir, 0750)
if err != nil { if err != nil {

View File

@@ -99,7 +99,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
mmap = *c.MMap mmap = *c.MMap
} }
ctxSize := 4096 ctxSize := 1024
if c.ContextSize != nil { if c.ContextSize != nil {
ctxSize = *c.ContextSize ctxSize = *c.ContextSize
} }

View File

@@ -35,17 +35,12 @@ func SoundGeneration(
return "", nil, fmt.Errorf("could not load sound generation model") return "", nil, fmt.Errorf("could not load sound generation model")
} }
if err := os.MkdirAll(appConfig.GeneratedContentDir, 0750); err != nil { if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
return "", nil, fmt.Errorf("failed creating audio directory: %s", err) return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
} }
audioDir := filepath.Join(appConfig.GeneratedContentDir, "audio") fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "sound_generation", ".wav")
if err := os.MkdirAll(audioDir, 0750); err != nil { filePath := filepath.Join(appConfig.AudioDir, fileName)
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
}
fileName := utils.GenerateUniqueFileName(audioDir, "sound_generation", ".wav")
filePath := filepath.Join(audioDir, fileName)
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{ res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
Text: text, Text: text,

View File

@@ -32,13 +32,12 @@ func ModelTTS(
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model) return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
} }
audioDir := filepath.Join(appConfig.GeneratedContentDir, "audio") if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
if err := os.MkdirAll(audioDir, 0750); err != nil {
return "", nil, fmt.Errorf("failed creating audio directory: %s", err) return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
} }
fileName := utils.GenerateUniqueFileName(audioDir, "tts", ".wav") fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
filePath := filepath.Join(audioDir, fileName) filePath := filepath.Join(appConfig.AudioDir, fileName)
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect. // We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
// This should be addressed in a follow up PR soon. // This should be addressed in a follow up PR soon.

View File

@@ -1,36 +0,0 @@
package backend
import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model"
)
func VideoGeneration(height, width int32, prompt, startImage, endImage, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
opts := ModelOptions(backendConfig, appConfig)
inferenceModel, err := loader.Load(
opts...,
)
if err != nil {
return nil, err
}
defer loader.Close()
fn := func() error {
_, err := inferenceModel.GenerateVideo(
appConfig.Context,
&proto.GenerateVideoRequest{
Height: height,
Width: width,
Prompt: prompt,
StartImage: startImage,
EndImage: endImage,
Dst: dst,
})
return err
}
return fn, nil
}

View File

@@ -1,13 +1,11 @@
package cliContext package cliContext
import ( import "embed"
rice "github.com/GeertJohan/go.rice"
)
type Context struct { type Context struct {
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"` Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"` LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI // This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
BackendAssets *rice.Box `kong:"-"` BackendAssets embed.FS `kong:"-"`
} }

View File

@@ -21,7 +21,8 @@ type RunCMD struct {
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
GeneratedContentPath string `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"` ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"` UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"` ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"` LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
@@ -46,7 +47,7 @@ type RunCMD struct {
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"` CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disables the web user interface. When set to true, the server will only expose API endpoints without serving the web interface" group:"api"` DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"` DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"` OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"` UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
@@ -80,7 +81,8 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithModelPath(r.ModelsPath), config.WithModelPath(r.ModelsPath),
config.WithContextSize(r.ContextSize), config.WithContextSize(r.ContextSize),
config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel), config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel),
config.WithGeneratedContentDir(r.GeneratedContentPath), config.WithImageDir(r.ImagePath),
config.WithAudioDir(r.AudioPath),
config.WithUploadDir(r.UploadPath), config.WithUploadDir(r.UploadPath),
config.WithConfigsDir(r.ConfigPath), config.WithConfigsDir(r.ConfigPath),
config.WithDynamicConfigDir(r.LocalaiConfigDir), config.WithDynamicConfigDir(r.LocalaiConfigDir),

View File

@@ -70,7 +70,7 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
opts := &config.ApplicationConfig{ opts := &config.ApplicationConfig{
ModelPath: t.ModelsPath, ModelPath: t.ModelsPath,
Context: context.Background(), Context: context.Background(),
GeneratedContentDir: outputDir, AudioDir: outputDir,
AssetsDestination: t.BackendAssetsPath, AssetsDestination: t.BackendAssetsPath,
ExternalGRPCBackends: externalBackends, ExternalGRPCBackends: externalBackends,
} }

View File

@@ -36,10 +36,10 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
text := strings.Join(t.Text, " ") text := strings.Join(t.Text, " ")
opts := &config.ApplicationConfig{ opts := &config.ApplicationConfig{
ModelPath: t.ModelsPath, ModelPath: t.ModelsPath,
Context: context.Background(), Context: context.Background(),
GeneratedContentDir: outputDir, AudioDir: outputDir,
AssetsDestination: t.BackendAssetsPath, AssetsDestination: t.BackendAssetsPath,
} }
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend) ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)

View File

@@ -7,11 +7,11 @@ import (
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
gguf "github.com/gpustack/gguf-parser-go"
cliContext "github.com/mudler/LocalAI/core/cli/context" cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/downloader"
gguf "github.com/thxcode/gguf-parser-go"
) )
type UtilCMD struct { type UtilCMD struct {
@@ -51,7 +51,7 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
log.Info(). log.Info().
Any("eosTokenID", f.Tokenizer().EOSTokenID). Any("eosTokenID", f.Tokenizer().EOSTokenID).
Any("bosTokenID", f.Tokenizer().BOSTokenID). Any("bosTokenID", f.Tokenizer().BOSTokenID).
Any("modelName", f.Metadata().Name). Any("modelName", f.Model().Name).
Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0]) Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0])
log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer") log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer")

View File

@@ -2,11 +2,11 @@ package config
import ( import (
"context" "context"
"embed"
"encoding/json" "encoding/json"
"regexp" "regexp"
"time" "time"
rice "github.com/GeertJohan/go.rice"
"github.com/mudler/LocalAI/pkg/xsysinfo" "github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
@@ -19,21 +19,20 @@ type ApplicationConfig struct {
UploadLimitMB, Threads, ContextSize int UploadLimitMB, Threads, ContextSize int
F16 bool F16 bool
Debug bool Debug bool
GeneratedContentDir string ImageDir string
AudioDir string
ConfigsDir string UploadDir string
UploadDir string ConfigsDir string
DynamicConfigsDir string
DynamicConfigsDir string DynamicConfigsDirPollInterval time.Duration
DynamicConfigsDirPollInterval time.Duration CORS bool
CORS bool CSRF bool
CSRF bool PreloadJSONModels string
PreloadJSONModels string PreloadModelsFromPath string
PreloadModelsFromPath string CORSAllowOrigins string
CORSAllowOrigins string ApiKeys []string
ApiKeys []string P2PToken string
P2PToken string P2PNetworkID string
P2PNetworkID string
DisableWebUI bool DisableWebUI bool
EnforcePredownloadScans bool EnforcePredownloadScans bool
@@ -47,7 +46,7 @@ type ApplicationConfig struct {
Galleries []Gallery Galleries []Gallery
BackendAssets *rice.Box BackendAssets embed.FS
AssetsDestination string AssetsDestination string
ExternalGRPCBackends map[string]string ExternalGRPCBackends map[string]string
@@ -198,7 +197,7 @@ func WithBackendAssetsOutput(out string) AppOption {
} }
} }
func WithBackendAssets(f *rice.Box) AppOption { func WithBackendAssets(f embed.FS) AppOption {
return func(o *ApplicationConfig) { return func(o *ApplicationConfig) {
o.BackendAssets = f o.BackendAssets = f
} }
@@ -280,9 +279,15 @@ func WithDebug(debug bool) AppOption {
} }
} }
func WithGeneratedContentDir(generatedContentDir string) AppOption { func WithAudioDir(audioDir string) AppOption {
return func(o *ApplicationConfig) { return func(o *ApplicationConfig) {
o.GeneratedContentDir = generatedContentDir o.AudioDir = audioDir
}
}
func WithImageDir(imageDir string) AppOption {
return func(o *ApplicationConfig) {
o.ImageDir = imageDir
} }
} }

View File

@@ -304,6 +304,9 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
defaultTFZ := 1.0 defaultTFZ := 1.0
defaultZero := 0 defaultZero := 0
// Try to offload all GPU layers (if GPU is found)
defaultHigh := 99999999
trueV := true trueV := true
falseV := false falseV := false
@@ -363,6 +366,9 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
if cfg.MirostatTAU == nil { if cfg.MirostatTAU == nil {
cfg.MirostatTAU = &defaultMirostatTAU cfg.MirostatTAU = &defaultMirostatTAU
} }
if cfg.NGPULayers == nil {
cfg.NGPULayers = &defaultHigh
}
if cfg.LowVRAM == nil { if cfg.LowVRAM == nil {
cfg.LowVRAM = &falseV cfg.LowVRAM = &falseV
@@ -430,19 +436,18 @@ func (c *BackendConfig) HasTemplate() bool {
type BackendConfigUsecases int type BackendConfigUsecases int
const ( const (
FLAG_ANY BackendConfigUsecases = 0b000000000000 FLAG_ANY BackendConfigUsecases = 0b00000000000
FLAG_CHAT BackendConfigUsecases = 0b000000000001 FLAG_CHAT BackendConfigUsecases = 0b00000000001
FLAG_COMPLETION BackendConfigUsecases = 0b000000000010 FLAG_COMPLETION BackendConfigUsecases = 0b00000000010
FLAG_EDIT BackendConfigUsecases = 0b000000000100 FLAG_EDIT BackendConfigUsecases = 0b00000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000000001000 FLAG_EMBEDDINGS BackendConfigUsecases = 0b00000001000
FLAG_RERANK BackendConfigUsecases = 0b000000010000 FLAG_RERANK BackendConfigUsecases = 0b00000010000
FLAG_IMAGE BackendConfigUsecases = 0b000000100000 FLAG_IMAGE BackendConfigUsecases = 0b00000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b000001000000 FLAG_TRANSCRIPT BackendConfigUsecases = 0b00001000000
FLAG_TTS BackendConfigUsecases = 0b000010000000 FLAG_TTS BackendConfigUsecases = 0b00010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b000100000000 FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
FLAG_TOKENIZE BackendConfigUsecases = 0b001000000000 FLAG_TOKENIZE BackendConfigUsecases = 0b01000000000
FLAG_VAD BackendConfigUsecases = 0b010000000000 FLAG_VAD BackendConfigUsecases = 0b10000000000
FLAG_VIDEO BackendConfigUsecases = 0b100000000000
// Common Subsets // Common Subsets
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
@@ -463,7 +468,6 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
"FLAG_TOKENIZE": FLAG_TOKENIZE, "FLAG_TOKENIZE": FLAG_TOKENIZE,
"FLAG_VAD": FLAG_VAD, "FLAG_VAD": FLAG_VAD,
"FLAG_LLM": FLAG_LLM, "FLAG_LLM": FLAG_LLM,
"FLAG_VIDEO": FLAG_VIDEO,
} }
} }
@@ -528,17 +532,6 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
return false return false
} }
}
if (u & FLAG_VIDEO) == FLAG_VIDEO {
videoBackends := []string{"diffusers", "stablediffusion"}
if !slices.Contains(videoBackends, c.Backend) {
return false
}
if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
return false
}
} }
if (u & FLAG_RERANK) == FLAG_RERANK { if (u & FLAG_RERANK) == FLAG_RERANK {
if c.Backend != "rerankers" { if c.Backend != "rerankers" {

View File

@@ -3,10 +3,9 @@ package config
import ( import (
"strings" "strings"
"github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
gguf "github.com/gpustack/gguf-parser-go" gguf "github.com/thxcode/gguf-parser-go"
) )
type familyType uint8 type familyType uint8
@@ -24,7 +23,6 @@ const (
const ( const (
defaultContextSize = 1024 defaultContextSize = 1024
defaultNGPULayers = 99999999
) )
type settingsConfig struct { type settingsConfig struct {
@@ -149,7 +147,7 @@ var knownTemplates = map[string]familyType{
func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) { func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
if defaultCtx == 0 && cfg.ContextSize == nil { if defaultCtx == 0 && cfg.ContextSize == nil {
ctxSize := f.EstimateLLaMACppRun().ContextSize ctxSize := f.EstimateLLaMACppUsage().ContextSize
if ctxSize > 0 { if ctxSize > 0 {
cSize := int(ctxSize) cSize := int(ctxSize)
cfg.ContextSize = &cSize cfg.ContextSize = &cSize
@@ -159,46 +157,6 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
} }
} }
// GPU options
if cfg.Options == nil {
if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
cfg.Options = []string{"gpu"}
}
}
// vram estimation
vram, err := xsysinfo.TotalAvailableVRAM()
if err != nil {
log.Error().Msgf("guessDefaultsFromFile(TotalAvailableVRAM): %s", err)
} else if vram > 0 {
estimate, err := xsysinfo.EstimateGGUFVRAMUsage(f, vram)
if err != nil {
log.Error().Msgf("guessDefaultsFromFile(EstimateGGUFVRAMUsage): %s", err)
} else {
if estimate.IsFullOffload {
log.Warn().Msgf("guessDefaultsFromFile: %s", "full offload is recommended")
}
if estimate.EstimatedVRAM > vram {
log.Warn().Msgf("guessDefaultsFromFile: %s", "estimated VRAM usage is greater than available VRAM")
}
if cfg.NGPULayers == nil && estimate.EstimatedLayers > 0 {
log.Debug().Msgf("guessDefaultsFromFile: %d layers estimated", estimate.EstimatedLayers)
cfg.NGPULayers = &estimate.EstimatedLayers
}
}
}
if cfg.NGPULayers == nil {
// we assume we want to offload all layers
defaultHigh := defaultNGPULayers
cfg.NGPULayers = &defaultHigh
}
log.Debug().Any("NGPULayers", cfg.NGPULayers).Msgf("guessDefaultsFromFile: %s", "NGPULayers set")
// template estimations
if cfg.HasTemplate() { if cfg.HasTemplate() {
// nothing to guess here // nothing to guess here
log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set") log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
@@ -208,12 +166,12 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
log.Debug(). log.Debug().
Any("eosTokenID", f.Tokenizer().EOSTokenID). Any("eosTokenID", f.Tokenizer().EOSTokenID).
Any("bosTokenID", f.Tokenizer().BOSTokenID). Any("bosTokenID", f.Tokenizer().BOSTokenID).
Any("modelName", f.Metadata().Name). Any("modelName", f.Model().Name).
Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName()) Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())
// guess the name // guess the name
if cfg.Name == "" { if cfg.Name == "" {
cfg.Name = f.Metadata().Name cfg.Name = f.Model().Name
} }
family := identifyFamily(f) family := identifyFamily(f)
@@ -249,7 +207,6 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
cfg.TemplateConfig.JinjaTemplate = true cfg.TemplateConfig.JinjaTemplate = true
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString() cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
} }
} }
func identifyFamily(f *gguf.GGUFFile) familyType { func identifyFamily(f *gguf.GGUFFile) familyType {
@@ -274,7 +231,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
commandR := arch == "command-r" && eosTokenID == 255001 commandR := arch == "command-r" && eosTokenID == 255001
qwen2 := arch == "qwen2" qwen2 := arch == "qwen2"
phi3 := arch == "phi-3" phi3 := arch == "phi-3"
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Metadata().Name), "gemma") gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
deepseek2 := arch == "deepseek2" deepseek2 := arch == "deepseek2"
switch { switch {

View File

@@ -4,8 +4,8 @@ import (
"os" "os"
"path/filepath" "path/filepath"
gguf "github.com/gpustack/gguf-parser-go"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
gguf "github.com/thxcode/gguf-parser-go"
) )
func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) { func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) {

View File

@@ -5,8 +5,6 @@ import (
"errors" "errors"
"fmt" "fmt"
"net/http" "net/http"
"os"
"path/filepath"
"github.com/dave-gray101/v2keyauth" "github.com/dave-gray101/v2keyauth"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"
@@ -155,19 +153,12 @@ func API(application *application.Application) (*fiber.App, error) {
Browse: true, Browse: true,
})) }))
if application.ApplicationConfig().GeneratedContentDir != "" { if application.ApplicationConfig().ImageDir != "" {
os.MkdirAll(application.ApplicationConfig().GeneratedContentDir, 0750) router.Static("/generated-images", application.ApplicationConfig().ImageDir)
audioPath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "audio") }
imagePath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "images")
videoPath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "videos")
os.MkdirAll(audioPath, 0750) if application.ApplicationConfig().AudioDir != "" {
os.MkdirAll(imagePath, 0750) router.Static("/generated-audio", application.ApplicationConfig().AudioDir)
os.MkdirAll(videoPath, 0750)
router.Static("/generated-audio", audioPath)
router.Static("/generated-images", imagePath)
router.Static("/generated-videos", videoPath)
} }
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration // Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration

View File

@@ -3,6 +3,7 @@ package http_test
import ( import (
"bytes" "bytes"
"context" "context"
"embed"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
@@ -23,7 +24,6 @@ import (
. "github.com/onsi/gomega" . "github.com/onsi/gomega"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
rice "github.com/GeertJohan/go.rice"
openaigo "github.com/otiai10/openaigo" openaigo "github.com/otiai10/openaigo"
"github.com/sashabaranov/go-openai" "github.com/sashabaranov/go-openai"
"github.com/sashabaranov/go-openai/jsonschema" "github.com/sashabaranov/go-openai/jsonschema"
@@ -264,15 +264,8 @@ func getRequest(url string, header http.Header) (error, int, []byte) {
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml` const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
var backendAssets *rice.Box //go:embed backend-assets/*
var backendAssets embed.FS
func init() {
var err error
backendAssets, err = rice.FindBox("backend-assets")
if err != nil {
panic(err)
}
}
var _ = Describe("API test", func() { var _ = Describe("API test", func() {
@@ -636,7 +629,8 @@ var _ = Describe("API test", func() {
application, err := application.New( application, err := application.New(
append(commonOpts, append(commonOpts,
config.WithContext(c), config.WithContext(c),
config.WithGeneratedContentDir(tmpdir), config.WithAudioDir(tmpdir),
config.WithImageDir(tmpdir),
config.WithGalleries(galleries), config.WithGalleries(galleries),
config.WithModelPath(modelDir), config.WithModelPath(modelDir),
config.WithBackendAssets(backendAssets), config.WithBackendAssets(backendAssets),

View File

@@ -1,205 +0,0 @@
package localai
import (
"bufio"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/backend"
"github.com/gofiber/fiber/v2"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
)
func downloadFile(url string) (string, error) {
// Get the data
resp, err := http.Get(url)
if err != nil {
return "", err
}
defer resp.Body.Close()
// Create the file
out, err := os.CreateTemp("", "video")
if err != nil {
return "", err
}
defer out.Close()
// Write the body to file
_, err = io.Copy(out, resp.Body)
return out.Name(), err
}
//
/*
*
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "A cute baby sea otter",
"n": 1,
"size": "512x512"
}'
*
*/
// VideoEndpoint
// @Summary Creates a video given a prompt.
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /video [post]
func VideoEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.VideoRequest)
if !ok || input.Model == "" {
log.Error().Msg("Video Endpoint - Invalid Input")
return fiber.ErrBadRequest
}
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
if !ok || config == nil {
log.Error().Msg("Video Endpoint - Invalid Config")
return fiber.ErrBadRequest
}
src := ""
if input.StartImage != "" {
var fileData []byte
var err error
// check if input.File is an URL, if so download it and save it
// to a temporary file
if strings.HasPrefix(input.StartImage, "http://") || strings.HasPrefix(input.StartImage, "https://") {
out, err := downloadFile(input.StartImage)
if err != nil {
return fmt.Errorf("failed downloading file:%w", err)
}
defer os.RemoveAll(out)
fileData, err = os.ReadFile(out)
if err != nil {
return fmt.Errorf("failed reading file:%w", err)
}
} else {
// base 64 decode the file and write it somewhere
// that we will cleanup
fileData, err = base64.StdEncoding.DecodeString(input.StartImage)
if err != nil {
return err
}
}
// Create a temporary file
outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64")
if err != nil {
return err
}
// write the base64 result
writer := bufio.NewWriter(outputFile)
_, err = writer.Write(fileData)
if err != nil {
outputFile.Close()
return err
}
outputFile.Close()
src = outputFile.Name()
defer os.RemoveAll(src)
}
log.Debug().Msgf("Parameter Config: %+v", config)
switch config.Backend {
case "stablediffusion":
config.Backend = model.StableDiffusionGGMLBackend
case "":
config.Backend = model.StableDiffusionGGMLBackend
}
width := input.Width
height := input.Height
if width == 0 {
width = 512
}
if height == 0 {
height = 512
}
b64JSON := input.ResponseFormat == "b64_json"
tempDir := ""
if !b64JSON {
tempDir = filepath.Join(appConfig.GeneratedContentDir, "videos")
}
// Create a temporary file
outputFile, err := os.CreateTemp(tempDir, "b64")
if err != nil {
return err
}
outputFile.Close()
// TODO: use mime type to determine the extension
output := outputFile.Name() + ".mp4"
// Rename the temporary file
err = os.Rename(outputFile.Name(), output)
if err != nil {
return err
}
baseURL := c.BaseURL()
fn, err := backend.VideoGeneration(height, width, input.Prompt, src, input.EndImage, output, ml, *config, appConfig)
if err != nil {
return err
}
if err := fn(); err != nil {
return err
}
item := &schema.Item{}
if b64JSON {
defer os.RemoveAll(output)
data, err := os.ReadFile(output)
if err != nil {
return err
}
item.B64JSON = base64.StdEncoding.EncodeToString(data)
} else {
base := filepath.Base(output)
item.URL = baseURL + "/generated-videos/" + base
}
id := uuid.New().String()
created := int(time.Now().Unix())
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Data: []schema.Item{*item},
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}

View File

@@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
log.Error().Msg("Image Endpoint - Invalid Input") log.Error().Msg("Image Endpoint - Invalid Input")
return fiber.ErrBadRequest return fiber.ErrBadRequest
} }
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig) config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
if !ok || config == nil { if !ok || config == nil {
log.Error().Msg("Image Endpoint - Invalid Config") log.Error().Msg("Image Endpoint - Invalid Config")
@@ -108,7 +108,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
} }
// Create a temporary file // Create a temporary file
outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64") outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64")
if err != nil { if err != nil {
return err return err
} }
@@ -184,7 +184,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
tempDir := "" tempDir := ""
if !b64JSON { if !b64JSON {
tempDir = filepath.Join(appConfig.GeneratedContentDir, "images") tempDir = appConfig.ImageDir
} }
// Create a temporary file // Create a temporary file
outputFile, err := os.CreateTemp(tempDir, "b64") outputFile, err := os.CreateTemp(tempDir, "b64")
@@ -192,7 +192,6 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
return err return err
} }
outputFile.Close() outputFile.Close()
output := outputFile.Name() + ".png" output := outputFile.Name() + ".png"
// Rename the temporary file // Rename the temporary file
err = os.Rename(outputFile.Name(), output) err = os.Rename(outputFile.Name(), output)

View File

@@ -59,11 +59,6 @@ func RegisterLocalAIRoutes(router *fiber.App,
router.Get("/metrics", localai.LocalAIMetricsEndpoint()) router.Get("/metrics", localai.LocalAIMetricsEndpoint())
} }
router.Post("/video",
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VIDEO)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.VideoRequest) }),
localai.VideoEndpoint(cl, ml, appConfig))
// Backend Statistics Module // Backend Statistics Module
// TODO: Should these use standard middlewares? Refactor later, they are extremely simple. // TODO: Should these use standard middlewares? Refactor later, they are extremely simple.
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now

View File

@@ -24,20 +24,6 @@ type GalleryResponse struct {
StatusURL string `json:"status"` StatusURL string `json:"status"`
} }
type VideoRequest struct {
BasicModelRequest
Prompt string `json:"prompt" yaml:"prompt"`
StartImage string `json:"start_image" yaml:"start_image"`
EndImage string `json:"end_image" yaml:"end_image"`
Width int32 `json:"width" yaml:"width"`
Height int32 `json:"height" yaml:"height"`
NumFrames int32 `json:"num_frames" yaml:"num_frames"`
FPS int32 `json:"fps" yaml:"fps"`
Seed int32 `json:"seed" yaml:"seed"`
CFGScale float32 `json:"cfg_scale" yaml:"cfg_scale"`
ResponseFormat string `json:"response_format" yaml:"response_format"`
}
// @Description TTS request body // @Description TTS request body
type TTSRequest struct { type TTSRequest struct {
BasicModelRequest BasicModelRequest

View File

@@ -481,7 +481,8 @@ In the help text below, BASEPATH is the location that local-ai is being executed
|-----------|---------|-------------|----------------------| |-----------|---------|-------------|----------------------|
| --models-path | BASEPATH/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH | | --models-path | BASEPATH/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH |
| --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH | | --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH |
| --generated-content-path | /tmp/generated/content | Location for assets generated by backends (e.g. stablediffusion) | $LOCALAI_GENERATED_CONTENT_PATH | | --image-path | /tmp/generated/images | Location for images generated by backends (e.g. stablediffusion) | $LOCALAI_IMAGE_PATH |
| --audio-path | /tmp/generated/audio | Location for audio generated by backends (e.g. piper) | $LOCALAI_AUDIO_PATH |
| --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH | | --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH |
| --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH | | --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH |
| --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR | | --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR |
@@ -514,7 +515,6 @@ In the help text below, BASEPATH is the location that local-ai is being executed
| --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT | | --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT |
| --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY | | --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY |
| --disable-welcome | | Disable welcome pages | $LOCALAI_DISABLE_WELCOME | | --disable-welcome | | Disable welcome pages | $LOCALAI_DISABLE_WELCOME |
| --disable-webui | false | Disables the web user interface. When set to true, the server will only expose API endpoints without serving the web interface | $LOCALAI_DISABLE_WEBUI |
| --machine-tag | | If not empty - put that string to Machine-Tag header in each response. Useful to track response from different machines using multiple P2P federated nodes | $LOCALAI_MACHINE_TAG | | --machine-tag | | If not empty - put that string to Machine-Tag header in each response. Useful to track response from different machines using multiple P2P federated nodes | $LOCALAI_MACHINE_TAG |
#### Backend Flags #### Backend Flags

View File

@@ -34,12 +34,4 @@ List of the Environment Variables:
| **FEDERATED** | Set to "true" to share the instance with the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}})) | | **FEDERATED** | Set to "true" to share the instance with the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}})) |
| **FEDERATED_SERVER** | Set to "true" to run the instance as a federation server which forwards requests to the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}})) | | **FEDERATED_SERVER** | Set to "true" to run the instance as a federation server which forwards requests to the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}})) |
## Uninstallation
To uninstall, run:
```
curl https://localai.io/install.sh | sh -s -- --uninstall
```
We are looking into improving the installer, and as this is a first iteration any feedback is welcome! Open up an [issue](https://github.com/mudler/LocalAI/issues/new/choose) if something doesn't work for you! We are looking into improving the installer, and as this is a first iteration any feedback is welcome! Open up an [issue](https://github.com/mudler/LocalAI/issues/new/choose) if something doesn't work for you!

View File

@@ -57,14 +57,12 @@ diffusers:
Requirement: nvidia-container-toolkit (installation instructions [1](https://www.server-world.info/en/note?os=Ubuntu_22.04&p=nvidia&f=2) [2](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)) Requirement: nvidia-container-toolkit (installation instructions [1](https://www.server-world.info/en/note?os=Ubuntu_22.04&p=nvidia&f=2) [2](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html))
If using a system with SELinux, ensure you have the policies installed, such as those [provided by nvidia](https://github.com/NVIDIA/dgx-selinux/) To check what CUDA version do you need, you can either run `nvidia-smi` or `nvcc --version`.
To check what CUDA version do you need, you can either run `nvidia-smi` or `nvcc --version`.
Alternatively, you can also check nvidia-smi with docker: Alternatively, you can also check nvidia-smi with docker:
``` ```
docker run --runtime=nvidia --rm nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi
``` ```
To use CUDA, use the images with the `cublas` tag, for example. To use CUDA, use the images with the `cublas` tag, for example.
@@ -114,7 +112,7 @@ llama_init_from_file: kv self size = 512.00 MB
## ROCM(AMD) acceleration ## ROCM(AMD) acceleration
There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation. There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation.
Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatability and package versions for dependencies across all variations of OS must be tested independently if disired, please refer to the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) documentation. Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatability and package versions for dependencies across all variations of OS must be tested independently if disired, please refer to the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) documentation.
@@ -139,7 +137,7 @@ LocalAI hipblas images are built against the following targets: gfx900,gfx906,gf
If your device is not one of these you must specify the corresponding `GPU_TARGETS` and specify `REBUILD=true`. Otherwise you don't need to specify these in the commands below. If your device is not one of these you must specify the corresponding `GPU_TARGETS` and specify `REBUILD=true`. Otherwise you don't need to specify these in the commands below.
### Verified ### Verified
The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0` The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0`
@@ -167,7 +165,7 @@ The devices in the following list have been tested with `hipblas` images running
1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html). 1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html).
2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatability for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)) 2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatability for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html))
3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html)) 3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html))
4. Deploy. Yes it's that easy. 4. Deploy. Yes it's that easy.
#### Setup Example (Docker/containerd) #### Setup Example (Docker/containerd)
@@ -249,7 +247,7 @@ This configuration has been tested on a 'custom' cluster managed by SUSE Rancher
- When installing the ROCM kernel driver on your system ensure that you are installing an equal or newer version that that which is currently implemented in LocalAI (6.0.0 at time of writing). - When installing the ROCM kernel driver on your system ensure that you are installing an equal or newer version that that which is currently implemented in LocalAI (6.0.0 at time of writing).
- AMD documentation indicates that this will ensure functionality however your milage may vary depending on the GPU and distro you are using. - AMD documentation indicates that this will ensure functionality however your milage may vary depending on the GPU and distro you are using.
- If you encounter an `Error 413` on attempting to upload an audio file or image for whisper or llava/bakllava on a k8s deployment, note that the ingress for your deployment may require the annontation `nginx.ingress.kubernetes.io/proxy-body-size: "25m"` to allow larger uploads. This may be included in future versions of the helm chart. - If you encounter an `Error 413` on attempting to upload an audio file or image for whisper or llava/bakllava on a k8s deployment, note that the ingress for your deployment may require the annontation `nginx.ingress.kubernetes.io/proxy-body-size: "25m"` to allow larger uploads. This may be included in future versions of the helm chart.
## Intel acceleration (sycl) ## Intel acceleration (sycl)
@@ -280,36 +278,3 @@ docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=
``` ```
Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled. Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
## Vulkan acceleration
### Requirements
If using nvidia, follow the steps in the [CUDA](#cudanvidia-acceleration) section to configure your docker runtime to allow access to the GPU.
### Container images
To use Vulkan, use the images with the `vulkan` tag, for example `{{< version >}}-vulkan-ffmpeg-core`.
#### Example
To run LocalAI with Docker and Vulkan, you can use the following command as an example:
```bash
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models localai/localai:latest-vulkan-ffmpeg-core
```
### Notes
In addition to the commands to run LocalAI normally, you need to specify additonal flags to pass the GPU hardware to the container.
These flags are the same as the sections above, depending on the hardware, for [nvidia](#cudanvidia-acceleration), [AMD](#rocmamd-acceleration) or [Intel](#intel-acceleration-sycl).
If you have mixed hardware, you can pass flags for multiple GPUs, for example:
```bash
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models \
--gpus=all \ # nvidia passthrough
--device /dev/dri --device /dev/kfd \ # AMD/Intel passthrough
localai/localai:latest-vulkan-ffmpeg-core
```

View File

@@ -18,15 +18,12 @@ If you are exposing LocalAI remotely, make sure you protect the API endpoints ad
## Quickstart ## Quickstart
### Using the Bash Installer
### Using the Bash Installer
```bash ```bash
# Basic installation
curl https://localai.io/install.sh | sh curl https://localai.io/install.sh | sh
``` ```
See [Installer]({{% relref "docs/advanced/installer" %}}) for all the supported options
### Run with docker: ### Run with docker:
```bash ```bash
# CPU only image: # CPU only image:
@@ -101,57 +98,6 @@ The AIO images come pre-configured with the following features:
For instructions on using AIO images, see [Using container images]({{% relref "docs/getting-started/container-images#all-in-one-images" %}}). For instructions on using AIO images, see [Using container images]({{% relref "docs/getting-started/container-images#all-in-one-images" %}}).
## Using LocalAI and the full stack with LocalAGI
LocalAI is part of the Local family stack, along with LocalAGI and LocalRecall.
[LocalAGI](https://github.com/mudler/LocalAGI) is a powerful, self-hostable AI Agent platform designed for maximum privacy and flexibility which encompassess and uses all the softwre stack. It provides a complete drop-in replacement for OpenAI's Responses APIs with advanced agentic capabilities, working entirely locally on consumer-grade hardware (CPU and GPU).
### Quick Start
```bash
# Clone the repository
git clone https://github.com/mudler/LocalAGI
cd LocalAGI
# CPU setup (default)
docker compose up
# NVIDIA GPU setup
docker compose -f docker-compose.nvidia.yaml up
# Intel GPU setup (for Intel Arc and integrated GPUs)
docker compose -f docker-compose.intel.yaml up
# Start with a specific model (see available models in models.localai.io, or localai.io to use any model in huggingface)
MODEL_NAME=gemma-3-12b-it docker compose up
# NVIDIA GPU setup with custom multimodal and image models
MODEL_NAME=gemma-3-12b-it \
MULTIMODAL_MODEL=minicpm-v-2_6 \
IMAGE_MODEL=flux.1-dev-ggml \
docker compose -f docker-compose.nvidia.yaml up
```
### Key Features
- **Privacy-Focused**: All processing happens locally, ensuring your data never leaves your machine
- **Flexible Deployment**: Supports CPU, NVIDIA GPU, and Intel GPU configurations
- **Multiple Model Support**: Compatible with various models from Hugging Face and other sources
- **Web Interface**: User-friendly chat interface for interacting with AI agents
- **Advanced Capabilities**: Supports multimodal models, image generation, and more
- **Docker Integration**: Easy deployment using Docker Compose
### Environment Variables
You can customize your LocalAGI setup using the following environment variables:
- `MODEL_NAME`: Specify the model to use (e.g., `gemma-3-12b-it`)
- `MULTIMODAL_MODEL`: Set a custom multimodal model
- `IMAGE_MODEL`: Configure an image generation model
For more advanced configuration and API documentation, visit the [LocalAGI GitHub repository](https://github.com/mudler/LocalAGI).
## What's Next? ## What's Next?
There is much more to explore with LocalAI! You can run any model from Hugging Face, perform video generation, and also voice cloning. For a comprehensive overview, check out the [features]({{% relref "docs/features" %}}) section. There is much more to explore with LocalAI! You can run any model from Hugging Face, perform video generation, and also voice cloning. For a comprehensive overview, check out the [features]({{% relref "docs/features" %}}) section.

View File

@@ -1,3 +1,3 @@
{ {
"version": "v2.29.0" "version": "v2.28.0"
} }

278
docs/static/install.sh vendored Executable file → Normal file
View File

@@ -1,74 +1,31 @@
#!/bin/sh #!/bin/sh
# LocalAI Installer Script # This script installs LocalAI on Linux.
# This script installs LocalAI on Linux and macOS systems. # It detects the current operating system architecture and installs the appropriate version of LocalAI.
# It automatically detects the system architecture and installs the appropriate version.
# Usage: # Usage:
# Basic installation: # curl ... | ENV_VAR=... sh -
# curl https://localai.io/install.sh | sh # or
# # ENV_VAR=... ./install.sh
# With environment variables: # To uninstall: ./install.sh --uninstall
# DOCKER_INSTALL=true USE_AIO=true API_KEY=your-key PORT=8080 THREADS=4 curl https://localai.io/install.sh | sh
#
# To uninstall:
# curl https://localai.io/install.sh | sh -s -- --uninstall
#
# Environment Variables:
# DOCKER_INSTALL - Set to "true" to install Docker images (default: auto-detected)
# USE_AIO - Set to "true" to use the all-in-one LocalAI image (default: false)
# API_KEY - API key for securing LocalAI access (default: none)
# PORT - Port to run LocalAI on (default: 8080)
# THREADS - Number of CPU threads to use (default: auto-detected)
# MODELS_PATH - Path to store models (default: /usr/share/local-ai/models)
# CORE_IMAGES - Set to "true" to download core LocalAI images (default: false)
# P2P_TOKEN - Token for P2P federation/worker mode (default: none)
# WORKER - Set to "true" to run as a worker node (default: false)
# FEDERATED - Set to "true" to enable federation mode (default: false)
# FEDERATED_SERVER - Set to "true" to run as a federation server (default: false)
set -e set -e
set -o noglob set -o noglob
#set -x #set -x
# --- helper functions for logs --- # --- helper functions for logs ---
# ANSI escape codes
LIGHT_BLUE='\033[38;5;117m'
ORANGE='\033[38;5;214m'
RED='\033[38;5;196m'
BOLD='\033[1m'
RESET='\033[0m'
info() info()
{ {
echo -e "${BOLD}${LIGHT_BLUE}" '[INFO] ' "$@" "${RESET}" echo ' ' "$@"
} }
warn() warn()
{ {
echo -e "${BOLD}${ORANGE}" '[WARN] ' "$@" "${RESET}" >&2 echo '[WARN] ' "$@" >&2
} }
fatal() fatal()
{ {
echo -e "${BOLD}${RED}" '[ERROR] ' "$@" "${RESET}" >&2 echo '[ERROR] ' "$@" >&2
exit 1
}
# --- custom choice functions ---
# like the logging functions, but with the -n flag to prevent the new line and keep the cursor in line for choices inputs like y/n
choice_info()
{
echo -e -n "${BOLD}${LIGHT_BLUE}" '[INFO] ' "$@" "${RESET}"
}
choice_warn()
{
echo -e -n "${BOLD}${ORANGE}" '[WARN] ' "$@" "${RESET}" >&2
}
choice_fatal()
{
echo -e -n "${BOLD}${RED}" '[ERROR] ' "$@" "${RESET}" >&2
exit 1 exit 1
} }
@@ -186,7 +143,7 @@ else
fi fi
THREADS=${THREADS:-$procs} THREADS=${THREADS:-$procs}
LATEST_VERSION=$(curl -s "https://api.github.com/repos/mudler/LocalAI/releases/latest" | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') LATEST_VERSION=$(curl -s "https://api.github.com/repos/mudler/LocalAI/releases/latest" | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
LOCALAI_VERSION="${LOCALAI_VERSION:-$LATEST_VERSION}" #changed due to VERSION beign already defined in Fedora 42 Cloud Edition VERSION="${VERSION:-$LATEST_VERSION}"
MODELS_PATH=${MODELS_PATH:-/usr/share/local-ai/models} MODELS_PATH=${MODELS_PATH:-/usr/share/local-ai/models}
@@ -253,7 +210,7 @@ WorkingDirectory=/usr/share/local-ai
[Install] [Install]
WantedBy=default.target WantedBy=default.target
EOF EOF
$SUDO touch /etc/localai.env $SUDO touch /etc/localai.env
$SUDO echo "ADDRESS=0.0.0.0:$PORT" | $SUDO tee /etc/localai.env >/dev/null $SUDO echo "ADDRESS=0.0.0.0:$PORT" | $SUDO tee /etc/localai.env >/dev/null
$SUDO echo "API_KEY=$API_KEY" | $SUDO tee -a /etc/localai.env >/dev/null $SUDO echo "API_KEY=$API_KEY" | $SUDO tee -a /etc/localai.env >/dev/null
@@ -286,74 +243,23 @@ EOF
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-yum-or-dnf # ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-yum-or-dnf
install_container_toolkit_yum() { install_container_toolkit_yum() {
info 'Installing NVIDIA container toolkit repository...' info 'Installing NVIDIA repository...'
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \ curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
$SUDO tee /etc/yum.repos.d/nvidia-container-toolkit.repo $SUDO tee /etc/yum.repos.d/nvidia-container-toolkit.repo
if [ "$PACKAGE_MANAGER" = "dnf" ]; then if [ "$PACKAGE_MANAGER" = "dnf" ]; then
DNF_VERSION=$($PACKAGE_MANAGER --version | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -n1 | cut -d. -f1) $SUDO $PACKAGE_MANAGER config-manager --enable nvidia-container-toolkit-experimental
if [ "$DNF_VERSION" -ge 5 ]; then else
# DNF5: Use 'setopt' to enable the repository
$SUDO $PACKAGE_MANAGER config-manager setopt nvidia-container-toolkit-experimental.enabled=1
else
# DNF4: Use '--set-enabled' to enable the repository
$SUDO $PACKAGE_MANAGER config-manager --enable nvidia-container-toolkit-experimental
fi
else
$SUDO $PACKAGE_MANAGER -y install yum-utils $SUDO $PACKAGE_MANAGER -y install yum-utils
$SUDO $PACKAGE_MANAGER-config-manager --enable nvidia-container-toolkit-experimental $SUDO $PACKAGE_MANAGER-config-manager --enable nvidia-container-toolkit-experimental
fi fi
$SUDO $PACKAGE_MANAGER install -y nvidia-container-toolkit $SUDO $PACKAGE_MANAGER install -y nvidia-container-toolkit
} }
# Fedora, Rhel and other distro ships tunable SELinux booleans in the container-selinux policy to control device access.
# In particular, enabling container_use_devices allows containers to use arbitrary host device labels (including GPU devices)
# ref: https://github.com/containers/ramalama/blob/main/docs/ramalama-cuda.7.md#expected-output
enable_selinux_container_booleans() {
# Check SELinux mode
SELINUX_MODE=$(getenforce)
if [ "$SELINUX_MODE" == "Enforcing" ]; then
# Check the status of container_use_devices
CONTAINER_USE_DEVICES=$(getsebool container_use_devices | awk '{print $3}')
if [ "$CONTAINER_USE_DEVICES" == "off" ]; then
#We want to give the user the choice to enable the SE booleans since it is a security config
warn "+-----------------------------------------------------------------------------------------------------------+"
warn "| WARNING: |"
warn "| Your distribution ships tunable SELinux booleans in the container-selinux policy to control device access.|"
warn "| In particular, enabling \"container_use_devices\" allows containers to use arbitrary host device labels |"
warn "| (including GPU devices). |"
warn "| This script can try to enable them enabling the \"container_use_devices\" flag. |"
warn "| |"
warn "| Otherwise you can exit the install script and enable them yourself. |"
warn "+-----------------------------------------------------------------------------------------------------------+"
while true; do
choice_warn "I understand that this script is going to change my SELinux configs, which is a security risk: (yes/exit) ";
read Answer
if [ "$Answer" = "yes" ]; then
warn "Enabling \"container_use_devices\" persistently..."
$SUDO setsebool -P container_use_devices 1
break
elif [ "$Answer" = "exit" ]; then
aborted
else
warn "Invalid choice. Please enter 'yes' or 'exit'."
fi
done
fi
fi
}
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-apt # ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-apt
install_container_toolkit_apt() { install_container_toolkit_apt() {
info 'Installing NVIDIA container toolkit repository...' info 'Installing NVIDIA repository...'
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | $SUDO gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | $SUDO gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
@@ -365,7 +271,7 @@ install_container_toolkit_apt() {
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-zypper # ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-zypper
install_container_toolkit_zypper() { install_container_toolkit_zypper() {
info 'Installing NVIDIA zypper repository...' info 'Installing NVIDIA repository...'
$SUDO zypper ar https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo $SUDO zypper ar https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo
$SUDO zypper modifyrepo --enable nvidia-container-toolkit-experimental $SUDO zypper modifyrepo --enable nvidia-container-toolkit-experimental
$SUDO zypper --gpg-auto-import-keys install -y nvidia-container-toolkit $SUDO zypper --gpg-auto-import-keys install -y nvidia-container-toolkit
@@ -394,29 +300,6 @@ install_container_toolkit() {
opensuse*|suse*) install_container_toolkit_zypper ;; opensuse*|suse*) install_container_toolkit_zypper ;;
*) echo "Could not install nvidia container toolkit - unknown OS" ;; *) echo "Could not install nvidia container toolkit - unknown OS" ;;
esac esac
# after installing the toolkit we need to add it to the docker runtimes, otherwise even with --gpu all
# the container would still run with runc and would not have access to nvidia-smi
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#configuring-docker
info "Adding NVIDIA Container Runtime to Docker runtimes..."
$SUDO nvidia-ctk runtime configure --runtime=docker
info "Restarting Docker Daemon"
$SUDO systemctl restart docker
# The NVML error arises because SELinux blocked the containers attempts to open the GPU devices or related libraries.
# Without relaxing SELinux for the container, GPU commands like nvidia-smi report “Insufficient Permissions”
# This has been noted in NVIDIAs documentation:
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/1.13.5/install-guide.html#id2
# ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/troubleshooting.html#nvml-insufficient-permissions-and-selinux
case $OS_NAME in
fedora|rhel|centos|rocky)
enable_selinux_container_booleans
;;
opensuse-tumbleweed)
enable_selinux_container_booleans
;;
esac
} }
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-7-centos-7 # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-7-centos-7
@@ -424,21 +307,14 @@ install_container_toolkit() {
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-9-rocky-9 # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-9-rocky-9
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#fedora # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#fedora
install_cuda_driver_yum() { install_cuda_driver_yum() {
info 'Installing NVIDIA CUDA repository...' info 'Installing NVIDIA repository...'
case $PACKAGE_MANAGER in case $PACKAGE_MANAGER in
yum) yum)
$SUDO $PACKAGE_MANAGER -y install yum-utils $SUDO $PACKAGE_MANAGER -y install yum-utils
$SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo $SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
;; ;;
dnf) dnf)
DNF_VERSION=$($PACKAGE_MANAGER --version | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -n1 | cut -d. -f1) $SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
if [ "$DNF_VERSION" -ge 5 ]; then
# DNF5: Use 'addrepo' to add the repository
$SUDO $PACKAGE_MANAGER config-manager addrepo --id=nvidia-cuda --set=name="nvidia-cuda" --set=baseurl="https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo"
else
# DNF4: Use '--add-repo' to add the repository
$SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
fi
;; ;;
esac esac
@@ -459,68 +335,10 @@ install_cuda_driver_yum() {
$SUDO $PACKAGE_MANAGER -y install cuda-drivers $SUDO $PACKAGE_MANAGER -y install cuda-drivers
} }
install_fedora_nvidia_kernel_drivers(){
#We want to give the user the choice to install the akmod kernel drivers or not, since it could break some setups
warn "+------------------------------------------------------------------------------------------------+"
warn "| WARNING: |"
warn "| Looks like the NVIDIA Kernel modules are not installed. |"
warn "| |"
warn "| This script can try to install them using akmod-nvidia. |"
warn "| - The script need the rpmfusion free and nonfree repos and will install them if not available. |"
warn "| - The akmod installation can sometimes inhibit the reboot command. |"
warn "| |"
warn "| Otherwise you can exit the install script and install them yourself. |"
warn "| NOTE: you will need to reboot after the installation. |"
warn "+------------------------------------------------------------------------------------------------+"
while true; do
choice_warn "Do you wish for the script to try and install them? (akmod/exit) ";
read Answer
if [ "$Answer" = "akmod" ]; then
DNF_VERSION=$($PACKAGE_MANAGER --version | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -n1 | cut -d. -f1)
OS_NAME=$ID
OS_VERSION=$VERSION_ID
FREE_URL="https://mirrors.rpmfusion.org/free/fedora/rpmfusion-free-release-${OS_VERSION}.noarch.rpm"
NONFREE_URL="https://mirrors.rpmfusion.org/nonfree/fedora/rpmfusion-nonfree-release-${OS_VERSION}.noarch.rpm"
curl -LO "$FREE_URL"
curl -LO "$NONFREE_URL"
if [ "$DNF_VERSION" -ge 5 ]; then
# DNF5:
$SUDO $PACKAGE_MANAGER install -y "rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm" "rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm"
$SUDO $PACKAGE_MANAGER install -y akmod-nvidia
else
# DNF4:
$SUDO $PACKAGE_MANAGER install -y "rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm" "rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm"
$SUDO $PACKAGE_MANAGER install -y akmod-nvidia
fi
$SUDO rm "rpmfusion-free-release-$(rpm -E %fedora).noarch.rpm"
$SUDO rm "rpmfusion-nonfree-release-$(rpm -E %fedora).noarch.rpm"
install_cuda_driver_yum $OS_NAME '41'
info "Nvidia driver installation complete, please reboot now and run the Install script again to complete the setup."
exit
elif [ "$Answer" = "exit" ]; then
aborted
else
warn "Invalid choice. Please enter 'akmod' or 'exit'."
fi
done
}
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian
install_cuda_driver_apt() { install_cuda_driver_apt() {
info 'Installing NVIDIA CUDA repository...' info 'Installing NVIDIA repository...'
curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb
case $1 in case $1 in
@@ -559,7 +377,7 @@ install_cuda() {
case $OS_NAME in case $OS_NAME in
centos|rhel) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -d '.' -f 1) ;; centos|rhel) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -d '.' -f 1) ;;
rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;; rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
fedora) [ $OS_VERSION -lt '41' ] && install_cuda_driver_yum $OS_NAME $OS_VERSION || install_cuda_driver_yum $OS_NAME '41';; fedora) [ $OS_VERSION -lt '37' ] && install_cuda_driver_yum $OS_NAME $OS_VERSION || install_cuda_driver_yum $OS_NAME '37';;
amzn) install_cuda_driver_yum 'fedora' '37' ;; amzn) install_cuda_driver_yum 'fedora' '37' ;;
debian) install_cuda_driver_apt $OS_NAME $OS_VERSION ;; debian) install_cuda_driver_apt $OS_NAME $OS_VERSION ;;
ubuntu) install_cuda_driver_apt $OS_NAME $(echo $OS_VERSION | sed 's/\.//') ;; ubuntu) install_cuda_driver_apt $OS_NAME $(echo $OS_VERSION | sed 's/\.//') ;;
@@ -635,7 +453,7 @@ install_docker() {
$SUDO systemctl start docker $SUDO systemctl start docker
fi fi
info "Creating LocalAI Docker volume..." info "Starting LocalAI Docker container..."
# Create volume if doesn't exist already # Create volume if doesn't exist already
if ! $SUDO docker volume inspect local-ai-data > /dev/null 2>&1; then if ! $SUDO docker volume inspect local-ai-data > /dev/null 2>&1; then
$SUDO docker volume create local-ai-data $SUDO docker volume create local-ai-data
@@ -649,7 +467,7 @@ install_docker() {
# if $SUDO docker ps --format '{{.Names}}' | grep -q local-ai; then # if $SUDO docker ps --format '{{.Names}}' | grep -q local-ai; then
# info "LocalAI Docker container is already running." # info "LocalAI Docker container is already running."
# exit 0 # exit 0
# fi # fi
# info "Starting LocalAI Docker container..." # info "Starting LocalAI Docker container..."
# $SUDO docker start local-ai # $SUDO docker start local-ai
@@ -666,28 +484,22 @@ install_docker() {
IMAGE_TAG= IMAGE_TAG=
if [ "$HAS_CUDA" ]; then if [ "$HAS_CUDA" ]; then
IMAGE_TAG=${LOCALAI_VERSION}-cublas-cuda12-ffmpeg IMAGE_TAG=${VERSION}-cublas-cuda12-ffmpeg
# CORE # CORE
if [ "$CORE_IMAGES" = true ]; then if [ "$CORE_IMAGES" = true ]; then
IMAGE_TAG=${LOCALAI_VERSION}-cublas-cuda12-ffmpeg-core IMAGE_TAG=${VERSION}-cublas-cuda12-ffmpeg-core
fi fi
# AIO # AIO
if [ "$USE_AIO" = true ]; then if [ "$USE_AIO" = true ]; then
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-nvidia-cuda-12 IMAGE_TAG=${VERSION}-aio-gpu-nvidia-cuda-12
fi fi
info "Checking Nvidia Kernel Drivers presence..."
if ! available nvidia-smi; then if ! available nvidia-smi; then
OS_NAME=$ID info "Installing nvidia-cuda-toolkit..."
OS_VERSION=$VERSION_ID # TODO:
$SUDO apt-get -y install nvidia-cuda-toolkit
case $OS_NAME in
debian|ubuntu) $SUDO apt-get -y install nvidia-cuda-toolkit;;
fedora) install_fedora_nvidia_kernel_drivers;;
esac
fi fi
info "Starting LocalAI Docker container..."
$SUDO docker run -v local-ai-data:/build/models \ $SUDO docker run -v local-ai-data:/build/models \
--gpus all \ --gpus all \
--restart=always \ --restart=always \
@@ -696,17 +508,16 @@ install_docker() {
$envs \ $envs \
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
elif [ "$HAS_AMD" ]; then elif [ "$HAS_AMD" ]; then
IMAGE_TAG=${LOCALAI_VERSION}-hipblas-ffmpeg IMAGE_TAG=${VERSION}-hipblas-ffmpeg
# CORE # CORE
if [ "$CORE_IMAGES" = true ]; then if [ "$CORE_IMAGES" = true ]; then
IMAGE_TAG=${LOCALAI_VERSION}-hipblas-ffmpeg-core IMAGE_TAG=${VERSION}-hipblas-ffmpeg-core
fi fi
# AIO # AIO
if [ "$USE_AIO" = true ]; then if [ "$USE_AIO" = true ]; then
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-hipblas IMAGE_TAG=${VERSION}-aio-gpu-hipblas
fi fi
info "Starting LocalAI Docker container..."
$SUDO docker run -v local-ai-data:/build/models \ $SUDO docker run -v local-ai-data:/build/models \
--device /dev/dri \ --device /dev/dri \
--device /dev/kfd \ --device /dev/kfd \
@@ -716,17 +527,16 @@ install_docker() {
$envs \ $envs \
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
elif [ "$HAS_INTEL" ]; then elif [ "$HAS_INTEL" ]; then
IMAGE_TAG=${LOCALAI_VERSION}-sycl-f32-ffmpeg IMAGE_TAG=${VERSION}-sycl-f32-ffmpeg
# CORE # CORE
if [ "$CORE_IMAGES" = true ]; then if [ "$CORE_IMAGES" = true ]; then
IMAGE_TAG=${LOCALAI_VERSION}-sycl-f32-ffmpeg-core IMAGE_TAG=${VERSION}-sycl-f32-ffmpeg-core
fi fi
# AIO # AIO
if [ "$USE_AIO" = true ]; then if [ "$USE_AIO" = true ]; then
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-intel-f32 IMAGE_TAG=${VERSION}-aio-gpu-intel-f32
fi fi
info "Starting LocalAI Docker container..."
$SUDO docker run -v local-ai-data:/build/models \ $SUDO docker run -v local-ai-data:/build/models \
--device /dev/dri \ --device /dev/dri \
--restart=always \ --restart=always \
@@ -735,17 +545,15 @@ install_docker() {
$envs \ $envs \
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
else else
IMAGE_TAG=${LOCALAI_VERSION}-ffmpeg IMAGE_TAG=${VERSION}-ffmpeg
# CORE # CORE
if [ "$CORE_IMAGES" = true ]; then if [ "$CORE_IMAGES" = true ]; then
IMAGE_TAG=${LOCALAI_VERSION}-ffmpeg-core IMAGE_TAG=${VERSION}-ffmpeg-core
fi fi
# AIO # AIO
if [ "$USE_AIO" = true ]; then if [ "$USE_AIO" = true ]; then
IMAGE_TAG=${LOCALAI_VERSION}-aio-cpu IMAGE_TAG=${VERSION}-aio-cpu
fi fi
info "Starting LocalAI Docker container..."
$SUDO docker run -v local-ai-data:/models \ $SUDO docker run -v local-ai-data:/models \
--restart=always \ --restart=always \
-e MODELS_PATH=/models \ -e MODELS_PATH=/models \
@@ -762,8 +570,8 @@ install_docker() {
install_binary_darwin() { install_binary_darwin() {
[ "$(uname -s)" = "Darwin" ] || fatal 'This script is intended to run on macOS only.' [ "$(uname -s)" = "Darwin" ] || fatal 'This script is intended to run on macOS only.'
info "Downloading LocalAI ${LOCALAI_VERSION}..." info "Downloading LocalAI ${VERSION}..."
curl --fail --show-error --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-Darwin-${ARCH}" curl --fail --show-error --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${VERSION}/local-ai-Darwin-${ARCH}"
info "Installing to /usr/local/bin/local-ai" info "Installing to /usr/local/bin/local-ai"
install -o0 -g0 -m755 $TEMP_DIR/local-ai /usr/local/bin/local-ai install -o0 -g0 -m755 $TEMP_DIR/local-ai /usr/local/bin/local-ai
@@ -794,8 +602,8 @@ install_binary() {
exit 1 exit 1
fi fi
info "Downloading LocalAI ${LOCALAI_VERSION}..." info "Downloading LocalAI ${VERSION}..."
curl --fail --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-Linux-${ARCH}" curl --fail --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${VERSION}/local-ai-Linux-${ARCH}"
for BINDIR in /usr/local/bin /usr/bin /bin; do for BINDIR in /usr/local/bin /usr/bin /bin; do
echo $PATH | grep -q $BINDIR && break || continue echo $PATH | grep -q $BINDIR && break || continue
@@ -849,7 +657,7 @@ detect_start_command() {
if [ "$WORKER" = true ]; then if [ "$WORKER" = true ]; then
if [ -n "$P2P_TOKEN" ]; then if [ -n "$P2P_TOKEN" ]; then
STARTCOMMAND="worker p2p-llama-cpp-rpc" STARTCOMMAND="worker p2p-llama-cpp-rpc"
else else
STARTCOMMAND="worker llama-cpp-rpc" STARTCOMMAND="worker llama-cpp-rpc"
fi fi
elif [ "$FEDERATED" = true ]; then elif [ "$FEDERATED" = true ]; then

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,39 +0,0 @@
---
name: "qwen3"
config_file: |
mmap: true
template:
chat_message: |
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
{{ else if eq .RoleName "tool" -}}
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}<|im_end|>
function: |
<|im_start|>system
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
For each function call return a json object with function name and arguments
<|im_end|>
{{.Input -}}
<|im_start|>assistant
chat: |
{{.Input -}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 8192
f16: true
stopwords:
- '<|im_end|>'
- '<dummy32000>'
- '</s>'
- '<|endoftext|>'

27
go.mod
View File

@@ -6,7 +6,6 @@ toolchain go1.23.1
require ( require (
dario.cat/mergo v1.0.1 dario.cat/mergo v1.0.1
github.com/GeertJohan/go.rice v1.0.3
github.com/Masterminds/sprig/v3 v3.3.0 github.com/Masterminds/sprig/v3 v3.3.0
github.com/alecthomas/kong v0.9.0 github.com/alecthomas/kong v0.9.0
github.com/census-instrumentation/opencensus-proto v0.4.1 github.com/census-instrumentation/opencensus-proto v0.4.1
@@ -28,7 +27,6 @@ require (
github.com/golang/protobuf v1.5.4 github.com/golang/protobuf v1.5.4
github.com/google/go-containerregistry v0.19.2 github.com/google/go-containerregistry v0.19.2
github.com/google/uuid v1.6.0 github.com/google/uuid v1.6.0
github.com/gpustack/gguf-parser-go v0.17.0
github.com/grpc-ecosystem/grpc-gateway v1.5.0 github.com/grpc-ecosystem/grpc-gateway v1.5.0
github.com/hpcloud/tail v1.0.0 github.com/hpcloud/tail v1.0.0
github.com/ipfs/go-log v1.0.5 github.com/ipfs/go-log v1.0.5
@@ -44,6 +42,7 @@ require (
github.com/onsi/ginkgo/v2 v2.22.2 github.com/onsi/ginkgo/v2 v2.22.2
github.com/onsi/gomega v1.36.2 github.com/onsi/gomega v1.36.2
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e
github.com/ory/dockertest/v3 v3.10.0
github.com/otiai10/openaigo v1.7.0 github.com/otiai10/openaigo v1.7.0
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
github.com/prometheus/client_golang v1.20.5 github.com/prometheus/client_golang v1.20.5
@@ -55,7 +54,7 @@ require (
github.com/streamer45/silero-vad-go v0.2.1 github.com/streamer45/silero-vad-go v0.2.1
github.com/stretchr/testify v1.10.0 github.com/stretchr/testify v1.10.0
github.com/swaggo/swag v1.16.3 github.com/swaggo/swag v1.16.3
github.com/testcontainers/testcontainers-go v0.35.0 github.com/thxcode/gguf-parser-go v0.1.0
github.com/tmc/langchaingo v0.1.12 github.com/tmc/langchaingo v0.1.12
github.com/valyala/fasthttp v1.55.0 github.com/valyala/fasthttp v1.55.0
go.opentelemetry.io/otel v1.34.0 go.opentelemetry.io/otel v1.34.0
@@ -75,26 +74,19 @@ require (
cloud.google.com/go/auth v0.4.1 // indirect cloud.google.com/go/auth v0.4.1 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
cloud.google.com/go/compute/metadata v0.5.0 // indirect cloud.google.com/go/compute/metadata v0.5.0 // indirect
github.com/containerd/platforms v0.2.1 // indirect
github.com/cpuguy83/dockercfg v0.3.2 // indirect
github.com/daaku/go.zipexe v1.0.2 // indirect
github.com/distribution/reference v0.6.0 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect
github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect
github.com/fasthttp/websocket v1.5.3 // indirect github.com/fasthttp/websocket v1.5.3 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/go-viper/mapstructure/v2 v2.0.0 // indirect
github.com/google/s2a-go v0.1.7 // indirect github.com/google/s2a-go v0.1.7 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/gax-go/v2 v2.12.4 // indirect github.com/googleapis/gax-go/v2 v2.12.4 // indirect
github.com/json-iterator/go v1.1.12 // indirect github.com/json-iterator/go v1.1.12 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/moby/patternmatcher v0.6.0 // indirect
github.com/moby/sys/user v0.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/morikuni/aec v1.0.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pion/datachannel v1.5.10 // indirect github.com/pion/datachannel v1.5.10 // indirect
github.com/pion/dtls/v2 v2.2.12 // indirect github.com/pion/dtls/v2 v2.2.12 // indirect
@@ -118,7 +110,6 @@ require (
github.com/pion/turn/v2 v2.1.6 // indirect github.com/pion/turn/v2 v2.1.6 // indirect
github.com/pion/turn/v4 v4.0.0 // indirect github.com/pion/turn/v4 v4.0.0 // indirect
github.com/pion/webrtc/v4 v4.0.9 // indirect github.com/pion/webrtc/v4 v4.0.9 // indirect
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
github.com/shirou/gopsutil/v4 v4.24.7 // indirect github.com/shirou/gopsutil/v4 v4.24.7 // indirect
github.com/wlynxg/anet v0.0.5 // indirect github.com/wlynxg/anet v0.0.5 // indirect
@@ -137,6 +128,7 @@ require (
github.com/Masterminds/semver/v3 v3.3.0 // indirect github.com/Masterminds/semver/v3 v3.3.0 // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/Microsoft/hcsshim v0.11.7 // indirect github.com/Microsoft/hcsshim v0.11.7 // indirect
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
github.com/StackExchange/wmi v1.2.1 // indirect github.com/StackExchange/wmi v1.2.1 // indirect
github.com/alecthomas/chroma/v2 v2.8.0 // indirect github.com/alecthomas/chroma/v2 v2.8.0 // indirect
github.com/andybalholm/brotli v1.1.0 // indirect github.com/andybalholm/brotli v1.1.0 // indirect
@@ -160,7 +152,7 @@ require (
github.com/dlclark/regexp2 v1.10.0 // indirect github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/docker/cli v27.0.3+incompatible // indirect github.com/docker/cli v27.0.3+incompatible // indirect
github.com/docker/distribution v2.8.2+incompatible // indirect github.com/docker/distribution v2.8.2+incompatible // indirect
github.com/docker/docker v27.1.1+incompatible github.com/docker/docker v27.0.3+incompatible
github.com/docker/docker-credential-helpers v0.7.0 // indirect github.com/docker/docker-credential-helpers v0.7.0 // indirect
github.com/docker/go-connections v0.5.0 // indirect github.com/docker/go-connections v0.5.0 // indirect
github.com/docker/go-units v0.5.0 // indirect github.com/docker/go-units v0.5.0 // indirect
@@ -189,13 +181,14 @@ require (
github.com/google/go-cmp v0.6.0 // indirect github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gopacket v1.1.19 // indirect github.com/google/gopacket v1.1.19 // indirect
github.com/google/pprof v0.0.0-20250208200701-d0013a598941 // indirect github.com/google/pprof v0.0.0-20250208200701-d0013a598941 // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/gorilla/css v1.0.1 // indirect github.com/gorilla/css v1.0.1 // indirect
github.com/gorilla/websocket v1.5.3 // indirect github.com/gorilla/websocket v1.5.3 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/golang-lru v1.0.2 // indirect github.com/hashicorp/golang-lru v1.0.2 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/henvic/httpretty v0.1.4 // indirect github.com/henvic/httpretty v0.1.3 // indirect
github.com/huandu/xstrings v1.5.0 // indirect github.com/huandu/xstrings v1.5.0 // indirect
github.com/huin/goupnp v1.3.0 // indirect github.com/huin/goupnp v1.3.0 // indirect
github.com/ipfs/boxo v0.27.4 // indirect github.com/ipfs/boxo v0.27.4 // indirect
@@ -262,6 +255,7 @@ require (
github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.0 github.com/opencontainers/image-spec v1.1.0
github.com/opencontainers/runc v1.1.12 // indirect
github.com/opencontainers/runtime-spec v1.2.0 // indirect github.com/opencontainers/runtime-spec v1.2.0 // indirect
github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
@@ -284,7 +278,7 @@ require (
github.com/shoenig/go-m1cpu v0.1.6 // indirect github.com/shoenig/go-m1cpu v0.1.6 // indirect
github.com/shopspring/decimal v1.4.0 // indirect github.com/shopspring/decimal v1.4.0 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect github.com/sirupsen/logrus v1.9.3 // indirect
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/spf13/cast v1.7.0 // indirect github.com/spf13/cast v1.7.0 // indirect
@@ -299,6 +293,9 @@ require (
github.com/vishvananda/netlink v1.3.0 // indirect github.com/vishvananda/netlink v1.3.0 // indirect
github.com/vishvananda/netns v0.0.5 // indirect github.com/vishvananda/netns v0.0.5 // indirect
github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
github.com/yuin/goldmark v1.5.4 // indirect github.com/yuin/goldmark v1.5.4 // indirect
github.com/yuin/goldmark-emoji v1.0.2 // indirect github.com/yuin/goldmark-emoji v1.0.2 // indirect

83
go.sum
View File

@@ -17,15 +17,10 @@ dmitri.shuralyov.com/html/belt v0.0.0-20180602232347-f7d459c86be0/go.mod h1:JLBr
dmitri.shuralyov.com/service/change v0.0.0-20181023043359-a85b471d5412/go.mod h1:a1inKt/atXimZ4Mv927x+r7UpyzRUf4emIoiiSC2TN4= dmitri.shuralyov.com/service/change v0.0.0-20181023043359-a85b471d5412/go.mod h1:a1inKt/atXimZ4Mv927x+r7UpyzRUf4emIoiiSC2TN4=
dmitri.shuralyov.com/state v0.0.0-20180228185332-28bcc343414c/go.mod h1:0PRwlb0D6DFvNNtx+9ybjezNCa8XF0xaYcETyp6rHWU= dmitri.shuralyov.com/state v0.0.0-20180228185332-28bcc343414c/go.mod h1:0PRwlb0D6DFvNNtx+9ybjezNCa8XF0xaYcETyp6rHWU=
git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg= git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/GeertJohan/go.incremental v1.0.0/go.mod h1:6fAjUhbVuX1KcMD3c8TEgVUqmo4seqhv0i0kdATSkM0=
github.com/GeertJohan/go.rice v1.0.3 h1:k5viR+xGtIhF61125vCE1cmJ5957RQGXG6dmbaWZSmI=
github.com/GeertJohan/go.rice v1.0.3/go.mod h1:XVdrU4pW00M4ikZed5q56tPf1v2KwnIKeIdc9CBYNt4=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
@@ -40,9 +35,10 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/Microsoft/hcsshim v0.11.7 h1:vl/nj3Bar/CvJSYo7gIQPyRWc9f3c6IeSNavBTSZNZQ= github.com/Microsoft/hcsshim v0.11.7 h1:vl/nj3Bar/CvJSYo7gIQPyRWc9f3c6IeSNavBTSZNZQ=
github.com/Microsoft/hcsshim v0.11.7/go.mod h1:MV8xMfmECjl5HdO7U/3/hFVnkmSBjAjmA09d4bExKcU= github.com/Microsoft/hcsshim v0.11.7/go.mod h1:MV8xMfmECjl5HdO7U/3/hFVnkmSBjAjmA09d4bExKcU=
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw=
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk=
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA= github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8= github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
github.com/akavel/rsrc v0.8.0/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxkKq+c=
github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU= github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU=
github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
github.com/alecthomas/chroma/v2 v2.8.0 h1:w9WJUjFFmHHB2e8mRpL9jjy3alYDlU0QLDezj1xE264= github.com/alecthomas/chroma/v2 v2.8.0 h1:w9WJUjFFmHHB2e8mRpL9jjy3alYDlU0QLDezj1xE264=
@@ -97,16 +93,12 @@ github.com/containerd/errdefs v0.1.0 h1:m0wCRBiu1WJT/Fr+iOoQHMQS/eP5myQ8lCv4Dz5Z
github.com/containerd/errdefs v0.1.0/go.mod h1:YgWiiHtLmSeBrvpw+UfPijzbLaB77mEG1WwJTDETIV0= github.com/containerd/errdefs v0.1.0/go.mod h1:YgWiiHtLmSeBrvpw+UfPijzbLaB77mEG1WwJTDETIV0=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
github.com/containerd/stargz-snapshotter/estargz v0.14.3 h1:OqlDCK3ZVUO6C3B/5FSkDwbkEETK84kQgEeFwDC+62k= github.com/containerd/stargz-snapshotter/estargz v0.14.3 h1:OqlDCK3ZVUO6C3B/5FSkDwbkEETK84kQgEeFwDC+62k=
github.com/containerd/stargz-snapshotter/estargz v0.14.3/go.mod h1:KY//uOCIkSuNAHhJogcZtrNHdKrA99/FCCRjE3HD36o= github.com/containerd/stargz-snapshotter/estargz v0.14.3/go.mod h1:KY//uOCIkSuNAHhJogcZtrNHdKrA99/FCCRjE3HD36o=
github.com/coreos/go-systemd v0.0.0-20181012123002-c6f51f82210d/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20181012123002-c6f51f82210d/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk=
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
@@ -116,8 +108,6 @@ github.com/creachadair/otp v0.5.0 h1:q3Th7CXm2zlmCdBjw5tEPFOj4oWJMnVL5HXlq0sNKS0
github.com/creachadair/otp v0.5.0/go.mod h1:0kceI87EnYFNYSTL121goJVAnk3eJhaed9H0nMuJUkA= github.com/creachadair/otp v0.5.0/go.mod h1:0kceI87EnYFNYSTL121goJVAnk3eJhaed9H0nMuJUkA=
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
github.com/daaku/go.zipexe v1.0.2 h1:Zg55YLYTr7M9wjKn8SY/WcpuuEi+kR2u4E8RhvpyXmk=
github.com/daaku/go.zipexe v1.0.2/go.mod h1:5xWogtqlYnfBXkSB1o9xysukNP9GTvaNkqzUZbt3Bw8=
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 h1:flLYmnQFZNo04x2NPehMbf30m7Pli57xwZ0NFqR/hb0= github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 h1:flLYmnQFZNo04x2NPehMbf30m7Pli57xwZ0NFqR/hb0=
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2/go.mod h1:NtWqRzAp/1tw+twkW8uuBenEVVYndEAZACWU3F3xdoQ= github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2/go.mod h1:NtWqRzAp/1tw+twkW8uuBenEVVYndEAZACWU3F3xdoQ=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -129,16 +119,14 @@ github.com/decred/dcrd/crypto/blake256 v1.0.1 h1:7PltbUIQB7u/FfZ39+DGa/ShuMyJ5il
github.com/decred/dcrd/crypto/blake256 v1.0.1/go.mod h1:2OfgNZ5wDpcsFmHmCK5gZTPcCXqlm2ArzUIkw9czNJo= github.com/decred/dcrd/crypto/blake256 v1.0.1/go.mod h1:2OfgNZ5wDpcsFmHmCK5gZTPcCXqlm2ArzUIkw9czNJo=
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 h1:rpfIENRNNilwHwZeG5+P150SMrnNEcHYvcCuK6dPZSg= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 h1:rpfIENRNNilwHwZeG5+P150SMrnNEcHYvcCuK6dPZSg=
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/docker/cli v27.0.3+incompatible h1:usGs0/BoBW8MWxGeEtqPMkzOY56jZ6kYlSN5BLDioCQ= github.com/docker/cli v27.0.3+incompatible h1:usGs0/BoBW8MWxGeEtqPMkzOY56jZ6kYlSN5BLDioCQ=
github.com/docker/cli v27.0.3+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= github.com/docker/cli v27.0.3+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8= github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8=
github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
github.com/docker/docker v27.1.1+incompatible h1:hO/M4MtV36kzKldqnA37IWhebRA+LnqqcqDja6kVaKY= github.com/docker/docker v27.0.3+incompatible h1:aBGI9TeQ4MPlhquTQKq9XbK79rKFVwXNUAYz9aXyEBE=
github.com/docker/docker v27.1.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker v27.0.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A= github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A=
github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0= github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
@@ -177,6 +165,8 @@ github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7z
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad h1:dQ93Vd6i25o+zH9vvnZ8mu7jtJQ6jT3D+zE3V8Q49n0=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
@@ -206,8 +196,12 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46 h1:lALhXzDkqtp12udlDLLg+ybXVMmL7Ox9tybqVLWxjPE= github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46 h1:lALhXzDkqtp12udlDLLg+ybXVMmL7Ox9tybqVLWxjPE=
github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo= github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo=
github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/go-viper/mapstructure/v2 v2.0.0 h1:dhn8MZ1gZ0mzeodTG3jt5Vj/o87xZKuNAprG2mQfMfc=
github.com/go-viper/mapstructure/v2 v2.0.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0= github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0=
github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
@@ -282,6 +276,8 @@ github.com/google/pprof v0.0.0-20250208200701-d0013a598941/go.mod h1:vavhavw2zAx
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o=
github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw=
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
@@ -299,13 +295,9 @@ github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0= github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gpustack/gguf-parser-go v0.17.0 h1:DkSziWLsiQM0pqqkr/zMcaBn94KY7iQTi4zmaHixDus=
github.com/gpustack/gguf-parser-go v0.17.0/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI= github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI=
github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw= github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
@@ -315,8 +307,8 @@ github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iP
github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU= github.com/henvic/httpretty v0.1.3 h1:4A6vigjz6Q/+yAfTD4wqipCv+Px69C7Th/NhT0ApuU8=
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM= github.com/henvic/httpretty v0.1.3/go.mod h1:UUEv7c2kHZ5SPQ51uS3wBpzPDibg2U3Y+IaXyHy5GBg=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
@@ -395,6 +387,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394=
github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8= github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8=
github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg= github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg=
github.com/libp2p/go-cidranger v1.1.0 h1:ewPN8EZ0dd1LSnrtuwd4709PXVcITVeuwbag38yPW7c= github.com/libp2p/go-cidranger v1.1.0 h1:ewPN8EZ0dd1LSnrtuwd4709PXVcITVeuwbag38yPW7c=
@@ -434,8 +428,6 @@ github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i
github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7 h1:5RK988zAqB3/AN3opGfRpoQgAVqr6/A5+qRTi67VUZY= github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7 h1:5RK988zAqB3/AN3opGfRpoQgAVqr6/A5+qRTi67VUZY=
github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7/go.mod h1:ilwx/Dta8jXAgpFYFvSWEMwxmbWXyiUHkd5FwyKhb5k= github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7/go.mod h1:ilwx/Dta8jXAgpFYFvSWEMwxmbWXyiUHkd5FwyKhb5k=
github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI= github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI=
github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
@@ -482,14 +474,10 @@ github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zx
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk=
github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78= github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78=
github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc= github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc=
github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=
github.com/moby/sys/user v0.1.0 h1:WmZ93f5Ux6het5iituh9x2zAG7NFY9Aqi49jjE1PaQg=
github.com/moby/sys/user v0.1.0/go.mod h1:fKJhFOnsCN6xZ5gSfbM6zaHGgDJMrqt9/reuj4T7MmU=
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -498,13 +486,13 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/mudler/edgevpn v0.30.1 h1:4yyhNFJX62NpRp50sxiyZE5E/sdAqEZX+aE5Mv7QS60= github.com/mudler/edgevpn v0.30.1 h1:4yyhNFJX62NpRp50sxiyZE5E/sdAqEZX+aE5Mv7QS60=
github.com/mudler/edgevpn v0.30.1/go.mod h1:IAJkkJ0oH3rwsSGOGTFT4UBYFqYuD/QyaKzTLB3P/eU= github.com/mudler/edgevpn v0.30.1/go.mod h1:IAJkkJ0oH3rwsSGOGTFT4UBYFqYuD/QyaKzTLB3P/eU=
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU= github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU=
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82/go.mod h1:Urp7LG5jylKoDq0663qeBh0pINGcRl35nXdKx82PSoU= github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82/go.mod h1:Urp7LG5jylKoDq0663qeBh0pINGcRl35nXdKx82PSoU=
github.com/mudler/water v0.0.0-20221010214108-8c7313014ce0 h1:Qh6ghkMgTu6siFbTf7L3IszJmshMhXxNL4V+t7IIA6w= github.com/mudler/water v0.0.0-20221010214108-8c7313014ce0 h1:Qh6ghkMgTu6siFbTf7L3IszJmshMhXxNL4V+t7IIA6w=
@@ -541,7 +529,6 @@ github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJE
github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM= github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM=
github.com/nikolalohinski/gonja/v2 v2.3.2 h1:UgLFfqi7L9XfX0PEcE4eUpvGojVQL5KhBfJJaBp7ZxY= github.com/nikolalohinski/gonja/v2 v2.3.2 h1:UgLFfqi7L9XfX0PEcE4eUpvGojVQL5KhBfJJaBp7ZxY=
github.com/nikolalohinski/gonja/v2 v2.3.2/go.mod h1:1Wcc/5huTu6y36e0sOFR1XQoFlylw3c3H3L5WOz0RDg= github.com/nikolalohinski/gonja/v2 v2.3.2/go.mod h1:1Wcc/5huTu6y36e0sOFR1XQoFlylw3c3H3L5WOz0RDg=
github.com/nkovacs/streamquote v1.0.0/go.mod h1:BN+NaZ2CmdKqUuTUXUEm9j95B2TRbpOWpxbJYzzgUsc=
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY= github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY=
@@ -558,6 +545,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss=
github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8=
github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk= github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
@@ -566,6 +555,8 @@ github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYr
github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e h1:s2RNOM/IGdY0Y6qfTeUKhDawdHDpK9RGBdx80qN4Ttw= github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e h1:s2RNOM/IGdY0Y6qfTeUKhDawdHDpK9RGBdx80qN4Ttw=
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e/go.mod h1:nBdnFKj15wFbf94Rwfq4m30eAcyY9V/IyKAGQFtqkW0= github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e/go.mod h1:nBdnFKj15wFbf94Rwfq4m30eAcyY9V/IyKAGQFtqkW0=
github.com/ory/dockertest/v3 v3.10.0 h1:4K3z2VMe8Woe++invjaTB7VRyQXQy5UY+loujO4aNE4=
github.com/ory/dockertest/v3 v3.10.0/go.mod h1:nr57ZbRWMqfsdGdFNLHz5jjNdDb7VVFnzAeW1n5N1Lg=
github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg= github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
github.com/otiai10/openaigo v1.7.0 h1:AOQcOjRRM57ABvz+aI2oJA/Qsz1AydKbdZAlGiKyCqg= github.com/otiai10/openaigo v1.7.0 h1:AOQcOjRRM57ABvz+aI2oJA/Qsz1AydKbdZAlGiKyCqg=
@@ -669,8 +660,6 @@ github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 h1:18kd+8ZUlt/ARXhljq+14TwAoKa61q6dX8jtwOf6DH8=
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529/go.mod h1:qe5TWALJ8/a1Lqznoc5BDHpYX/8HU60Hm2AwRmqzxqA=
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8= github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
@@ -723,8 +712,8 @@ github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY= github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b h1:e9eeuSYSLmUKxy7ALzKcxo7ggTceQaVcBhjDIcewa9c=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0= github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
github.com/smartystreets/assertions v1.13.0 h1:Dx1kYM01xsSqKPno3aqLnrwac2LetPvN23diwyr69Qs= github.com/smartystreets/assertions v1.13.0 h1:Dx1kYM01xsSqKPno3aqLnrwac2LetPvN23diwyr69Qs=
github.com/smartystreets/assertions v1.13.0/go.mod h1:wDmR7qL282YbGsPy6H/yAsesrxfxaaSlJazyFLYVFx8= github.com/smartystreets/assertions v1.13.0/go.mod h1:wDmR7qL282YbGsPy6H/yAsesrxfxaaSlJazyFLYVFx8=
@@ -743,8 +732,6 @@ github.com/streamer45/silero-vad-go v0.2.1/go.mod h1:B+2FXs/5fZ6pzl6unUZYhZqkYdO
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
@@ -761,8 +748,8 @@ github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0J
github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg= github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg=
github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk= github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk=
github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA=
github.com/testcontainers/testcontainers-go v0.35.0 h1:uADsZpTKFAtp8SLK+hMwSaa+X+JiERHtd4sQAFmXeMo= github.com/thxcode/gguf-parser-go v0.1.0 h1:J4QruXyEQGjrAKeKZFlsD2na9l4XF5+bjR194d+wJS4=
github.com/testcontainers/testcontainers-go v0.35.0/go.mod h1:oEVBj5zrfJTrgjwONs1SsRbnBtH9OKl+IGl3UMcr2B4= github.com/thxcode/gguf-parser-go v0.1.0/go.mod h1:Tn1PsO/YDEtLIxm1+QDCjIIH9L/9Sr7+KpxZKm0sEuE=
github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0= github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw= github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU= github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU=
@@ -781,7 +768,6 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8= github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM= github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM=
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck= github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=
@@ -800,6 +786,13 @@ github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1/go.mod h
github.com/wlynxg/anet v0.0.3/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA= github.com/wlynxg/anet v0.0.3/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
github.com/wlynxg/anet v0.0.5 h1:J3VJGi1gvo0JwZ/P1/Yc/8p63SoW98B5dHkYDmpgvvU= github.com/wlynxg/anet v0.0.5 h1:J3VJGi1gvo0JwZ/P1/Yc/8p63SoW98B5dHkYDmpgvvU=
github.com/wlynxg/anet v0.0.5/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA= github.com/wlynxg/anet v0.0.5/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
@@ -822,10 +815,6 @@ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuH
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM=
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY= go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI= go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 h1:K0XaT3DwHAcV4nKLzcQvwAgSyisUghWoY20I7huthMk=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0/go.mod h1:B5Ki776z/MBnVha1Nzwp5arlzBbE3+1jk+pGmaP5HME=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 h1:lUsI2TYsQw2r1IASwoROaCnjdj2cvC2+Jbxvk6nHnWU=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0/go.mod h1:2HpZxxQurfGxJlJDblybejHB6RX6pmExPNe517hREw4=
go.opentelemetry.io/otel/exporters/prometheus v0.50.0 h1:2Ewsda6hejmbhGFyUvWZjUThC98Cf8Zy6g0zkIimOng= go.opentelemetry.io/otel/exporters/prometheus v0.50.0 h1:2Ewsda6hejmbhGFyUvWZjUThC98Cf8Zy6g0zkIimOng=
go.opentelemetry.io/otel/exporters/prometheus v0.50.0/go.mod h1:pMm5PkUo5YwbLiuEf7t2xg4wbP0/eSJrMxIMxKosynY= go.opentelemetry.io/otel/exporters/prometheus v0.50.0/go.mod h1:pMm5PkUo5YwbLiuEf7t2xg4wbP0/eSJrMxIMxKosynY=
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ= go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
@@ -836,8 +825,6 @@ go.opentelemetry.io/otel/sdk/metric v1.28.0 h1:OkuaKgKrgAbYrrY0t92c+cC+2F6hsFNnC
go.opentelemetry.io/otel/sdk/metric v1.28.0/go.mod h1:cWPjykihLAPvXKi4iZc1dpER3Jdq2Z0YLse3moQUCpg= go.opentelemetry.io/otel/sdk/metric v1.28.0/go.mod h1:cWPjykihLAPvXKi4iZc1dpER3Jdq2Z0YLse3moQUCpg=
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k= go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE= go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/dig v1.18.0 h1:imUL1UiY0Mg4bqbFfsRQO5G4CGRBec/ZujWTvSVp3pw= go.uber.org/dig v1.18.0 h1:imUL1UiY0Mg4bqbFfsRQO5G4CGRBec/ZujWTvSVp3pw=
@@ -1107,8 +1094,8 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU= gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo=
gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU= gotest.tools/v3 v3.3.0/go.mod h1:Mcr9QNxkg0uMvy/YElmo4SpXgJKWgQvYrT7Kw5RzJ1A=
grpc.go4.org v0.0.0-20170609214715-11d0a25b4919/go.mod h1:77eQGdRu53HpSqPFJFmuJdjuHRquDANNeA4x7B8WQ9o= grpc.go4.org v0.0.0-20170609214715-11d0a25b4919/go.mod h1:77eQGdRu53HpSqPFJFmuJdjuHRquDANNeA4x7B8WQ9o=
gvisor.dev/gvisor v0.0.0-20230927004350-cbd86285d259 h1:TbRPT0HtzFP3Cno1zZo7yPzEEnfu8EjLfl6IU9VfqkQ= gvisor.dev/gvisor v0.0.0-20230927004350-cbd86285d259 h1:TbRPT0HtzFP3Cno1zZo7yPzEEnfu8EjLfl6IU9VfqkQ=
gvisor.dev/gvisor v0.0.0-20230927004350-cbd86285d259/go.mod h1:AVgIgHMwK63XvmAzWG9vLQ41YnVHN0du0tEC46fI7yY= gvisor.dev/gvisor v0.0.0-20230927004350-cbd86285d259/go.mod h1:AVgIgHMwK63XvmAzWG9vLQ41YnVHN0du0tEC46fI7yY=

View File

@@ -1,37 +1,35 @@
package assets package assets
import ( import (
"embed"
"fmt" "fmt"
"io/fs"
"os" "os"
"path/filepath" "path/filepath"
rice "github.com/GeertJohan/go.rice"
"github.com/mudler/LocalAI/pkg/library" "github.com/mudler/LocalAI/pkg/library"
) )
const backendAssetsDir = "backend-assets"
func ResolvePath(dir string, paths ...string) string { func ResolvePath(dir string, paths ...string) string {
return filepath.Join(append([]string{dir, backendAssetsDir}, paths...)...) return filepath.Join(append([]string{dir, "backend-assets"}, paths...)...)
} }
func ExtractFiles(content *rice.Box, extractDir string) error { func ExtractFiles(content embed.FS, extractDir string) error {
// Create the target directory with backend-assets subdirectory // Create the target directory if it doesn't exist
backendAssetsDir := filepath.Join(extractDir, backendAssetsDir) err := os.MkdirAll(extractDir, 0750)
err := os.MkdirAll(backendAssetsDir, 0750)
if err != nil { if err != nil {
return fmt.Errorf("failed to create directory: %v", err) return fmt.Errorf("failed to create directory: %v", err)
} }
// Walk through the rice box and extract files // Walk through the embedded FS and extract files
err = content.Walk("", func(path string, info os.FileInfo, err error) error { err = fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil { if err != nil {
return err return err
} }
// Reconstruct the directory structure in the target directory // Reconstruct the directory structure in the target directory
targetFile := filepath.Join(backendAssetsDir, path) targetFile := filepath.Join(extractDir, path)
if info.IsDir() { if d.IsDir() {
// Create the directory in the target directory // Create the directory in the target directory
err := os.MkdirAll(targetFile, 0750) err := os.MkdirAll(targetFile, 0750)
if err != nil { if err != nil {
@@ -40,8 +38,8 @@ func ExtractFiles(content *rice.Box, extractDir string) error {
return nil return nil
} }
// Read the file from the rice box // Read the file from the embedded FS
fileData, err := content.Bytes(path) fileData, err := content.ReadFile(path)
if err != nil { if err != nil {
return fmt.Errorf("failed to read file: %v", err) return fmt.Errorf("failed to read file: %v", err)
} }
@@ -58,7 +56,7 @@ func ExtractFiles(content *rice.Box, extractDir string) error {
// If there is a lib directory, set LD_LIBRARY_PATH to include it // If there is a lib directory, set LD_LIBRARY_PATH to include it
// we might use this mechanism to carry over e.g. Nvidia CUDA libraries // we might use this mechanism to carry over e.g. Nvidia CUDA libraries
// from the embedded FS to the target directory // from the embedded FS to the target directory
library.LoadExtractedLibs(backendAssetsDir) library.LoadExtractedLibs(extractDir)
return err return err
} }

View File

@@ -1,19 +1,19 @@
package assets package assets
import ( import (
"os" "embed"
"io/fs"
rice "github.com/GeertJohan/go.rice"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
func ListFiles(content *rice.Box) (files []string) { func ListFiles(content embed.FS) (files []string) {
err := content.Walk("", func(path string, info os.FileInfo, err error) error { err := fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil { if err != nil {
return err return err
} }
if info.IsDir() { if d.IsDir() {
return nil return nil
} }
@@ -21,7 +21,7 @@ func ListFiles(content *rice.Box) (files []string) {
return nil return nil
}) })
if err != nil { if err != nil {
log.Error().Err(err).Msg("error walking the rice box") log.Error().Err(err).Msg("error walking the embedded filesystem")
} }
return return
} }

View File

@@ -39,7 +39,6 @@ type Backend interface {
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error)
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)

View File

@@ -53,10 +53,6 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error {
return fmt.Errorf("unimplemented") return fmt.Errorf("unimplemented")
} }
func (llm *Base) GenerateVideo(*pb.GenerateVideoRequest) error {
return fmt.Errorf("unimplemented")
}
func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error) { func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error) {
return pb.TranscriptResult{}, fmt.Errorf("unimplemented") return pb.TranscriptResult{}, fmt.Errorf("unimplemented")
} }

View File

@@ -215,28 +215,6 @@ func (c *Client) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest,
return client.GenerateImage(ctx, in, opts...) return client.GenerateImage(ctx, in, opts...)
} }
func (c *Client) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error) {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
}
c.setBusy(true)
defer c.setBusy(false)
c.wdMark()
defer c.wdUnMark()
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithDefaultCallOptions(
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
))
if err != nil {
return nil, err
}
defer conn.Close()
client := pb.NewBackendClient(conn)
return client.GenerateVideo(ctx, in, opts...)
}
func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) { func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
if !c.parallel { if !c.parallel {
c.opMutex.Lock() c.opMutex.Lock()

View File

@@ -47,10 +47,6 @@ func (e *embedBackend) GenerateImage(ctx context.Context, in *pb.GenerateImageRe
return e.s.GenerateImage(ctx, in) return e.s.GenerateImage(ctx, in)
} }
func (e *embedBackend) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error) {
return e.s.GenerateVideo(ctx, in)
}
func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) { func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
return e.s.TTS(ctx, in) return e.s.TTS(ctx, in)
} }

View File

@@ -14,7 +14,6 @@ type LLM interface {
Load(*pb.ModelOptions) error Load(*pb.ModelOptions) error
Embeddings(*pb.PredictOptions) ([]float32, error) Embeddings(*pb.PredictOptions) ([]float32, error)
GenerateImage(*pb.GenerateImageRequest) error GenerateImage(*pb.GenerateImageRequest) error
GenerateVideo(*pb.GenerateVideoRequest) error
AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error) AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error)
TTS(*pb.TTSRequest) error TTS(*pb.TTSRequest) error
SoundGeneration(*pb.SoundGenerationRequest) error SoundGeneration(*pb.SoundGenerationRequest) error

View File

@@ -75,18 +75,6 @@ func (s *server) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest)
return &pb.Result{Message: "Image generated", Success: true}, nil return &pb.Result{Message: "Image generated", Success: true}, nil
} }
func (s *server) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest) (*pb.Result, error) {
if s.llm.Locking() {
s.llm.Lock()
defer s.llm.Unlock()
}
err := s.llm.GenerateVideo(in)
if err != nil {
return &pb.Result{Message: fmt.Sprintf("Error generating video: %s", err.Error()), Success: false}, err
}
return &pb.Result{Message: "Video generated", Success: true}, nil
}
func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error) { func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error) {
if s.llm.Locking() { if s.llm.Locking() {
s.llm.Lock() s.llm.Lock()

View File

@@ -25,7 +25,7 @@ func LoadExtractedLibs(dir string) error {
} }
var err error = nil var err error = nil
for _, libDir := range []string{filepath.Join(dir, "lib"), filepath.Join(dir, "lib")} { for _, libDir := range []string{filepath.Join(dir, "backend-assets", "lib"), filepath.Join(dir, "lib")} {
err = errors.Join(err, LoadExternal(libDir)) err = errors.Join(err, LoadExternal(libDir))
} }
return err return err

View File

@@ -181,6 +181,10 @@ func orderBackends(backends map[string][]string) ([]string, error) {
// selectGRPCProcessByHostCapabilities selects the GRPC process to start based on system capabilities // selectGRPCProcessByHostCapabilities selects the GRPC process to start based on system capabilities
// Note: this is now relevant only for llama.cpp // Note: this is now relevant only for llama.cpp
func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) string { func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) string {
foundCUDA := false
foundAMDGPU := false
foundIntelGPU := false
var grpcProcess string
// Select backend now just for llama.cpp // Select backend now just for llama.cpp
if backend != LLamaCPP { if backend != LLamaCPP {
@@ -194,26 +198,50 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
} }
// Check for GPU-binaries that are shipped with single binary releases // Check for GPU-binaries that are shipped with single binary releases
gpuBinaries := map[string]string{ gpus, err := xsysinfo.GPUs()
"nvidia": LLamaCPPCUDA, if err == nil {
"amd": LLamaCPPHipblas, for _, gpu := range gpus {
"intel": LLamaCPPSycl16, if strings.Contains(gpu.String(), "nvidia") {
} p := backendPath(assetDir, LLamaCPPCUDA)
if _, err := os.Stat(p); err == nil {
if !f16 { log.Info().Msgf("[%s] attempting to load with CUDA variant", backend)
gpuBinaries["intel"] = LLamaCPPSycl32 grpcProcess = p
} foundCUDA = true
} else {
for vendor, binary := range gpuBinaries { log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
if xsysinfo.HasGPU(vendor) { }
p := backendPath(assetDir, binary) }
if _, err := os.Stat(p); err == nil { if strings.Contains(gpu.String(), "amd") {
log.Info().Msgf("[%s] attempting to load with %s variant (vendor: %s)", backend, binary, vendor) p := backendPath(assetDir, LLamaCPPHipblas)
return p if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend)
grpcProcess = p
foundAMDGPU = true
} else {
log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
}
}
if strings.Contains(gpu.String(), "intel") {
backend := LLamaCPPSycl16
if !f16 {
backend = LLamaCPPSycl32
}
p := backendPath(assetDir, backend)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with Intel variant", backend)
grpcProcess = p
foundIntelGPU = true
} else {
log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
}
} }
} }
} }
if foundCUDA || foundAMDGPU || foundIntelGPU {
return grpcProcess
}
// No GPU found or no specific binaries found, try to load the CPU variant(s) // No GPU found or no specific binaries found, try to load the CPU variant(s)
// Select a binary based on availability/capability // Select a binary based on availability/capability

View File

@@ -21,8 +21,7 @@ type MultimodalContent struct {
ID int ID int
} }
// https://github.com/ggml-org/llama.cpp/blob/be1d4a13db26750fac702ceb3af88ae4f39dc9f4/tools/mtmd/mtmd.h#L42 const DefaultMultiModalTemplate = "{{ range .Audio }}[audio-{{.ID}}]{{end}}{{ range .Images }}[img-{{.ID}}]{{end}}{{ range .Video }}[vid-{{.ID}}]{{end}}{{.Text}}"
const DefaultMultiModalTemplate = "{{ range .Audio }}[audio-{{.ID}}]{{end}}{{ range .Images }}<__image__>{{end}}{{ range .Video }}[vid-{{.ID}}]{{end}}{{.Text}}"
func TemplateMultiModal(templateString string, opts MultiModalOptions, text string) (string, error) { func TemplateMultiModal(templateString string, opts MultiModalOptions, text string) (string, error) {
if templateString == "" { if templateString == "" {

View File

@@ -1,52 +0,0 @@
package xsysinfo
import (
"errors"
gguf "github.com/gpustack/gguf-parser-go"
)
type VRAMEstimate struct {
TotalVRAM uint64
AvailableVRAM uint64
ModelSize uint64
EstimatedLayers int
EstimatedVRAM uint64
IsFullOffload bool
}
func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) {
// Get model metadata
m := f.Metadata()
a := f.Architecture()
// Calculate base model size
modelSize := uint64(m.Size)
if a.BlockCount == 0 {
return nil, errors.New("block count is 0")
}
// Estimate number of layers that can fit in VRAM
// Each layer typically requires about 1/32 of the model size
layerSize := modelSize / uint64(a.BlockCount)
estimatedLayers := int(availableVRAM / layerSize)
// If we can't fit even one layer, we need to do full offload
isFullOffload := estimatedLayers <= 0
if isFullOffload {
estimatedLayers = 0
}
// Calculate estimated VRAM usage
estimatedVRAM := uint64(estimatedLayers) * layerSize
return &VRAMEstimate{
TotalVRAM: availableVRAM,
AvailableVRAM: availableVRAM,
ModelSize: modelSize,
EstimatedLayers: estimatedLayers,
EstimatedVRAM: estimatedVRAM,
IsFullOffload: isFullOffload,
}, nil
}

View File

@@ -1,8 +1,6 @@
package xsysinfo package xsysinfo
import ( import (
"strings"
"github.com/jaypipes/ghw" "github.com/jaypipes/ghw"
"github.com/jaypipes/ghw/pkg/gpu" "github.com/jaypipes/ghw/pkg/gpu"
) )
@@ -15,37 +13,3 @@ func GPUs() ([]*gpu.GraphicsCard, error) {
return gpu.GraphicsCards, nil return gpu.GraphicsCards, nil
} }
func TotalAvailableVRAM() (uint64, error) {
gpus, err := GPUs()
if err != nil {
return 0, err
}
var totalVRAM uint64
for _, gpu := range gpus {
if gpu != nil && gpu.Node != nil && gpu.Node.Memory != nil {
if gpu.Node.Memory.TotalUsableBytes > 0 {
totalVRAM += uint64(gpu.Node.Memory.TotalUsableBytes)
}
}
}
return totalVRAM, nil
}
func HasGPU(vendor string) bool {
gpus, err := GPUs()
if err != nil {
return false
}
if vendor == "" {
return len(gpus) > 0
}
for _, gpu := range gpus {
if strings.Contains(gpu.String(), vendor) {
return true
}
}
return false
}

View File

@@ -812,30 +812,6 @@ const docTemplate = `{
} }
} }
} }
},
"/video": {
"post": {
"summary": "Creates a video given a prompt.",
"parameters": [
{
"description": "query params",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/schema.OpenAIRequest"
}
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.OpenAIResponse"
}
}
}
}
} }
}, },
"definitions": { "definitions": {
@@ -1660,6 +1636,7 @@ const docTemplate = `{
"type": "string" "type": "string"
}, },
"model_base_name": { "model_base_name": {
"description": "AutoGPTQ",
"type": "string" "type": "string"
}, },
"n": { "n": {
@@ -1743,6 +1720,10 @@ const docTemplate = `{
}, },
"typical_p": { "typical_p": {
"type": "number" "type": "number"
},
"use_fast_tokenizer": {
"description": "AutoGPTQ",
"type": "boolean"
} }
} }
}, },

View File

@@ -805,30 +805,6 @@
} }
} }
} }
},
"/video": {
"post": {
"summary": "Creates a video given a prompt.",
"parameters": [
{
"description": "query params",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/schema.OpenAIRequest"
}
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.OpenAIResponse"
}
}
}
}
} }
}, },
"definitions": { "definitions": {
@@ -1653,6 +1629,7 @@
"type": "string" "type": "string"
}, },
"model_base_name": { "model_base_name": {
"description": "AutoGPTQ",
"type": "string" "type": "string"
}, },
"n": { "n": {
@@ -1736,6 +1713,10 @@
}, },
"typical_p": { "typical_p": {
"type": "number" "type": "number"
},
"use_fast_tokenizer": {
"description": "AutoGPTQ",
"type": "boolean"
} }
} }
}, },

View File

@@ -555,6 +555,7 @@ definitions:
model: model:
type: string type: string
model_base_name: model_base_name:
description: AutoGPTQ
type: string type: string
"n": "n":
description: Also part of the OpenAI official spec. use it for returning multiple description: Also part of the OpenAI official spec. use it for returning multiple
@@ -615,6 +616,9 @@ definitions:
type: boolean type: boolean
typical_p: typical_p:
type: number type: number
use_fast_tokenizer:
description: AutoGPTQ
type: boolean
required: required:
- file - file
type: object type: object
@@ -1248,21 +1252,6 @@ paths:
schema: schema:
$ref: '#/definitions/proto.VADResponse' $ref: '#/definitions/proto.VADResponse'
summary: Detect voice fragments in an audio stream summary: Detect voice fragments in an audio stream
/video:
post:
parameters:
- description: query params
in: body
name: request
required: true
schema:
$ref: '#/definitions/schema.OpenAIRequest'
responses:
"200":
description: Response
schema:
$ref: '#/definitions/schema.OpenAIResponse'
summary: Creates a video given a prompt.
securityDefinitions: securityDefinitions:
BearerAuth: BearerAuth:
in: header in: header

View File

@@ -7,27 +7,24 @@ import (
"runtime" "runtime"
"testing" "testing"
"github.com/docker/go-connections/nat"
. "github.com/onsi/ginkgo/v2" . "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega" . "github.com/onsi/gomega"
"github.com/ory/dockertest/v3"
"github.com/ory/dockertest/v3/docker"
"github.com/sashabaranov/go-openai" "github.com/sashabaranov/go-openai"
"github.com/testcontainers/testcontainers-go"
"github.com/testcontainers/testcontainers-go/wait"
) )
var container testcontainers.Container var pool *dockertest.Pool
var resource *dockertest.Resource
var client *openai.Client var client *openai.Client
var containerImage = os.Getenv("LOCALAI_IMAGE") var containerImage = os.Getenv("LOCALAI_IMAGE")
var containerImageTag = os.Getenv("LOCALAI_IMAGE_TAG") var containerImageTag = os.Getenv("LOCALAI_IMAGE_TAG")
var modelsDir = os.Getenv("LOCALAI_MODELS_DIR") var modelsDir = os.Getenv("LOCALAI_MODELS_DIR")
var apiPort = os.Getenv("LOCALAI_API_PORT")
var apiEndpoint = os.Getenv("LOCALAI_API_ENDPOINT") var apiEndpoint = os.Getenv("LOCALAI_API_ENDPOINT")
var apiKey = os.Getenv("LOCALAI_API_KEY") var apiKey = os.Getenv("LOCALAI_API_KEY")
const (
defaultApiPort = "8080"
)
func TestLocalAI(t *testing.T) { func TestLocalAI(t *testing.T) {
RegisterFailHandler(Fail) RegisterFailHandler(Fail)
RunSpecs(t, "LocalAI E2E test suite") RunSpecs(t, "LocalAI E2E test suite")
@@ -35,14 +32,15 @@ func TestLocalAI(t *testing.T) {
var _ = BeforeSuite(func() { var _ = BeforeSuite(func() {
if apiPort == "" {
apiPort = "8080"
}
var defaultConfig openai.ClientConfig var defaultConfig openai.ClientConfig
if apiEndpoint == "" { if apiEndpoint == "" {
startDockerImage() startDockerImage()
apiPort, err := container.MappedPort(context.Background(), nat.Port(defaultApiPort))
Expect(err).To(Not(HaveOccurred()))
defaultConfig = openai.DefaultConfig(apiKey) defaultConfig = openai.DefaultConfig(apiKey)
apiEndpoint = "http://localhost:" + apiPort.Port() + "/v1" // So that other tests can reference this value safely. apiEndpoint = "http://localhost:" + apiPort + "/v1" // So that other tests can reference this value safely.
defaultConfig.BaseURL = apiEndpoint defaultConfig.BaseURL = apiEndpoint
} else { } else {
GinkgoWriter.Printf("docker apiEndpoint set from env: %q\n", apiEndpoint) GinkgoWriter.Printf("docker apiEndpoint set from env: %q\n", apiEndpoint)
@@ -60,23 +58,26 @@ var _ = BeforeSuite(func() {
}) })
var _ = AfterSuite(func() { var _ = AfterSuite(func() {
if container != nil { if resource != nil {
Expect(container.Terminate(context.Background())).To(Succeed()) Expect(pool.Purge(resource)).To(Succeed())
} }
//dat, err := os.ReadFile(resource.Container.LogPath)
//Expect(err).To(Not(HaveOccurred()))
//Expect(string(dat)).To(ContainSubstring("GRPC Service Ready"))
//fmt.Println(string(dat))
}) })
var _ = AfterEach(func() { var _ = AfterEach(func() {
// Add any cleanup needed after each test //Expect(dbClient.Clear()).To(Succeed())
}) })
type logConsumer struct {
}
func (l *logConsumer) Accept(log testcontainers.Log) {
GinkgoWriter.Write([]byte(log.Content))
}
func startDockerImage() { func startDockerImage() {
p, err := dockertest.NewPool("")
Expect(err).To(Not(HaveOccurred()))
Expect(p.Client.Ping()).To(Succeed())
pool = p
// get cwd // get cwd
cwd, err := os.Getwd() cwd, err := os.Getwd()
Expect(err).To(Not(HaveOccurred())) Expect(err).To(Not(HaveOccurred()))
@@ -87,43 +88,20 @@ func startDockerImage() {
} }
proc := runtime.NumCPU() proc := runtime.NumCPU()
options := &dockertest.RunOptions{
req := testcontainers.ContainerRequest{ Repository: containerImage,
Tag: containerImageTag,
Image: fmt.Sprintf("%s:%s", containerImage, containerImageTag), // Cmd: []string{"server", "/data"},
ExposedPorts: []string{defaultApiPort}, PortBindings: map[docker.Port][]docker.PortBinding{
LogConsumerCfg: &testcontainers.LogConsumerConfig{ "8080/tcp": []docker.PortBinding{{HostPort: apiPort}},
Consumers: []testcontainers.LogConsumer{
&logConsumer{},
},
}, },
Env: map[string]string{ Env: []string{"MODELS_PATH=/models", "DEBUG=true", "THREADS=" + fmt.Sprint(proc), "LOCALAI_SINGLE_ACTIVE_BACKEND=true"},
"MODELS_PATH": "/models", Mounts: []string{md + ":/models"},
"DEBUG": "true",
"THREADS": fmt.Sprint(proc),
"LOCALAI_SINGLE_ACTIVE_BACKEND": "true",
},
Files: []testcontainers.ContainerFile{
{
HostFilePath: md,
ContainerFilePath: "/models",
FileMode: 0o755,
},
},
WaitingFor: wait.ForAll(
wait.ForListeningPort(nat.Port(defaultApiPort)),
// wait.ForHTTP("/v1/models").WithPort(nat.Port(apiPort)).WithStartupTimeout(50*time.Minute),
),
} }
GinkgoWriter.Printf("Launching Docker Container %s:%s\n", containerImage, containerImageTag) GinkgoWriter.Printf("Launching Docker Container %q\n%+v\n", containerImageTag, options)
r, err := pool.RunWithOptions(options)
ctx := context.Background()
c, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
ContainerRequest: req,
Started: true,
})
Expect(err).To(Not(HaveOccurred())) Expect(err).To(Not(HaveOccurred()))
container = c resource = r
} }